cuda-cccl 0.1.3.2.0.dev438__cp311-cp311-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1962) hide show
  1. cuda/cccl/__init__.py +27 -0
  2. cuda/cccl/_cuda_version_utils.py +24 -0
  3. cuda/cccl/cooperative/__init__.py +3 -0
  4. cuda/cccl/cooperative/experimental/__init__.py +8 -0
  5. cuda/cccl/cooperative/experimental/_caching.py +48 -0
  6. cuda/cccl/cooperative/experimental/_common.py +275 -0
  7. cuda/cccl/cooperative/experimental/_nvrtc.py +91 -0
  8. cuda/cccl/cooperative/experimental/_scan_op.py +181 -0
  9. cuda/cccl/cooperative/experimental/_types.py +937 -0
  10. cuda/cccl/cooperative/experimental/_typing.py +107 -0
  11. cuda/cccl/cooperative/experimental/block/__init__.py +39 -0
  12. cuda/cccl/cooperative/experimental/block/_block_exchange.py +251 -0
  13. cuda/cccl/cooperative/experimental/block/_block_load_store.py +215 -0
  14. cuda/cccl/cooperative/experimental/block/_block_merge_sort.py +125 -0
  15. cuda/cccl/cooperative/experimental/block/_block_radix_sort.py +214 -0
  16. cuda/cccl/cooperative/experimental/block/_block_reduce.py +294 -0
  17. cuda/cccl/cooperative/experimental/block/_block_scan.py +983 -0
  18. cuda/cccl/cooperative/experimental/warp/__init__.py +9 -0
  19. cuda/cccl/cooperative/experimental/warp/_warp_merge_sort.py +92 -0
  20. cuda/cccl/cooperative/experimental/warp/_warp_reduce.py +153 -0
  21. cuda/cccl/cooperative/experimental/warp/_warp_scan.py +78 -0
  22. cuda/cccl/headers/__init__.py +7 -0
  23. cuda/cccl/headers/include/__init__.py +1 -0
  24. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +262 -0
  25. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1185 -0
  26. cuda/cccl/headers/include/cub/agent/agent_for.cuh +84 -0
  27. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +927 -0
  28. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +232 -0
  29. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +730 -0
  30. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +766 -0
  31. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +289 -0
  32. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +706 -0
  33. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +558 -0
  34. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +619 -0
  35. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +806 -0
  36. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1127 -0
  37. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +585 -0
  38. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +477 -0
  39. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +292 -0
  40. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1120 -0
  41. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +341 -0
  42. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +609 -0
  43. cuda/cccl/headers/include/cub/agent/agent_topk.cuh +764 -0
  44. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +614 -0
  45. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1424 -0
  46. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +965 -0
  47. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1217 -0
  48. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1308 -0
  49. cuda/cccl/headers/include/cub/block/block_histogram.cuh +420 -0
  50. cuda/cccl/headers/include/cub/block/block_load.cuh +1260 -0
  51. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +800 -0
  52. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1220 -0
  53. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2194 -0
  54. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  55. cuda/cccl/headers/include/cub/block/block_reduce.cuh +666 -0
  56. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +434 -0
  57. cuda/cccl/headers/include/cub/block/block_scan.cuh +2584 -0
  58. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  59. cuda/cccl/headers/include/cub/block/block_store.cuh +1246 -0
  60. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +624 -0
  61. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  62. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  63. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  64. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  65. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
  66. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  67. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  68. cuda/cccl/headers/include/cub/config.cuh +53 -0
  69. cuda/cccl/headers/include/cub/cub.cuh +120 -0
  70. cuda/cccl/headers/include/cub/detail/array_utils.cuh +78 -0
  71. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +161 -0
  72. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +74 -0
  73. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  74. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +61 -0
  75. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +253 -0
  76. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +88 -0
  77. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
  78. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
  79. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +118 -0
  80. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  81. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  82. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +62 -0
  83. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  84. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +53 -0
  85. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
  86. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  87. cuda/cccl/headers/include/cub/detail/rfa.cuh +731 -0
  88. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  89. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  90. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
  91. cuda/cccl/headers/include/cub/detail/type_traits.cuh +187 -0
  92. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +73 -0
  93. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  94. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  95. cuda/cccl/headers/include/cub/device/device_copy.cuh +187 -0
  96. cuda/cccl/headers/include/cub/device/device_for.cuh +985 -0
  97. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1509 -0
  98. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  99. cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
  100. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  101. cuda/cccl/headers/include/cub/device/device_partition.cuh +664 -0
  102. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3437 -0
  103. cuda/cccl/headers/include/cub/device/device_reduce.cuh +2519 -0
  104. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
  105. cuda/cccl/headers/include/cub/device/device_scan.cuh +2205 -0
  106. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  107. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1520 -0
  108. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  109. cuda/cccl/headers/include/cub/device/device_select.cuh +1228 -0
  110. cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
  111. cuda/cccl/headers/include/cub/device/device_transform.cuh +637 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +315 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +111 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +719 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +198 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1046 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +304 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +474 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1753 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1327 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +655 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +536 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +314 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +615 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +500 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +602 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +917 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +842 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +342 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +441 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +389 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +629 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +561 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +545 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +226 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +803 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +578 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +192 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +324 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +475 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +201 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +1009 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +70 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +121 -0
  149. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +63 -0
  150. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +278 -0
  151. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +79 -0
  152. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +118 -0
  153. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1068 -0
  154. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
  155. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +945 -0
  156. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +676 -0
  157. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +621 -0
  158. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1013 -0
  159. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +249 -0
  160. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1588 -0
  161. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +443 -0
  162. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +85 -0
  163. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +454 -0
  164. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +874 -0
  165. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +227 -0
  166. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  167. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  168. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +254 -0
  169. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +259 -0
  170. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +250 -0
  171. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +320 -0
  172. cuda/cccl/headers/include/cub/thread/thread_load.cuh +349 -0
  173. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +688 -0
  174. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +541 -0
  175. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
  176. cuda/cccl/headers/include/cub/thread/thread_search.cuh +199 -0
  177. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +458 -0
  178. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +102 -0
  179. cuda/cccl/headers/include/cub/thread/thread_store.cuh +365 -0
  180. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  181. cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
  182. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  183. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  184. cuda/cccl/headers/include/cub/util_device.cuh +784 -0
  185. cuda/cccl/headers/include/cub/util_macro.cuh +97 -0
  186. cuda/cccl/headers/include/cub/util_math.cuh +118 -0
  187. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  188. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  189. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  190. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  191. cuda/cccl/headers/include/cub/util_type.cuh +1120 -0
  192. cuda/cccl/headers/include/cub/util_vsmem.cuh +253 -0
  193. cuda/cccl/headers/include/cub/version.cuh +89 -0
  194. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  195. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  196. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +736 -0
  197. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +407 -0
  198. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +952 -0
  199. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +715 -0
  200. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  201. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  202. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  203. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +824 -0
  204. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1886 -0
  205. cuda/cccl/headers/include/cub/warp/warp_store.cuh +520 -0
  206. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
  207. cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
  208. cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
  209. cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
  210. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  211. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  212. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
  213. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  214. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
  215. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
  216. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
  217. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  218. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  219. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  220. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
  221. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +468 -0
  222. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
  223. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  224. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  225. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  226. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  227. cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
  228. cuda/cccl/headers/include/cuda/__cccl_config +36 -0
  229. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
  230. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +249 -0
  231. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  232. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  233. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  234. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  235. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  236. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  237. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  238. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  239. cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
  240. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +93 -0
  241. cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
  242. cuda/cccl/headers/include/cuda/__complex_ +28 -0
  243. cuda/cccl/headers/include/cuda/__device/all_devices.h +240 -0
  244. cuda/cccl/headers/include/cuda/__device/arch_traits.h +613 -0
  245. cuda/cccl/headers/include/cuda/__device/attributes.h +721 -0
  246. cuda/cccl/headers/include/cuda/__device/device_ref.h +185 -0
  247. cuda/cccl/headers/include/cuda/__device/physical_device.h +168 -0
  248. cuda/cccl/headers/include/cuda/__driver/driver_api.h +541 -0
  249. cuda/cccl/headers/include/cuda/__event/event.h +171 -0
  250. cuda/cccl/headers/include/cuda/__event/event_ref.h +158 -0
  251. cuda/cccl/headers/include/cuda/__event/timed_event.h +118 -0
  252. cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
  253. cuda/cccl/headers/include/cuda/__execution/output_ordering.h +89 -0
  254. cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
  255. cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
  256. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  257. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
  258. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  259. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  260. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
  261. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  262. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  263. cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
  264. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  265. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  266. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +49 -0
  267. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +300 -0
  268. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +483 -0
  269. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
  270. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
  271. cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
  272. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +386 -0
  273. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +344 -0
  274. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +498 -0
  275. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +501 -0
  276. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +461 -0
  277. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +112 -0
  278. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +673 -0
  279. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  280. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +462 -0
  281. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +63 -0
  282. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +122 -0
  283. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +51 -0
  284. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  285. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  286. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  287. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  288. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
  289. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
  290. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
  291. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  292. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  293. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
  294. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  295. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  296. cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
  297. cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
  298. cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
  299. cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
  300. cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
  301. cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
  302. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
  303. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
  304. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  305. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
  306. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
  307. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +69 -0
  308. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
  309. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +654 -0
  310. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
  311. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  312. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
  313. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  314. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
  315. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2982 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
  383. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
  384. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
  385. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
  386. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
  387. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
  388. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
  389. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
  390. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  391. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  392. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  393. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  394. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  395. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  396. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  397. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  398. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  399. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  400. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  401. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  402. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  403. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  404. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  405. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  406. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  407. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  408. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  409. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  410. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  411. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  412. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  413. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  414. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  415. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  416. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  417. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  418. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  419. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  420. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  421. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  422. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  423. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
  424. cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
  425. cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
  426. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +97 -0
  427. cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
  428. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  429. cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
  430. cuda/cccl/headers/include/cuda/__stream/stream.h +142 -0
  431. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +296 -0
  432. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  433. cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
  434. cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
  435. cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
  436. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
  437. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
  438. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
  439. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
  440. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
  441. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +590 -0
  442. cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
  443. cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
  444. cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
  445. cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
  446. cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
  447. cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
  448. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
  449. cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
  450. cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
  451. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
  452. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
  453. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
  454. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
  455. cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
  456. cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
  457. cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
  458. cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
  459. cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
  460. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  461. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  462. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
  463. cuda/cccl/headers/include/cuda/access_property +26 -0
  464. cuda/cccl/headers/include/cuda/algorithm +27 -0
  465. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  466. cuda/cccl/headers/include/cuda/atomic +27 -0
  467. cuda/cccl/headers/include/cuda/barrier +267 -0
  468. cuda/cccl/headers/include/cuda/bit +29 -0
  469. cuda/cccl/headers/include/cuda/cmath +36 -0
  470. cuda/cccl/headers/include/cuda/devices +20 -0
  471. cuda/cccl/headers/include/cuda/discard_memory +32 -0
  472. cuda/cccl/headers/include/cuda/functional +32 -0
  473. cuda/cccl/headers/include/cuda/iterator +38 -0
  474. cuda/cccl/headers/include/cuda/latch +27 -0
  475. cuda/cccl/headers/include/cuda/mdspan +28 -0
  476. cuda/cccl/headers/include/cuda/memory +34 -0
  477. cuda/cccl/headers/include/cuda/memory_resource +35 -0
  478. cuda/cccl/headers/include/cuda/numeric +29 -0
  479. cuda/cccl/headers/include/cuda/pipeline +579 -0
  480. cuda/cccl/headers/include/cuda/ptx +128 -0
  481. cuda/cccl/headers/include/cuda/semaphore +31 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  495. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  496. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  497. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  498. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  499. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  500. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  501. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  502. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  503. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  504. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  505. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  506. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  507. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  508. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  509. cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
  510. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +92 -0
  511. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  512. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  513. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  514. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  515. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  516. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  517. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  518. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  519. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  520. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  521. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  522. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +96 -0
  523. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  524. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
  525. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  526. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  527. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  528. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  529. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +140 -0
  530. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  531. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
  532. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  533. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  534. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  535. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  536. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  537. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  538. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  539. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  540. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
  541. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  542. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
  543. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
  544. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
  545. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  546. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
  547. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  548. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  549. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  550. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  551. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  552. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  553. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  554. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  555. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  556. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  557. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  558. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  559. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  560. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  561. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  562. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  563. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  564. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  565. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  566. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  567. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  568. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  569. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  570. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  571. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  572. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  573. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
  574. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  575. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  576. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  577. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  578. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  579. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
  580. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  581. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  582. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
  583. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  584. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  585. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  586. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  587. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  588. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  589. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  590. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  591. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  592. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
  593. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
  594. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  595. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
  596. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  597. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  598. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  599. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  600. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  601. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  602. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  603. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
  604. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
  605. cuda/cccl/headers/include/cuda/std/__bit/countl.h +167 -0
  606. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  607. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  608. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  609. cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
  610. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  611. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
  612. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  613. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  614. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
  615. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
  616. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +676 -0
  617. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
  618. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
  619. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
  620. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
  621. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
  622. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
  623. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
  624. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +79 -0
  625. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +68 -0
  626. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
  627. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  628. cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
  629. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1284 -0
  630. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
  631. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
  632. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  633. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  634. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  635. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  636. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  637. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
  638. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  639. cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
  640. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  641. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  642. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  643. cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
  644. cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +54 -0
  645. cuda/cccl/headers/include/cuda/std/__chrono/day.h +162 -0
  646. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +503 -0
  647. cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +55 -0
  648. cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +46 -0
  649. cuda/cccl/headers/include/cuda/std/__chrono/month.h +187 -0
  650. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +60 -0
  651. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +80 -0
  652. cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +258 -0
  653. cuda/cccl/headers/include/cuda/std/__chrono/year.h +186 -0
  654. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
  655. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
  656. cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +200 -0
  657. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +784 -0
  658. cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +118 -0
  659. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
  660. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +231 -0
  661. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
  662. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
  663. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
  664. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
  665. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
  666. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
  667. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
  668. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +180 -0
  669. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  670. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
  671. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +534 -0
  672. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +260 -0
  673. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  674. cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
  675. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
  676. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
  677. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
  678. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
  679. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
  680. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
  681. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  682. cuda/cccl/headers/include/cuda/std/__complex/complex.h +674 -0
  683. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
  684. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
  685. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
  686. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  687. cuda/cccl/headers/include/cuda/std/__complex/literals.h +106 -0
  688. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
  689. cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
  690. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +322 -0
  691. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +321 -0
  692. cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
  693. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  694. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  695. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
  696. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  697. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  698. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  699. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
  700. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  701. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  702. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +341 -0
  703. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
  704. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
  705. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  706. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
  707. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  708. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  709. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  710. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  711. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  712. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  713. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  714. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  715. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
  716. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  717. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  718. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  719. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  720. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  721. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  722. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  723. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  724. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +69 -0
  725. cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
  726. cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
  727. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  728. cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +72 -0
  729. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +146 -0
  730. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  731. cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
  732. cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
  733. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  734. cuda/cccl/headers/include/cuda/std/__expected/expected.h +1956 -0
  735. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
  736. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  737. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +172 -0
  738. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  739. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +809 -0
  740. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  741. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  742. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
  743. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
  744. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +113 -0
  745. cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
  746. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  747. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
  748. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
  749. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  750. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  751. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  752. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  753. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  754. cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
  755. cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
  756. cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
  757. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
  758. cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
  759. cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
  760. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  761. cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
  762. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
  763. cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
  764. cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
  765. cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
  766. cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
  767. cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
  768. cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
  769. cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
  770. cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
  771. cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
  772. cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
  773. cuda/cccl/headers/include/cuda/std/__format_ +45 -0
  774. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  775. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  776. cuda/cccl/headers/include/cuda/std/__functional/bind.h +337 -0
  777. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
  778. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  779. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  780. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  781. cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
  782. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  783. cuda/cccl/headers/include/cuda/std/__functional/function.h +1278 -0
  784. cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
  785. cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
  786. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +560 -0
  787. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
  788. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +67 -0
  789. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
  790. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  791. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  792. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
  793. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
  794. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
  795. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  796. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  797. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  798. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  799. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  800. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +268 -0
  801. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +35 -0
  802. cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
  803. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +49 -0
  804. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +66 -0
  805. cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
  806. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  807. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  808. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  809. cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
  810. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +90 -0
  811. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  812. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  813. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +34 -0
  814. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  815. cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
  816. cuda/cccl/headers/include/cuda/std/__fwd/string.h +83 -0
  817. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +59 -0
  818. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  819. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  820. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  821. cuda/cccl/headers/include/cuda/std/__internal/features.h +77 -0
  822. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +122 -0
  823. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  824. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
  825. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
  826. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
  827. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  828. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
  829. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  830. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  831. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  832. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  833. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  834. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
  835. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
  836. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  837. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
  838. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
  839. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  840. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  841. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  842. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  843. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
  844. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  845. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
  846. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  847. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  848. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
  849. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  850. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  851. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
  852. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  853. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
  854. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  855. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
  856. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  857. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  858. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  859. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
  860. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  861. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  862. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
  863. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  864. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
  865. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
  866. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
  867. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
  868. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
  869. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +144 -0
  870. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  871. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
  872. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +758 -0
  873. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
  874. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
  875. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
  876. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +497 -0
  877. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  878. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
  879. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
  880. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
  881. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  882. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  883. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  884. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  885. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  886. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  887. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +532 -0
  888. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  889. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  890. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
  891. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +248 -0
  892. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  893. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +46 -0
  894. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
  895. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  896. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  897. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
  898. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
  899. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
  900. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  901. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  902. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  903. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  904. cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
  905. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  906. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  907. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  908. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  909. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
  910. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  911. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  912. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  913. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
  914. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  915. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  916. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  917. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  918. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  919. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
  920. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  921. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  922. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  923. cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
  924. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +432 -0
  925. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  926. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  927. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  928. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
  929. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
  930. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
  931. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
  932. cuda/cccl/headers/include/cuda/std/__random_ +29 -0
  933. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  934. cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
  935. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +314 -0
  936. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  937. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  938. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  939. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  940. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  941. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  942. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
  943. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  944. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
  945. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  946. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +161 -0
  947. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
  948. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  949. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  950. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  951. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
  952. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  953. cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
  954. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  955. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
  956. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  957. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  958. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  959. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
  960. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  961. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
  962. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  963. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  964. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
  965. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  966. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  967. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  968. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  969. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  970. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
  971. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  972. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  973. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
  974. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  975. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  976. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +98 -0
  977. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +260 -0
  978. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +218 -0
  979. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  980. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  981. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +80 -0
  982. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +64 -0
  983. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  984. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  985. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
  986. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  987. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  988. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  989. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  990. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  991. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  992. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  993. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  994. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  995. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  996. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
  997. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  998. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
  999. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  1000. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  1001. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  1002. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  1003. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  1004. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  1005. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  1006. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  1007. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  1008. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  1009. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
  1010. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
  1011. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  1012. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
  1013. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
  1014. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  1015. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  1016. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  1017. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  1018. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
  1019. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  1020. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  1021. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  1022. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
  1023. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
  1024. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  1025. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  1026. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  1027. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
  1028. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  1029. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  1030. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  1031. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  1032. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  1033. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  1034. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
  1035. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
  1036. cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
  1037. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  1038. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
  1039. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
  1040. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  1041. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  1042. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  1043. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  1044. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  1045. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  1046. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  1047. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
  1048. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  1049. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  1050. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  1051. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  1052. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  1053. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  1054. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  1055. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  1056. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  1057. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  1058. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  1059. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  1060. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  1061. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  1062. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  1063. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  1064. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  1065. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
  1066. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  1067. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
  1068. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  1069. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
  1070. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +119 -0
  1071. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  1072. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  1073. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  1074. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
  1075. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  1076. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  1077. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  1078. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  1079. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
  1080. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +202 -0
  1081. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
  1082. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
  1083. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
  1084. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
  1085. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
  1086. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
  1087. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
  1088. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
  1089. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
  1090. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
  1091. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  1092. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
  1093. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  1094. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  1095. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  1096. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  1097. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  1098. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  1099. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  1100. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  1101. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  1102. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  1103. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  1104. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  1105. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  1106. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
  1107. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +162 -0
  1108. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  1109. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1110. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1111. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1112. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1113. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1114. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1115. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1116. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1117. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1118. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1119. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1120. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1121. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1122. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1123. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
  1124. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
  1125. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1126. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  1127. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
  1128. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  1129. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1130. cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
  1131. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
  1132. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1133. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  1134. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
  1135. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +106 -0
  1136. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  1137. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1138. cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
  1139. cuda/cccl/headers/include/cuda/std/__utility/pair.h +796 -0
  1140. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1141. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
  1142. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1143. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  1144. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1145. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1146. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1147. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1148. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1149. cuda/cccl/headers/include/cuda/std/array +518 -0
  1150. cuda/cccl/headers/include/cuda/std/atomic +810 -0
  1151. cuda/cccl/headers/include/cuda/std/barrier +42 -0
  1152. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1153. cuda/cccl/headers/include/cuda/std/bitset +994 -0
  1154. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1155. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1156. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1157. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1158. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1159. cuda/cccl/headers/include/cuda/std/cmath +87 -0
  1160. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1161. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1162. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1163. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1164. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1165. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1166. cuda/cccl/headers/include/cuda/std/ctime +154 -0
  1167. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1168. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +204 -0
  1169. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1721 -0
  1170. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2509 -0
  1171. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1172. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1173. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
  1174. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2142 -0
  1175. cuda/cccl/headers/include/cuda/std/execution +29 -0
  1176. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1177. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1178. cuda/cccl/headers/include/cuda/std/initializer_list +36 -0
  1179. cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
  1180. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1181. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1182. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1183. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1184. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1185. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1186. cuda/cccl/headers/include/cuda/std/numbers +341 -0
  1187. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1188. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1189. cuda/cccl/headers/include/cuda/std/ranges +69 -0
  1190. cuda/cccl/headers/include/cuda/std/ratio +416 -0
  1191. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1192. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1193. cuda/cccl/headers/include/cuda/std/span +628 -0
  1194. cuda/cccl/headers/include/cuda/std/string_view +799 -0
  1195. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1196. cuda/cccl/headers/include/cuda/std/type_traits +177 -0
  1197. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1198. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1199. cuda/cccl/headers/include/cuda/std/version +243 -0
  1200. cuda/cccl/headers/include/cuda/stream +31 -0
  1201. cuda/cccl/headers/include/cuda/stream_ref +54 -0
  1202. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1203. cuda/cccl/headers/include/cuda/utility +27 -0
  1204. cuda/cccl/headers/include/cuda/version +16 -0
  1205. cuda/cccl/headers/include/cuda/warp +28 -0
  1206. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1207. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1208. cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
  1209. cuda/cccl/headers/include/nv/target +235 -0
  1210. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1211. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1212. cuda/cccl/headers/include/thrust/advance.h +57 -0
  1213. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1214. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1215. cuda/cccl/headers/include/thrust/complex.h +858 -0
  1216. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1217. cuda/cccl/headers/include/thrust/count.h +245 -0
  1218. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1219. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +37 -0
  1220. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1221. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +350 -0
  1222. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +371 -0
  1223. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +45 -0
  1224. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +242 -0
  1225. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +39 -0
  1226. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +137 -0
  1227. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +39 -0
  1228. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +99 -0
  1229. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +53 -0
  1230. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +68 -0
  1231. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1232. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +102 -0
  1233. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +86 -0
  1234. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +79 -0
  1235. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +79 -0
  1236. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +39 -0
  1237. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +98 -0
  1238. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1239. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1240. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1241. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1242. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1243. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1244. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1245. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1246. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1247. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1248. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1249. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1250. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1251. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1252. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1253. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1254. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1255. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1256. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1257. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1258. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1259. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1260. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1261. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1262. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1263. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1264. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1265. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1266. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1267. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1268. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1269. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1270. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1271. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1272. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1273. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1274. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1275. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1276. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1277. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1278. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1279. cuda/cccl/headers/include/thrust/detail/device_delete.inl +52 -0
  1280. cuda/cccl/headers/include/thrust/detail/device_free.inl +47 -0
  1281. cuda/cccl/headers/include/thrust/detail/device_new.inl +61 -0
  1282. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1283. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1284. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1285. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +80 -0
  1286. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1287. cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
  1288. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1289. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1290. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1291. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1292. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1293. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1294. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1295. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1296. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1297. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1298. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1299. cuda/cccl/headers/include/thrust/detail/integer_math.h +130 -0
  1300. cuda/cccl/headers/include/thrust/detail/internal_functional.h +328 -0
  1301. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1302. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
  1303. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1304. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1305. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +40 -0
  1306. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1307. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1308. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +37 -0
  1309. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1310. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1311. cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
  1312. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1313. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1314. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1315. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1316. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1317. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
  1318. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1319. cuda/cccl/headers/include/thrust/detail/reference.h +494 -0
  1320. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1321. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1322. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1323. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1324. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1325. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1326. cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
  1327. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1328. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1329. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1330. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1331. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1332. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1333. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1334. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1335. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1336. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
  1337. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1338. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1339. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1340. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
  1341. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1342. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1343. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1344. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1345. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1346. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1347. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1348. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
  1349. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1350. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1351. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1352. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1353. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1354. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1355. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1356. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1357. cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
  1358. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1210 -0
  1359. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1360. cuda/cccl/headers/include/thrust/device_delete.h +59 -0
  1361. cuda/cccl/headers/include/thrust/device_free.h +72 -0
  1362. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1363. cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
  1364. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1365. cuda/cccl/headers/include/thrust/device_new.h +91 -0
  1366. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1367. cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
  1368. cuda/cccl/headers/include/thrust/device_reference.h +983 -0
  1369. cuda/cccl/headers/include/thrust/device_vector.h +576 -0
  1370. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1371. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1372. cuda/cccl/headers/include/thrust/execution_policy.h +251 -0
  1373. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1374. cuda/cccl/headers/include/thrust/fill.h +200 -0
  1375. cuda/cccl/headers/include/thrust/find.h +382 -0
  1376. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1377. cuda/cccl/headers/include/thrust/functional.h +395 -0
  1378. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1379. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1380. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1381. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1382. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
  1383. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1384. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1385. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1386. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1387. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1388. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1389. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
  1390. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
  1391. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1392. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +182 -0
  1393. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +58 -0
  1394. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1395. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1396. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1397. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1398. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1399. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1400. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
  1401. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
  1402. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +323 -0
  1403. cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
  1404. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
  1405. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1406. cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
  1407. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
  1408. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
  1409. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1410. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +149 -0
  1411. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1412. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1413. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1414. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
  1415. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1416. cuda/cccl/headers/include/thrust/memory.h +299 -0
  1417. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1418. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1419. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1420. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1421. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
  1422. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1423. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
  1424. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1425. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1426. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1427. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1428. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1429. cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
  1430. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1431. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1432. cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
  1433. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1434. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1435. cuda/cccl/headers/include/thrust/pair.h +99 -0
  1436. cuda/cccl/headers/include/thrust/partition.h +1391 -0
  1437. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1438. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1439. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1440. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1441. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1442. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1443. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1444. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1445. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1446. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1447. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1448. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1449. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
  1450. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1451. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
  1452. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1453. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1454. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1455. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1456. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1457. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1458. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1459. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1460. cuda/cccl/headers/include/thrust/random.h +120 -0
  1461. cuda/cccl/headers/include/thrust/reduce.h +1113 -0
  1462. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1463. cuda/cccl/headers/include/thrust/replace.h +826 -0
  1464. cuda/cccl/headers/include/thrust/reverse.h +215 -0
  1465. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1466. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1467. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1468. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1469. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1470. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1471. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1472. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1473. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1474. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1475. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1476. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1477. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1478. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1479. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +109 -0
  1480. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1481. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1482. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1483. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1484. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1485. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1486. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1487. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1488. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1489. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1490. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1491. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1492. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1493. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1494. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1495. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1496. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1497. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1498. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1499. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1500. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1501. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1502. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1503. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1504. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1505. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1506. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1507. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1508. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1509. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1510. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1511. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1512. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1513. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1514. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1515. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1516. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1517. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
  1518. cuda/cccl/headers/include/thrust/system/cpp/memory.h +106 -0
  1519. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +72 -0
  1520. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +120 -0
  1521. cuda/cccl/headers/include/thrust/system/cpp/vector.h +96 -0
  1522. cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
  1523. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1524. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1525. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1526. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1527. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +240 -0
  1528. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1529. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1530. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1531. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +593 -0
  1532. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1533. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1534. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +210 -0
  1535. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1536. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1537. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +264 -0
  1538. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1539. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +100 -0
  1540. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +272 -0
  1541. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1542. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1543. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +60 -0
  1544. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1545. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1546. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +80 -0
  1547. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1548. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1549. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1550. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1551. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1552. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +217 -0
  1553. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1554. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1555. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1556. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +785 -0
  1557. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
  1558. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1559. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +122 -0
  1560. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
  1561. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +342 -0
  1562. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +414 -0
  1563. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +91 -0
  1564. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1565. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1734 -0
  1566. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +470 -0
  1567. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
  1568. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +75 -0
  1569. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1570. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1571. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +429 -0
  1572. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1573. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1574. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +117 -0
  1575. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +105 -0
  1576. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
  1577. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +310 -0
  1578. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
  1579. cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
  1580. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
  1581. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1582. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1583. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
  1584. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +61 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/errno.h +120 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +53 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +79 -0
  1635. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +161 -0
  1636. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +384 -0
  1637. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +45 -0
  1638. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +64 -0
  1639. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +58 -0
  1640. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +146 -0
  1641. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +48 -0
  1642. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +84 -0
  1643. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +49 -0
  1644. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +60 -0
  1645. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +66 -0
  1646. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +252 -0
  1647. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +54 -0
  1648. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +49 -0
  1649. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +137 -0
  1650. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +58 -0
  1651. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +73 -0
  1652. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +96 -0
  1653. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +45 -0
  1654. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1655. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +60 -0
  1656. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +72 -0
  1657. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +59 -0
  1658. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +64 -0
  1659. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +86 -0
  1660. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +99 -0
  1661. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +148 -0
  1662. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +49 -0
  1663. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +68 -0
  1664. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +129 -0
  1665. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +207 -0
  1666. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +43 -0
  1667. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +71 -0
  1668. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +100 -0
  1669. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +83 -0
  1670. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +186 -0
  1671. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +86 -0
  1672. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +121 -0
  1673. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +95 -0
  1674. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +175 -0
  1675. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +48 -0
  1676. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +67 -0
  1677. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +63 -0
  1678. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +126 -0
  1679. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +72 -0
  1680. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +85 -0
  1681. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +126 -0
  1682. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +232 -0
  1683. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +73 -0
  1684. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +85 -0
  1685. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +104 -0
  1686. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1687. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +282 -0
  1688. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +476 -0
  1689. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +54 -0
  1690. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +125 -0
  1691. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +113 -0
  1692. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +175 -0
  1693. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +44 -0
  1694. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +76 -0
  1695. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +41 -0
  1696. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
  1697. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +47 -0
  1698. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +54 -0
  1699. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +82 -0
  1700. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
  1701. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +50 -0
  1702. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +56 -0
  1703. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +80 -0
  1704. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +113 -0
  1705. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +45 -0
  1706. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +166 -0
  1707. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +45 -0
  1708. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +115 -0
  1709. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +71 -0
  1710. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +113 -0
  1711. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +81 -0
  1712. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +126 -0
  1713. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +117 -0
  1714. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +70 -0
  1715. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +42 -0
  1716. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
  1717. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +49 -0
  1718. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +119 -0
  1719. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +49 -0
  1720. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +71 -0
  1721. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1722. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1723. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
  1724. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +110 -0
  1725. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1726. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +62 -0
  1727. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +74 -0
  1728. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1729. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +123 -0
  1730. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1731. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +43 -0
  1732. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1733. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +141 -0
  1734. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +45 -0
  1735. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1736. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +50 -0
  1737. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +75 -0
  1738. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +145 -0
  1739. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1740. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +301 -0
  1741. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1742. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +64 -0
  1743. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +98 -0
  1744. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +179 -0
  1745. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1746. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1747. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +154 -0
  1748. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +145 -0
  1749. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1750. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1751. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +206 -0
  1752. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +59 -0
  1753. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
  1754. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +55 -0
  1755. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +356 -0
  1756. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +48 -0
  1757. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +124 -0
  1758. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +48 -0
  1759. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +586 -0
  1760. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1761. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1762. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1763. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1764. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1765. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1766. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +58 -0
  1767. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1768. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1769. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +115 -0
  1770. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +106 -0
  1771. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1772. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1773. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1774. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1775. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1776. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1777. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1778. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1779. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1780. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1781. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1782. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1783. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1784. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +127 -0
  1785. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1786. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1787. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1788. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1789. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1790. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1791. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1792. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1793. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1794. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1795. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1796. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1797. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1798. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1799. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1800. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1801. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1802. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1803. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1804. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1805. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1806. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1807. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1808. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1809. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1810. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1811. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1812. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1813. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1814. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +30 -0
  1815. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +30 -0
  1816. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1817. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1818. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1819. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1820. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
  1821. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1822. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1823. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1824. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1825. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1826. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1827. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1828. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1829. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1830. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1831. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1832. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1833. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
  1834. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1835. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1836. cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
  1837. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1838. cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
  1839. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1840. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1841. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1842. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1843. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1844. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1845. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1846. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1847. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1848. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +109 -0
  1849. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1850. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1851. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1852. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1853. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1854. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1855. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1856. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1857. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1858. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1859. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1860. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1861. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1862. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1863. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1864. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1865. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1866. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1867. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1868. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1869. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1870. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1871. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1872. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1873. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1874. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1875. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1876. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1877. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1878. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1879. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +30 -0
  1880. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1881. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1882. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1883. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1884. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1885. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1886. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1887. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1888. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1889. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1890. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1891. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1892. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1893. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1894. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1895. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1896. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1897. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
  1898. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1899. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1900. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
  1901. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1902. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1903. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1904. cuda/cccl/headers/include/thrust/transform.h +1045 -0
  1905. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1906. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1907. cuda/cccl/headers/include/thrust/tuple.h +139 -0
  1908. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1909. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
  1910. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1911. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1912. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1913. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
  1914. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1915. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +96 -0
  1916. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1917. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1918. cuda/cccl/headers/include/thrust/unique.h +1088 -0
  1919. cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
  1920. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1921. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1922. cuda/cccl/headers/include/thrust/version.h +93 -0
  1923. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1924. cuda/cccl/headers/include_paths.py +51 -0
  1925. cuda/cccl/parallel/__init__.py +9 -0
  1926. cuda/cccl/parallel/experimental/.gitignore +4 -0
  1927. cuda/cccl/parallel/experimental/__init__.py +73 -0
  1928. cuda/cccl/parallel/experimental/_bindings.py +79 -0
  1929. cuda/cccl/parallel/experimental/_bindings.pyi +405 -0
  1930. cuda/cccl/parallel/experimental/_bindings_impl.pyx +1984 -0
  1931. cuda/cccl/parallel/experimental/_caching.py +71 -0
  1932. cuda/cccl/parallel/experimental/_cccl_interop.py +422 -0
  1933. cuda/cccl/parallel/experimental/_utils/__init__.py +0 -0
  1934. cuda/cccl/parallel/experimental/_utils/protocols.py +132 -0
  1935. cuda/cccl/parallel/experimental/_utils/temp_storage_buffer.py +86 -0
  1936. cuda/cccl/parallel/experimental/algorithms/__init__.py +50 -0
  1937. cuda/cccl/parallel/experimental/algorithms/_histogram.py +243 -0
  1938. cuda/cccl/parallel/experimental/algorithms/_merge_sort.py +225 -0
  1939. cuda/cccl/parallel/experimental/algorithms/_radix_sort.py +312 -0
  1940. cuda/cccl/parallel/experimental/algorithms/_reduce.py +184 -0
  1941. cuda/cccl/parallel/experimental/algorithms/_scan.py +261 -0
  1942. cuda/cccl/parallel/experimental/algorithms/_segmented_reduce.py +257 -0
  1943. cuda/cccl/parallel/experimental/algorithms/_transform.py +308 -0
  1944. cuda/cccl/parallel/experimental/algorithms/_unique_by_key.py +252 -0
  1945. cuda/cccl/parallel/experimental/cccl/.gitkeep +0 -0
  1946. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  1947. cuda/cccl/parallel/experimental/cu12/cccl/libcccl.c.parallel.so +0 -0
  1948. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  1949. cuda/cccl/parallel/experimental/cu13/cccl/libcccl.c.parallel.so +0 -0
  1950. cuda/cccl/parallel/experimental/iterators/__init__.py +19 -0
  1951. cuda/cccl/parallel/experimental/iterators/_factories.py +191 -0
  1952. cuda/cccl/parallel/experimental/iterators/_iterators.py +612 -0
  1953. cuda/cccl/parallel/experimental/iterators/_zip_iterator.py +199 -0
  1954. cuda/cccl/parallel/experimental/numba_utils.py +53 -0
  1955. cuda/cccl/parallel/experimental/op.py +3 -0
  1956. cuda/cccl/parallel/experimental/struct.py +272 -0
  1957. cuda/cccl/parallel/experimental/typing.py +35 -0
  1958. cuda/cccl/py.typed +0 -0
  1959. cuda_cccl-0.1.3.2.0.dev438.dist-info/METADATA +42 -0
  1960. cuda_cccl-0.1.3.2.0.dev438.dist-info/RECORD +1962 -0
  1961. cuda_cccl-0.1.3.2.0.dev438.dist-info/WHEEL +5 -0
  1962. cuda_cccl-0.1.3.2.0.dev438.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,1217 @@
1
+ /******************************************************************************
2
+ * Copyright (c) 2011, Duane Merrill. All rights reserved.
3
+ * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ * * Redistributions of source code must retain the above copyright
8
+ * notice, this list of conditions and the following disclaimer.
9
+ * * Redistributions in binary form must reproduce the above copyright
10
+ * notice, this list of conditions and the following disclaimer in the
11
+ * documentation and/or other materials provided with the distribution.
12
+ * * Neither the name of the NVIDIA CORPORATION nor the
13
+ * names of its contributors may be used to endorse or promote products
14
+ * derived from this software without specific prior written permission.
15
+ *
16
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ *
27
+ ******************************************************************************/
28
+
29
+ /**
30
+ * @file
31
+ * The cub::BlockDiscontinuity class provides [<em>collective</em>](../index.html#sec0) methods for
32
+ * flagging discontinuities within an ordered set of items partitioned across a CUDA thread block.
33
+ */
34
+
35
+ #pragma once
36
+
37
+ #include <cub/config.cuh>
38
+
39
+ #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
40
+ # pragma GCC system_header
41
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
42
+ # pragma clang system_header
43
+ #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
44
+ # pragma system_header
45
+ #endif // no system header
46
+
47
+ #include <cub/util_ptx.cuh>
48
+ #include <cub/util_type.cuh>
49
+
50
+ CUB_NAMESPACE_BEGIN
51
+
52
+ //! @rst
53
+ //! The BlockDiscontinuity class provides :ref:`collective <collective-primitives>` methods for
54
+ //! flagging discontinuities within an ordered set of items partitioned across a CUDA thread
55
+ //! block.
56
+ //!
57
+ //! Overview
58
+ //! +++++++++++++++++++++++++++++++++++++++++++++
59
+ //!
60
+ //! - A set of "head flags" (or "tail flags") is often used to indicate corresponding items
61
+ //! that differ from their predecessors (or successors). For example, head flags are convenient
62
+ //! for demarcating disjoint data segments as part of a segmented scan or reduction.
63
+ //! - @blocked
64
+ //!
65
+ //! Performance Considerations
66
+ //! +++++++++++++++++++++++++++++++++++++++++++++
67
+ //!
68
+ //! - @granularity
69
+ //! - Incurs zero bank conflicts for most types
70
+ //!
71
+ //! A Simple Example
72
+ //! +++++++++++++++++++++++++++++++++++++++++++++
73
+ //!
74
+ //! @blockcollective{BlockDiscontinuity}
75
+ //!
76
+ //! The code snippet below illustrates the head flagging of 512 integer items that
77
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
78
+ //! where each thread owns 4 consecutive items.
79
+ //!
80
+ //! .. code-block:: c++
81
+ //!
82
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
83
+ //!
84
+ //! __global__ void ExampleKernel(...)
85
+ //! {
86
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
87
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
88
+ //!
89
+ //! // Allocate shared memory for BlockDiscontinuity
90
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
91
+ //!
92
+ //! // Obtain a segment of consecutive items that are blocked across threads
93
+ //! int thread_data[4];
94
+ //! ...
95
+ //!
96
+ //! // Collectively compute head flags for discontinuities in the segment
97
+ //! int head_flags[4];
98
+ //! BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality());
99
+ //!
100
+ //! Suppose the set of input ``thread_data`` across the block of threads is
101
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }``.
102
+ //! The corresponding output ``head_flags`` in those threads will be
103
+ //! ``{ [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }``.
104
+ //!
105
+ //! Re-using dynamically allocating shared memory
106
+ //! +++++++++++++++++++++++++++++++++++++++++++++
107
+ //!
108
+ //! The ``examples/block/example_block_reduce_dyn_smem.cu`` example illustrates usage of
109
+ //! dynamically shared memory with BlockReduce and how to re-purpose the same memory region.
110
+ //! This example can be easily adapted to the storage required by BlockDiscontinuity.
111
+ //! @endrst
112
+ //!
113
+ //! @tparam T
114
+ //! The data type to be flagged.
115
+ //!
116
+ //! @tparam BLOCK_DIM_X
117
+ //! The thread block length in threads along the X dimension
118
+ //!
119
+ //! @tparam BLOCK_DIM_Y
120
+ //! **[optional]** The thread block length in threads along the Y dimension (default: 1)
121
+ //!
122
+ //! @tparam BLOCK_DIM_Z
123
+ //! **[optional]** The thread block length in threads along the Z dimension (default: 1)
124
+ //!
125
+ template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1>
126
+ class BlockDiscontinuity
127
+ {
128
+ private:
129
+ enum
130
+ {
131
+ /// The thread block size in threads
132
+ BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
133
+ };
134
+
135
+ /// Shared memory storage layout type (last element from each thread's input)
136
+ struct _TempStorage
137
+ {
138
+ T first_items[BLOCK_THREADS];
139
+ T last_items[BLOCK_THREADS];
140
+ };
141
+
142
+ /// Internal storage allocator
143
+ _CCCL_DEVICE _CCCL_FORCEINLINE _TempStorage& PrivateStorage()
144
+ {
145
+ __shared__ _TempStorage private_storage;
146
+ return private_storage;
147
+ }
148
+
149
+ /// Specialization for when FlagOp has third index param
150
+ template <typename FlagOp, bool HAS_PARAM = BinaryOpHasIdxParam<T, FlagOp>::value>
151
+ struct ApplyOp
152
+ {
153
+ // Apply flag operator
154
+ static _CCCL_DEVICE _CCCL_FORCEINLINE bool FlagT(FlagOp flag_op, const T& a, const T& b, int idx)
155
+ {
156
+ return flag_op(a, b, idx);
157
+ }
158
+ };
159
+
160
+ /// Specialization for when FlagOp does not have a third index param
161
+ template <typename FlagOp>
162
+ struct ApplyOp<FlagOp, false>
163
+ {
164
+ // Apply flag operator
165
+ static _CCCL_DEVICE _CCCL_FORCEINLINE bool FlagT(FlagOp flag_op, const T& a, const T& b, int /*idx*/)
166
+ {
167
+ return flag_op(a, b);
168
+ }
169
+ };
170
+
171
+ /// Templated unrolling of item comparison (inductive case)
172
+ struct Iterate
173
+ {
174
+ /**
175
+ * @brief Head flags
176
+ *
177
+ * @param[out] flags
178
+ * Calling thread's discontinuity head_flags
179
+ *
180
+ * @param[in] input
181
+ * Calling thread's input items
182
+ *
183
+ * @param[out] preds
184
+ * Calling thread's predecessor items
185
+ *
186
+ * @param[in] flag_op
187
+ * Binary boolean flag predicate
188
+ */
189
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
190
+ static _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeads(
191
+ int linear_tid,
192
+ FlagT (&flags)[ITEMS_PER_THREAD],
193
+ T (&input)[ITEMS_PER_THREAD],
194
+ T (&preds)[ITEMS_PER_THREAD],
195
+ FlagOp flag_op)
196
+ {
197
+ _CCCL_PRAGMA_UNROLL_FULL()
198
+ for (int i = 1; i < ITEMS_PER_THREAD; ++i)
199
+ {
200
+ preds[i] = input[i - 1];
201
+ flags[i] = ApplyOp<FlagOp>::FlagT(flag_op, preds[i], input[i], (linear_tid * ITEMS_PER_THREAD) + i);
202
+ }
203
+ }
204
+
205
+ /**
206
+ * @brief Tail flags
207
+ *
208
+ * @param[out] flags
209
+ * Calling thread's discontinuity head_flags
210
+ *
211
+ * @param[in] input
212
+ * Calling thread's input items
213
+ *
214
+ * @param[in] flag_op
215
+ * Binary boolean flag predicate
216
+ */
217
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
218
+ static _CCCL_DEVICE _CCCL_FORCEINLINE void
219
+ FlagTails(int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], T (&input)[ITEMS_PER_THREAD], FlagOp flag_op)
220
+ {
221
+ _CCCL_PRAGMA_UNROLL_FULL()
222
+ for (int i = 0; i < ITEMS_PER_THREAD - 1; ++i)
223
+ {
224
+ flags[i] = ApplyOp<FlagOp>::FlagT(flag_op, input[i], input[i + 1], (linear_tid * ITEMS_PER_THREAD) + i + 1);
225
+ }
226
+ }
227
+ };
228
+
229
+ /******************************************************************************
230
+ * Thread fields
231
+ ******************************************************************************/
232
+
233
+ /// Shared storage reference
234
+ _TempStorage& temp_storage;
235
+
236
+ /// Linear thread-id
237
+ unsigned int linear_tid;
238
+
239
+ public:
240
+ /// @smemstorage{BlockDiscontinuity}
241
+ struct TempStorage : Uninitialized<_TempStorage>
242
+ {};
243
+
244
+ //! @name Collective constructors
245
+ //! @{
246
+
247
+ /**
248
+ * @brief Collective constructor using a private static allocation of shared memory as temporary
249
+ * storage.
250
+ */
251
+ _CCCL_DEVICE _CCCL_FORCEINLINE BlockDiscontinuity()
252
+ : temp_storage(PrivateStorage())
253
+ , linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
254
+ {}
255
+
256
+ /**
257
+ * @brief Collective constructor using the specified memory allocation as temporary storage.
258
+ *
259
+ * @param[in] temp_storage
260
+ * Reference to memory allocation having layout type TempStorage
261
+ */
262
+ _CCCL_DEVICE _CCCL_FORCEINLINE BlockDiscontinuity(TempStorage& temp_storage)
263
+ : temp_storage(temp_storage.Alias())
264
+ , linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
265
+ {}
266
+
267
+ //! @} end member group
268
+ //! @name Head flag operations
269
+ //! @{
270
+
271
+ #ifndef _CCCL_DOXYGEN_INVOKED // Do not document
272
+
273
+ /**
274
+ * @param[out] head_flags
275
+ * Calling thread's discontinuity head_flags
276
+ *
277
+ * @param[in] input
278
+ * Calling thread's input items
279
+ *
280
+ * @param[out] preds
281
+ * Calling thread's predecessor items
282
+ *
283
+ * @param[in] flag_op
284
+ * Binary boolean flag predicate
285
+ */
286
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
287
+ _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeads(
288
+ FlagT (&head_flags)[ITEMS_PER_THREAD], T (&input)[ITEMS_PER_THREAD], T (&preds)[ITEMS_PER_THREAD], FlagOp flag_op)
289
+ {
290
+ // Share last item
291
+ temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
292
+
293
+ __syncthreads();
294
+
295
+ if (linear_tid == 0)
296
+ {
297
+ // Set flag for first thread-item (preds[0] is undefined)
298
+ head_flags[0] = 1;
299
+ }
300
+ else
301
+ {
302
+ preds[0] = temp_storage.last_items[linear_tid - 1];
303
+ head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
304
+ }
305
+
306
+ // Set head_flags for remaining items
307
+ Iterate::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
308
+ }
309
+
310
+ /**
311
+ * @param[out] head_flags
312
+ * Calling thread's discontinuity head_flags
313
+ *
314
+ * @param[in] input
315
+ * Calling thread's input items
316
+ *
317
+ * @param[out] preds
318
+ * Calling thread's predecessor items
319
+ *
320
+ * @param[in] flag_op
321
+ * Binary boolean flag predicate
322
+ *
323
+ * @param[in] tile_predecessor_item
324
+ * <b>[<em>thread</em><sub>0</sub> only]</b> Item with which to compare the first tile item
325
+ * (<tt>input<sub>0</sub></tt> from <em>thread</em><sub>0</sub>).
326
+ */
327
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
328
+ _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeads(
329
+ FlagT (&head_flags)[ITEMS_PER_THREAD],
330
+ T (&input)[ITEMS_PER_THREAD],
331
+ T (&preds)[ITEMS_PER_THREAD],
332
+ FlagOp flag_op,
333
+ T tile_predecessor_item)
334
+ {
335
+ // Share last item
336
+ temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
337
+
338
+ __syncthreads();
339
+
340
+ // Set flag for first thread-item
341
+ preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread
342
+ temp_storage.last_items[linear_tid - 1];
343
+
344
+ head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
345
+
346
+ // Set head_flags for remaining items
347
+ Iterate::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
348
+ }
349
+
350
+ #endif // _CCCL_DOXYGEN_INVOKED
351
+
352
+ //! @rst
353
+ //! Sets head flags indicating discontinuities between items partitioned across the thread
354
+ //! block, for which the first item has no reference and is always flagged.
355
+ //!
356
+ //! - The flag ``head_flags[i]`` is set for item ``input[i]`` when ``flag_op(previous-item, input[i])`` returns
357
+ //! ``true`` (where ``previous-item`` is either the preceding item in the same thread or the last item in
358
+ //! the previous thread).
359
+ //! - For *thread*\ :sub:`0`, item ``input[0]`` is always flagged.
360
+ //! - @blocked
361
+ //! - @granularity
362
+ //! - @smemreuse
363
+ //!
364
+ //! Snippet
365
+ //! +++++++
366
+ //!
367
+ //! The code snippet below illustrates the head-flagging of 512 integer items that
368
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
369
+ //! where each thread owns 4 consecutive items.
370
+ //!
371
+ //! .. code-block:: c++
372
+ //!
373
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
374
+ //!
375
+ //! __global__ void ExampleKernel(...)
376
+ //! {
377
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
378
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
379
+ //!
380
+ //! // Allocate shared memory for BlockDiscontinuity
381
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
382
+ //!
383
+ //! // Obtain a segment of consecutive items that are blocked across threads
384
+ //! int thread_data[4];
385
+ //! ...
386
+ //!
387
+ //! // Collectively compute head flags for discontinuities in the segment
388
+ //! int head_flags[4];
389
+ //! BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality());
390
+ //!
391
+ //! Suppose the set of input ``thread_data`` across the block of threads is
392
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }``.
393
+ //! The corresponding output ``head_flags`` in those threads will be
394
+ //! ``{ [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }``.
395
+ //! @endrst
396
+ //!
397
+ //! @tparam ITEMS_PER_THREAD
398
+ //! **[inferred]** The number of consecutive items partitioned onto each thread
399
+ //!
400
+ //! @tparam FlagT
401
+ //! **[inferred]** The flag type (must be an integer type)
402
+ //!
403
+ //! @tparam FlagOp
404
+ //! **[inferred]** Binary predicate functor type having member
405
+ //! `T operator()(const T &a, const T &b)` or member
406
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`, and returning `true`
407
+ //! if a discontinuity exists between `a` and `b`, otherwise `false`.
408
+ //! `b_index` is the rank of b in the aggregate tile of data.
409
+ //!
410
+ //! @param[out] head_flags
411
+ //! Calling thread's discontinuity head_flags
412
+ //!
413
+ //! @param[in] input
414
+ //! Calling thread's input items
415
+ //!
416
+ //! @param[in] flag_op
417
+ //! Binary boolean flag predicate
418
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
419
+ _CCCL_DEVICE _CCCL_FORCEINLINE void
420
+ FlagHeads(FlagT (&head_flags)[ITEMS_PER_THREAD], T (&input)[ITEMS_PER_THREAD], FlagOp flag_op)
421
+ {
422
+ T preds[ITEMS_PER_THREAD];
423
+ FlagHeads(head_flags, input, preds, flag_op);
424
+ }
425
+
426
+ //! @rst
427
+ //! Sets head flags indicating discontinuities between items partitioned across the thread block.
428
+ //!
429
+ //! - The flag ``head_flags[i]`` is set for item ``input[i]`` when ``flag_op(previous-item, input[i])``
430
+ //! returns ``true`` (where ``previous-item`` is either the preceding item in the same thread or the last item
431
+ //! in the previous thread).
432
+ //! - For *thread*\ :sub:`0`, item ``input[0]`` is compared against ``tile_predecessor_item``.
433
+ //! - @blocked
434
+ //! - @granularity
435
+ //! - @smemreuse
436
+ //!
437
+ //! Snippet
438
+ //! +++++++
439
+ //!
440
+ //! The code snippet below illustrates the head-flagging of 512 integer items that
441
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
442
+ //! where each thread owns 4 consecutive items.
443
+ //!
444
+ //! .. code-block:: c++
445
+ //!
446
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
447
+ //!
448
+ //! __global__ void ExampleKernel(...)
449
+ //! {
450
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
451
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
452
+ //!
453
+ //! // Allocate shared memory for BlockDiscontinuity
454
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
455
+ //!
456
+ //! // Obtain a segment of consecutive items that are blocked across threads
457
+ //! int thread_data[4];
458
+ //! ...
459
+ //!
460
+ //! // Have thread0 obtain the predecessor item for the entire tile
461
+ //! int tile_predecessor_item;
462
+ //! if (threadIdx.x == 0) tile_predecessor_item == ...
463
+ //!
464
+ //! // Collectively compute head flags for discontinuities in the segment
465
+ //! int head_flags[4];
466
+ //! BlockDiscontinuity(temp_storage).FlagHeads(
467
+ //! head_flags, thread_data, cub::Inequality(), tile_predecessor_item);
468
+ //!
469
+ //! Suppose the set of input ``thread_data`` across the block of threads is
470
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }``,
471
+ //! and that ``tile_predecessor_item`` is ``0``. The corresponding output ``head_flags`` in those
472
+ //! threads will be ``{ [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }``.
473
+ //! @endrst
474
+ //!
475
+ //! @tparam ITEMS_PER_THREAD
476
+ //! **[inferred]** The number of consecutive items partitioned onto each thread.
477
+ //!
478
+ //! @tparam FlagT
479
+ //! **[inferred]** The flag type (must be an integer type)
480
+ //!
481
+ //! @tparam FlagOp
482
+ //! **[inferred]** Binary predicate functor type having member
483
+ //! `T operator()(const T &a, const T &b)` or member
484
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`,
485
+ //! and returning `true` if a discontinuity exists between `a` and `b`,
486
+ //! otherwise `false`. `b_index` is the rank of b in the aggregate tile of data.
487
+ //!
488
+ //! @param[out] head_flags
489
+ //! Calling thread's discontinuity `head_flags`
490
+ //!
491
+ //! @param[in] input
492
+ //! Calling thread's input items
493
+ //!
494
+ //! @param[in] flag_op
495
+ //! Binary boolean flag predicate
496
+ //!
497
+ //! @param[in] tile_predecessor_item
498
+ //! @rst
499
+ //! *thread*\ :sub:`0` only item with which to compare the first tile item (``input[0]`` from *thread*\ :sub:`0`).
500
+ //! @endrst
501
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
502
+ _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeads(
503
+ FlagT (&head_flags)[ITEMS_PER_THREAD], T (&input)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_predecessor_item)
504
+ {
505
+ T preds[ITEMS_PER_THREAD];
506
+ FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item);
507
+ }
508
+
509
+ //! @} end member group
510
+ //! @name Tail flag operations
511
+ //! @{
512
+
513
+ //! @rst
514
+ //! Sets tail flags indicating discontinuities between items partitioned across the thread
515
+ //! block, for which the last item has no reference and is always flagged.
516
+ //!
517
+ //! - The flag ``tail_flags[i]`` is set for item ``input[i]`` when
518
+ //! ``flag_op(input[i], next-item)``
519
+ //! returns ``true`` (where `next-item` is either the next item
520
+ //! in the same thread or the first item in the next thread).
521
+ //! - For *thread*\ :sub:`BLOCK_THREADS - 1`, item ``input[ITEMS_PER_THREAD - 1]`` is always flagged.
522
+ //! - @blocked
523
+ //! - @granularity
524
+ //! - @smemreuse
525
+ //!
526
+ //! Snippet
527
+ //! +++++++
528
+ //!
529
+ //! The code snippet below illustrates the tail-flagging of 512 integer items that
530
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
531
+ //! where each thread owns 4 consecutive items.
532
+ //!
533
+ //! .. code-block:: c++
534
+ //!
535
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
536
+ //!
537
+ //! __global__ void ExampleKernel(...)
538
+ //! {
539
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
540
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
541
+ //!
542
+ //! // Allocate shared memory for BlockDiscontinuity
543
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
544
+ //!
545
+ //! // Obtain a segment of consecutive items that are blocked across threads
546
+ //! int thread_data[4];
547
+ //! ...
548
+ //!
549
+ //! // Collectively compute tail flags for discontinuities in the segment
550
+ //! int tail_flags[4];
551
+ //! BlockDiscontinuity(temp_storage).FlagTails(tail_flags, thread_data, cub::Inequality());
552
+ //!
553
+ //! Suppose the set of input ``thread_data`` across the block of threads is
554
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }``.
555
+ //! The corresponding output ``tail_flags`` in those threads will be
556
+ //! ``{ [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }``.
557
+ //! @endrst
558
+ //!
559
+ //! @tparam ITEMS_PER_THREAD
560
+ //! **[inferred]** The number of consecutive items partitioned onto each thread.
561
+ //!
562
+ //! @tparam FlagT
563
+ //! **[inferred]** The flag type (must be an integer type)
564
+ //!
565
+ //! @tparam FlagOp
566
+ //! **[inferred]** Binary predicate functor type having member
567
+ //! `T operator()(const T &a, const T &b)` or member
568
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`, and returning `true`
569
+ //! if a discontinuity exists between `a` and `b`, otherwise `false`. `b_index` is the
570
+ //! rank of `b` in the aggregate tile of data.
571
+ //!
572
+ //! @param[out] tail_flags
573
+ //! Calling thread's discontinuity tail_flags
574
+ //!
575
+ //! @param[in] input
576
+ //! Calling thread's input items
577
+ //!
578
+ //! @param[in] flag_op
579
+ //! Binary boolean flag predicate
580
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
581
+ _CCCL_DEVICE _CCCL_FORCEINLINE void
582
+ FlagTails(FlagT (&tail_flags)[ITEMS_PER_THREAD], T (&input)[ITEMS_PER_THREAD], FlagOp flag_op)
583
+ {
584
+ // Share first item
585
+ temp_storage.first_items[linear_tid] = input[0];
586
+
587
+ __syncthreads();
588
+
589
+ // Set flag for last thread-item
590
+ tail_flags[ITEMS_PER_THREAD - 1] =
591
+ (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread
592
+ ApplyOp<FlagOp>::FlagT(
593
+ flag_op,
594
+ input[ITEMS_PER_THREAD - 1],
595
+ temp_storage.first_items[linear_tid + 1],
596
+ (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
597
+
598
+ // Set tail_flags for remaining items
599
+ Iterate::FlagTails(linear_tid, tail_flags, input, flag_op);
600
+ }
601
+
602
+ //! @rst
603
+ //! Sets tail flags indicating discontinuities between items partitioned across the thread block.
604
+ //!
605
+ //! - The flag ``tail_flags[i]`` is set for item ``input[i]`` when ``flag_op(input[i], next-item)``
606
+ //! returns ``true`` (where ``next-item`` is either the next item in the same thread or the first item in
607
+ //! the next thread).
608
+ //! - For *thread*\ :sub:`BLOCK_THREADS - 1`, item ``input[ITEMS_PER_THREAD - 1]`` is compared against
609
+ //! ``tile_successor_item``.
610
+ //! - @blocked
611
+ //! - @granularity
612
+ //! - @smemreuse
613
+ //!
614
+ //! Snippet
615
+ //! +++++++
616
+ //!
617
+ //! The code snippet below illustrates the tail-flagging of 512 integer items that
618
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
619
+ //! where each thread owns 4 consecutive items.
620
+ //!
621
+ //! .. code-block:: c++
622
+ //!
623
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
624
+ //!
625
+ //! __global__ void ExampleKernel(...)
626
+ //! {
627
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
628
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
629
+ //!
630
+ //! // Allocate shared memory for BlockDiscontinuity
631
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
632
+ //!
633
+ //! // Obtain a segment of consecutive items that are blocked across threads
634
+ //! int thread_data[4];
635
+ //! ...
636
+ //!
637
+ //! // Have thread127 obtain the successor item for the entire tile
638
+ //! int tile_successor_item;
639
+ //! if (threadIdx.x == 127) tile_successor_item == ...
640
+ //!
641
+ //! // Collectively compute tail flags for discontinuities in the segment
642
+ //! int tail_flags[4];
643
+ //! BlockDiscontinuity(temp_storage).FlagTails(
644
+ //! tail_flags, thread_data, cub::Inequality(), tile_successor_item);
645
+ //!
646
+ //! Suppose the set of input ``thread_data`` across the block of threads is
647
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }``
648
+ //! and that ``tile_successor_item`` is ``125``. The corresponding output ``tail_flags`` in those
649
+ //! threads will be ``{ [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }``.
650
+ //! @endrst
651
+ //!
652
+ //! @tparam ITEMS_PER_THREAD
653
+ //! **[inferred]** The number of consecutive items partitioned onto each thread.
654
+ //!
655
+ //! @tparam FlagT
656
+ //! **[inferred]** The flag type (must be an integer type)
657
+ //!
658
+ //! @tparam FlagOp
659
+ //! **[inferred]** Binary predicate functor type having member
660
+ //! `T operator()(const T &a, const T &b)` or member
661
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`, and returning `true`
662
+ //! if a discontinuity exists between `a` and `b`, otherwise `false`. `b_index` is the
663
+ //! rank of `b` in the aggregate tile of data.
664
+ //!
665
+ //! @param[out] tail_flags
666
+ //! Calling thread's discontinuity tail_flags
667
+ //!
668
+ //! @param[in] input
669
+ //! Calling thread's input items
670
+ //!
671
+ //! @param[in] flag_op
672
+ //! Binary boolean flag predicate
673
+ //!
674
+ //! @param[in] tile_successor_item
675
+ //! @rst
676
+ //! *thread*\ :sub:`BLOCK_THREADS - 1` only item with which to
677
+ //! compare the last tile item (``input[ITEMS_PER_THREAD - 1]`` from
678
+ //! *thread*\ :sub:`BLOCK_THREADS - 1`).
679
+ //! @endrst
680
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
681
+ _CCCL_DEVICE _CCCL_FORCEINLINE void
682
+ FlagTails(FlagT (&tail_flags)[ITEMS_PER_THREAD], T (&input)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_successor_item)
683
+ {
684
+ // Share first item
685
+ temp_storage.first_items[linear_tid] = input[0];
686
+
687
+ __syncthreads();
688
+
689
+ // Set flag for last thread-item
690
+ T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread
691
+ temp_storage.first_items[linear_tid + 1];
692
+
693
+ tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::FlagT(
694
+ flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
695
+
696
+ // Set tail_flags for remaining items
697
+ Iterate::FlagTails(linear_tid, tail_flags, input, flag_op);
698
+ }
699
+
700
+ //! @} end member group
701
+ //! @name Head & tail flag operations
702
+ //! @{
703
+
704
+ //! @rst
705
+ //! Sets both head and tail flags indicating discontinuities between items partitioned across the thread block.
706
+ //!
707
+ //! - The flag ``head_flags[i]`` is set for item ``input[i]`` when ``flag_op(previous-item, input[i])`` returns
708
+ //! ``true`` (where ``previous-item`` is either the preceding item in the same thread or the last item in
709
+ //! the previous thread).
710
+ //! - For *thread*\ :sub:`0`, item ``input[0]`` is always flagged.
711
+ //! - The flag ``tail_flags[i]`` is set for item ``input[i]`` when ``flag_op(input[i], next-item)``
712
+ //! returns ``true`` (where next-item is either the next item in the same thread or the first item in
713
+ //! the next thread).
714
+ //! - For *thread*\ :sub:`BLOCK_THREADS - 1`, item ``input[ITEMS_PER_THREAD - 1]`` is always flagged.
715
+ //! - @blocked
716
+ //! - @granularity
717
+ //! - @smemreuse
718
+ //!
719
+ //! Snippet
720
+ //! +++++++
721
+ //!
722
+ //! The code snippet below illustrates the head- and tail-flagging of 512 integer items that
723
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
724
+ //! where each thread owns 4 consecutive items.
725
+ //!
726
+ //! .. code-block:: c++
727
+ //!
728
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
729
+ //!
730
+ //! __global__ void ExampleKernel(...)
731
+ //! {
732
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
733
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
734
+ //!
735
+ //! // Allocate shared memory for BlockDiscontinuity
736
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
737
+ //!
738
+ //! // Obtain a segment of consecutive items that are blocked across threads
739
+ //! int thread_data[4];
740
+ //! ...
741
+ //!
742
+ //! // Collectively compute head and flags for discontinuities in the segment
743
+ //! int head_flags[4];
744
+ //! int tail_flags[4];
745
+ //! BlockDiscontinuity(temp_storage).FlagHeadsAndTails(
746
+ //! head_flags, tail_flags, thread_data, cub::Inequality());
747
+ //!
748
+ //! Suppose the set of input ``thread_data`` across the block of threads is
749
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }``
750
+ //! and that the tile_successor_item is ``125``. The corresponding output ``head_flags``
751
+ //! in those threads will be ``{ [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }``.
752
+ //! and the corresponding output ``tail_flags`` in those threads will be
753
+ //! ``{ [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }``.
754
+ //! @endrst
755
+ //!
756
+ //! @tparam ITEMS_PER_THREAD
757
+ //! **[inferred]** The number of consecutive items partitioned onto each thread.
758
+ //!
759
+ //! @tparam FlagT
760
+ //! **[inferred]** The flag type (must be an integer type)
761
+ //!
762
+ //! @tparam FlagOp
763
+ //! **[inferred]** Binary predicate functor type having member
764
+ //! `T operator()(const T &a, const T &b)` or member
765
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`, and returning `true`
766
+ //! if a discontinuity exists between `a` and `b`, otherwise `false`. `b_index` is the
767
+ //! rank of `b` in the aggregate tile of data.
768
+ //!
769
+ //! @param[out] head_flags
770
+ //! Calling thread's discontinuity head_flags
771
+ //!
772
+ //! @param[out] tail_flags
773
+ //! Calling thread's discontinuity tail_flags
774
+ //!
775
+ //! @param[in] input
776
+ //! Calling thread's input items
777
+ //!
778
+ //! @param[in] flag_op
779
+ //! Binary boolean flag predicate
780
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
781
+ _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeadsAndTails(
782
+ FlagT (&head_flags)[ITEMS_PER_THREAD],
783
+ FlagT (&tail_flags)[ITEMS_PER_THREAD],
784
+ T (&input)[ITEMS_PER_THREAD],
785
+ FlagOp flag_op)
786
+ {
787
+ // Share first and last items
788
+ temp_storage.first_items[linear_tid] = input[0];
789
+ temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
790
+
791
+ __syncthreads();
792
+
793
+ T preds[ITEMS_PER_THREAD];
794
+
795
+ // Set flag for first thread-item
796
+ if (linear_tid == 0)
797
+ {
798
+ head_flags[0] = 1;
799
+ }
800
+ else
801
+ {
802
+ preds[0] = temp_storage.last_items[linear_tid - 1];
803
+ head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
804
+ }
805
+
806
+ // Set flag for last thread-item
807
+ tail_flags[ITEMS_PER_THREAD - 1] =
808
+ (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread
809
+ ApplyOp<FlagOp>::FlagT(
810
+ flag_op,
811
+ input[ITEMS_PER_THREAD - 1],
812
+ temp_storage.first_items[linear_tid + 1],
813
+ (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
814
+
815
+ // Set head_flags for remaining items
816
+ Iterate::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
817
+
818
+ // Set tail_flags for remaining items
819
+ Iterate::FlagTails(linear_tid, tail_flags, input, flag_op);
820
+ }
821
+
822
+ //! @rst
823
+ //! Sets both head and tail flags indicating discontinuities between items partitioned across the thread block.
824
+ //!
825
+ //! - The flag ``head_flags[i]`` is set for item ``input[i]`` when
826
+ //! ``flag_op(previous-item, input[i])`` returns ``true`` (where ``previous-item`` is either the preceding item
827
+ //! in the same thread or the last item in the previous thread).
828
+ //! - For *thread*\ :sub:`0`, item ``input[0]`` is always flagged.
829
+ //! - The flag ``tail_flags[i]`` is set for item ``input[i]`` when ``flag_op(input[i], next-item)`` returns ``true``
830
+ //! (where ``next-item`` is either the next item in the same thread or the first item in the next thread).
831
+ //! - For *thread*\ :sub:`BLOCK_THREADS - 1`, item ``input[ITEMS_PER_THREAD - 1]`` is compared
832
+ //! against ``tile_predecessor_item``.
833
+ //! - @blocked
834
+ //! - @granularity
835
+ //! - @smemreuse
836
+ //!
837
+ //! Snippet
838
+ //! +++++++
839
+ //!
840
+ //! The code snippet below illustrates the head- and tail-flagging of 512 integer items that
841
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
842
+ //! where each thread owns 4 consecutive items.
843
+ //!
844
+ //! .. code-block:: c++
845
+ //!
846
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
847
+ //!
848
+ //! __global__ void ExampleKernel(...)
849
+ //! {
850
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
851
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
852
+ //!
853
+ //! // Allocate shared memory for BlockDiscontinuity
854
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
855
+ //!
856
+ //! // Obtain a segment of consecutive items that are blocked across threads
857
+ //! int thread_data[4];
858
+ //! ...
859
+ //!
860
+ //! // Have thread127 obtain the successor item for the entire tile
861
+ //! int tile_successor_item;
862
+ //! if (threadIdx.x == 127) tile_successor_item == ...
863
+ //!
864
+ //! // Collectively compute head and flags for discontinuities in the segment
865
+ //! int head_flags[4];
866
+ //! int tail_flags[4];
867
+ //! BlockDiscontinuity(temp_storage).FlagHeadsAndTails(
868
+ //! head_flags, tail_flags, tile_successor_item, thread_data, cub::Inequality());
869
+ //!
870
+ //! Suppose the set of input ``thread_data`` across the block of threads is
871
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }``
872
+ //! and that the tile_successor_item is ``125``. The corresponding output ``head_flags``
873
+ //! in those threads will be ``{ [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }``.
874
+ //! and the corresponding output ``tail_flags`` in those threads will be
875
+ //! ``{ [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }``.
876
+ //! @endrst
877
+ //!
878
+ //! @tparam ITEMS_PER_THREAD
879
+ //! **[inferred]** The number of consecutive items partitioned onto each thread.
880
+ //!
881
+ //! @tparam FlagT
882
+ //! **[inferred]** The flag type (must be an integer type)
883
+ //!
884
+ //! @tparam FlagOp
885
+ //! **[inferred]** Binary predicate functor type having member
886
+ //! `T operator()(const T &a, const T &b)` or member
887
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`, and returning `true`
888
+ //! if a discontinuity exists between `a` and `b`, otherwise `false`. `b_index` is the
889
+ //! rank of b in the aggregate tile of data.
890
+ //!
891
+ //! @param[out] head_flags
892
+ //! Calling thread's discontinuity head_flags
893
+ //!
894
+ //! @param[out] tail_flags
895
+ //! Calling thread's discontinuity tail_flags
896
+ //!
897
+ //! @param[in] tile_successor_item
898
+ //! @rst
899
+ //! *thread*\ :sub:`BLOCK_THREADS - 1` only item with which to compare
900
+ //! the last tile item (``input[ITEMS_PER_THREAD - 1]`` from
901
+ //! *thread*\ :sub:`BLOCK_THREADS - 1`).
902
+ //! @endrst
903
+ //!
904
+ //! @param[in] input
905
+ //! Calling thread's input items
906
+ //!
907
+ //! @param[in] flag_op
908
+ //! Binary boolean flag predicate
909
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
910
+ _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeadsAndTails(
911
+ FlagT (&head_flags)[ITEMS_PER_THREAD],
912
+ FlagT (&tail_flags)[ITEMS_PER_THREAD],
913
+ T tile_successor_item,
914
+ T (&input)[ITEMS_PER_THREAD],
915
+ FlagOp flag_op)
916
+ {
917
+ // Share first and last items
918
+ temp_storage.first_items[linear_tid] = input[0];
919
+ temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
920
+
921
+ __syncthreads();
922
+
923
+ T preds[ITEMS_PER_THREAD];
924
+
925
+ // Set flag for first thread-item
926
+ if (linear_tid == 0)
927
+ {
928
+ head_flags[0] = 1;
929
+ }
930
+ else
931
+ {
932
+ preds[0] = temp_storage.last_items[linear_tid - 1];
933
+ head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
934
+ }
935
+
936
+ // Set flag for last thread-item
937
+ T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread
938
+ temp_storage.first_items[linear_tid + 1];
939
+
940
+ tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::FlagT(
941
+ flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
942
+
943
+ // Set head_flags for remaining items
944
+ Iterate::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
945
+
946
+ // Set tail_flags for remaining items
947
+ Iterate::FlagTails(linear_tid, tail_flags, input, flag_op);
948
+ }
949
+
950
+ //! @rst
951
+ //! Sets both head and tail flags indicating discontinuities between items partitioned across the thread block.
952
+ //!
953
+ //! - The flag ``head_flags[i]`` is set for item ``input[i]`` when ``flag_op(previous-item, input[i])``
954
+ //! returns ``true`` (where ``previous-item`` is either the preceding item in the same thread or the last item
955
+ //! in the previous thread).
956
+ //! - For *thread*\ :sub:`0`, item ``input[0]`` is compared against ``tile_predecessor_item``.
957
+ //! - The flag ``tail_flags[i]`` is set for item ``input[i]`` when
958
+ //! ``flag_op(input[i], next-item)`` returns ``true`` (where ``next-item`` is either the next item
959
+ //! in the same thread or the first item in the next thread).
960
+ //! - For *thread*\ :sub:`BLOCK_THREADS - 1`, item
961
+ //! ``input[ITEMS_PER_THREAD - 1]`` is always flagged.
962
+ //! - @blocked
963
+ //! - @granularity
964
+ //! - @smemreuse
965
+ //!
966
+ //! Snippet
967
+ //! +++++++
968
+ //!
969
+ //! The code snippet below illustrates the head- and tail-flagging of 512 integer items that
970
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
971
+ //! where each thread owns 4 consecutive items.
972
+ //!
973
+ //! .. code-block:: c++
974
+ //!
975
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
976
+ //!
977
+ //! __global__ void ExampleKernel(...)
978
+ //! {
979
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
980
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
981
+ //!
982
+ //! // Allocate shared memory for BlockDiscontinuity
983
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
984
+ //!
985
+ //! // Obtain a segment of consecutive items that are blocked across threads
986
+ //! int thread_data[4];
987
+ //! ...
988
+ //!
989
+ //! // Have thread0 obtain the predecessor item for the entire tile
990
+ //! int tile_predecessor_item;
991
+ //! if (threadIdx.x == 0) tile_predecessor_item == ...
992
+ //!
993
+ //! // Have thread127 obtain the successor item for the entire tile
994
+ //! int tile_successor_item;
995
+ //! if (threadIdx.x == 127) tile_successor_item == ...
996
+ //!
997
+ //! // Collectively compute head and flags for discontinuities in the segment
998
+ //! int head_flags[4];
999
+ //! int tail_flags[4];
1000
+ //! BlockDiscontinuity(temp_storage).FlagHeadsAndTails(
1001
+ //! head_flags, tile_predecessor_item, tail_flags, tile_successor_item,
1002
+ //! thread_data, cub::Inequality());
1003
+ //!
1004
+ //! Suppose the set of input ``thread_data`` across the block of threads is
1005
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }``,
1006
+ //! that the ``tile_predecessor_item`` is ``0``, and that the ``tile_successor_item`` is ``125``.
1007
+ //! The corresponding output ``head_flags`` in those threads will be
1008
+ //! ``{ [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }``, and the corresponding output ``tail_flags``
1009
+ //! in those threads will be ``{ [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }``.
1010
+ //! @endrst
1011
+ //!
1012
+ //! @tparam ITEMS_PER_THREAD
1013
+ //! **[inferred]** The number of consecutive items partitioned onto each thread.
1014
+ //!
1015
+ //! @tparam FlagT
1016
+ //! **[inferred]** The flag type (must be an integer type)
1017
+ //!
1018
+ //! @tparam FlagOp
1019
+ //! **[inferred]** Binary predicate functor type having member
1020
+ //! `T operator()(const T &a, const T &b)` or member
1021
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`, and returning `true`
1022
+ //! if a discontinuity exists between `a` and `b`, otherwise `false`. `b_index` is the rank
1023
+ //! of b in the aggregate tile of data.
1024
+ //!
1025
+ //! @param[out] head_flags
1026
+ //! Calling thread's discontinuity head_flags
1027
+ //!
1028
+ //! @param[in] tile_predecessor_item
1029
+ //! @rst
1030
+ //! *thread*\ :sub:`0` only item with which to compare the first tile item (``input[0]`` from *thread*\ :sub:`0`).
1031
+ //! @endrst
1032
+ //!
1033
+ //! @param[out] tail_flags
1034
+ //! Calling thread's discontinuity tail_flags
1035
+ //!
1036
+ //! @param[in] input
1037
+ //! Calling thread's input items
1038
+ //!
1039
+ //! @param[in] flag_op
1040
+ //! Binary boolean flag predicate
1041
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
1042
+ _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeadsAndTails(
1043
+ FlagT (&head_flags)[ITEMS_PER_THREAD],
1044
+ T tile_predecessor_item,
1045
+ FlagT (&tail_flags)[ITEMS_PER_THREAD],
1046
+ T (&input)[ITEMS_PER_THREAD],
1047
+ FlagOp flag_op)
1048
+ {
1049
+ // Share first and last items
1050
+ temp_storage.first_items[linear_tid] = input[0];
1051
+ temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
1052
+
1053
+ __syncthreads();
1054
+
1055
+ T preds[ITEMS_PER_THREAD];
1056
+
1057
+ // Set flag for first thread-item
1058
+ preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread
1059
+ temp_storage.last_items[linear_tid - 1];
1060
+
1061
+ head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
1062
+
1063
+ // Set flag for last thread-item
1064
+ tail_flags[ITEMS_PER_THREAD - 1] =
1065
+ (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread
1066
+ ApplyOp<FlagOp>::FlagT(
1067
+ flag_op,
1068
+ input[ITEMS_PER_THREAD - 1],
1069
+ temp_storage.first_items[linear_tid + 1],
1070
+ (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
1071
+
1072
+ // Set head_flags for remaining items
1073
+ Iterate::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
1074
+
1075
+ // Set tail_flags for remaining items
1076
+ Iterate::FlagTails(linear_tid, tail_flags, input, flag_op);
1077
+ }
1078
+
1079
+ //! @rst
1080
+ //! Sets both head and tail flags indicating discontinuities between items partitioned across the thread block.
1081
+ //!
1082
+ //! - The flag ``head_flags[i]`` is set for item ``input[i]`` when ``flag_op(previous-item, input[i])``
1083
+ //! returns ``true`` (where ``previous-item`` is either the preceding item in the same thread or the last item in
1084
+ //! the previous thread).
1085
+ //! - For *thread*\ :sub:`0`, item ``input[0]`` is compared against ``tile_predecessor_item``.
1086
+ //! - The flag ``tail_flags[i]`` is set for item ``input[i]`` when ``flag_op(input[i], next-item)``
1087
+ //! returns ``true`` (where ``next-item`` is either the next item in the same thread or the first item in
1088
+ //! the next thread).
1089
+ //! - For *thread*\ :sub:`BLOCK_THREADS - 1`, item ``input[ITEMS_PER_THREAD - 1]`` is compared
1090
+ //! against ``tile_successor_item``.
1091
+ //! - @blocked
1092
+ //! - @granularity
1093
+ //! - @smemreuse
1094
+ //!
1095
+ //! Snippet
1096
+ //! +++++++
1097
+ //!
1098
+ //! The code snippet below illustrates the head- and tail-flagging of 512 integer items that
1099
+ //! are partitioned in a :ref:`blocked arrangement <flexible-data-arrangement>` across 128 threads
1100
+ //! where each thread owns 4 consecutive items.
1101
+ //!
1102
+ //! .. code-block:: c++
1103
+ //!
1104
+ //! #include <cub/cub.cuh> // or equivalently <cub/block/block_discontinuity.cuh>
1105
+ //!
1106
+ //! __global__ void ExampleKernel(...)
1107
+ //! {
1108
+ //! // Specialize BlockDiscontinuity for a 1D block of 128 threads of type int
1109
+ //! using BlockDiscontinuity = cub::BlockDiscontinuity<int, 128>;
1110
+ //!
1111
+ //! // Allocate shared memory for BlockDiscontinuity
1112
+ //! __shared__ typename BlockDiscontinuity::TempStorage temp_storage;
1113
+ //!
1114
+ //! // Obtain a segment of consecutive items that are blocked across threads
1115
+ //! int thread_data[4];
1116
+ //! ...
1117
+ //!
1118
+ //! // Have thread0 obtain the predecessor item for the entire tile
1119
+ //! int tile_predecessor_item;
1120
+ //! if (threadIdx.x == 0) tile_predecessor_item == ...
1121
+ //!
1122
+ //! // Have thread127 obtain the successor item for the entire tile
1123
+ //! int tile_successor_item;
1124
+ //! if (threadIdx.x == 127) tile_successor_item == ...
1125
+ //!
1126
+ //! // Collectively compute head and flags for discontinuities in the segment
1127
+ //! int head_flags[4];
1128
+ //! int tail_flags[4];
1129
+ //! BlockDiscontinuity(temp_storage).FlagHeadsAndTails(
1130
+ //! head_flags, tile_predecessor_item, tail_flags, tile_successor_item,
1131
+ //! thread_data, cub::Inequality());
1132
+ //!
1133
+ //! Suppose the set of input ``thread_data`` across the block of threads is
1134
+ //! ``{ [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }``,
1135
+ //! that the ``tile_predecessor_item`` is ``0``, and that the
1136
+ //! ``tile_successor_item`` is ``125``. The corresponding output ``head_flags``
1137
+ //! in those threads will be ``{ [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }``.
1138
+ //! and the corresponding output ``tail_flags`` in those threads will be
1139
+ //! ``{ [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }``.
1140
+ //! @endrst
1141
+ //!
1142
+ //! @tparam ITEMS_PER_THREAD
1143
+ //! **[inferred]** The number of consecutive items partitioned onto each thread.
1144
+ //!
1145
+ //! @tparam FlagT
1146
+ //! **[inferred]** The flag type (must be an integer type)
1147
+ //!
1148
+ //! @tparam FlagOp
1149
+ //! **[inferred]** Binary predicate functor type having member
1150
+ //! `T operator()(const T &a, const T &b)` or member
1151
+ //! `T operator()(const T &a, const T &b, unsigned int b_index)`, and returning `true`
1152
+ //! if a discontinuity exists between `a` and `b`, otherwise `false`. `b_index` is the rank
1153
+ //! of `b` in the aggregate tile of data.
1154
+ //!
1155
+ //! @param[out] head_flags
1156
+ //! Calling thread's discontinuity head_flags
1157
+ //!
1158
+ //! @param[in] tile_predecessor_item
1159
+ //! @rst
1160
+ //! *thread*\ :sub:`0` only item with which to compare the first tile item (``input[0]`` from *thread*\ :sub:`0`).
1161
+ //! @endrst
1162
+ //!
1163
+ //! @param[out] tail_flags
1164
+ //! Calling thread's discontinuity tail_flags
1165
+ //!
1166
+ //! @param[in] tile_successor_item
1167
+ //! @rst
1168
+ //! *thread*\ :sub:`BLOCK_THREADS - 1` only item with which to compare the last tile item
1169
+ //! (``input[ITEMS_PER_THREAD - 1]`` from *thread*\ :sub:`BLOCK_THREADS - 1`).
1170
+ //! @endrst
1171
+ //!
1172
+ //! @param[in] input
1173
+ //! Calling thread's input items
1174
+ //!
1175
+ //! @param[in] flag_op
1176
+ //! Binary boolean flag predicate
1177
+ template <int ITEMS_PER_THREAD, typename FlagT, typename FlagOp>
1178
+ _CCCL_DEVICE _CCCL_FORCEINLINE void FlagHeadsAndTails(
1179
+ FlagT (&head_flags)[ITEMS_PER_THREAD],
1180
+ T tile_predecessor_item,
1181
+ FlagT (&tail_flags)[ITEMS_PER_THREAD],
1182
+ T tile_successor_item,
1183
+ T (&input)[ITEMS_PER_THREAD],
1184
+ FlagOp flag_op)
1185
+ {
1186
+ // Share first and last items
1187
+ temp_storage.first_items[linear_tid] = input[0];
1188
+ temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
1189
+
1190
+ __syncthreads();
1191
+
1192
+ T preds[ITEMS_PER_THREAD];
1193
+
1194
+ // Set flag for first thread-item
1195
+ preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread
1196
+ temp_storage.last_items[linear_tid - 1];
1197
+
1198
+ head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
1199
+
1200
+ // Set flag for last thread-item
1201
+ T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread
1202
+ temp_storage.first_items[linear_tid + 1];
1203
+
1204
+ tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::FlagT(
1205
+ flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
1206
+
1207
+ // Set head_flags for remaining items
1208
+ Iterate::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
1209
+
1210
+ // Set tail_flags for remaining items
1211
+ Iterate::FlagTails(linear_tid, tail_flags, input, flag_op);
1212
+ }
1213
+
1214
+ //! @} end member group
1215
+ };
1216
+
1217
+ CUB_NAMESPACE_END