cuda-cccl 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (1968) hide show
  1. cuda/cccl/__init__.py +27 -0
  2. cuda/cccl/_cuda_version_utils.py +24 -0
  3. cuda/cccl/cooperative/__init__.py +9 -0
  4. cuda/cccl/cooperative/experimental/__init__.py +24 -0
  5. cuda/cccl/headers/__init__.py +7 -0
  6. cuda/cccl/headers/include/__init__.py +1 -0
  7. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +259 -0
  8. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1182 -0
  9. cuda/cccl/headers/include/cub/agent/agent_for.cuh +81 -0
  10. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +709 -0
  11. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +234 -0
  12. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +748 -0
  13. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +786 -0
  14. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
  15. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +703 -0
  16. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +555 -0
  17. cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +619 -0
  18. cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +806 -0
  19. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1124 -0
  20. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +589 -0
  21. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +474 -0
  22. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +289 -0
  23. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1117 -0
  24. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +346 -0
  25. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +606 -0
  26. cuda/cccl/headers/include/cub/agent/agent_topk.cuh +764 -0
  27. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +631 -0
  28. cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1424 -0
  29. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +963 -0
  30. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1227 -0
  31. cuda/cccl/headers/include/cub/block/block_exchange.cuh +1313 -0
  32. cuda/cccl/headers/include/cub/block/block_histogram.cuh +424 -0
  33. cuda/cccl/headers/include/cub/block/block_load.cuh +1264 -0
  34. cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
  35. cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +800 -0
  36. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1225 -0
  37. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2196 -0
  38. cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
  39. cuda/cccl/headers/include/cub/block/block_reduce.cuh +667 -0
  40. cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +434 -0
  41. cuda/cccl/headers/include/cub/block/block_scan.cuh +2315 -0
  42. cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
  43. cuda/cccl/headers/include/cub/block/block_store.cuh +1247 -0
  44. cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +624 -0
  45. cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
  46. cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
  47. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
  48. cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
  49. cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
  50. cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
  51. cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
  52. cuda/cccl/headers/include/cub/config.cuh +53 -0
  53. cuda/cccl/headers/include/cub/cub.cuh +120 -0
  54. cuda/cccl/headers/include/cub/detail/array_utils.cuh +78 -0
  55. cuda/cccl/headers/include/cub/detail/choose_offset.cuh +161 -0
  56. cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +74 -0
  57. cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
  58. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +62 -0
  59. cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +253 -0
  60. cuda/cccl/headers/include/cub/detail/integer_utils.cuh +88 -0
  61. cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
  62. cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
  63. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +114 -0
  64. cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
  65. cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
  66. cuda/cccl/headers/include/cub/detail/ptx-json/json.h +62 -0
  67. cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
  68. cuda/cccl/headers/include/cub/detail/ptx-json/string.h +53 -0
  69. cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
  70. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
  71. cuda/cccl/headers/include/cub/detail/rfa.cuh +731 -0
  72. cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
  73. cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
  74. cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
  75. cuda/cccl/headers/include/cub/detail/type_traits.cuh +187 -0
  76. cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +73 -0
  77. cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
  78. cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
  79. cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
  80. cuda/cccl/headers/include/cub/device/device_for.cuh +1063 -0
  81. cuda/cccl/headers/include/cub/device/device_histogram.cuh +1509 -0
  82. cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
  83. cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
  84. cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
  85. cuda/cccl/headers/include/cub/device/device_partition.cuh +668 -0
  86. cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3437 -0
  87. cuda/cccl/headers/include/cub/device/device_reduce.cuh +2518 -0
  88. cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
  89. cuda/cccl/headers/include/cub/device/device_scan.cuh +2212 -0
  90. cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
  91. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1430 -0
  92. cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
  93. cuda/cccl/headers/include/cub/device/device_select.cuh +1228 -0
  94. cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
  95. cuda/cccl/headers/include/cub/device/device_transform.cuh +668 -0
  96. cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +315 -0
  97. cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +719 -0
  98. cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
  99. cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
  100. cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +198 -0
  101. cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1046 -0
  102. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +303 -0
  103. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
  104. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1744 -0
  105. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1310 -0
  106. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +655 -0
  107. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +531 -0
  108. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +313 -0
  109. cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +615 -0
  110. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +517 -0
  111. cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +602 -0
  112. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +975 -0
  113. cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +842 -0
  114. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
  115. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +440 -0
  116. cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +389 -0
  117. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +627 -0
  118. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +569 -0
  119. cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +545 -0
  120. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +261 -0
  121. cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
  122. cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
  123. cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +803 -0
  124. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +583 -0
  125. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +189 -0
  126. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +321 -0
  127. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +522 -0
  128. cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +201 -0
  129. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +1028 -0
  130. cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
  131. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +67 -0
  132. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +118 -0
  133. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +60 -0
  134. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +275 -0
  135. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +76 -0
  136. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +126 -0
  137. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1065 -0
  138. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
  139. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +942 -0
  140. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +673 -0
  141. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +618 -0
  142. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1010 -0
  143. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +398 -0
  144. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1588 -0
  145. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +440 -0
  146. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +85 -0
  147. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +481 -0
  148. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +884 -0
  149. cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +227 -0
  150. cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
  151. cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
  152. cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +254 -0
  153. cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +259 -0
  154. cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +250 -0
  155. cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +320 -0
  156. cuda/cccl/headers/include/cub/thread/thread_load.cuh +349 -0
  157. cuda/cccl/headers/include/cub/thread/thread_operators.cuh +688 -0
  158. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +548 -0
  159. cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
  160. cuda/cccl/headers/include/cub/thread/thread_search.cuh +199 -0
  161. cuda/cccl/headers/include/cub/thread/thread_simd.cuh +458 -0
  162. cuda/cccl/headers/include/cub/thread/thread_sort.cuh +102 -0
  163. cuda/cccl/headers/include/cub/thread/thread_store.cuh +365 -0
  164. cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
  165. cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
  166. cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
  167. cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
  168. cuda/cccl/headers/include/cub/util_device.cuh +800 -0
  169. cuda/cccl/headers/include/cub/util_macro.cuh +97 -0
  170. cuda/cccl/headers/include/cub/util_math.cuh +118 -0
  171. cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
  172. cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
  173. cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
  174. cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
  175. cuda/cccl/headers/include/cub/util_type.cuh +1120 -0
  176. cuda/cccl/headers/include/cub/util_vsmem.cuh +253 -0
  177. cuda/cccl/headers/include/cub/version.cuh +89 -0
  178. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
  179. cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
  180. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +737 -0
  181. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +408 -0
  182. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +952 -0
  183. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +715 -0
  184. cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
  185. cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
  186. cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
  187. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +829 -0
  188. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1890 -0
  189. cuda/cccl/headers/include/cub/warp/warp_store.cuh +521 -0
  190. cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
  191. cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
  192. cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
  193. cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
  194. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
  195. cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
  196. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
  197. cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
  198. cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
  199. cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
  200. cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
  201. cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
  202. cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
  203. cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
  204. cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
  205. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +487 -0
  206. cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
  207. cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
  208. cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
  209. cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
  210. cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
  211. cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
  212. cuda/cccl/headers/include/cuda/__cccl_config +37 -0
  213. cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
  214. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
  215. cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
  216. cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
  217. cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
  218. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  219. cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
  220. cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
  221. cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
  222. cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
  223. cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
  224. cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
  225. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
  226. cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
  227. cuda/cccl/headers/include/cuda/__complex_ +28 -0
  228. cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
  229. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  230. cuda/cccl/headers/include/cuda/__device/arch_traits.h +537 -0
  231. cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
  232. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  233. cuda/cccl/headers/include/cuda/__device/device_ref.h +156 -0
  234. cuda/cccl/headers/include/cuda/__device/physical_device.h +172 -0
  235. cuda/cccl/headers/include/cuda/__driver/driver_api.h +835 -0
  236. cuda/cccl/headers/include/cuda/__event/event.h +171 -0
  237. cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
  238. cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
  239. cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
  240. cuda/cccl/headers/include/cuda/__execution/output_ordering.h +89 -0
  241. cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
  242. cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
  243. cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
  244. cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
  245. cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
  246. cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
  247. cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
  248. cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
  249. cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
  250. cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
  251. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  252. cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
  253. cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
  254. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
  255. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
  256. cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +483 -0
  257. cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
  258. cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
  259. cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
  260. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
  261. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
  262. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
  263. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
  264. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
  265. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  266. cuda/cccl/headers/include/cuda/__iterator/zip_function.h +112 -0
  267. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +557 -0
  268. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  269. cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
  270. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +533 -0
  271. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +238 -0
  272. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
  273. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +117 -0
  274. cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
  275. cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
  276. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
  277. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
  278. cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
  279. cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
  280. cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
  281. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
  282. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
  283. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
  284. cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
  285. cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
  286. cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
  287. cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
  288. cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
  289. cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
  290. cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
  291. cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
  292. cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
  293. cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
  294. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  295. cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
  296. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
  297. cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
  298. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +113 -0
  299. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
  300. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +652 -0
  301. cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
  302. cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
  303. cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
  304. cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
  305. cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
  306. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2983 -0
  307. cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
  308. cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
  309. cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
  310. cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
  311. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
  312. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
  313. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
  314. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
  315. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
  316. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
  317. cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
  318. cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
  319. cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
  320. cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
  321. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
  322. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
  323. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
  324. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
  325. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
  326. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
  327. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
  328. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
  329. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
  330. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
  331. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
  332. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
  333. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
  334. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
  335. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
  336. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
  337. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
  338. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
  339. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
  340. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
  341. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
  342. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
  343. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
  344. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
  345. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
  346. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
  347. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
  348. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
  349. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
  350. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
  351. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
  352. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
  353. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
  354. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
  355. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
  356. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
  357. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
  358. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
  359. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
  360. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
  361. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
  362. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
  363. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
  364. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
  365. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
  366. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
  367. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
  368. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
  369. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
  370. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
  371. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
  372. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
  373. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
  374. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
  375. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
  376. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
  377. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
  378. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
  379. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
  380. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
  381. cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
  382. cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
  383. cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
  384. cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
  385. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
  386. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
  387. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
  388. cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
  389. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
  390. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
  391. cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
  392. cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
  393. cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
  394. cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
  395. cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
  396. cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
  397. cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
  398. cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
  399. cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
  400. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
  401. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
  402. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
  403. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
  404. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
  405. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
  406. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
  407. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
  408. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
  409. cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
  410. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
  411. cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
  412. cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
  413. cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
  414. cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
  415. cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
  416. cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
  417. cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
  418. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
  419. cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
  420. cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
  421. cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
  422. cuda/cccl/headers/include/cuda/__stream/stream.h +141 -0
  423. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +303 -0
  424. cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
  425. cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
  426. cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
  427. cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
  428. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
  429. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
  430. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
  431. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
  432. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
  433. cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +590 -0
  434. cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
  435. cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
  436. cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
  437. cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
  438. cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
  439. cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
  440. cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
  441. cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
  442. cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
  443. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
  444. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
  445. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
  446. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
  447. cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
  448. cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
  449. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  450. cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
  451. cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
  452. cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
  453. cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
  454. cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
  455. cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
  456. cuda/cccl/headers/include/cuda/access_property +26 -0
  457. cuda/cccl/headers/include/cuda/algorithm +27 -0
  458. cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
  459. cuda/cccl/headers/include/cuda/atomic +27 -0
  460. cuda/cccl/headers/include/cuda/barrier +267 -0
  461. cuda/cccl/headers/include/cuda/bit +29 -0
  462. cuda/cccl/headers/include/cuda/cmath +37 -0
  463. cuda/cccl/headers/include/cuda/devices +33 -0
  464. cuda/cccl/headers/include/cuda/discard_memory +32 -0
  465. cuda/cccl/headers/include/cuda/functional +32 -0
  466. cuda/cccl/headers/include/cuda/iterator +39 -0
  467. cuda/cccl/headers/include/cuda/latch +27 -0
  468. cuda/cccl/headers/include/cuda/mdspan +28 -0
  469. cuda/cccl/headers/include/cuda/memory +35 -0
  470. cuda/cccl/headers/include/cuda/memory_resource +35 -0
  471. cuda/cccl/headers/include/cuda/numeric +29 -0
  472. cuda/cccl/headers/include/cuda/pipeline +579 -0
  473. cuda/cccl/headers/include/cuda/ptx +129 -0
  474. cuda/cccl/headers/include/cuda/semaphore +31 -0
  475. cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
  476. cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
  477. cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
  478. cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
  479. cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
  480. cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
  481. cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
  482. cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
  483. cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
  484. cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
  485. cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
  486. cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
  487. cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
  488. cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
  489. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
  490. cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
  491. cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
  492. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
  493. cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
  494. cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
  495. cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
  496. cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
  497. cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
  498. cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
  499. cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
  500. cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
  501. cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
  502. cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
  503. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
  504. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
  505. cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
  506. cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
  507. cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
  508. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
  509. cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
  510. cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
  511. cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
  512. cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
  513. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
  514. cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
  515. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
  516. cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
  517. cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
  518. cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
  519. cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
  520. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
  521. cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
  522. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
  523. cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
  524. cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
  525. cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
  526. cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
  527. cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
  528. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
  529. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
  530. cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
  531. cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
  532. cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
  533. cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
  534. cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
  535. cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
  536. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
  537. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
  538. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
  539. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
  540. cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
  541. cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
  542. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
  543. cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
  544. cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
  545. cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
  546. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
  547. cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
  548. cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
  549. cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
  550. cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
  551. cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
  552. cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
  553. cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
  554. cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
  555. cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
  556. cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
  557. cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
  558. cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
  559. cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
  560. cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
  561. cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
  562. cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
  563. cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
  564. cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
  565. cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
  566. cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
  567. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
  568. cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
  569. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
  570. cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
  571. cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
  572. cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
  573. cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
  574. cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
  575. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
  576. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
  577. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
  578. cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
  579. cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
  580. cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
  581. cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
  582. cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
  583. cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
  584. cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
  585. cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
  586. cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
  587. cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
  588. cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
  589. cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
  590. cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
  591. cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
  592. cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
  593. cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
  594. cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
  595. cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
  596. cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
  597. cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
  598. cuda/cccl/headers/include/cuda/std/__bit/countl.h +174 -0
  599. cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
  600. cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
  601. cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
  602. cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
  603. cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
  604. cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
  605. cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
  606. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  607. cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
  608. cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
  609. cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
  610. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +673 -0
  611. cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
  612. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
  613. cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
  614. cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
  615. cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
  616. cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
  617. cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
  618. cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +91 -0
  619. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
  620. cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
  621. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  622. cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
  623. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  624. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  625. cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
  626. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1286 -0
  627. cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
  628. cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
  629. cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
  630. cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
  631. cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
  632. cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
  633. cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
  634. cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
  635. cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
  636. cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
  637. cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
  638. cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
  639. cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
  640. cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
  641. cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +54 -0
  642. cuda/cccl/headers/include/cuda/std/__chrono/day.h +162 -0
  643. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +503 -0
  644. cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +55 -0
  645. cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +46 -0
  646. cuda/cccl/headers/include/cuda/std/__chrono/month.h +187 -0
  647. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +60 -0
  648. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +80 -0
  649. cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +259 -0
  650. cuda/cccl/headers/include/cuda/std/__chrono/year.h +186 -0
  651. cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
  652. cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
  653. cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +200 -0
  654. cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +784 -0
  655. cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +118 -0
  656. cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
  657. cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +231 -0
  658. cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
  659. cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
  660. cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
  661. cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
  662. cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
  663. cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
  664. cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
  665. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +186 -0
  666. cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
  667. cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
  668. cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +534 -0
  669. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
  670. cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
  671. cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
  672. cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
  673. cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
  674. cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
  675. cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
  676. cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
  677. cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
  678. cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
  679. cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
  680. cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
  681. cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
  682. cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
  683. cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
  684. cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
  685. cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
  686. cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
  687. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
  688. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
  689. cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
  690. cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
  691. cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
  692. cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
  693. cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
  694. cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
  695. cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
  696. cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
  697. cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
  698. cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
  699. cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +341 -0
  700. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
  701. cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
  702. cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
  703. cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
  704. cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
  705. cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
  706. cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
  707. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
  708. cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
  709. cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
  710. cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
  711. cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
  712. cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
  713. cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
  714. cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
  715. cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
  716. cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
  717. cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
  718. cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
  719. cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
  720. cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
  721. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
  722. cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
  723. cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
  724. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
  725. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +139 -0
  726. cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
  727. cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
  728. cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
  729. cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
  730. cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
  731. cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
  732. cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
  733. cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +165 -0
  734. cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
  735. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
  736. cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
  737. cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
  738. cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
  739. cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
  740. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
  741. cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
  742. cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
  743. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
  744. cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
  745. cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
  746. cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
  747. cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
  748. cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
  749. cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
  750. cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
  751. cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
  752. cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
  753. cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
  754. cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
  755. cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
  756. cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
  757. cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
  758. cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
  759. cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
  760. cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
  761. cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
  762. cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
  763. cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
  764. cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
  765. cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
  766. cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
  767. cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
  768. cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
  769. cuda/cccl/headers/include/cuda/std/__format_ +45 -0
  770. cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
  771. cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
  772. cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
  773. cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
  774. cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
  775. cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
  776. cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
  777. cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
  778. cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
  779. cuda/cccl/headers/include/cuda/std/__functional/function.h +1275 -0
  780. cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
  781. cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
  782. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +296 -0
  783. cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
  784. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
  785. cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
  786. cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
  787. cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
  788. cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
  789. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
  790. cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
  791. cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
  792. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
  793. cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
  794. cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
  795. cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
  796. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
  797. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
  798. cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
  799. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
  800. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
  801. cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
  802. cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
  803. cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
  804. cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
  805. cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
  806. cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
  807. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
  808. cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
  809. cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
  810. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
  811. cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
  812. cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
  813. cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
  814. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
  815. cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
  816. cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
  817. cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
  818. cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
  819. cuda/cccl/headers/include/cuda/std/__internal/features.h +72 -0
  820. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +143 -0
  821. cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
  822. cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
  823. cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
  824. cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
  825. cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
  826. cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
  827. cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
  828. cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
  829. cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
  830. cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
  831. cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
  832. cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
  833. cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
  834. cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
  835. cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
  836. cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
  837. cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
  838. cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
  839. cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
  840. cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
  841. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
  842. cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
  843. cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
  844. cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
  845. cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
  846. cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
  847. cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
  848. cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
  849. cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
  850. cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
  851. cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
  852. cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
  853. cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
  854. cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
  855. cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
  856. cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
  857. cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
  858. cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
  859. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
  860. cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
  861. cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
  862. cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
  863. cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
  864. cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
  865. cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
  866. cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
  867. cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
  868. cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
  869. cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
  870. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +759 -0
  871. cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
  872. cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
  873. cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
  874. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
  875. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
  876. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
  877. cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
  878. cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
  879. cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
  880. cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
  881. cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
  882. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
  883. cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
  884. cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
  885. cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +525 -0
  886. cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
  887. cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
  888. cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
  889. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +246 -0
  890. cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
  891. cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +46 -0
  892. cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
  893. cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
  894. cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
  895. cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
  896. cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
  897. cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
  898. cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
  899. cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
  900. cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
  901. cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
  902. cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
  903. cuda/cccl/headers/include/cuda/std/__new_ +29 -0
  904. cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
  905. cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
  906. cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
  907. cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
  908. cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
  909. cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
  910. cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
  911. cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
  912. cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
  913. cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
  914. cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
  915. cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
  916. cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
  917. cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
  918. cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
  919. cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
  920. cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
  921. cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
  922. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +433 -0
  923. cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
  924. cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
  925. cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
  926. cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
  927. cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
  928. cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
  929. cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
  930. cuda/cccl/headers/include/cuda/std/__random_ +29 -0
  931. cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
  932. cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
  933. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  934. cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
  935. cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
  936. cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
  937. cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
  938. cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
  939. cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
  940. cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
  941. cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
  942. cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
  943. cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
  944. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
  945. cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +162 -0
  946. cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
  947. cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
  948. cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
  949. cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
  950. cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
  951. cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
  952. cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
  953. cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
  954. cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
  955. cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
  956. cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
  957. cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
  958. cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
  959. cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
  960. cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
  961. cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
  962. cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
  963. cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
  964. cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
  965. cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
  966. cuda/cccl/headers/include/cuda/std/__string_ +29 -0
  967. cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
  968. cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
  969. cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
  970. cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
  971. cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
  972. cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
  973. cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
  974. cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
  975. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
  976. cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +260 -0
  977. cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
  978. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
  979. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
  980. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +84 -0
  981. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
  982. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
  983. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
  984. cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
  985. cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
  986. cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
  987. cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
  988. cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
  989. cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
  990. cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
  991. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
  992. cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
  993. cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
  994. cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
  995. cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
  996. cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
  997. cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
  998. cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
  999. cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
  1000. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
  1001. cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
  1002. cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
  1003. cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
  1004. cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
  1005. cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
  1006. cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
  1007. cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
  1008. cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
  1009. cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
  1010. cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
  1011. cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
  1012. cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
  1013. cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
  1014. cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
  1015. cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
  1016. cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
  1017. cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
  1018. cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
  1019. cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
  1020. cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
  1021. cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
  1022. cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
  1023. cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
  1024. cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
  1025. cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
  1026. cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
  1027. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
  1028. cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
  1029. cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
  1030. cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
  1031. cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
  1032. cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
  1033. cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
  1034. cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
  1035. cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
  1036. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
  1037. cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
  1038. cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
  1039. cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
  1040. cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
  1041. cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
  1042. cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
  1043. cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
  1044. cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
  1045. cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
  1046. cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
  1047. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
  1048. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
  1049. cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
  1050. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
  1051. cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
  1052. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
  1053. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
  1054. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
  1055. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
  1056. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
  1057. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
  1058. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
  1059. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
  1060. cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
  1061. cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
  1062. cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
  1063. cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
  1064. cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
  1065. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
  1066. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
  1067. cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
  1068. cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
  1069. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
  1070. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
  1071. cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
  1072. cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
  1073. cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
  1074. cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
  1075. cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
  1076. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
  1077. cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
  1078. cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
  1079. cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +202 -0
  1080. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
  1081. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
  1082. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
  1083. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
  1084. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
  1085. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
  1086. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
  1087. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
  1088. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
  1089. cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
  1090. cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
  1091. cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
  1092. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
  1093. cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
  1094. cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
  1095. cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
  1096. cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
  1097. cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
  1098. cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
  1099. cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
  1100. cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
  1101. cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
  1102. cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
  1103. cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
  1104. cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
  1105. cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
  1106. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
  1107. cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
  1108. cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
  1109. cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
  1110. cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
  1111. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
  1112. cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
  1113. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
  1114. cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
  1115. cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
  1116. cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
  1117. cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
  1118. cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
  1119. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
  1120. cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
  1121. cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
  1122. cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
  1123. cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
  1124. cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
  1125. cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
  1126. cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
  1127. cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
  1128. cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
  1129. cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
  1130. cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
  1131. cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
  1132. cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
  1133. cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
  1134. cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
  1135. cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
  1136. cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
  1137. cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
  1138. cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
  1139. cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
  1140. cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
  1141. cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
  1142. cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
  1143. cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
  1144. cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
  1145. cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
  1146. cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
  1147. cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
  1148. cuda/cccl/headers/include/cuda/std/array +518 -0
  1149. cuda/cccl/headers/include/cuda/std/atomic +810 -0
  1150. cuda/cccl/headers/include/cuda/std/barrier +42 -0
  1151. cuda/cccl/headers/include/cuda/std/bit +35 -0
  1152. cuda/cccl/headers/include/cuda/std/bitset +994 -0
  1153. cuda/cccl/headers/include/cuda/std/cassert +28 -0
  1154. cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
  1155. cuda/cccl/headers/include/cuda/std/cfloat +59 -0
  1156. cuda/cccl/headers/include/cuda/std/chrono +26 -0
  1157. cuda/cccl/headers/include/cuda/std/climits +61 -0
  1158. cuda/cccl/headers/include/cuda/std/cmath +87 -0
  1159. cuda/cccl/headers/include/cuda/std/complex +50 -0
  1160. cuda/cccl/headers/include/cuda/std/concepts +48 -0
  1161. cuda/cccl/headers/include/cuda/std/cstddef +28 -0
  1162. cuda/cccl/headers/include/cuda/std/cstdint +178 -0
  1163. cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
  1164. cuda/cccl/headers/include/cuda/std/cstring +110 -0
  1165. cuda/cccl/headers/include/cuda/std/ctime +154 -0
  1166. cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
  1167. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +207 -0
  1168. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1721 -0
  1169. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2509 -0
  1170. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
  1171. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
  1172. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
  1173. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2144 -0
  1174. cuda/cccl/headers/include/cuda/std/execution +29 -0
  1175. cuda/cccl/headers/include/cuda/std/expected +30 -0
  1176. cuda/cccl/headers/include/cuda/std/functional +56 -0
  1177. cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
  1178. cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
  1179. cuda/cccl/headers/include/cuda/std/iterator +70 -0
  1180. cuda/cccl/headers/include/cuda/std/latch +34 -0
  1181. cuda/cccl/headers/include/cuda/std/limits +28 -0
  1182. cuda/cccl/headers/include/cuda/std/linalg +30 -0
  1183. cuda/cccl/headers/include/cuda/std/mdspan +38 -0
  1184. cuda/cccl/headers/include/cuda/std/memory +39 -0
  1185. cuda/cccl/headers/include/cuda/std/numbers +346 -0
  1186. cuda/cccl/headers/include/cuda/std/numeric +41 -0
  1187. cuda/cccl/headers/include/cuda/std/optional +31 -0
  1188. cuda/cccl/headers/include/cuda/std/ranges +69 -0
  1189. cuda/cccl/headers/include/cuda/std/ratio +416 -0
  1190. cuda/cccl/headers/include/cuda/std/semaphore +31 -0
  1191. cuda/cccl/headers/include/cuda/std/source_location +83 -0
  1192. cuda/cccl/headers/include/cuda/std/span +628 -0
  1193. cuda/cccl/headers/include/cuda/std/string_view +925 -0
  1194. cuda/cccl/headers/include/cuda/std/tuple +26 -0
  1195. cuda/cccl/headers/include/cuda/std/type_traits +177 -0
  1196. cuda/cccl/headers/include/cuda/std/utility +70 -0
  1197. cuda/cccl/headers/include/cuda/std/variant +25 -0
  1198. cuda/cccl/headers/include/cuda/std/version +240 -0
  1199. cuda/cccl/headers/include/cuda/stream +31 -0
  1200. cuda/cccl/headers/include/cuda/stream_ref +59 -0
  1201. cuda/cccl/headers/include/cuda/type_traits +27 -0
  1202. cuda/cccl/headers/include/cuda/utility +28 -0
  1203. cuda/cccl/headers/include/cuda/version +16 -0
  1204. cuda/cccl/headers/include/cuda/warp +28 -0
  1205. cuda/cccl/headers/include/cuda/work_stealing +26 -0
  1206. cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
  1207. cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
  1208. cuda/cccl/headers/include/nv/target +240 -0
  1209. cuda/cccl/headers/include/thrust/addressof.h +22 -0
  1210. cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
  1211. cuda/cccl/headers/include/thrust/advance.h +57 -0
  1212. cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
  1213. cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
  1214. cuda/cccl/headers/include/thrust/complex.h +858 -0
  1215. cuda/cccl/headers/include/thrust/copy.h +506 -0
  1216. cuda/cccl/headers/include/thrust/count.h +245 -0
  1217. cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
  1218. cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
  1219. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +626 -0
  1220. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +192 -0
  1221. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +96 -0
  1222. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +81 -0
  1223. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +78 -0
  1224. cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
  1225. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +115 -0
  1226. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +116 -0
  1227. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
  1228. cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
  1229. cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
  1230. cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
  1231. cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
  1232. cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
  1233. cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
  1234. cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
  1235. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
  1236. cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
  1237. cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
  1238. cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
  1239. cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
  1240. cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
  1241. cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
  1242. cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
  1243. cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
  1244. cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
  1245. cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
  1246. cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
  1247. cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
  1248. cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
  1249. cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
  1250. cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
  1251. cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
  1252. cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
  1253. cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
  1254. cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
  1255. cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
  1256. cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
  1257. cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
  1258. cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
  1259. cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
  1260. cuda/cccl/headers/include/thrust/detail/config.h +36 -0
  1261. cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
  1262. cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
  1263. cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
  1264. cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
  1265. cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
  1266. cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
  1267. cuda/cccl/headers/include/thrust/detail/count.h +55 -0
  1268. cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
  1269. cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
  1270. cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
  1271. cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
  1272. cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +81 -0
  1273. cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
  1274. cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
  1275. cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
  1276. cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
  1277. cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
  1278. cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
  1279. cuda/cccl/headers/include/thrust/detail/function.h +49 -0
  1280. cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
  1281. cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
  1282. cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
  1283. cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
  1284. cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
  1285. cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
  1286. cuda/cccl/headers/include/thrust/detail/internal_functional.h +328 -0
  1287. cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
  1288. cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
  1289. cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
  1290. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
  1291. cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
  1292. cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
  1293. cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
  1294. cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
  1295. cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
  1296. cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
  1297. cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
  1298. cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
  1299. cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
  1300. cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
  1301. cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
  1302. cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
  1303. cuda/cccl/headers/include/thrust/detail/reference.h +494 -0
  1304. cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
  1305. cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
  1306. cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
  1307. cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
  1308. cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
  1309. cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
  1310. cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
  1311. cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
  1312. cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
  1313. cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
  1314. cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
  1315. cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
  1316. cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
  1317. cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
  1318. cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
  1319. cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
  1320. cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
  1321. cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
  1322. cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
  1323. cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
  1324. cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
  1325. cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
  1326. cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
  1327. cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
  1328. cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
  1329. cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
  1330. cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
  1331. cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
  1332. cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
  1333. cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
  1334. cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
  1335. cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
  1336. cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
  1337. cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
  1338. cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
  1339. cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
  1340. cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
  1341. cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
  1342. cuda/cccl/headers/include/thrust/detail/vector_base.inl +1210 -0
  1343. cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
  1344. cuda/cccl/headers/include/thrust/device_delete.h +74 -0
  1345. cuda/cccl/headers/include/thrust/device_free.h +85 -0
  1346. cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
  1347. cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
  1348. cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
  1349. cuda/cccl/headers/include/thrust/device_new.h +112 -0
  1350. cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
  1351. cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
  1352. cuda/cccl/headers/include/thrust/device_reference.h +983 -0
  1353. cuda/cccl/headers/include/thrust/device_vector.h +576 -0
  1354. cuda/cccl/headers/include/thrust/distance.h +43 -0
  1355. cuda/cccl/headers/include/thrust/equal.h +247 -0
  1356. cuda/cccl/headers/include/thrust/execution_policy.h +251 -0
  1357. cuda/cccl/headers/include/thrust/extrema.h +657 -0
  1358. cuda/cccl/headers/include/thrust/fill.h +200 -0
  1359. cuda/cccl/headers/include/thrust/find.h +382 -0
  1360. cuda/cccl/headers/include/thrust/for_each.h +261 -0
  1361. cuda/cccl/headers/include/thrust/functional.h +395 -0
  1362. cuda/cccl/headers/include/thrust/gather.h +464 -0
  1363. cuda/cccl/headers/include/thrust/generate.h +193 -0
  1364. cuda/cccl/headers/include/thrust/host_vector.h +576 -0
  1365. cuda/cccl/headers/include/thrust/inner_product.h +264 -0
  1366. cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
  1367. cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
  1368. cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
  1369. cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
  1370. cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
  1371. cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
  1372. cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
  1373. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
  1374. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
  1375. cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
  1376. cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +182 -0
  1377. cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +58 -0
  1378. cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
  1379. cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
  1380. cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
  1381. cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
  1382. cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
  1383. cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
  1384. cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
  1385. cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
  1386. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
  1387. cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
  1388. cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
  1389. cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
  1390. cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
  1391. cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
  1392. cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
  1393. cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
  1394. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
  1395. cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
  1396. cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
  1397. cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
  1398. cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
  1399. cuda/cccl/headers/include/thrust/logical.h +290 -0
  1400. cuda/cccl/headers/include/thrust/memory.h +299 -0
  1401. cuda/cccl/headers/include/thrust/merge.h +725 -0
  1402. cuda/cccl/headers/include/thrust/mismatch.h +261 -0
  1403. cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
  1404. cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
  1405. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +528 -0
  1406. cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
  1407. cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
  1408. cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
  1409. cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
  1410. cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
  1411. cuda/cccl/headers/include/thrust/mr/new.h +100 -0
  1412. cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
  1413. cuda/cccl/headers/include/thrust/mr/pool.h +528 -0
  1414. cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
  1415. cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
  1416. cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
  1417. cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
  1418. cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
  1419. cuda/cccl/headers/include/thrust/pair.h +99 -0
  1420. cuda/cccl/headers/include/thrust/partition.h +1391 -0
  1421. cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
  1422. cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
  1423. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
  1424. cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
  1425. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
  1426. cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
  1427. cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
  1428. cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
  1429. cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
  1430. cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
  1431. cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
  1432. cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
  1433. cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
  1434. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
  1435. cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
  1436. cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
  1437. cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
  1438. cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
  1439. cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
  1440. cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
  1441. cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
  1442. cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
  1443. cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
  1444. cuda/cccl/headers/include/thrust/random.h +120 -0
  1445. cuda/cccl/headers/include/thrust/reduce.h +1113 -0
  1446. cuda/cccl/headers/include/thrust/remove.h +768 -0
  1447. cuda/cccl/headers/include/thrust/replace.h +826 -0
  1448. cuda/cccl/headers/include/thrust/reverse.h +215 -0
  1449. cuda/cccl/headers/include/thrust/scan.h +1671 -0
  1450. cuda/cccl/headers/include/thrust/scatter.h +446 -0
  1451. cuda/cccl/headers/include/thrust/sequence.h +277 -0
  1452. cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
  1453. cuda/cccl/headers/include/thrust/shuffle.h +182 -0
  1454. cuda/cccl/headers/include/thrust/sort.h +1320 -0
  1455. cuda/cccl/headers/include/thrust/swap.h +147 -0
  1456. cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
  1457. cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
  1458. cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
  1459. cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
  1460. cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
  1461. cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
  1462. cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
  1463. cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +109 -0
  1464. cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
  1465. cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
  1466. cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
  1467. cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
  1468. cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
  1469. cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
  1470. cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
  1471. cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
  1472. cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
  1473. cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
  1474. cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
  1475. cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
  1476. cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
  1477. cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
  1478. cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
  1479. cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
  1480. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
  1481. cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
  1482. cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
  1483. cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
  1484. cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
  1485. cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
  1486. cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
  1487. cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
  1488. cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
  1489. cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
  1490. cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
  1491. cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
  1492. cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
  1493. cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
  1494. cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
  1495. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
  1496. cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
  1497. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
  1498. cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
  1499. cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
  1500. cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
  1501. cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
  1502. cuda/cccl/headers/include/thrust/system/cpp/memory.h +106 -0
  1503. cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +72 -0
  1504. cuda/cccl/headers/include/thrust/system/cpp/pointer.h +120 -0
  1505. cuda/cccl/headers/include/thrust/system/cpp/vector.h +96 -0
  1506. cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
  1507. cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
  1508. cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
  1509. cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
  1510. cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
  1511. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +273 -0
  1512. cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
  1513. cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
  1514. cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
  1515. cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +593 -0
  1516. cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
  1517. cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
  1518. cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +233 -0
  1519. cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
  1520. cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
  1521. cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +264 -0
  1522. cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
  1523. cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +100 -0
  1524. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +170 -0
  1525. cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
  1526. cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
  1527. cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +60 -0
  1528. cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
  1529. cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
  1530. cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +80 -0
  1531. cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
  1532. cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
  1533. cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
  1534. cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
  1535. cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
  1536. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +223 -0
  1537. cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
  1538. cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
  1539. cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
  1540. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +785 -0
  1541. cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
  1542. cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
  1543. cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +122 -0
  1544. cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
  1545. cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +341 -0
  1546. cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +414 -0
  1547. cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +91 -0
  1548. cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
  1549. cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1734 -0
  1550. cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +469 -0
  1551. cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
  1552. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +61 -0
  1553. cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
  1554. cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
  1555. cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +429 -0
  1556. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
  1557. cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
  1558. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +117 -0
  1559. cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +105 -0
  1560. cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
  1561. cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +310 -0
  1562. cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
  1563. cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
  1564. cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
  1565. cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
  1566. cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
  1567. cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
  1568. cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
  1569. cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
  1570. cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
  1571. cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
  1572. cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
  1573. cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
  1574. cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
  1575. cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
  1576. cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
  1577. cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
  1578. cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
  1579. cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
  1580. cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
  1581. cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
  1582. cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
  1583. cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
  1584. cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
  1585. cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
  1586. cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
  1587. cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
  1588. cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
  1589. cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
  1590. cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
  1591. cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
  1592. cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
  1593. cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
  1594. cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
  1595. cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
  1596. cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
  1597. cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
  1598. cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
  1599. cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
  1600. cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
  1601. cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
  1602. cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
  1603. cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
  1604. cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
  1605. cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
  1606. cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
  1607. cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
  1608. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
  1609. cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
  1610. cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
  1611. cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
  1612. cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +61 -0
  1613. cuda/cccl/headers/include/thrust/system/detail/errno.h +120 -0
  1614. cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
  1615. cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
  1616. cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
  1617. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +53 -0
  1618. cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +79 -0
  1619. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +161 -0
  1620. cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +384 -0
  1621. cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +45 -0
  1622. cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +64 -0
  1623. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +58 -0
  1624. cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +146 -0
  1625. cuda/cccl/headers/include/thrust/system/detail/generic/count.h +48 -0
  1626. cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +84 -0
  1627. cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +49 -0
  1628. cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +60 -0
  1629. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +66 -0
  1630. cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +252 -0
  1631. cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +54 -0
  1632. cuda/cccl/headers/include/thrust/system/detail/generic/find.h +49 -0
  1633. cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +137 -0
  1634. cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +58 -0
  1635. cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +73 -0
  1636. cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +96 -0
  1637. cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +45 -0
  1638. cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
  1639. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +60 -0
  1640. cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +72 -0
  1641. cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +59 -0
  1642. cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +64 -0
  1643. cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +86 -0
  1644. cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +99 -0
  1645. cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +148 -0
  1646. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +49 -0
  1647. cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +68 -0
  1648. cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +129 -0
  1649. cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +207 -0
  1650. cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +43 -0
  1651. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +71 -0
  1652. cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +100 -0
  1653. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +83 -0
  1654. cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +186 -0
  1655. cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +86 -0
  1656. cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +121 -0
  1657. cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +95 -0
  1658. cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +175 -0
  1659. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +48 -0
  1660. cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +67 -0
  1661. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +63 -0
  1662. cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +126 -0
  1663. cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +72 -0
  1664. cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +85 -0
  1665. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +126 -0
  1666. cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +232 -0
  1667. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +73 -0
  1668. cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +85 -0
  1669. cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +104 -0
  1670. cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
  1671. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +282 -0
  1672. cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +476 -0
  1673. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +54 -0
  1674. cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +125 -0
  1675. cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +113 -0
  1676. cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +175 -0
  1677. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +44 -0
  1678. cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +76 -0
  1679. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +41 -0
  1680. cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
  1681. cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +47 -0
  1682. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +54 -0
  1683. cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +82 -0
  1684. cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
  1685. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +50 -0
  1686. cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +56 -0
  1687. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +80 -0
  1688. cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +113 -0
  1689. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +45 -0
  1690. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +166 -0
  1691. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +45 -0
  1692. cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +115 -0
  1693. cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +71 -0
  1694. cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +113 -0
  1695. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +81 -0
  1696. cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +126 -0
  1697. cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +117 -0
  1698. cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +70 -0
  1699. cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +42 -0
  1700. cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
  1701. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +49 -0
  1702. cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +119 -0
  1703. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +49 -0
  1704. cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +71 -0
  1705. cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
  1706. cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
  1707. cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
  1708. cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +110 -0
  1709. cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
  1710. cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +62 -0
  1711. cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +74 -0
  1712. cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
  1713. cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +123 -0
  1714. cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
  1715. cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +43 -0
  1716. cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
  1717. cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +141 -0
  1718. cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +45 -0
  1719. cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
  1720. cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +50 -0
  1721. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +75 -0
  1722. cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +145 -0
  1723. cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
  1724. cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +301 -0
  1725. cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
  1726. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +64 -0
  1727. cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +98 -0
  1728. cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +179 -0
  1729. cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
  1730. cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
  1731. cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +154 -0
  1732. cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +145 -0
  1733. cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
  1734. cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
  1735. cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +206 -0
  1736. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +59 -0
  1737. cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
  1738. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +55 -0
  1739. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +356 -0
  1740. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +48 -0
  1741. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +124 -0
  1742. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +48 -0
  1743. cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +586 -0
  1744. cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
  1745. cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
  1746. cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
  1747. cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
  1748. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
  1749. cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
  1750. cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +58 -0
  1751. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
  1752. cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
  1753. cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +115 -0
  1754. cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +106 -0
  1755. cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
  1756. cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
  1757. cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
  1758. cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
  1759. cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
  1760. cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
  1761. cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
  1762. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
  1763. cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
  1764. cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
  1765. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
  1766. cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
  1767. cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
  1768. cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +127 -0
  1769. cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
  1770. cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
  1771. cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
  1772. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
  1773. cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
  1774. cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
  1775. cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
  1776. cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
  1777. cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
  1778. cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
  1779. cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
  1780. cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
  1781. cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
  1782. cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
  1783. cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
  1784. cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
  1785. cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
  1786. cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
  1787. cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
  1788. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
  1789. cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
  1790. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
  1791. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
  1792. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
  1793. cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
  1794. cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
  1795. cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
  1796. cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
  1797. cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
  1798. cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +73 -0
  1799. cuda/cccl/headers/include/thrust/system/omp/detail/scan.inl +172 -0
  1800. cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +36 -0
  1801. cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
  1802. cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
  1803. cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
  1804. cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
  1805. cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
  1806. cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
  1807. cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
  1808. cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
  1809. cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
  1810. cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
  1811. cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
  1812. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
  1813. cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
  1814. cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
  1815. cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
  1816. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
  1817. cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
  1818. cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
  1819. cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
  1820. cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
  1821. cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
  1822. cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
  1823. cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
  1824. cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
  1825. cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
  1826. cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
  1827. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
  1828. cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
  1829. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
  1830. cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
  1831. cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
  1832. cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
  1833. cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +109 -0
  1834. cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
  1835. cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
  1836. cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
  1837. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
  1838. cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
  1839. cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
  1840. cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
  1841. cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
  1842. cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
  1843. cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
  1844. cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
  1845. cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
  1846. cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
  1847. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
  1848. cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
  1849. cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
  1850. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
  1851. cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
  1852. cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
  1853. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
  1854. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
  1855. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
  1856. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
  1857. cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
  1858. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
  1859. cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
  1860. cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
  1861. cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
  1862. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
  1863. cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
  1864. cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +33 -0
  1865. cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
  1866. cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
  1867. cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
  1868. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
  1869. cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
  1870. cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
  1871. cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
  1872. cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
  1873. cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
  1874. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
  1875. cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
  1876. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
  1877. cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
  1878. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
  1879. cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
  1880. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
  1881. cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
  1882. cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
  1883. cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
  1884. cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
  1885. cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
  1886. cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
  1887. cuda/cccl/headers/include/thrust/system_error.h +57 -0
  1888. cuda/cccl/headers/include/thrust/tabulate.h +125 -0
  1889. cuda/cccl/headers/include/thrust/transform.h +1045 -0
  1890. cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
  1891. cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
  1892. cuda/cccl/headers/include/thrust/tuple.h +139 -0
  1893. cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
  1894. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
  1895. cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
  1896. cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
  1897. cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
  1898. cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
  1899. cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
  1900. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
  1901. cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
  1902. cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
  1903. cuda/cccl/headers/include/thrust/unique.h +1088 -0
  1904. cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
  1905. cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
  1906. cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
  1907. cuda/cccl/headers/include/thrust/version.h +93 -0
  1908. cuda/cccl/headers/include/thrust/zip_function.h +176 -0
  1909. cuda/cccl/headers/include_paths.py +51 -0
  1910. cuda/cccl/parallel/__init__.py +9 -0
  1911. cuda/cccl/parallel/experimental/__init__.py +24 -0
  1912. cuda/cccl/py.typed +0 -0
  1913. cuda/compute/__init__.py +79 -0
  1914. cuda/compute/_bindings.py +79 -0
  1915. cuda/compute/_bindings.pyi +475 -0
  1916. cuda/compute/_bindings_impl.pyx +2273 -0
  1917. cuda/compute/_caching.py +71 -0
  1918. cuda/compute/_cccl_interop.py +422 -0
  1919. cuda/compute/_utils/__init__.py +0 -0
  1920. cuda/compute/_utils/protocols.py +132 -0
  1921. cuda/compute/_utils/temp_storage_buffer.py +86 -0
  1922. cuda/compute/algorithms/__init__.py +54 -0
  1923. cuda/compute/algorithms/_histogram.py +243 -0
  1924. cuda/compute/algorithms/_merge_sort.py +225 -0
  1925. cuda/compute/algorithms/_radix_sort.py +312 -0
  1926. cuda/compute/algorithms/_reduce.py +182 -0
  1927. cuda/compute/algorithms/_scan.py +331 -0
  1928. cuda/compute/algorithms/_segmented_reduce.py +257 -0
  1929. cuda/compute/algorithms/_three_way_partition.py +261 -0
  1930. cuda/compute/algorithms/_transform.py +329 -0
  1931. cuda/compute/algorithms/_unique_by_key.py +252 -0
  1932. cuda/compute/cccl/.gitkeep +0 -0
  1933. cuda/compute/cu12/_bindings_impl.cp313-win_amd64.pyd +0 -0
  1934. cuda/compute/cu12/cccl/cccl.c.parallel.dll +0 -0
  1935. cuda/compute/cu12/cccl/cccl.c.parallel.lib +0 -0
  1936. cuda/compute/cu13/_bindings_impl.cp313-win_amd64.pyd +0 -0
  1937. cuda/compute/cu13/cccl/cccl.c.parallel.dll +0 -0
  1938. cuda/compute/cu13/cccl/cccl.c.parallel.lib +0 -0
  1939. cuda/compute/iterators/__init__.py +21 -0
  1940. cuda/compute/iterators/_factories.py +219 -0
  1941. cuda/compute/iterators/_iterators.py +817 -0
  1942. cuda/compute/iterators/_zip_iterator.py +199 -0
  1943. cuda/compute/numba_utils.py +53 -0
  1944. cuda/compute/op.py +3 -0
  1945. cuda/compute/struct.py +272 -0
  1946. cuda/compute/typing.py +37 -0
  1947. cuda/coop/__init__.py +8 -0
  1948. cuda/coop/_caching.py +48 -0
  1949. cuda/coop/_common.py +275 -0
  1950. cuda/coop/_nvrtc.py +92 -0
  1951. cuda/coop/_scan_op.py +181 -0
  1952. cuda/coop/_types.py +937 -0
  1953. cuda/coop/_typing.py +107 -0
  1954. cuda/coop/block/__init__.py +39 -0
  1955. cuda/coop/block/_block_exchange.py +251 -0
  1956. cuda/coop/block/_block_load_store.py +215 -0
  1957. cuda/coop/block/_block_merge_sort.py +125 -0
  1958. cuda/coop/block/_block_radix_sort.py +214 -0
  1959. cuda/coop/block/_block_reduce.py +294 -0
  1960. cuda/coop/block/_block_scan.py +983 -0
  1961. cuda/coop/warp/__init__.py +9 -0
  1962. cuda/coop/warp/_warp_merge_sort.py +92 -0
  1963. cuda/coop/warp/_warp_reduce.py +153 -0
  1964. cuda/coop/warp/_warp_scan.py +78 -0
  1965. cuda_cccl-0.3.3.dist-info/METADATA +41 -0
  1966. cuda_cccl-0.3.3.dist-info/RECORD +1968 -0
  1967. cuda_cccl-0.3.3.dist-info/WHEEL +5 -0
  1968. cuda_cccl-0.3.3.dist-info/licenses/LICENSE +1 -0
@@ -0,0 +1,2983 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ *
17
+ * Licensed under the Apache License v2.0 with LLVM Exceptions.
18
+ * See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
19
+ */
20
+
21
+ /* Temporary helper #defines, #undef'ed at end of header */
22
+ #define NVTX3_CPP_VERSION_MAJOR 1
23
+ #define NVTX3_CPP_VERSION_MINOR 0
24
+
25
+ /* This section handles the decision of whether to provide unversioned symbols.
26
+ * If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is #defined, unversioned symbols are
27
+ * not provided, and explicit-version symbols such as nvtx3::v1::scoped_range
28
+ * and NVTX3_V1_FUNC_RANGE must be used. By default, the first #include of this
29
+ * header will define the unversioned symbols such as nvtx3::scoped_range and
30
+ * NVTX3_FUNC_RANGE. Subsequently including a different major version of this
31
+ * header without #defining NVTX3_CPP_REQUIRE_EXPLICIT_VERSION triggers an error
32
+ * since the symbols would conflict. Subsequently including of a different
33
+ * minor version within the same major version is allowed. Functionality of
34
+ * minor versions is cumulative, regardless of include order.
35
+ *
36
+ * Since NVTX3_CPP_REQUIRE_EXPLICIT_VERSION allows all combinations of versions
37
+ * to coexist without problems within a translation unit, the recommended best
38
+ * practice for instrumenting header-based libraries with NVTX C++ Wrappers is
39
+ * is to #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before including nvtx3.hpp,
40
+ * #undef it afterward, and only use explicit-version symbols. This is not
41
+ * necessary in common cases, such as instrumenting a standalone application, or
42
+ * static/shared libraries in .cpp files or headers private to those projects.
43
+ */
44
+ /* clang-format off */
45
+ #if !defined(NVTX3_CPP_REQUIRE_EXPLICIT_VERSION)
46
+ /* Define macro used by all definitions in this header to indicate the
47
+ * unversioned symbols should be defined in addition to the versioned ones.
48
+ */
49
+ #define NVTX3_INLINE_THIS_VERSION
50
+
51
+ #if !defined(NVTX3_CPP_INLINED_VERSION_MAJOR)
52
+ /* First occurrence of this header in the translation unit. Define macros
53
+ * indicating which version shall be used for unversioned symbols.
54
+ */
55
+
56
+ /**
57
+ * @brief Semantic major version number for NVTX C++ wrappers of unversioned symbols
58
+ *
59
+ * Breaking changes may occur between major versions, and different major versions
60
+ * cannot provide unversioned symbols in the same translation unit (.cpp file).
61
+ *
62
+ * Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
63
+ *
64
+ * Not to be confused with the version number of the NVTX core library.
65
+ */
66
+ #define NVTX3_CPP_INLINED_VERSION_MAJOR 1 // NVTX3_CPP_VERSION_MAJOR
67
+
68
+ /**
69
+ * @brief Semantic minor version number for NVTX C++ wrappers of unversioned symbols
70
+ *
71
+ * No breaking changes occur between minor versions -- minor version changes within
72
+ * a major version are purely additive.
73
+ *
74
+ * Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
75
+ *
76
+ * Not to be confused with the version number of the NVTX core library.
77
+ */
78
+ #define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
79
+ #elif NVTX3_CPP_INLINED_VERSION_MAJOR != NVTX3_CPP_VERSION_MAJOR
80
+ /* Unsupported case -- cannot define unversioned symbols for different major versions
81
+ * in the same translation unit.
82
+ */
83
+ #error \
84
+ "Two different major versions of the NVTX C++ Wrappers are being included in a single .cpp file, with unversioned symbols enabled in both. Only one major version can enable unversioned symbols in a .cpp file. To disable unversioned symbols, #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before #including nvtx3.hpp, and use the explicit-version symbols instead -- this is the preferred way to use nvtx3.hpp from a header file."
85
+ #elif (NVTX3_CPP_INLINED_VERSION_MAJOR == NVTX3_CPP_VERSION_MAJOR) && \
86
+ (NVTX3_CPP_INLINED_VERSION_MINOR < NVTX3_CPP_VERSION_MINOR)
87
+ /* An older minor version of the same major version already defined unversioned
88
+ * symbols. The new features provided in this header will be inlined
89
+ * redefine the minor version macro to this header's version.
90
+ */
91
+ #undef NVTX3_CPP_INLINED_VERSION_MINOR
92
+ #define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
93
+ // else, already have this version or newer, nothing to do
94
+ #endif
95
+ #endif
96
+ /* clang-format on */
97
+
98
+ /**
99
+ * @file nvtx3.hpp
100
+ *
101
+ * @brief Provides C++ constructs making the NVTX library safer and easier to
102
+ * use with zero overhead.
103
+ */
104
+
105
+ /**
106
+ * \mainpage
107
+ * \tableofcontents
108
+ *
109
+ * \section QUICK_START Quick Start
110
+ *
111
+ * To add NVTX ranges to your code, use the `nvtx3::scoped_range` RAII object. A
112
+ * range begins when the object is created, and ends when the object is
113
+ * destroyed.
114
+ *
115
+ * \code{.cpp}
116
+ * #include "nvtx3.hpp"
117
+ * void some_function() {
118
+ * // Begins a NVTX range with the message "some_function"
119
+ * // The range ends when some_function() returns and `r` is destroyed
120
+ * nvtx3::scoped_range r{"some_function"};
121
+ *
122
+ * for(int i = 0; i < 6; ++i) {
123
+ * nvtx3::scoped_range loop{"loop range"};
124
+ * std::this_thread::sleep_for(std::chrono::seconds{1});
125
+ * }
126
+ * } // Range ends when `r` is destroyed
127
+ * \endcode
128
+ *
129
+ * The example code above generates the following timeline view in Nsight
130
+ * Systems:
131
+ *
132
+ * \image html
133
+ * https://raw.githubusercontent.com/NVIDIA/NVTX/release-v3/docs/images/example_range.png
134
+ *
135
+ * Alternatively, use the \ref MACROS like `NVTX3_FUNC_RANGE()` to add
136
+ * ranges to your code that automatically use the name of the enclosing function
137
+ * as the range's message.
138
+ *
139
+ * \code{.cpp}
140
+ * #include "nvtx3.hpp"
141
+ * void some_function() {
142
+ * // Creates a range with a message "some_function" that ends when the
143
+ * // enclosing function returns
144
+ * NVTX3_FUNC_RANGE();
145
+ * ...
146
+ * }
147
+ * \endcode
148
+ *
149
+ *
150
+ * \section Overview
151
+ *
152
+ * The NVTX library provides a set of functions for users to annotate their code
153
+ * to aid in performance profiling and optimization. These annotations provide
154
+ * information to tools like Nsight Systems to improve visualization of
155
+ * application timelines.
156
+ *
157
+ * \ref RANGES are one of the most commonly used NVTX constructs for annotating
158
+ * a span of time. For example, imagine a user wanted to see every time a
159
+ * function, `my_function`, is called and how long it takes to execute. This can
160
+ * be accomplished with an NVTX range created on the entry to the function and
161
+ * terminated on return from `my_function` using the push/pop C APIs:
162
+ *
163
+ * \code{.cpp}
164
+ * void my_function(...) {
165
+ * nvtxRangePushA("my_function"); // Begins NVTX range
166
+ * // do work
167
+ * nvtxRangePop(); // Ends NVTX range
168
+ * }
169
+ * \endcode
170
+ *
171
+ * One of the challenges with using the NVTX C API is that it requires manually
172
+ * terminating the end of the range with `nvtxRangePop`. This can be challenging
173
+ * if `my_function()` has multiple returns or can throw exceptions as it
174
+ * requires calling `nvtxRangePop()` before all possible return points.
175
+ *
176
+ * NVTX C++ solves this inconvenience through the "RAII" technique by providing
177
+ * a `nvtx3::scoped_range` class that begins a range at construction and ends
178
+ * the range on destruction. The above example then becomes:
179
+ *
180
+ * \code{.cpp}
181
+ * void my_function(...) {
182
+ * nvtx3::scoped_range r{"my_function"}; // Begins NVTX range
183
+ * // do work
184
+ * } // Range ends on exit from `my_function` when `r` is destroyed
185
+ * \endcode
186
+ *
187
+ * The range object `r` is deterministically destroyed whenever `my_function`
188
+ * returns---ending the NVTX range without manual intervention. For more
189
+ * information, see \ref RANGES and `nvtx3::scoped_range_in`.
190
+ *
191
+ * Another inconvenience of the NVTX C APIs are the several constructs where the
192
+ * user is expected to initialize an object at the beginning of an application
193
+ * and reuse that object throughout the lifetime of the application. For example
194
+ * see domains, categories, and registered messages.
195
+ *
196
+ * Example:
197
+ * \code{.cpp}
198
+ * nvtxDomainHandle_t D = nvtxDomainCreateA("my domain");
199
+ * // Reuse `D` throughout the rest of the application
200
+ * \endcode
201
+ *
202
+ * This can be problematic if the user application or library does not have an
203
+ * explicit initialization function called before all other functions to
204
+ * ensure that these long-lived objects are initialized before being used.
205
+ *
206
+ * NVTX C++ makes use of the "construct on first use" technique to alleviate
207
+ * this inconvenience. In short, a function local static object is constructed
208
+ * upon the first invocation of a function and returns a reference to that
209
+ * object on all future invocations. See the documentation for `nvtx3::domain`,
210
+ * `nvtx3::named_category`, `nvtx3::registered_string`, and
211
+ * https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use for more
212
+ * information.
213
+ *
214
+ * Using construct on first use, the above example becomes:
215
+ * \code{.cpp}
216
+ * struct my_domain{ static constexpr char const* name{"my domain"}; };
217
+ *
218
+ * // The first invocation of `domain::get` for the type `my_domain` will
219
+ * // construct a `nvtx3::domain` object and return a reference to it. Future
220
+ * // invocations simply return a reference.
221
+ * nvtx3::domain const& D = nvtx3::domain::get<my_domain>();
222
+ * \endcode
223
+ * For more information about NVTX and how it can be used, see
224
+ * https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx and
225
+ * https://devblogs.nvidia.com/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/
226
+ * for more information.
227
+ *
228
+ * \section RANGES Ranges
229
+ *
230
+ * Ranges are used to describe a span of time during the execution of an
231
+ * application. Common examples are using ranges to annotate the time it takes
232
+ * to execute a function or an iteration of a loop.
233
+ *
234
+ * NVTX C++ uses RAII to automate the generation of ranges that are tied to the
235
+ * lifetime of objects. Similar to `std::lock_guard` in the C++ Standard
236
+ * Template Library.
237
+ *
238
+ * \subsection scoped_range Scoped Range
239
+ *
240
+ * `nvtx3::scoped_range_in` is a class that begins a range upon construction
241
+ * and ends the range at destruction. This is one of the most commonly used
242
+ * constructs in NVTX C++ and is useful for annotating spans of time on a
243
+ * particular thread. These ranges can be nested to arbitrary depths.
244
+ *
245
+ * `nvtx3::scoped_range` is an alias for a `nvtx3::scoped_range_in` in the
246
+ * global NVTX domain. For more information about Domains, see \ref DOMAINS.
247
+ *
248
+ * Various attributes of a range can be configured constructing a
249
+ * `nvtx3::scoped_range_in` with a `nvtx3::event_attributes` object. For
250
+ * more information, see \ref ATTRIBUTES.
251
+ *
252
+ * Example:
253
+ *
254
+ * \code{.cpp}
255
+ * void some_function() {
256
+ * // Creates a range for the duration of `some_function`
257
+ * nvtx3::scoped_range r{};
258
+ *
259
+ * while(true) {
260
+ * // Creates a range for every loop iteration
261
+ * // `loop_range` is nested inside `r`
262
+ * nvtx3::scoped_range loop_range{};
263
+ * }
264
+ * }
265
+ * \endcode
266
+ *
267
+ * \subsection unique_range Unique Range
268
+ *
269
+ * `nvtx3::unique_range` is similar to `nvtx3::scoped_range`, with a few key differences:
270
+ * - `unique_range` objects can be destroyed in any order whereas `scoped_range` objects must be
271
+ * destroyed in exact reverse creation order
272
+ * - `unique_range` can start and end on different threads
273
+ * - `unique_range` is movable
274
+ * - `unique_range` objects can be constructed as heap objects
275
+ *
276
+ * There is extra overhead associated with `unique_range` constructs and therefore use of
277
+ * `nvtx3::scoped_range_in` should be preferred.
278
+ *
279
+ * \section MARKS Marks
280
+ *
281
+ * `nvtx3::mark` annotates an instantaneous point in time with a "marker".
282
+ *
283
+ * Unlike a "range" which has a beginning and an end, a marker is a single event
284
+ * in an application, such as detecting a problem:
285
+ *
286
+ * \code{.cpp}
287
+ * bool success = do_operation(...);
288
+ * if (!success) {
289
+ * nvtx3::mark("operation failed!");
290
+ * }
291
+ * \endcode
292
+ *
293
+ * \section DOMAINS Domains
294
+ *
295
+ * Similar to C++ namespaces, domains allow for scoping NVTX events. By default,
296
+ * all NVTX events belong to the "global" domain. Libraries and applications
297
+ * should scope their events to use a custom domain to differentiate where the
298
+ * events originate from.
299
+ *
300
+ * It is common for a library or application to have only a single domain and
301
+ * for the name of that domain to be known at compile time. Therefore, Domains
302
+ * in NVTX C++ are represented by _tag types_.
303
+ *
304
+ * For example, to define a custom domain, simply define a new concrete type
305
+ * (a `class` or `struct`) with a `static` member called `name` that contains
306
+ * the desired name of the domain.
307
+ *
308
+ * \code{.cpp}
309
+ * struct my_domain{ static constexpr char const* name{"my domain"}; };
310
+ * \endcode
311
+ *
312
+ * For any NVTX C++ construct that can be scoped to a domain, the type
313
+ * `my_domain` can be passed as an explicit template argument to scope it to
314
+ * the custom domain.
315
+ *
316
+ * The tag type `nvtx3::domain::global` represents the global NVTX domain.
317
+ *
318
+ * \code{.cpp}
319
+ * // By default, `scoped_range_in` belongs to the global domain
320
+ * nvtx3::scoped_range_in<> r0{};
321
+ *
322
+ * // Alias for a `scoped_range_in` in the global domain
323
+ * nvtx3::scoped_range r1{};
324
+ *
325
+ * // `r` belongs to the custom domain
326
+ * nvtx3::scoped_range_in<my_domain> r{};
327
+ * \endcode
328
+ *
329
+ * When using a custom domain, it is recommended to define type aliases for NVTX
330
+ * constructs in the custom domain.
331
+ * \code{.cpp}
332
+ * using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
333
+ * using my_registered_string = nvtx3::registered_string_in<my_domain>;
334
+ * using my_named_category = nvtx3::named_category_in<my_domain>;
335
+ * \endcode
336
+ *
337
+ * See `nvtx3::domain` for more information.
338
+ *
339
+ * \section ATTRIBUTES Event Attributes
340
+ *
341
+ * NVTX events can be customized with various attributes to provide additional
342
+ * information (such as a custom message) or to control visualization of the
343
+ * event (such as the color used). These attributes can be specified per-event
344
+ * via arguments to a `nvtx3::event_attributes` object.
345
+ *
346
+ * NVTX events can be customized via four "attributes":
347
+ * - \ref COLOR : color used to visualize the event in tools.
348
+ * - \ref MESSAGES : Custom message string.
349
+ * - \ref PAYLOAD : User-defined numerical value.
350
+ * - \ref CATEGORY : Intra-domain grouping.
351
+ *
352
+ * It is possible to construct a `nvtx3::event_attributes` from any number of
353
+ * attribute objects (nvtx3::color, nvtx3::message, nvtx3::payload,
354
+ * nvtx3::category) in any order. If an attribute is not specified, a tool
355
+ * specific default value is used. See `nvtx3::event_attributes` for more
356
+ * information.
357
+ *
358
+ * \code{.cpp}
359
+ * // Set message, same as passing nvtx3::message{"message"}
360
+ * nvtx3::event_attributes attr{"message"};
361
+ *
362
+ * // Set message and color
363
+ * nvtx3::event_attributes attr{"message", nvtx3::rgb{127, 255, 0}};
364
+ *
365
+ * // Set message, color, payload, category
366
+ * nvtx3::event_attributes attr{"message",
367
+ * nvtx3::rgb{127, 255, 0},
368
+ * nvtx3::payload{42},
369
+ * nvtx3::category{1}};
370
+ *
371
+ * // Same as above -- can use any order of arguments
372
+ * nvtx3::event_attributes attr{nvtx3::payload{42},
373
+ * nvtx3::category{1},
374
+ * "message",
375
+ * nvtx3::rgb{127, 255, 0}};
376
+ *
377
+ * // Multiple arguments of the same type are allowed, but only the first is
378
+ * // used -- in this example, payload is set to 42:
379
+ * nvtx3::event_attributes attr{ nvtx3::payload{42}, nvtx3::payload{7} };
380
+ *
381
+ * // Using the nvtx3 namespace in a local scope makes the syntax more succinct:
382
+ * using namespace nvtx3;
383
+ * event_attributes attr{"message", rgb{127, 255, 0}, payload{42}, category{1}};
384
+ * \endcode
385
+ *
386
+ * \subsection MESSAGES message
387
+ *
388
+ * `nvtx3::message` sets the message string for an NVTX event.
389
+ *
390
+ * Example:
391
+ * \code{.cpp}
392
+ * // Create an `event_attributes` with the message "my message"
393
+ * nvtx3::event_attributes attr{nvtx3::message{"my message"}};
394
+ *
395
+ * // strings and string literals implicitly assumed to be a `nvtx3::message`
396
+ * nvtx3::event_attributes attr{"my message"};
397
+ * \endcode
398
+ *
399
+ * \subsubsection REGISTERED_MESSAGE Registered Messages
400
+ *
401
+ * Associating a `nvtx3::message` with an event requires copying the contents of
402
+ * the message every time the message is used, i.e., copying the entire message
403
+ * string. This may cause non-trivial overhead in performance sensitive code.
404
+ *
405
+ * To eliminate this overhead, NVTX allows registering a message string,
406
+ * yielding a "handle" that is inexpensive to copy that may be used in place of
407
+ * a message string. When visualizing the events, tools such as Nsight Systems
408
+ * will take care of mapping the message handle to its string.
409
+ *
410
+ * A message should be registered once and the handle reused throughout the rest
411
+ * of the application. This can be done by either explicitly creating static
412
+ * `nvtx3::registered_string` objects, or using the
413
+ * `nvtx3::registered_string::get` construct on first use helper (recommended).
414
+ *
415
+ * Similar to \ref DOMAINS, `nvtx3::registered_string::get` requires defining a
416
+ * custom tag type with a static `message` member whose value will be the
417
+ * contents of the registered string.
418
+ *
419
+ * Example:
420
+ * \code{.cpp}
421
+ * // Explicitly constructed, static `registered_string` in my_domain:
422
+ * static registered_string_in<my_domain> static_message{"my message"};
423
+ *
424
+ * // Or use construct on first use:
425
+ * // Define a tag type with a `message` member string to register
426
+ * struct my_message{ static constexpr char const* message{ "my message" }; };
427
+ *
428
+ * // Uses construct on first use to register the contents of
429
+ * // `my_message::message`
430
+ * auto& msg = nvtx3::registered_string_in<my_domain>::get<my_message>();
431
+ * \endcode
432
+ *
433
+ * \subsection COLOR color
434
+ *
435
+ * Associating a `nvtx3::color` with an event allows controlling how the event
436
+ * is visualized in a tool such as Nsight Systems. This is a convenient way to
437
+ * visually differentiate among different events.
438
+ *
439
+ * \code{.cpp}
440
+ * // Define a color via rgb color values
441
+ * nvtx3::color c{nvtx3::rgb{127, 255, 0}};
442
+ * nvtx3::event_attributes attr{c};
443
+ *
444
+ * // rgb color values can be passed directly to an `event_attributes`
445
+ * nvtx3::event_attributes attr1{nvtx3::rgb{127,255,0}};
446
+ * \endcode
447
+ *
448
+ * \subsection CATEGORY category
449
+ *
450
+ * A `nvtx3::category` is simply an integer id that allows for fine-grain
451
+ * grouping of NVTX events. For example, one might use separate categories for
452
+ * IO, memory allocation, compute, etc.
453
+ *
454
+ * \code{.cpp}
455
+ * nvtx3::event_attributes{nvtx3::category{1}};
456
+ * \endcode
457
+ *
458
+ * \subsubsection NAMED_CATEGORIES Named Categories
459
+ *
460
+ * Associates a `name` string with a category `id` to help differentiate among
461
+ * categories.
462
+ *
463
+ * For any given category id `Id`, a `named_category{Id, "name"}` should only
464
+ * be constructed once and reused throughout an application. This can be done by
465
+ * either explicitly creating static `nvtx3::named_category` objects, or using
466
+ * the `nvtx3::named_category::get` construct on first use helper (recommended).
467
+ *
468
+ * Similar to \ref DOMAINS, `nvtx3::named_category::get` requires defining a
469
+ * custom tag type with static `name` and `id` members.
470
+ *
471
+ * \code{.cpp}
472
+ * // Explicitly constructed, static `named_category` in my_domain:
473
+ * static nvtx3::named_category_in<my_domain> static_category{42, "my category"};
474
+ *
475
+ * // Or use construct on first use:
476
+ * // Define a tag type with `name` and `id` members
477
+ * struct my_category {
478
+ * static constexpr char const* name{"my category"}; // category name
479
+ * static constexpr uint32_t id{42}; // category id
480
+ * };
481
+ *
482
+ * // Use construct on first use to name the category id `42`
483
+ * // with name "my category":
484
+ * auto& cat = named_category_in<my_domain>::get<my_category>();
485
+ *
486
+ * // Range `r` associated with category id `42`
487
+ * nvtx3::event_attributes attr{cat};
488
+ * \endcode
489
+ *
490
+ * \subsection PAYLOAD payload
491
+ *
492
+ * Allows associating a user-defined numerical value with an event.
493
+ *
494
+ * \code{.cpp}
495
+ * // Constructs a payload from the `int32_t` value 42
496
+ * nvtx3:: event_attributes attr{nvtx3::payload{42}};
497
+ * \endcode
498
+ *
499
+ *
500
+ * \section EXAMPLE Example
501
+ *
502
+ * Putting it all together:
503
+ * \code{.cpp}
504
+ * // Define a custom domain tag type
505
+ * struct my_domain{ static constexpr char const* name{"my domain"}; };
506
+ *
507
+ * // Define a named category tag type
508
+ * struct my_category{
509
+ * static constexpr char const* name{"my category"};
510
+ * static constexpr uint32_t id{42};
511
+ * };
512
+ *
513
+ * // Define a registered string tag type
514
+ * struct my_message{ static constexpr char const* message{"my message"}; };
515
+ *
516
+ * // For convenience, use aliases for domain scoped objects
517
+ * using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
518
+ * using my_registered_string = nvtx3::registered_string_in<my_domain>;
519
+ * using my_named_category = nvtx3::named_category_in<my_domain>;
520
+ *
521
+ * // Default values for all attributes
522
+ * nvtx3::event_attributes attr{};
523
+ * my_scoped_range r0{attr};
524
+ *
525
+ * // Custom (unregistered) message, and unnamed category
526
+ * nvtx3::event_attributes attr1{"message", nvtx3::category{2}};
527
+ * my_scoped_range r1{attr1};
528
+ *
529
+ * // Alternatively, pass arguments of `event_attributes` constructor directly
530
+ * // to `my_scoped_range`
531
+ * my_scoped_range r2{"message", nvtx3::category{2}};
532
+ *
533
+ * // construct on first use a registered string
534
+ * auto& msg = my_registered_string::get<my_message>();
535
+ *
536
+ * // construct on first use a named category
537
+ * auto& cat = my_named_category::get<my_category>();
538
+ *
539
+ * // Use registered string and named category with a custom payload
540
+ * my_scoped_range r3{msg, cat, nvtx3::payload{42}};
541
+ *
542
+ * // Any number of arguments in any order
543
+ * my_scoped_range r{nvtx3::rgb{127, 255,0}, msg};
544
+ *
545
+ * \endcode
546
+ * \section MACROS Convenience Macros
547
+ *
548
+ * Oftentimes users want to quickly and easily add NVTX ranges to their library
549
+ * or application to aid in profiling and optimization.
550
+ *
551
+ * A convenient way to do this is to use the \ref NVTX3_FUNC_RANGE and
552
+ * \ref NVTX3_FUNC_RANGE_IN macros. These macros take care of constructing an
553
+ * `nvtx3::scoped_range_in` with the name of the enclosing function as the
554
+ * range's message.
555
+ *
556
+ * \code{.cpp}
557
+ * void some_function() {
558
+ * // Automatically generates an NVTX range for the duration of the function
559
+ * // using "some_function" as the event's message.
560
+ * NVTX3_FUNC_RANGE();
561
+ * }
562
+ * \endcode
563
+ *
564
+ */
565
+
566
+ /* Temporary helper #defines, removed with #undef at end of header */
567
+
568
+ /* Some compilers do not correctly support SFINAE, which is used in this API
569
+ * to detect common usage errors and provide clearer error messages (by using
570
+ * static_assert) than the compiler would produce otherwise. These compilers
571
+ * will generate errors while compiling this file such as:
572
+ *
573
+ * error: 'name' is not a member of 'nvtx3::v1::domain::global'
574
+ *
575
+ * The following compiler versions are known to have this problem, and so are
576
+ * set by default to disable the SFINAE-based checks:
577
+ *
578
+ * - All MSVC versions prior to VS2017 Update 7 (15.7)
579
+ * - GCC 8.1-8.3 (the problem was fixed in GCC 8.4)
580
+ *
581
+ * If you find your compiler hits this problem, you can work around it by
582
+ * defining NVTX3_USE_CHECKED_OVERLOADS_FOR_GET to 0 before including this
583
+ * header, or you can add a check for your compiler version to this #if.
584
+ * Also, please report the issue on the NVTX GitHub page.
585
+ */
586
+ #if !defined(NVTX3_USE_CHECKED_OVERLOADS_FOR_GET)
587
+ # if defined(_MSC_VER) && _MSC_VER < 1914 || defined(__GNUC__) && __GNUC__ == 8 && __GNUC_MINOR__ < 4
588
+ # define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 0
589
+ # else
590
+ # define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 1
591
+ # endif
592
+ # define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE
593
+ #endif
594
+
595
+ /* Within this header, nvtx3::NVTX3_VERSION_NAMESPACE resolves to nvtx3::vX,
596
+ * where "X" is the major version number. */
597
+ #define NVTX3_CONCAT(A, B) A##B
598
+ #define NVTX3_NAMESPACE_FOR(VERSION) NVTX3_CONCAT(v, VERSION)
599
+ #define NVTX3_VERSION_NAMESPACE NVTX3_NAMESPACE_FOR(NVTX3_CPP_VERSION_MAJOR)
600
+
601
+ /* Avoid duplicating #if defined(NVTX3_INLINE_THIS_VERSION) for namespaces
602
+ * in each minor version by making a macro to use unconditionally, which
603
+ * resolves to "inline" or nothing as appropriate. */
604
+ #if defined(NVTX3_INLINE_THIS_VERSION)
605
+ # define NVTX3_INLINE_IF_REQUESTED inline
606
+ #else
607
+ # define NVTX3_INLINE_IF_REQUESTED
608
+ #endif
609
+
610
+ /* Enables the use of constexpr when support for C++14 constexpr is present.
611
+ *
612
+ * Initialization of a class member that is a union to a specific union member
613
+ * can only be done in the body of a constructor, not in a member initializer
614
+ * list. A constexpr constructor must have an empty body until C++14, so there
615
+ * is no way to make an initializer of a member union constexpr in C++11. This
616
+ * macro allows making functions constexpr in C++14 or newer, but non-constexpr
617
+ * in C++11 compilation. It is used here on constructors that initialize their
618
+ * member unions.
619
+ */
620
+ #if __cpp_constexpr >= 201304L
621
+ # define NVTX3_CONSTEXPR_IF_CPP14 constexpr
622
+ #else
623
+ # define NVTX3_CONSTEXPR_IF_CPP14
624
+ #endif
625
+
626
+ // Macro wrappers for C++ attributes
627
+ #if !defined(__has_cpp_attribute)
628
+ # define __has_cpp_attribute(x) 0
629
+ #endif
630
+ #if __has_cpp_attribute(maybe_unused)
631
+ # define NVTX3_MAYBE_UNUSED [[maybe_unused]]
632
+ #else
633
+ # define NVTX3_MAYBE_UNUSED
634
+ #endif
635
+ #if __has_cpp_attribute(nodiscard)
636
+ # define NVTX3_NO_DISCARD [[nodiscard]]
637
+ #else
638
+ # define NVTX3_NO_DISCARD
639
+ #endif
640
+
641
+ /* Use a macro for static asserts, which defaults to static_assert, but that
642
+ * testing tools can replace with a logging function. For example:
643
+ * #define NVTX3_STATIC_ASSERT(c, m) \
644
+ * do { if (!(c)) printf("static_assert would fail: %s\n", m); } while (0)
645
+ */
646
+ #if !defined(NVTX3_STATIC_ASSERT)
647
+ # define NVTX3_STATIC_ASSERT(condition, message) static_assert(condition, message)
648
+ # define NVTX3_STATIC_ASSERT_DEFINED_HERE
649
+ #endif
650
+
651
+ /* Implementation sections, enclosed in guard macros for each minor version */
652
+
653
+ #ifndef NVTX3_CPP_DEFINITIONS_V1_0
654
+ # define NVTX3_CPP_DEFINITIONS_V1_0
655
+
656
+ # include <cuda/std/__cccl/memory_wrapper.h>
657
+
658
+ # include <cstddef>
659
+ # include <string>
660
+ # include <type_traits>
661
+ # include <utility>
662
+
663
+ # include <nvtx3/nvToolsExt.h>
664
+
665
+ namespace nvtx3
666
+ {
667
+
668
+ NVTX3_INLINE_IF_REQUESTED namespace NVTX3_VERSION_NAMESPACE
669
+ {
670
+ namespace detail
671
+ {
672
+
673
+ template <typename Unused>
674
+ struct always_false : std::false_type
675
+ {};
676
+
677
+ template <typename T, typename = void>
678
+ struct has_name : std::false_type
679
+ {};
680
+ template <typename T>
681
+ struct has_name<T, decltype((void) T::name, void())> : std::true_type
682
+ {};
683
+
684
+ template <typename T, typename = void>
685
+ struct has_id : std::false_type
686
+ {};
687
+ template <typename T>
688
+ struct has_id<T, decltype((void) T::id, void())> : std::true_type
689
+ {};
690
+
691
+ template <typename T, typename = void>
692
+ struct has_message : std::false_type
693
+ {};
694
+ template <typename T>
695
+ struct has_message<T, decltype((void) T::message, void())> : std::true_type
696
+ {};
697
+
698
+ template <typename T, typename = void>
699
+ struct is_c_string : std::false_type
700
+ {};
701
+ template <typename T>
702
+ struct is_c_string<T,
703
+ typename std::enable_if<std::is_convertible<T, char const*>::value
704
+ || std::is_convertible<T, wchar_t const*>::value>::type> : std::true_type
705
+ {};
706
+
707
+ template <typename T>
708
+ using is_uint32 = std::is_same<typename std::decay<T>::type, uint32_t>;
709
+
710
+ } // namespace detail
711
+
712
+ /**
713
+ * @brief `domain`s allow for grouping NVTX events into a single scope to
714
+ * differentiate them from events in other `domain`s.
715
+ *
716
+ * By default, all NVTX constructs are placed in the "global" NVTX domain.
717
+ *
718
+ * A custom `domain` may be used in order to differentiate a library's or
719
+ * application's NVTX events from other events.
720
+ *
721
+ * `domain`s are expected to be long-lived and unique to a library or
722
+ * application. As such, it is assumed a domain's name is known at compile
723
+ * time. Therefore, all NVTX constructs that can be associated with a domain
724
+ * require the domain to be specified via a *type* `D` passed as an
725
+ * explicit template parameter.
726
+ *
727
+ * The type `domain::global` may be used to indicate that the global NVTX
728
+ * domain should be used.
729
+ *
730
+ * None of the C++ NVTX constructs require the user to manually construct a
731
+ * `domain` object. Instead, if a custom domain is desired, the user is
732
+ * expected to define a type `D` that contains a member
733
+ * `D::name` which resolves to either a `char const*` or `wchar_t
734
+ * const*`. The value of `D::name` is used to name and uniquely
735
+ * identify the custom domain.
736
+ *
737
+ * Upon the first use of an NVTX construct associated with the type
738
+ * `D`, the "construct on first use" pattern is used to construct a
739
+ * function local static `domain` object. All future NVTX constructs
740
+ * associated with `D` will use a reference to the previously
741
+ * constructed `domain` object. See `domain::get`.
742
+ *
743
+ * Example:
744
+ * \code{.cpp}
745
+ * // The type `my_domain` defines a `name` member used to name and identify
746
+ * // the `domain` object identified by `my_domain`.
747
+ * struct my_domain{ static constexpr char const* name{"my_domain"}; };
748
+ *
749
+ * // The NVTX range `r` will be grouped with all other NVTX constructs
750
+ * // associated with `my_domain`.
751
+ * nvtx3::scoped_range_in<my_domain> r{};
752
+ *
753
+ * // An alias can be created for a `scoped_range_in` in the custom domain
754
+ * using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
755
+ * my_scoped_range my_range{};
756
+ *
757
+ * // `domain::global` indicates that the global NVTX domain is used
758
+ * nvtx3::scoped_range_in<domain::global> r2{};
759
+ *
760
+ * // For convenience, `nvtx3::scoped_range` is an alias for a range in the
761
+ * // global domain
762
+ * nvtx3::scoped_range r3{};
763
+ * \endcode
764
+ */
765
+ class domain
766
+ {
767
+ public:
768
+ domain(domain const&) = delete;
769
+ domain& operator=(domain const&) = delete;
770
+ domain(domain&&) = delete;
771
+ domain& operator=(domain&&) = delete;
772
+
773
+ /**
774
+ * @brief Tag type for the "global" NVTX domain.
775
+ *
776
+ * This type may be passed as a template argument to any function/class
777
+ * expecting a type to identify a domain to indicate that the global domain
778
+ * should be used.
779
+ *
780
+ * All NVTX events in the global domain across all libraries and
781
+ * applications will be grouped together.
782
+ *
783
+ */
784
+ struct global
785
+ {};
786
+
787
+ # if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
788
+ /**
789
+ * @brief Returns reference to an instance of a function local static
790
+ * `domain` object.
791
+ *
792
+ * Uses the "construct on first use" idiom to safely ensure the `domain`
793
+ * object is initialized exactly once upon first invocation of
794
+ * `domain::get<D>()`. All following invocations will return a
795
+ * reference to the previously constructed `domain` object. See
796
+ * https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
797
+ *
798
+ * None of the constructs in this header require the user to directly invoke
799
+ * `domain::get`. It is automatically invoked when constructing objects like
800
+ * a `scoped_range_in` or `category`. Advanced users may wish to use
801
+ * `domain::get` for the convenience of the "construct on first use" idiom
802
+ * when using domains with their own use of the NVTX C API.
803
+ *
804
+ * This function is thread-safe as of C++11. If two or more threads call
805
+ * `domain::get<D>` concurrently, exactly one of them is guaranteed
806
+ * to construct the `domain` object and the other(s) will receive a
807
+ * reference to the object after it is fully constructed.
808
+ *
809
+ * The domain's name is specified via the type `D` pass as an
810
+ * explicit template parameter. `D` is required to contain a
811
+ * member `D::name` that resolves to either a `char const*` or
812
+ * `wchar_t const*`. The value of `D::name` is used to name and
813
+ * uniquely identify the `domain`.
814
+ *
815
+ * Example:
816
+ * \code{.cpp}
817
+ * // The type `my_domain` defines a `name` member used to name and identify
818
+ * // the `domain` object identified by `my_domain`.
819
+ * struct my_domain{ static constexpr char const* name{"my domain"}; };
820
+ *
821
+ * auto& D1 = domain::get<my_domain>(); // First invocation constructs a
822
+ * // `domain` with the name "my domain"
823
+ *
824
+ * auto& D2 = domain::get<my_domain>(); // Quickly returns reference to
825
+ * // previously constructed `domain`.
826
+ * \endcode
827
+ *
828
+ * @tparam D Type that contains a `D::name` member used to
829
+ * name the `domain` object.
830
+ * @return Reference to the `domain` corresponding to the type `D`.
831
+ */
832
+ template <typename D = global, typename std::enable_if<detail::is_c_string<decltype(D::name)>::value, int>::type = 0>
833
+ NVTX3_NO_DISCARD static domain const& get() noexcept
834
+ {
835
+ static domain const d(D::name);
836
+ return d;
837
+ }
838
+
839
+ /**
840
+ * @brief Overload of `domain::get` to provide a clear compile error when
841
+ * `D` has a `name` member that is not directly convertible to either
842
+ * `char const*` or `wchar_t const*`.
843
+ */
844
+ template <typename D = global,
845
+ typename std::enable_if<!detail::is_c_string<decltype(D::name)>::value, int>::type = 0>
846
+ NVTX3_NO_DISCARD static domain const& get() noexcept
847
+ {
848
+ NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
849
+ "Type used to identify an NVTX domain must contain a static constexpr member "
850
+ "called 'name' of type const char* or const wchar_t* -- 'name' member is not "
851
+ "convertible to either of those types");
852
+ static domain const unused;
853
+ return unused; // Function must compile for static_assert to be triggered
854
+ }
855
+
856
+ /**
857
+ * @brief Overload of `domain::get` to provide a clear compile error when
858
+ * `D` does not have a `name` member.
859
+ */
860
+ template <typename D = global, typename std::enable_if<!detail::has_name<D>::value, int>::type = 0>
861
+ NVTX3_NO_DISCARD static domain const& get() noexcept
862
+ {
863
+ NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
864
+ "Type used to identify an NVTX domain must contain a static constexpr member "
865
+ "called 'name' of type const char* or const wchar_t* -- 'name' member is missing");
866
+ static domain const unused;
867
+ return unused; // Function must compile for static_assert to be triggered
868
+ }
869
+ # else
870
+ template <typename D = global>
871
+ NVTX3_NO_DISCARD static domain const& get() noexcept
872
+ {
873
+ static domain const d(D::name);
874
+ return d;
875
+ }
876
+ # endif
877
+
878
+ /**
879
+ * @brief Conversion operator to `nvtxDomainHandle_t`.
880
+ *
881
+ * Allows transparently passing a domain object into an API expecting a
882
+ * native `nvtxDomainHandle_t` object.
883
+ */
884
+ operator nvtxDomainHandle_t() const noexcept
885
+ {
886
+ return _domain;
887
+ }
888
+
889
+ private:
890
+ /**
891
+ * @brief Construct a new domain with the specified `name`.
892
+ *
893
+ * This constructor is private as it is intended that `domain` objects only
894
+ * be created through the `domain::get` function.
895
+ *
896
+ * @param name A unique name identifying the domain
897
+ */
898
+ explicit domain(char const* name) noexcept
899
+ : _domain{nvtxDomainCreateA(name)}
900
+ {}
901
+
902
+ /**
903
+ * @brief Construct a new domain with the specified `name`.
904
+ *
905
+ * This constructor is private as it is intended that `domain` objects only
906
+ * be created through the `domain::get` function.
907
+ *
908
+ * @param name A unique name identifying the domain
909
+ */
910
+ explicit domain(wchar_t const* name) noexcept
911
+ : _domain{nvtxDomainCreateW(name)}
912
+ {}
913
+
914
+ /**
915
+ * @brief Construct a new domain with the specified `name`.
916
+ *
917
+ * This constructor is private as it is intended that `domain` objects only
918
+ * be created through the `domain::get` function.
919
+ *
920
+ * @param name A unique name identifying the domain
921
+ */
922
+ explicit domain(std::string const& name) noexcept
923
+ : domain{name.c_str()}
924
+ {}
925
+
926
+ /**
927
+ * @brief Construct a new domain with the specified `name`.
928
+ *
929
+ * This constructor is private as it is intended that `domain` objects only
930
+ * be created through the `domain::get` function.
931
+ *
932
+ * @param name A unique name identifying the domain
933
+ */
934
+ explicit domain(std::wstring const& name) noexcept
935
+ : domain{name.c_str()}
936
+ {}
937
+
938
+ /**
939
+ * @brief Default constructor creates a `domain` representing the
940
+ * "global" NVTX domain.
941
+ *
942
+ * All events not associated with a custom `domain` are grouped in the
943
+ * "global" NVTX domain.
944
+ *
945
+ */
946
+ constexpr domain() noexcept {}
947
+
948
+ /**
949
+ * @brief Intentionally avoid calling nvtxDomainDestroy on the `domain` object.
950
+ *
951
+ * No currently-available tools attempt to free domain resources when the
952
+ * nvtxDomainDestroy function is called, due to the thread-safety and
953
+ * efficiency challenges of freeing thread-local storage for other threads.
954
+ * Since libraries may be disallowed from introducing static destructors,
955
+ * and destroying the domain is likely to have no effect, the destructor
956
+ * for `domain` intentionally chooses to not destroy the domain.
957
+ *
958
+ * In a situation where domain destruction is necessary, either manually
959
+ * call nvtxDomainDestroy on the domain's handle, or make a class that
960
+ * derives from `domain` and calls nvtxDomainDestroy in its destructor.
961
+ */
962
+ ~domain() = default;
963
+
964
+ private:
965
+ nvtxDomainHandle_t const _domain{}; ///< The `domain`s NVTX handle
966
+ };
967
+
968
+ /**
969
+ * @brief Returns reference to the `domain` object that represents the global
970
+ * NVTX domain.
971
+ *
972
+ * This specialization for `domain::global` returns a default constructed,
973
+ * `domain` object for use when the "global" domain is desired.
974
+ *
975
+ * All NVTX events in the global domain across all libraries and applications
976
+ * will be grouped together.
977
+ *
978
+ * @return Reference to the `domain` corresponding to the global NVTX domain.
979
+ *
980
+ */
981
+ template <>
982
+ NVTX3_NO_DISCARD inline domain const& domain::get<domain::global>() noexcept
983
+ {
984
+ static domain const d{};
985
+ return d;
986
+ }
987
+
988
+ /**
989
+ * @brief Indicates the values of the red, green, and blue color channels for
990
+ * an RGB color to use as an event attribute (assumes no transparency).
991
+ *
992
+ */
993
+ struct rgb
994
+ {
995
+ /// Type used for component values
996
+ using component_type = uint8_t;
997
+
998
+ /**
999
+ * @brief Construct a rgb with red, green, and blue channels
1000
+ * specified by `red_`, `green_`, and `blue_`, respectively.
1001
+ *
1002
+ * Valid values are in the range `[0,255]`.
1003
+ *
1004
+ * @param red_ Value of the red channel
1005
+ * @param green_ Value of the green channel
1006
+ * @param blue_ Value of the blue channel
1007
+ */
1008
+ constexpr rgb(component_type red_, component_type green_, component_type blue_) noexcept
1009
+ : red{red_}
1010
+ , green{green_}
1011
+ , blue{blue_}
1012
+ {}
1013
+
1014
+ component_type red{}; ///< Red channel value
1015
+ component_type green{}; ///< Green channel value
1016
+ component_type blue{}; ///< Blue channel value
1017
+ };
1018
+
1019
+ /**
1020
+ * @brief Indicates the value of the alpha, red, green, and blue color
1021
+ * channels for an ARGB color to use as an event attribute.
1022
+ *
1023
+ */
1024
+ struct argb final : rgb
1025
+ {
1026
+ /**
1027
+ * @brief Construct an argb with alpha, red, green, and blue channels
1028
+ * specified by `alpha_`, `red_`, `green_`, and `blue_`, respectively.
1029
+ *
1030
+ * Valid values are in the range `[0,255]`.
1031
+ *
1032
+ * @param alpha_ Value of the alpha channel (opacity)
1033
+ * @param red_ Value of the red channel
1034
+ * @param green_ Value of the green channel
1035
+ * @param blue_ Value of the blue channel
1036
+ *
1037
+ */
1038
+ constexpr argb(component_type alpha_, component_type red_, component_type green_, component_type blue_) noexcept
1039
+ : rgb{red_, green_, blue_}
1040
+ , alpha{alpha_}
1041
+ {}
1042
+
1043
+ component_type alpha{}; ///< Alpha channel value
1044
+ };
1045
+
1046
+ /**
1047
+ * @brief Represents a custom color that can be associated with an NVTX event
1048
+ * via its `event_attributes`.
1049
+ *
1050
+ * Specifying colors for NVTX events is a convenient way to visually
1051
+ * differentiate among different events in a visualization tool such as Nsight
1052
+ * Systems.
1053
+ *
1054
+ */
1055
+ class color
1056
+ {
1057
+ public:
1058
+ /// Type used for the color's value
1059
+ using value_type = uint32_t;
1060
+
1061
+ /**
1062
+ * @brief Constructs a `color` using the value provided by `hex_code`.
1063
+ *
1064
+ * `hex_code` is expected to be a 4 byte argb hex code.
1065
+ *
1066
+ * The most significant byte indicates the value of the alpha channel
1067
+ * (opacity) (0-255)
1068
+ *
1069
+ * The next byte indicates the value of the red channel (0-255)
1070
+ *
1071
+ * The next byte indicates the value of the green channel (0-255)
1072
+ *
1073
+ * The least significant byte indicates the value of the blue channel
1074
+ * (0-255)
1075
+ *
1076
+ * @param hex_code The hex code used to construct the `color`
1077
+ */
1078
+ constexpr explicit color(value_type hex_code) noexcept
1079
+ : _value{hex_code}
1080
+ {}
1081
+
1082
+ /**
1083
+ * @brief Construct a `color` using the alpha, red, green, blue components
1084
+ * in `argb`.
1085
+ *
1086
+ * @param argb_ The alpha, red, green, blue components of the desired `color`
1087
+ */
1088
+ constexpr color(argb argb_) noexcept
1089
+ : color{from_bytes_msb_to_lsb(argb_.alpha, argb_.red, argb_.green, argb_.blue)}
1090
+ {}
1091
+
1092
+ /**
1093
+ * @brief Construct a `color` using the red, green, blue components in
1094
+ * `rgb`.
1095
+ *
1096
+ * Uses maximum value for the alpha channel (opacity) of the `color`.
1097
+ *
1098
+ * @param rgb_ The red, green, blue components of the desired `color`
1099
+ */
1100
+ constexpr color(rgb rgb_) noexcept
1101
+ : color{from_bytes_msb_to_lsb(0xFF, rgb_.red, rgb_.green, rgb_.blue)}
1102
+ {}
1103
+
1104
+ /**
1105
+ * @brief Returns the `color`s argb hex code
1106
+ *
1107
+ */
1108
+ constexpr value_type get_value() const noexcept
1109
+ {
1110
+ return _value;
1111
+ }
1112
+
1113
+ /**
1114
+ * @brief Return the NVTX color type of the color.
1115
+ *
1116
+ */
1117
+ constexpr nvtxColorType_t get_type() const noexcept
1118
+ {
1119
+ return _type;
1120
+ }
1121
+
1122
+ color() = delete;
1123
+ ~color() = default;
1124
+ color(color const&) = default;
1125
+ color& operator=(color const&) = default;
1126
+ color(color&&) = default;
1127
+ color& operator=(color&&) = default;
1128
+
1129
+ private:
1130
+ /**
1131
+ * @brief Constructs an unsigned, 4B integer from the component bytes in
1132
+ * most to least significant byte order.
1133
+ *
1134
+ */
1135
+ constexpr static value_type
1136
+ from_bytes_msb_to_lsb(uint8_t byte3, uint8_t byte2, uint8_t byte1, uint8_t byte0) noexcept
1137
+ {
1138
+ return uint32_t{byte3} << 24 | uint32_t{byte2} << 16 | uint32_t{byte1} << 8 | uint32_t{byte0};
1139
+ }
1140
+
1141
+ value_type _value{}; ///< color's argb color code
1142
+ nvtxColorType_t _type{NVTX_COLOR_ARGB}; ///< NVTX color type code
1143
+ };
1144
+
1145
+ /**
1146
+ * @brief Object for intra-domain grouping of NVTX events.
1147
+ *
1148
+ * A `category` is simply an integer id that allows for fine-grain grouping of
1149
+ * NVTX events. For example, one might use separate categories for IO, memory
1150
+ * allocation, compute, etc.
1151
+ *
1152
+ * Example:
1153
+ * \code{.cpp}
1154
+ * nvtx3::category cat1{1};
1155
+ *
1156
+ * // Range `r1` belongs to the category identified by the value `1`.
1157
+ * nvtx3::scoped_range r1{cat1};
1158
+ *
1159
+ * // Range `r2` belongs to the same category as `r1`
1160
+ * nvtx3::scoped_range r2{nvtx3::category{1}};
1161
+ * \endcode
1162
+ *
1163
+ * To associate a name string with a category id, see `named_category`.
1164
+ *
1165
+ */
1166
+ class category
1167
+ {
1168
+ public:
1169
+ /// Type used for `category`s integer id.
1170
+ using id_type = uint32_t;
1171
+
1172
+ /**
1173
+ * @brief Construct a `category` with the specified `id`.
1174
+ *
1175
+ * The `category` will be unnamed and identified only by its `id` value.
1176
+ *
1177
+ * All `category`s in a domain sharing the same `id` are equivalent.
1178
+ *
1179
+ * @param[in] id The `category`'s identifying value
1180
+ */
1181
+ constexpr explicit category(id_type id) noexcept
1182
+ : id_{id}
1183
+ {}
1184
+
1185
+ /**
1186
+ * @brief Returns the id of the category.
1187
+ *
1188
+ */
1189
+ constexpr id_type get_id() const noexcept
1190
+ {
1191
+ return id_;
1192
+ }
1193
+
1194
+ category() = delete;
1195
+ ~category() = default;
1196
+ category(category const&) = default;
1197
+ category& operator=(category const&) = default;
1198
+ category(category&&) = default;
1199
+ category& operator=(category&&) = default;
1200
+
1201
+ private:
1202
+ id_type id_{}; ///< category's unique identifier
1203
+ };
1204
+
1205
+ /**
1206
+ * @brief A `category` with an associated name string.
1207
+ *
1208
+ * Associates a `name` string with a category `id` to help differentiate among
1209
+ * categories.
1210
+ *
1211
+ * For any given category id `Id`, a `named_category(Id, "name")` should only
1212
+ * be constructed once and reused throughout an application. This can be done
1213
+ * by either explicitly creating static `named_category` objects, or using the
1214
+ * `named_category::get` construct on first use helper (recommended).
1215
+ *
1216
+ * Creating two or more `named_category` objects with the same value for `id`
1217
+ * in the same domain results in undefined behavior.
1218
+ *
1219
+ * Similarly, behavior is undefined when a `named_category` and `category`
1220
+ * share the same value of `id`.
1221
+ *
1222
+ * Example:
1223
+ * \code{.cpp}
1224
+ * // Explicitly constructed, static `named_category` in global domain:
1225
+ * static nvtx3::named_category static_category{42, "my category"};
1226
+ *
1227
+ * // Range `r` associated with category id `42`
1228
+ * nvtx3::scoped_range r{static_category};
1229
+ *
1230
+ * // OR use construct on first use:
1231
+ *
1232
+ * // Define a type with `name` and `id` members
1233
+ * struct my_category {
1234
+ * static constexpr char const* name{"my category"}; // category name
1235
+ * static constexpr uint32_t id{42}; // category id
1236
+ * };
1237
+ *
1238
+ * // Use construct on first use to name the category id `42`
1239
+ * // with name "my category"
1240
+ * auto& cat = named_category_in<my_domain>::get<my_category>();
1241
+ *
1242
+ * // Range `r` associated with category id `42`
1243
+ * nvtx3::scoped_range r{cat};
1244
+ * \endcode
1245
+ *
1246
+ * `named_category_in<D>`'s association of a name to a category id is local to
1247
+ * the domain specified by the type `D`. An id may have a different name in
1248
+ * another domain.
1249
+ *
1250
+ * @tparam D Type containing `name` member used to identify the `domain` to
1251
+ * which the `named_category_in` belongs. Else, `domain::global` to indicate
1252
+ * that the global NVTX domain should be used.
1253
+ */
1254
+ template <typename D = domain::global>
1255
+ class named_category_in final : public category
1256
+ {
1257
+ public:
1258
+ # if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
1259
+ /**
1260
+ * @brief Returns a global instance of a `named_category_in` as a
1261
+ * function-local static.
1262
+ *
1263
+ * Creates a `named_category_in<D>` with name and id specified by the contents
1264
+ * of a type `C`. `C::name` determines the name and `C::id` determines the
1265
+ * category id.
1266
+ *
1267
+ * This function is useful for constructing a named `category` exactly once
1268
+ * and reusing the same instance throughout an application.
1269
+ *
1270
+ * Example:
1271
+ * \code{.cpp}
1272
+ * // Define a type with `name` and `id` members
1273
+ * struct my_category {
1274
+ * static constexpr char const* name{"my category"}; // category name
1275
+ * static constexpr uint32_t id{42}; // category id
1276
+ * };
1277
+ *
1278
+ * // Use construct on first use to name the category id `42`
1279
+ * // with name "my category"
1280
+ * auto& cat = named_category_in<my_domain>::get<my_category>();
1281
+ *
1282
+ * // Range `r` associated with category id `42`
1283
+ * nvtx3::scoped_range r{cat};
1284
+ * \endcode
1285
+ *
1286
+ * Uses the "construct on first use" idiom to safely ensure the `category`
1287
+ * object is initialized exactly once. See
1288
+ * https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
1289
+ *
1290
+ * @tparam C Type containing a member `C::name` that resolves to either a
1291
+ * `char const*` or `wchar_t const*` and `C::id`.
1292
+ */
1293
+ template <
1294
+ typename C,
1295
+ typename std::enable_if<detail::is_c_string<decltype(C::name)>::value && detail::is_uint32<decltype(C::id)>::value,
1296
+ int>::type = 0>
1297
+ static named_category_in const& get() noexcept
1298
+ {
1299
+ static named_category_in const cat(C::id, C::name);
1300
+ return cat;
1301
+ }
1302
+
1303
+ /**
1304
+ * @brief Overload of `named_category_in::get` to provide a clear compile error
1305
+ * when `C` has the required `name` and `id` members, but they are not the
1306
+ * required types. `name` must be directly convertible to `char const*` or
1307
+ * `wchar_t const*`, and `id` must be `uint32_t`.
1308
+ */
1309
+ template <typename C,
1310
+ typename std::enable_if<!detail::is_c_string<decltype(C::name)>::value
1311
+ || !detail::is_uint32<decltype(C::id)>::value,
1312
+ int>::type = 0>
1313
+ NVTX3_NO_DISCARD static named_category_in const& get() noexcept
1314
+ {
1315
+ NVTX3_STATIC_ASSERT(detail::is_c_string<decltype(C::name)>::value,
1316
+ "Type used to name an NVTX category must contain a static constexpr member "
1317
+ "called 'name' of type const char* or const wchar_t* -- 'name' member is not "
1318
+ "convertible to either of those types");
1319
+ NVTX3_STATIC_ASSERT(detail::is_uint32<decltype(C::id)>::value,
1320
+ "Type used to name an NVTX category must contain a static constexpr member "
1321
+ "called 'id' of type uint32_t -- 'id' member is the wrong type");
1322
+ static named_category_in const unused;
1323
+ return unused; // Function must compile for static_assert to be triggered
1324
+ }
1325
+
1326
+ /**
1327
+ * @brief Overload of `named_category_in::get` to provide a clear compile error
1328
+ * when `C` does not have the required `name` and `id` members.
1329
+ */
1330
+ template <typename C,
1331
+ typename std::enable_if<!detail::has_name<C>::value || !detail::has_id<C>::value, int>::type = 0>
1332
+ NVTX3_NO_DISCARD static named_category_in const& get() noexcept
1333
+ {
1334
+ NVTX3_STATIC_ASSERT(detail::has_name<C>::value,
1335
+ "Type used to name an NVTX category must contain a static constexpr member "
1336
+ "called 'name' of type const char* or const wchar_t* -- 'name' member is missing");
1337
+ NVTX3_STATIC_ASSERT(detail::has_id<C>::value,
1338
+ "Type used to name an NVTX category must contain a static constexpr member "
1339
+ "called 'id' of type uint32_t -- 'id' member is missing");
1340
+ static named_category_in const unused;
1341
+ return unused; // Function must compile for static_assert to be triggered
1342
+ }
1343
+ # else
1344
+ template <typename C>
1345
+ NVTX3_NO_DISCARD static named_category_in const& get() noexcept
1346
+ {
1347
+ static named_category_in const cat(C::id, C::name);
1348
+ return cat;
1349
+ }
1350
+ # endif
1351
+
1352
+ private:
1353
+ // Default constructor is only used internally for static_assert(false) cases.
1354
+ named_category_in() noexcept
1355
+ : category{0}
1356
+ {}
1357
+
1358
+ public:
1359
+ /**
1360
+ * @brief Construct a `named_category_in` with the specified `id` and `name`.
1361
+ *
1362
+ * The name `name` will be registered with `id`.
1363
+ *
1364
+ * Every unique value of `id` should only be named once.
1365
+ *
1366
+ * @param[in] id The category id to name
1367
+ * @param[in] name The name to associated with `id`
1368
+ */
1369
+ named_category_in(id_type id, char const* name) noexcept
1370
+ : category{id}
1371
+ {
1372
+ # ifndef NVTX_DISABLE
1373
+ nvtxDomainNameCategoryA(domain::get<D>(), get_id(), name);
1374
+ # else
1375
+ (void) id;
1376
+ (void) name;
1377
+ # endif
1378
+ }
1379
+
1380
+ /**
1381
+ * @brief Construct a `named_category_in` with the specified `id` and `name`.
1382
+ *
1383
+ * The name `name` will be registered with `id`.
1384
+ *
1385
+ * Every unique value of `id` should only be named once.
1386
+ *
1387
+ * @param[in] id The category id to name
1388
+ * @param[in] name The name to associated with `id`
1389
+ */
1390
+ named_category_in(id_type id, wchar_t const* name) noexcept
1391
+ : category{id}
1392
+ {
1393
+ # ifndef NVTX_DISABLE
1394
+ nvtxDomainNameCategoryW(domain::get<D>(), get_id(), name);
1395
+ # else
1396
+ (void) id;
1397
+ (void) name;
1398
+ # endif
1399
+ }
1400
+ };
1401
+
1402
+ /**
1403
+ * @brief Alias for a `named_category_in` in the global NVTX domain.
1404
+ *
1405
+ */
1406
+ using named_category = named_category_in<domain::global>;
1407
+
1408
+ /**
1409
+ * @brief A message registered with NVTX.
1410
+ *
1411
+ * Normally, associating a `message` with an NVTX event requires copying the
1412
+ * contents of the message string. This may cause non-trivial overhead in
1413
+ * highly performance sensitive regions of code.
1414
+ *
1415
+ * message registration is an optimization to lower the overhead of
1416
+ * associating a message with an NVTX event. Registering a message yields a
1417
+ * handle that is inexpensive to copy that may be used in place of a message
1418
+ * string.
1419
+ *
1420
+ * A particular message should only be registered once and the handle
1421
+ * reused throughout the rest of the application. This can be done by either
1422
+ * explicitly creating static `registered_string_in` objects, or using the
1423
+ * `registered_string_in::get` construct on first use helper (recommended).
1424
+ *
1425
+ * Example:
1426
+ * \code{.cpp}
1427
+ * // Explicitly constructed, static `registered_string` in my_domain:
1428
+ * static registered_string_in<my_domain> static_message{"message"};
1429
+ *
1430
+ * // "message" is associated with the range `r`
1431
+ * nvtx3::scoped_range r{static_message};
1432
+ *
1433
+ * // Or use construct on first use:
1434
+ *
1435
+ * // Define a type with a `message` member that defines the contents of the
1436
+ * // registered string
1437
+ * struct my_message{ static constexpr char const* message{ "my message" }; };
1438
+ *
1439
+ * // Uses construct on first use to register the contents of
1440
+ * // `my_message::message`
1441
+ * auto& msg = registered_string_in<my_domain>::get<my_message>();
1442
+ *
1443
+ * // "my message" is associated with the range `r`
1444
+ * nvtx3::scoped_range r{msg};
1445
+ * \endcode
1446
+ *
1447
+ * `registered_string_in`s are local to a particular domain specified via
1448
+ * the type `D`.
1449
+ *
1450
+ * @tparam D Type containing `name` member used to identify the `domain` to
1451
+ * which the `registered_string_in` belongs. Else, `domain::global` to indicate
1452
+ * that the global NVTX domain should be used.
1453
+ */
1454
+ template <typename D = domain::global>
1455
+ class registered_string_in
1456
+ {
1457
+ public:
1458
+ # if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
1459
+ /**
1460
+ * @brief Returns a global instance of a `registered_string_in` as a function
1461
+ * local static.
1462
+ *
1463
+ * Provides a convenient way to register a message with NVTX without having
1464
+ * to explicitly register the message.
1465
+ *
1466
+ * Upon first invocation, constructs a `registered_string_in` whose contents
1467
+ * are specified by `message::message`.
1468
+ *
1469
+ * All future invocations will return a reference to the object constructed
1470
+ * in the first invocation.
1471
+ *
1472
+ * Example:
1473
+ * \code{.cpp}
1474
+ * // Define a type with a `message` member that defines the contents of the
1475
+ * // registered string
1476
+ * struct my_message{ static constexpr char const* message{ "my message" };
1477
+ * };
1478
+ *
1479
+ * // Uses construct on first use to register the contents of
1480
+ * // `my_message::message`
1481
+ * auto& msg = registered_string_in<my_domain>::get<my_message>();
1482
+ *
1483
+ * // "my message" is associated with the range `r`
1484
+ * nvtx3::scoped_range r{msg};
1485
+ * \endcode
1486
+ *
1487
+ * @tparam M Type required to contain a member `M::message` that
1488
+ * resolves to either a `char const*` or `wchar_t const*` used as the
1489
+ * registered string's contents.
1490
+ * @return Reference to a `registered_string_in` associated with the type `M`.
1491
+ */
1492
+ template <typename M, typename std::enable_if<detail::is_c_string<decltype(M::message)>::value, int>::type = 0>
1493
+ NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
1494
+ {
1495
+ static registered_string_in const regstr(M::message);
1496
+ return regstr;
1497
+ }
1498
+
1499
+ /**
1500
+ * @brief Overload of `registered_string_in::get` to provide a clear compile error
1501
+ * when `M` has a `message` member that is not directly convertible to either
1502
+ * `char const*` or `wchar_t const*`.
1503
+ */
1504
+ template <typename M, typename std::enable_if<!detail::is_c_string<decltype(M::message)>::value, int>::type = 0>
1505
+ NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
1506
+ {
1507
+ NVTX3_STATIC_ASSERT(detail::always_false<M>::value,
1508
+ "Type used to register an NVTX string must contain a static constexpr member "
1509
+ "called 'message' of type const char* or const wchar_t* -- 'message' member is "
1510
+ "not convertible to either of those types");
1511
+ static registered_string_in const unused;
1512
+ return unused; // Function must compile for static_assert to be triggered
1513
+ }
1514
+
1515
+ /**
1516
+ * @brief Overload of `registered_string_in::get` to provide a clear compile error when
1517
+ * `M` does not have a `message` member.
1518
+ */
1519
+ template <typename M, typename std::enable_if<!detail::has_message<M>::value, int>::type = 0>
1520
+ NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
1521
+ {
1522
+ NVTX3_STATIC_ASSERT(detail::always_false<M>::value,
1523
+ "Type used to register an NVTX string must contain a static constexpr member "
1524
+ "called 'message' of type const char* or const wchar_t* -- 'message' member "
1525
+ "is missing");
1526
+ static registered_string_in const unused;
1527
+ return unused; // Function must compile for static_assert to be triggered
1528
+ }
1529
+ # else
1530
+ template <typename M>
1531
+ NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
1532
+ {
1533
+ static registered_string_in const regstr(M::message);
1534
+ return regstr;
1535
+ }
1536
+ # endif
1537
+
1538
+ /**
1539
+ * @brief Constructs a `registered_string_in` from the specified `msg` string.
1540
+ *
1541
+ * Registers `msg` with NVTX and associates a handle with the registered
1542
+ * message.
1543
+ *
1544
+ * A particular message should should only be registered once and the handle
1545
+ * reused throughout the rest of the application.
1546
+ *
1547
+ * @param msg The contents of the message
1548
+ */
1549
+ explicit registered_string_in(char const* msg) noexcept
1550
+ : handle_{nvtxDomainRegisterStringA(domain::get<D>(), msg)}
1551
+ {}
1552
+
1553
+ /**
1554
+ * @brief Constructs a `registered_string_in` from the specified `msg` string.
1555
+ *
1556
+ * Registers `msg` with NVTX and associates a handle with the registered
1557
+ * message.
1558
+ *
1559
+ * A particular message should should only be registered once and the handle
1560
+ * reused throughout the rest of the application.
1561
+ *
1562
+ * @param msg The contents of the message
1563
+ */
1564
+ explicit registered_string_in(std::string const& msg) noexcept
1565
+ : registered_string_in{msg.c_str()}
1566
+ {}
1567
+
1568
+ /**
1569
+ * @brief Constructs a `registered_string_in` from the specified `msg` string.
1570
+ *
1571
+ * Registers `msg` with NVTX and associates a handle with the registered
1572
+ * message.
1573
+ *
1574
+ * A particular message should should only be registered once and the handle
1575
+ * reused throughout the rest of the application.
1576
+ *
1577
+ * @param msg The contents of the message
1578
+ */
1579
+ explicit registered_string_in(wchar_t const* msg) noexcept
1580
+ : handle_{nvtxDomainRegisterStringW(domain::get<D>(), msg)}
1581
+ {}
1582
+
1583
+ /**
1584
+ * @brief Constructs a `registered_string_in` from the specified `msg` string.
1585
+ *
1586
+ * Registers `msg` with NVTX and associates a handle with the registered
1587
+ * message.
1588
+ *
1589
+ * A particular message should only be registered once and the handle
1590
+ * reused throughout the rest of the application.
1591
+ *
1592
+ * @param msg The contents of the message
1593
+ */
1594
+ explicit registered_string_in(std::wstring const& msg) noexcept
1595
+ : registered_string_in{msg.c_str()}
1596
+ {}
1597
+
1598
+ /**
1599
+ * @brief Returns the registered string's handle
1600
+ *
1601
+ */
1602
+ nvtxStringHandle_t get_handle() const noexcept
1603
+ {
1604
+ return handle_;
1605
+ }
1606
+
1607
+ private:
1608
+ // Default constructor is only used internally for static_assert(false) cases.
1609
+ registered_string_in() noexcept {}
1610
+
1611
+ public:
1612
+ ~registered_string_in() = default;
1613
+ registered_string_in(registered_string_in const&) = default;
1614
+ registered_string_in& operator=(registered_string_in const&) = default;
1615
+ registered_string_in(registered_string_in&&) = default;
1616
+ registered_string_in& operator=(registered_string_in&&) = default;
1617
+
1618
+ private:
1619
+ nvtxStringHandle_t handle_{}; ///< The handle returned from
1620
+ ///< registering the message with NVTX
1621
+ };
1622
+
1623
+ /**
1624
+ * @brief Alias for a `registered_string_in` in the global NVTX domain.
1625
+ *
1626
+ */
1627
+ using registered_string = registered_string_in<domain::global>;
1628
+
1629
+ /**
1630
+ * @brief Allows associating a message string with an NVTX event via
1631
+ * its `EventAttribute`s.
1632
+ *
1633
+ * Associating a `message` with an NVTX event through its `event_attributes`
1634
+ * allows for naming events to easily differentiate them from other events.
1635
+ *
1636
+ * Every time an NVTX event is created with an associated `message`, the
1637
+ * contents of the message string must be copied. This may cause non-trivial
1638
+ * overhead in highly performance sensitive sections of code. Use of a
1639
+ * `nvtx3::registered_string` is recommended in these situations.
1640
+ *
1641
+ * Example:
1642
+ * \code{.cpp}
1643
+ * // Creates an `event_attributes` with message "message 0"
1644
+ * nvtx3::event_attributes attr0{nvtx3::message{"message 0"}};
1645
+ *
1646
+ * // `range0` contains message "message 0"
1647
+ * nvtx3::scoped_range range0{attr0};
1648
+ *
1649
+ * // `std::string` and string literals are implicitly assumed to be
1650
+ * // the contents of an `nvtx3::message`
1651
+ * // Creates an `event_attributes` with message "message 1"
1652
+ * nvtx3::event_attributes attr1{"message 1"};
1653
+ *
1654
+ * // `range1` contains message "message 1"
1655
+ * nvtx3::scoped_range range1{attr1};
1656
+ *
1657
+ * // `range2` contains message "message 2"
1658
+ * nvtx3::scoped_range range2{nvtx3::message{"message 2"}};
1659
+ *
1660
+ * // `std::string` and string literals are implicitly assumed to be
1661
+ * // the contents of an `nvtx3::message`
1662
+ * // `range3` contains message "message 3"
1663
+ * nvtx3::scoped_range range3{"message 3"};
1664
+ * \endcode
1665
+ */
1666
+ class message
1667
+ {
1668
+ public:
1669
+ using value_type = nvtxMessageValue_t;
1670
+
1671
+ /**
1672
+ * @brief Construct a `message` whose contents are specified by `msg`.
1673
+ *
1674
+ * @param msg The contents of the message
1675
+ */
1676
+ NVTX3_CONSTEXPR_IF_CPP14 message(char const* msg) noexcept
1677
+ : type_{NVTX_MESSAGE_TYPE_ASCII}
1678
+ {
1679
+ value_.ascii = msg;
1680
+ }
1681
+
1682
+ /**
1683
+ * @brief Construct a `message` whose contents are specified by `msg`.
1684
+ *
1685
+ * @param msg The contents of the message
1686
+ */
1687
+ message(std::string const& msg) noexcept
1688
+ : message{msg.c_str()}
1689
+ {}
1690
+
1691
+ /**
1692
+ * @brief Disallow construction for `std::string` r-value
1693
+ *
1694
+ * `message` is a non-owning type and therefore cannot take ownership of an
1695
+ * r-value. Therefore, constructing from an r-value is disallowed to prevent
1696
+ * a dangling pointer.
1697
+ *
1698
+ */
1699
+ message(std::string&&) = delete;
1700
+
1701
+ /**
1702
+ * @brief Construct a `message` whose contents are specified by `msg`.
1703
+ *
1704
+ * @param msg The contents of the message
1705
+ */
1706
+ NVTX3_CONSTEXPR_IF_CPP14 message(wchar_t const* msg) noexcept
1707
+ : type_{NVTX_MESSAGE_TYPE_UNICODE}
1708
+ {
1709
+ value_.unicode = msg;
1710
+ }
1711
+
1712
+ /**
1713
+ * @brief Construct a `message` whose contents are specified by `msg`.
1714
+ *
1715
+ * @param msg The contents of the message
1716
+ */
1717
+ message(std::wstring const& msg) noexcept
1718
+ : message{msg.c_str()}
1719
+ {}
1720
+
1721
+ /**
1722
+ * @brief Disallow construction for `std::wstring` r-value
1723
+ *
1724
+ * `message` is a non-owning type and therefore cannot take ownership of an
1725
+ * r-value. Therefore, constructing from an r-value is disallowed to prevent
1726
+ * a dangling pointer.
1727
+ *
1728
+ */
1729
+ message(std::wstring&&) = delete;
1730
+
1731
+ /**
1732
+ * @brief Construct a `message` from a `registered_string_in`.
1733
+ *
1734
+ * @tparam D Type containing `name` member used to identify the `domain`
1735
+ * to which the `registered_string_in` belongs. Else, `domain::global` to
1736
+ * indicate that the global NVTX domain should be used.
1737
+ * @param msg The message that has already been registered with NVTX.
1738
+ */
1739
+ template <typename D>
1740
+ NVTX3_CONSTEXPR_IF_CPP14 message(registered_string_in<D> const& msg) noexcept
1741
+ : type_{NVTX_MESSAGE_TYPE_REGISTERED}
1742
+ {
1743
+ value_.registered = msg.get_handle();
1744
+ }
1745
+
1746
+ /**
1747
+ * @brief Construct a `message` from NVTX C API type and value.
1748
+ *
1749
+ * @param type nvtxMessageType_t enum value indicating type of the payload
1750
+ * @param value nvtxMessageValue_t union containing message
1751
+ */
1752
+ constexpr message(nvtxMessageType_t const& type, nvtxMessageValue_t const& value) noexcept
1753
+ : type_{type}
1754
+ , value_(value)
1755
+ {}
1756
+
1757
+ /**
1758
+ * @brief Construct a `message` from NVTX C API registered string handle.
1759
+ *
1760
+ * @param handle nvtxStringHandle_t value of registered string handle
1761
+ */
1762
+ NVTX3_CONSTEXPR_IF_CPP14 message(nvtxStringHandle_t handle) noexcept
1763
+ : type_{NVTX_MESSAGE_TYPE_REGISTERED}
1764
+ {
1765
+ value_.registered = handle;
1766
+ }
1767
+
1768
+ /**
1769
+ * @brief Return the union holding the value of the message.
1770
+ *
1771
+ */
1772
+ constexpr value_type get_value() const noexcept
1773
+ {
1774
+ return value_;
1775
+ }
1776
+
1777
+ /**
1778
+ * @brief Return the type information about the value the union holds.
1779
+ *
1780
+ */
1781
+ constexpr nvtxMessageType_t get_type() const noexcept
1782
+ {
1783
+ return type_;
1784
+ }
1785
+
1786
+ private:
1787
+ nvtxMessageType_t type_{}; ///< message type
1788
+ nvtxMessageValue_t value_{}; ///< message contents
1789
+ };
1790
+
1791
+ /**
1792
+ * @brief A numerical value that can be associated with an NVTX event via
1793
+ * its `event_attributes`.
1794
+ *
1795
+ * Example:
1796
+ * \code{.cpp}
1797
+ * // Constructs a payload from the int32_t value 42
1798
+ * nvtx3:: event_attributes attr{nvtx3::payload{42}};
1799
+ *
1800
+ * // `range0` will have an int32_t payload of 42
1801
+ * nvtx3::scoped_range range0{attr};
1802
+ *
1803
+ * // range1 has double payload of 3.14
1804
+ * nvtx3::scoped_range range1{nvtx3::payload{3.14}};
1805
+ * \endcode
1806
+ */
1807
+ class payload
1808
+ {
1809
+ public:
1810
+ using value_type = typename nvtxEventAttributes_v2::payload_t;
1811
+
1812
+ /**
1813
+ * @brief Construct a `payload` from a signed, 8 byte integer.
1814
+ *
1815
+ * @param value Value to use as contents of the payload
1816
+ */
1817
+ NVTX3_CONSTEXPR_IF_CPP14 explicit payload(int64_t value) noexcept
1818
+ : type_{NVTX_PAYLOAD_TYPE_INT64}
1819
+ , value_{}
1820
+ {
1821
+ value_.llValue = value;
1822
+ }
1823
+
1824
+ /**
1825
+ * @brief Construct a `payload` from a signed, 4 byte integer.
1826
+ *
1827
+ * @param value Value to use as contents of the payload
1828
+ */
1829
+ NVTX3_CONSTEXPR_IF_CPP14 explicit payload(int32_t value) noexcept
1830
+ : type_{NVTX_PAYLOAD_TYPE_INT32}
1831
+ , value_{}
1832
+ {
1833
+ value_.iValue = value;
1834
+ }
1835
+
1836
+ /**
1837
+ * @brief Construct a `payload` from an unsigned, 8 byte integer.
1838
+ *
1839
+ * @param value Value to use as contents of the payload
1840
+ */
1841
+ NVTX3_CONSTEXPR_IF_CPP14 explicit payload(uint64_t value) noexcept
1842
+ : type_{NVTX_PAYLOAD_TYPE_UNSIGNED_INT64}
1843
+ , value_{}
1844
+ {
1845
+ value_.ullValue = value;
1846
+ }
1847
+
1848
+ /**
1849
+ * @brief Construct a `payload` from an unsigned, 4 byte integer.
1850
+ *
1851
+ * @param value Value to use as contents of the payload
1852
+ */
1853
+ NVTX3_CONSTEXPR_IF_CPP14 explicit payload(uint32_t value) noexcept
1854
+ : type_{NVTX_PAYLOAD_TYPE_UNSIGNED_INT32}
1855
+ , value_{}
1856
+ {
1857
+ value_.uiValue = value;
1858
+ }
1859
+
1860
+ /**
1861
+ * @brief Construct a `payload` from a single-precision floating point
1862
+ * value.
1863
+ *
1864
+ * @param value Value to use as contents of the payload
1865
+ */
1866
+ NVTX3_CONSTEXPR_IF_CPP14 explicit payload(float value) noexcept
1867
+ : type_{NVTX_PAYLOAD_TYPE_FLOAT}
1868
+ , value_{}
1869
+ {
1870
+ value_.fValue = value;
1871
+ }
1872
+
1873
+ /**
1874
+ * @brief Construct a `payload` from a double-precision floating point
1875
+ * value.
1876
+ *
1877
+ * @param value Value to use as contents of the payload
1878
+ */
1879
+ NVTX3_CONSTEXPR_IF_CPP14 explicit payload(double value) noexcept
1880
+ : type_{NVTX_PAYLOAD_TYPE_DOUBLE}
1881
+ , value_{}
1882
+ {
1883
+ value_.dValue = value;
1884
+ }
1885
+
1886
+ /**
1887
+ * @brief Construct a `payload` from NVTX C API type and value.
1888
+ *
1889
+ * @param type nvtxPayloadType_t enum value indicating type of the payload
1890
+ * @param value nvtxEventAttributes_t::payload_t union containing payload
1891
+ */
1892
+ constexpr payload(nvtxPayloadType_t const& type, value_type const& value) noexcept
1893
+ : type_{type}
1894
+ , value_(value)
1895
+ {}
1896
+
1897
+ /**
1898
+ * @brief Return the union holding the value of the payload
1899
+ *
1900
+ */
1901
+ constexpr value_type get_value() const noexcept
1902
+ {
1903
+ return value_;
1904
+ }
1905
+
1906
+ /**
1907
+ * @brief Return the information about the type the union holds.
1908
+ *
1909
+ */
1910
+ constexpr nvtxPayloadType_t get_type() const noexcept
1911
+ {
1912
+ return type_;
1913
+ }
1914
+
1915
+ private:
1916
+ nvtxPayloadType_t type_; ///< Type of the payload value
1917
+ value_type value_; ///< Union holding the payload value
1918
+ };
1919
+
1920
+ /**
1921
+ * @brief Describes the attributes of a NVTX event.
1922
+ *
1923
+ * NVTX events can be customized via four "attributes":
1924
+ *
1925
+ * - color: color used to visualize the event in tools such as Nsight
1926
+ * Systems. See `color`.
1927
+ * - message: Custom message string. See `message`.
1928
+ * - payload: User-defined numerical value. See `payload`.
1929
+ * - category: Intra-domain grouping. See `category`.
1930
+ *
1931
+ * These component attributes are specified via an `event_attributes` object.
1932
+ * See `nvtx3::color`, `nvtx3::message`, `nvtx3::payload`, and
1933
+ * `nvtx3::category` for how these individual attributes are constructed.
1934
+ *
1935
+ * While it is possible to specify all four attributes, it is common to want
1936
+ * to only specify a subset of attributes and use default values for the
1937
+ * others. For convenience, `event_attributes` can be constructed from any
1938
+ * number of attribute components in any order.
1939
+ *
1940
+ * Example:
1941
+ * \code{.cpp}
1942
+ * // Set message, same as using nvtx3::message{"message"}
1943
+ * event_attributes attr{"message"};
1944
+ *
1945
+ * // Set message and color
1946
+ * event_attributes attr{"message", nvtx3::rgb{127, 255, 0}};
1947
+ *
1948
+ * // Set message, color, payload, category
1949
+ * event_attributes attr{"message",
1950
+ * nvtx3::rgb{127, 255, 0},
1951
+ * nvtx3::payload{42},
1952
+ * nvtx3::category{1}};
1953
+ *
1954
+ * // Same as above -- can use any order of arguments
1955
+ * event_attributes attr{nvtx3::payload{42},
1956
+ * nvtx3::category{1},
1957
+ * "message",
1958
+ * nvtx3::rgb{127, 255, 0}};
1959
+ *
1960
+ * // Multiple arguments of the same type are allowed, but only the first is
1961
+ * // used -- in this example, payload is set to 42:
1962
+ * event_attributes attr{ nvtx3::payload{42}, nvtx3::payload{7} };
1963
+ *
1964
+ * // Range `r` will be customized according the attributes in `attr`
1965
+ * nvtx3::scoped_range r{attr};
1966
+ *
1967
+ * // For convenience, `event_attributes` constructor arguments may be passed
1968
+ * // to the `scoped_range_in` constructor -- they are forwarded to the
1969
+ * // `event_attributes` constructor
1970
+ * nvtx3::scoped_range r{nvtx3::payload{42}, nvtx3::category{1}, "message"};
1971
+ *
1972
+ * // Using the nvtx3 namespace in a local scope makes the syntax more succinct:
1973
+ * using namespace nvtx3;
1974
+ * scoped_range r{payload{42}, category{1}, "message"};
1975
+ * \endcode
1976
+ *
1977
+ */
1978
+ class event_attributes
1979
+ {
1980
+ public:
1981
+ using value_type = nvtxEventAttributes_t;
1982
+
1983
+ /**
1984
+ * @brief Default constructor creates an `event_attributes` with no
1985
+ * category, color, payload, nor message.
1986
+ */
1987
+ constexpr event_attributes() noexcept
1988
+ : attributes_{
1989
+ NVTX_VERSION, // version
1990
+ sizeof(nvtxEventAttributes_t), // size
1991
+ 0, // category
1992
+ NVTX_COLOR_UNKNOWN, // color type
1993
+ 0, // color value
1994
+ NVTX_PAYLOAD_UNKNOWN, // payload type
1995
+ 0, // reserved 4B
1996
+ {0}, // payload value (union)
1997
+ NVTX_MESSAGE_UNKNOWN, // message type
1998
+ {0} // message value (union)
1999
+ }
2000
+ {}
2001
+
2002
+ /**
2003
+ * @brief Variadic constructor where the first argument is a `category`.
2004
+ *
2005
+ * Sets the value of the `EventAttribute`s category based on `c` and
2006
+ * forwards the remaining variadic parameter pack to the next constructor.
2007
+ *
2008
+ */
2009
+ template <typename... Args>
2010
+ NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(category const& c, Args const&... args) noexcept
2011
+ : event_attributes(args...)
2012
+ {
2013
+ attributes_.category = c.get_id();
2014
+ }
2015
+
2016
+ /**
2017
+ * @brief Variadic constructor where the first argument is a `color`.
2018
+ *
2019
+ * Sets the value of the `EventAttribute`s color based on `c` and forwards
2020
+ * the remaining variadic parameter pack to the next constructor.
2021
+ *
2022
+ */
2023
+ template <typename... Args>
2024
+ NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(color const& c, Args const&... args) noexcept
2025
+ : event_attributes(args...)
2026
+ {
2027
+ attributes_.color = c.get_value();
2028
+ attributes_.colorType = c.get_type();
2029
+ }
2030
+
2031
+ /**
2032
+ * @brief Variadic constructor where the first argument is a `payload`.
2033
+ *
2034
+ * Sets the value of the `EventAttribute`s payload based on `p` and forwards
2035
+ * the remaining variadic parameter pack to the next constructor.
2036
+ *
2037
+ */
2038
+ template <typename... Args>
2039
+ NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(payload const& p, Args const&... args) noexcept
2040
+ : event_attributes(args...)
2041
+ {
2042
+ attributes_.payload = p.get_value();
2043
+ attributes_.payloadType = p.get_type();
2044
+ }
2045
+
2046
+ /**
2047
+ * @brief Variadic constructor where the first argument is a `message`.
2048
+ *
2049
+ * Sets the value of the `EventAttribute`s message based on `m` and forwards
2050
+ * the remaining variadic parameter pack to the next constructor.
2051
+ *
2052
+ */
2053
+ template <typename... Args>
2054
+ NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(message const& m, Args const&... args) noexcept
2055
+ : event_attributes(args...)
2056
+ {
2057
+ attributes_.message = m.get_value();
2058
+ attributes_.messageType = m.get_type();
2059
+ }
2060
+
2061
+ ~event_attributes() = default;
2062
+ event_attributes(event_attributes const&) = default;
2063
+ event_attributes& operator=(event_attributes const&) = default;
2064
+ event_attributes(event_attributes&&) = default;
2065
+ event_attributes& operator=(event_attributes&&) = default;
2066
+
2067
+ /**
2068
+ * @brief Get raw pointer to underlying NVTX attributes object.
2069
+ *
2070
+ */
2071
+ constexpr value_type const* get() const noexcept
2072
+ {
2073
+ return &attributes_;
2074
+ }
2075
+
2076
+ private:
2077
+ value_type attributes_{}; ///< The NVTX attributes structure
2078
+ };
2079
+
2080
+ /**
2081
+ * @brief A RAII object for creating a NVTX range local to a thread within a
2082
+ * domain.
2083
+ *
2084
+ * When constructed, begins a nested NVTX range on the calling thread in the
2085
+ * specified domain. Upon destruction, ends the NVTX range.
2086
+ *
2087
+ * Behavior is undefined if a `scoped_range_in` object is
2088
+ * created/destroyed on different threads.
2089
+ *
2090
+ * `scoped_range_in` is neither movable nor copyable.
2091
+ *
2092
+ * `scoped_range_in`s may be nested within other ranges.
2093
+ *
2094
+ * The domain of the range is specified by the template type parameter `D`.
2095
+ * By default, the `domain::global` is used, which scopes the range to the
2096
+ * global NVTX domain. The convenience alias `scoped_range` is provided for
2097
+ * ranges scoped to the global domain.
2098
+ *
2099
+ * A custom domain can be defined by creating a type, `D`, with a static
2100
+ * member `D::name` whose value is used to name the domain associated with
2101
+ * `D`. `D::name` must resolve to either `char const*` or `wchar_t const*`
2102
+ *
2103
+ * Example:
2104
+ * \code{.cpp}
2105
+ * // Define a type `my_domain` with a member `name` used to name the domain
2106
+ * // associated with the type `my_domain`.
2107
+ * struct my_domain{
2108
+ * static constexpr char const* name{"my domain"};
2109
+ * };
2110
+ * \endcode
2111
+ *
2112
+ * Usage:
2113
+ * \code{.cpp}
2114
+ * nvtx3::scoped_range_in<my_domain> r1{"range 1"}; // Range in my domain
2115
+ *
2116
+ * // Three equivalent ways to make a range in the global domain:
2117
+ * nvtx3::scoped_range_in<nvtx3::domain::global> r2{"range 2"};
2118
+ * nvtx3::scoped_range_in<> r3{"range 3"};
2119
+ * nvtx3::scoped_range r4{"range 4"};
2120
+ *
2121
+ * // Create an alias to succinctly make ranges in my domain:
2122
+ * using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
2123
+ *
2124
+ * my_scoped_range r3{"range 3"};
2125
+ * \endcode
2126
+ */
2127
+ template <class D = domain::global>
2128
+ class NVTX3_MAYBE_UNUSED scoped_range_in
2129
+ {
2130
+ public:
2131
+ /**
2132
+ * @brief Construct a `scoped_range_in` with the specified
2133
+ * `event_attributes`
2134
+ *
2135
+ * Example:
2136
+ * \code{cpp}
2137
+ * nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
2138
+ * nvtx3::scoped_range range{attr}; // Creates a range with message contents
2139
+ * // "msg" and green color
2140
+ * \endcode
2141
+ *
2142
+ * @param[in] attr `event_attributes` that describes the desired attributes
2143
+ * of the range.
2144
+ */
2145
+ explicit scoped_range_in(event_attributes const& attr) noexcept
2146
+ {
2147
+ # ifndef NVTX_DISABLE
2148
+ nvtxDomainRangePushEx(domain::get<D>(), attr.get());
2149
+ # else
2150
+ (void) attr;
2151
+ # endif
2152
+ }
2153
+
2154
+ /**
2155
+ * @brief Constructs a `scoped_range_in` from the constructor arguments
2156
+ * of an `event_attributes`.
2157
+ *
2158
+ * Forwards the arguments `args...` to construct an
2159
+ * `event_attributes` object. The `event_attributes` object is then
2160
+ * associated with the `scoped_range_in`.
2161
+ *
2162
+ * For more detail, see `event_attributes` documentation.
2163
+ *
2164
+ * Example:
2165
+ * \code{cpp}
2166
+ * // Creates a range with message "message" and green color
2167
+ * nvtx3::scoped_range r{"message", nvtx3::rgb{127,255,0}};
2168
+ * \endcode
2169
+ *
2170
+ * @param[in] args Arguments to used to construct an `event_attributes` associated with this
2171
+ * range.
2172
+ *
2173
+ */
2174
+ template <typename... Args>
2175
+ explicit scoped_range_in(Args const&... args) noexcept
2176
+ : scoped_range_in{event_attributes{args...}}
2177
+ {}
2178
+
2179
+ /**
2180
+ * @brief Default constructor creates a `scoped_range_in` with no
2181
+ * message, color, payload, nor category.
2182
+ *
2183
+ */
2184
+ scoped_range_in() noexcept
2185
+ : scoped_range_in{event_attributes{}}
2186
+ {}
2187
+
2188
+ /**
2189
+ * @brief Delete `operator new` to disallow heap allocated objects.
2190
+ *
2191
+ * `scoped_range_in` must follow RAII semantics to guarantee proper push/pop semantics.
2192
+ *
2193
+ */
2194
+ void* operator new(std::size_t) = delete;
2195
+
2196
+ scoped_range_in(scoped_range_in const&) = delete;
2197
+ scoped_range_in& operator=(scoped_range_in const&) = delete;
2198
+ scoped_range_in(scoped_range_in&&) = delete;
2199
+ scoped_range_in& operator=(scoped_range_in&&) = delete;
2200
+
2201
+ /**
2202
+ * @brief Destroy the scoped_range_in, ending the NVTX range event.
2203
+ */
2204
+ ~scoped_range_in() noexcept
2205
+ {
2206
+ # ifndef NVTX_DISABLE
2207
+ nvtxDomainRangePop(domain::get<D>());
2208
+ # endif
2209
+ }
2210
+ };
2211
+
2212
+ /**
2213
+ * @brief Alias for a `scoped_range_in` in the global NVTX domain.
2214
+ *
2215
+ */
2216
+ using scoped_range = scoped_range_in<domain::global>;
2217
+
2218
+ namespace detail
2219
+ {
2220
+
2221
+ /// @cond internal
2222
+ template <typename D = domain::global>
2223
+ class NVTX3_MAYBE_UNUSED optional_scoped_range_in
2224
+ {
2225
+ public:
2226
+ optional_scoped_range_in() = default;
2227
+
2228
+ void begin(event_attributes const& attr) noexcept
2229
+ {
2230
+ # ifndef NVTX_DISABLE
2231
+ // This class is not meant to be part of the public NVTX C++ API and should
2232
+ // only be used in the `NVTX3_FUNC_RANGE_IF` and `NVTX3_FUNC_RANGE_IF_IN`
2233
+ // macros. However, to prevent developers from misusing this class, make
2234
+ // sure to not start multiple ranges.
2235
+ if (initialized)
2236
+ {
2237
+ return;
2238
+ }
2239
+
2240
+ nvtxDomainRangePushEx(domain::get<D>(), attr.get());
2241
+ initialized = true;
2242
+ # endif
2243
+ }
2244
+
2245
+ ~optional_scoped_range_in() noexcept
2246
+ {
2247
+ # ifndef NVTX_DISABLE
2248
+ if (initialized)
2249
+ {
2250
+ nvtxDomainRangePop(domain::get<D>());
2251
+ }
2252
+ # endif
2253
+ }
2254
+
2255
+ void* operator new(std::size_t) = delete;
2256
+ optional_scoped_range_in(optional_scoped_range_in const&) = delete;
2257
+ optional_scoped_range_in& operator=(optional_scoped_range_in const&) = delete;
2258
+ optional_scoped_range_in(optional_scoped_range_in&&) = delete;
2259
+ optional_scoped_range_in& operator=(optional_scoped_range_in&&) = delete;
2260
+
2261
+ private:
2262
+ # ifndef NVTX_DISABLE
2263
+ bool initialized = false;
2264
+ # endif
2265
+ };
2266
+ /// @endcond
2267
+
2268
+ } // namespace detail
2269
+
2270
+ /**
2271
+ * @brief Handle used for correlating explicit range start and end events.
2272
+ *
2273
+ * A handle is "null" if it does not correspond to any range.
2274
+ *
2275
+ */
2276
+ struct range_handle
2277
+ {
2278
+ /// Type used for the handle's value
2279
+ using value_type = nvtxRangeId_t;
2280
+
2281
+ /**
2282
+ * @brief Construct a `range_handle` from the given id.
2283
+ *
2284
+ */
2285
+ constexpr explicit range_handle(value_type id) noexcept
2286
+ : _range_id{id}
2287
+ {}
2288
+
2289
+ /**
2290
+ * @brief Constructs a null range handle.
2291
+ *
2292
+ * A null range_handle corresponds to no range. Calling `end_range` on a
2293
+ * null handle is undefined behavior when a tool is active.
2294
+ *
2295
+ */
2296
+ constexpr range_handle() noexcept = default;
2297
+
2298
+ /**
2299
+ * @brief Checks whether this handle is null
2300
+ *
2301
+ * Provides contextual conversion to `bool`.
2302
+ *
2303
+ * \code{cpp}
2304
+ * range_handle handle{};
2305
+ * if (handle) {...}
2306
+ * \endcode
2307
+ *
2308
+ */
2309
+ constexpr explicit operator bool() const noexcept
2310
+ {
2311
+ return get_value() != null_range_id;
2312
+ }
2313
+
2314
+ /**
2315
+ * @brief Implicit conversion from `nullptr` constructs a null handle.
2316
+ *
2317
+ * Satisfies the "NullablePointer" requirement to make `range_handle` comparable with `nullptr`.
2318
+ *
2319
+ */
2320
+ constexpr range_handle(std::nullptr_t) noexcept {}
2321
+
2322
+ /**
2323
+ * @brief Returns the `range_handle`'s value
2324
+ *
2325
+ * @return value_type The handle's value
2326
+ */
2327
+ constexpr value_type get_value() const noexcept
2328
+ {
2329
+ return _range_id;
2330
+ }
2331
+
2332
+ private:
2333
+ /// Sentinel value for a null handle that corresponds to no range
2334
+ static constexpr value_type null_range_id = nvtxRangeId_t{0};
2335
+
2336
+ value_type _range_id{null_range_id}; ///< The underlying NVTX range id
2337
+ };
2338
+
2339
+ /**
2340
+ * @brief Compares two range_handles for equality
2341
+ *
2342
+ * @param lhs The first range_handle to compare
2343
+ * @param rhs The second range_handle to compare
2344
+ */
2345
+ inline constexpr bool operator==(range_handle lhs, range_handle rhs) noexcept
2346
+ {
2347
+ return lhs.get_value() == rhs.get_value();
2348
+ }
2349
+
2350
+ /**
2351
+ * @brief Compares two range_handles for inequality
2352
+ *
2353
+ * @param lhs The first range_handle to compare
2354
+ * @param rhs The second range_handle to compare
2355
+ */
2356
+ inline constexpr bool operator!=(range_handle lhs, range_handle rhs) noexcept
2357
+ {
2358
+ return !(lhs == rhs);
2359
+ }
2360
+
2361
+ /**
2362
+ * @brief Manually begin an NVTX range.
2363
+ *
2364
+ * Explicitly begins an NVTX range and returns a unique handle. To end the
2365
+ * range, pass the handle to `end_range_in<D>()`.
2366
+ *
2367
+ * `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
2368
+ * `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
2369
+ *
2370
+ * `start_range_in/end_range_in` are the most explicit and lowest level APIs
2371
+ * provided for creating ranges. Use of `nvtx3::unique_range_in` should be
2372
+ * preferred unless one is unable to tie the range to the lifetime of an object.
2373
+ *
2374
+ * Example:
2375
+ * \code{.cpp}
2376
+ * nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
2377
+ * // Manually begin a range
2378
+ * nvtx3::range_handle h = nvtx3::start_range_in<my_domain>(attr);
2379
+ * ...
2380
+ * nvtx3::end_range_in<my_domain>(h); // End the range
2381
+ * \endcode
2382
+ *
2383
+ * @tparam D Type containing `name` member used to identify the `domain`
2384
+ * to which the range belongs. Else, `domain::global` to indicate that the
2385
+ * global NVTX domain should be used.
2386
+ * @param[in] attr `event_attributes` that describes the desired attributes
2387
+ * of the range.
2388
+ * @return Unique handle to be passed to `end_range_in` to end the range.
2389
+ */
2390
+ template <typename D = domain::global>
2391
+ NVTX3_NO_DISCARD inline range_handle start_range_in(event_attributes const& attr) noexcept
2392
+ {
2393
+ # ifndef NVTX_DISABLE
2394
+ return range_handle{nvtxDomainRangeStartEx(domain::get<D>(), attr.get())};
2395
+ # else
2396
+ (void) attr;
2397
+ return {};
2398
+ # endif
2399
+ }
2400
+
2401
+ /**
2402
+ * @brief Manually begin an NVTX range.
2403
+ *
2404
+ * Explicitly begins an NVTX range and returns a unique handle. To end the
2405
+ * range, pass the handle to `end_range_in<D>()`.
2406
+ *
2407
+ * `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
2408
+ * `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
2409
+ *
2410
+ * `start_range_in/end_range_in` are the most explicit and lowest level APIs
2411
+ * provided for creating ranges. Use of `nvtx3::unique_range_in` should be
2412
+ * preferred unless one is unable to tie the range to the lifetime of an object.
2413
+ *
2414
+ * This overload uses `args...` to construct an `event_attributes` to
2415
+ * associate with the range. For more detail, see `event_attributes`.
2416
+ *
2417
+ * Example:
2418
+ * \code{cpp}
2419
+ * // Manually begin a range
2420
+ * nvtx3::range_handle h = nvtx3::start_range_in<D>("msg", nvtx3::rgb{127,255,0});
2421
+ * ...
2422
+ * nvtx3::end_range_in<D>(h); // Ends the range
2423
+ * \endcode
2424
+ *
2425
+ * @tparam D Type containing `name` member used to identify the `domain`
2426
+ * to which the range belongs. Else, `domain::global` to indicate that the
2427
+ * global NVTX domain should be used.
2428
+ * @param[in] args Variadic parameter pack of the arguments for an `event_attributes`.
2429
+ * @return Unique handle to be passed to `end_range` to end the range.
2430
+ */
2431
+ template <typename D = domain::global, typename... Args>
2432
+ NVTX3_NO_DISCARD inline range_handle start_range_in(Args const&... args) noexcept
2433
+ {
2434
+ # ifndef NVTX_DISABLE
2435
+ return start_range_in<D>(event_attributes{args...});
2436
+ # else
2437
+ return {};
2438
+ # endif
2439
+ }
2440
+
2441
+ /**
2442
+ * @brief Manually begin an NVTX range in the global domain.
2443
+ *
2444
+ * Explicitly begins an NVTX range and returns a unique handle. To end the
2445
+ * range, pass the handle to `end_range()`.
2446
+ *
2447
+ * `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
2448
+ * `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
2449
+ *
2450
+ * `start_range/end_range` are the most explicit and lowest level APIs
2451
+ * provided for creating ranges. Use of `nvtx3::unique_range` should be
2452
+ * preferred unless one is unable to tie the range to the lifetime of an object.
2453
+ *
2454
+ * Example:
2455
+ * \code{.cpp}
2456
+ * nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
2457
+ * // Manually begin a range
2458
+ * nvtx3::range_handle h = nvtx3::start_range(attr);
2459
+ * ...
2460
+ * nvtx3::end_range(h); // End the range
2461
+ * \endcode
2462
+ *
2463
+ * @param[in] attr `event_attributes` that describes the desired attributes
2464
+ * of the range.
2465
+ * @return Unique handle to be passed to `end_range_in` to end the range.
2466
+ */
2467
+ NVTX3_NO_DISCARD inline range_handle start_range(event_attributes const& attr) noexcept
2468
+ {
2469
+ # ifndef NVTX_DISABLE
2470
+ return start_range_in<domain::global>(attr);
2471
+ # else
2472
+ (void) attr;
2473
+ return {};
2474
+ # endif
2475
+ }
2476
+
2477
+ /**
2478
+ * @brief Manually begin an NVTX range in the global domain.
2479
+ *
2480
+ * Explicitly begins an NVTX range and returns a unique handle. To end the
2481
+ * range, pass the handle to `end_range_in<D>()`.
2482
+ *
2483
+ * `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
2484
+ * `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
2485
+ *
2486
+ * `start_range_in/end_range_in` are the most explicit and lowest level APIs
2487
+ * provided for creating ranges. Use of `nvtx3::unique_range_in` should be
2488
+ * preferred unless one is unable to tie the range to the lifetime of an object.
2489
+ *
2490
+ * This overload uses `args...` to construct an `event_attributes` to
2491
+ * associate with the range. For more detail, see `event_attributes`.
2492
+ *
2493
+ * Example:
2494
+ * \code{cpp}
2495
+ * // Manually begin a range
2496
+ * nvtx3::range_handle h = nvtx3::start_range("msg", nvtx3::rgb{127,255,0});
2497
+ * ...
2498
+ * nvtx3::end_range(h); // Ends the range
2499
+ * \endcode
2500
+ *
2501
+ * @param[in] args Variadic parameter pack of the arguments for an `event_attributes`.
2502
+ * @return Unique handle to be passed to `end_range` to end the range.
2503
+ */
2504
+ template <typename... Args>
2505
+ NVTX3_NO_DISCARD inline range_handle start_range(Args const&... args) noexcept
2506
+ {
2507
+ # ifndef NVTX_DISABLE
2508
+ return start_range_in<domain::global>(args...);
2509
+ # else
2510
+ return {};
2511
+ # endif
2512
+ }
2513
+
2514
+ /**
2515
+ * @brief Manually end the range associated with the handle `r` in domain `D`.
2516
+ *
2517
+ * Explicitly ends the NVTX range indicated by the handle `r` returned from a
2518
+ * prior call to `start_range_in<D>`. The range may end on a different thread
2519
+ * from where it began.
2520
+ *
2521
+ * @tparam D Type containing `name` member used to identify the `domain` to
2522
+ * which the range belongs. Else, `domain::global` to indicate that the global
2523
+ * NVTX domain should be used.
2524
+ * @param r Handle to a range started by a prior call to `start_range_in`.
2525
+ *
2526
+ * @warning The domain type specified as template parameter to this function
2527
+ * must be the same that was specified on the associated `start_range_in` call.
2528
+ */
2529
+ template <typename D = domain::global>
2530
+ inline void end_range_in(range_handle r) noexcept
2531
+ {
2532
+ # ifndef NVTX_DISABLE
2533
+ nvtxDomainRangeEnd(domain::get<D>(), r.get_value());
2534
+ # else
2535
+ (void) r;
2536
+ # endif
2537
+ }
2538
+
2539
+ /**
2540
+ * @brief Manually end the range associated with the handle `r` in the global
2541
+ * domain.
2542
+ *
2543
+ * Explicitly ends the NVTX range indicated by the handle `r` returned from a
2544
+ * prior call to `start_range`. The range may end on a different thread from
2545
+ * where it began.
2546
+ *
2547
+ * @param r Handle to a range started by a prior call to `start_range`.
2548
+ *
2549
+ * @warning The domain type specified as template parameter to this function
2550
+ * must be the same that was specified on the associated `start_range` call.
2551
+ */
2552
+ inline void end_range(range_handle r) noexcept
2553
+ {
2554
+ # ifndef NVTX_DISABLE
2555
+ end_range_in<domain::global>(r);
2556
+ # else
2557
+ (void) r;
2558
+ # endif
2559
+ }
2560
+
2561
+ /**
2562
+ * @brief A RAII object for creating a NVTX range within a domain that can
2563
+ * be created and destroyed on different threads.
2564
+ *
2565
+ * When constructed, begins a NVTX range in the specified domain. Upon
2566
+ * destruction, ends the NVTX range.
2567
+ *
2568
+ * Similar to `nvtx3::scoped_range_in`, with a few key differences:
2569
+ * - `unique_range` objects can be destroyed in an order whereas `scoped_range` objects must be
2570
+ * destroyed in exact reverse creation order
2571
+ * - `unique_range` can start and end on different threads
2572
+ * - `unique_range` is movable
2573
+ * - `unique_range` objects can be constructed as heap objects
2574
+ *
2575
+ * There is extra overhead associated with `unique_range` constructs and therefore use of
2576
+ * `nvtx3::scoped_range_in` should be preferred.
2577
+ *
2578
+ * @tparam D Type containing `name` member used to identify the `domain`
2579
+ * to which the `unique_range_in` belongs. Else, `domain::global` to
2580
+ * indicate that the global NVTX domain should be used.
2581
+ */
2582
+ template <typename D = domain::global>
2583
+ class NVTX3_MAYBE_UNUSED unique_range_in
2584
+ {
2585
+ public:
2586
+ /**
2587
+ * @brief Construct a new unique_range_in object with the specified event attributes
2588
+ *
2589
+ * Example:
2590
+ * \code{cpp}
2591
+ * nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
2592
+ * nvtx3::unique_range_in<my_domain> range{attr}; // Creates a range with message contents
2593
+ * // "msg" and green color
2594
+ * \endcode
2595
+ *
2596
+ * @param[in] attr `event_attributes` that describes the desired attributes
2597
+ * of the range.
2598
+ */
2599
+ explicit unique_range_in(event_attributes const& attr) noexcept
2600
+ : handle_{start_range_in<D>(attr)}
2601
+ {}
2602
+
2603
+ /**
2604
+ * @brief Constructs a `unique_range_in` from the constructor arguments
2605
+ * of an `event_attributes`.
2606
+ *
2607
+ * Forwards the arguments `args...` to construct an
2608
+ * `event_attributes` object. The `event_attributes` object is then
2609
+ * associated with the `unique_range_in`.
2610
+ *
2611
+ * For more detail, see `event_attributes` documentation.
2612
+ *
2613
+ * Example:
2614
+ * \code{.cpp}
2615
+ * // Creates a range with message "message" and green color
2616
+ * nvtx3::unique_range_in<> r{"message", nvtx3::rgb{127,255,0}};
2617
+ * \endcode
2618
+ *
2619
+ * @param[in] args Variadic parameter pack of arguments to construct an `event_attributes`
2620
+ * associated with this range.
2621
+ */
2622
+ template <typename... Args>
2623
+ explicit unique_range_in(Args const&... args) noexcept
2624
+ : unique_range_in{event_attributes{args...}}
2625
+ {}
2626
+
2627
+ /**
2628
+ * @brief Default constructor creates a `unique_range_in` with no
2629
+ * message, color, payload, nor category.
2630
+ *
2631
+ */
2632
+ constexpr unique_range_in() noexcept
2633
+ : unique_range_in{event_attributes{}}
2634
+ {}
2635
+
2636
+ /**
2637
+ * @brief Destroy the `unique_range_in` ending the range.
2638
+ *
2639
+ */
2640
+ ~unique_range_in() noexcept = default;
2641
+
2642
+ /**
2643
+ * @brief Move constructor allows taking ownership of the NVTX range from
2644
+ * another `unique_range_in`.
2645
+ *
2646
+ * @param other The range to take ownership of
2647
+ */
2648
+ unique_range_in(unique_range_in&& other) noexcept = default;
2649
+
2650
+ /**
2651
+ * @brief Move assignment operator allows taking ownership of an NVTX range
2652
+ * from another `unique_range_in`.
2653
+ *
2654
+ * @param other The range to take ownership of
2655
+ */
2656
+ unique_range_in& operator=(unique_range_in&& other) noexcept = default;
2657
+
2658
+ /// Copy construction is not allowed to prevent multiple objects from owning
2659
+ /// the same range handle
2660
+ unique_range_in(unique_range_in const&) = delete;
2661
+
2662
+ /// Copy assignment is not allowed to prevent multiple objects from owning the
2663
+ /// same range handle
2664
+ unique_range_in& operator=(unique_range_in const&) = delete;
2665
+
2666
+ private:
2667
+ struct end_range_handle
2668
+ {
2669
+ using pointer = range_handle; /// Override the pointer type of the unique_ptr
2670
+ void operator()(range_handle h) const noexcept
2671
+ {
2672
+ end_range_in<D>(h);
2673
+ }
2674
+ };
2675
+
2676
+ /// Range handle used to correlate the start/end of the range
2677
+ std::unique_ptr<range_handle, end_range_handle> handle_;
2678
+ };
2679
+
2680
+ /**
2681
+ * @brief Alias for a `unique_range_in` in the global NVTX domain.
2682
+ *
2683
+ */
2684
+ using unique_range = unique_range_in<domain::global>;
2685
+
2686
+ /**
2687
+ * @brief Annotates an instantaneous point in time with a "marker", using the
2688
+ * attributes specified by `attr`.
2689
+ *
2690
+ * Unlike a "range" which has a beginning and an end, a marker is a single event
2691
+ * in an application, such as detecting a problem:
2692
+ *
2693
+ * \code{.cpp}
2694
+ * bool success = do_operation(...);
2695
+ * if (!success) {
2696
+ * nvtx3::event_attributes attr{"operation failed!", nvtx3::rgb{255,0,0}};
2697
+ * nvtx3::mark_in<my_domain>(attr);
2698
+ * }
2699
+ * \endcode
2700
+ *
2701
+ * Note that nvtx3::mark_in<D> is a function, not a class like scoped_range_in<D>.
2702
+ *
2703
+ * @tparam D Type containing `name` member used to identify the `domain`
2704
+ * to which the `unique_range_in` belongs. Else, `domain::global` to
2705
+ * indicate that the global NVTX domain should be used.
2706
+ * @param[in] attr `event_attributes` that describes the desired attributes
2707
+ * of the mark.
2708
+ */
2709
+ template <typename D = domain::global>
2710
+ inline void mark_in(event_attributes const& attr) noexcept
2711
+ {
2712
+ # ifndef NVTX_DISABLE
2713
+ nvtxDomainMarkEx(domain::get<D>(), attr.get());
2714
+ # else
2715
+ (void) (attr);
2716
+ # endif
2717
+ }
2718
+
2719
+ /**
2720
+ * @brief Annotates an instantaneous point in time with a "marker", using the
2721
+ * arguments to construct an `event_attributes`.
2722
+ *
2723
+ * Unlike a "range" which has a beginning and an end, a marker is a single event
2724
+ * in an application, such as detecting a problem:
2725
+ *
2726
+ * \code{.cpp}
2727
+ * bool success = do_operation(...);
2728
+ * if (!success) {
2729
+ * nvtx3::mark_in<my_domain>("operation failed!", nvtx3::rgb{255,0,0});
2730
+ * }
2731
+ * \endcode
2732
+ *
2733
+ * Note that nvtx3::mark_in<D> is a function, not a class like scoped_range_in<D>.
2734
+ *
2735
+ * Forwards the arguments `args...` to construct an `event_attributes` object.
2736
+ * The attributes are then associated with the marker. For more detail, see
2737
+ * the `event_attributes` documentation.
2738
+ *
2739
+ * @tparam D Type containing `name` member used to identify the `domain`
2740
+ * to which the `unique_range_in` belongs. Else `domain::global` to
2741
+ * indicate that the global NVTX domain should be used.
2742
+ * @param[in] args Variadic parameter pack of arguments to construct an `event_attributes`
2743
+ * associated with this range.
2744
+ *
2745
+ */
2746
+ template <typename D = domain::global, typename... Args>
2747
+ inline void mark_in(Args const&... args) noexcept
2748
+ {
2749
+ # ifndef NVTX_DISABLE
2750
+ mark_in<D>(event_attributes{args...});
2751
+ # endif
2752
+ }
2753
+
2754
+ /**
2755
+ * @brief Annotates an instantaneous point in time with a "marker", using the
2756
+ * attributes specified by `attr`, in the global domain.
2757
+ *
2758
+ * Unlike a "range" which has a beginning and an end, a marker is a single event
2759
+ * in an application, such as detecting a problem:
2760
+ *
2761
+ * \code{.cpp}
2762
+ * bool success = do_operation(...);
2763
+ * if (!success) {
2764
+ * nvtx3::event_attributes attr{"operation failed!", nvtx3::rgb{255,0,0}};
2765
+ * nvtx3::mark(attr);
2766
+ * }
2767
+ * \endcode
2768
+ *
2769
+ * Note that nvtx3::mark is a function, not a class like scoped_range.
2770
+ *
2771
+ * @param[in] attr `event_attributes` that describes the desired attributes
2772
+ * of the mark.
2773
+ */
2774
+ inline void mark(event_attributes const& attr) noexcept
2775
+ {
2776
+ # ifndef NVTX_DISABLE
2777
+ mark_in<domain::global>(attr);
2778
+ # endif
2779
+ }
2780
+
2781
+ /**
2782
+ * @brief Annotates an instantaneous point in time with a "marker", using the
2783
+ * arguments to construct an `event_attributes`, in the global domain.
2784
+ *
2785
+ * Unlike a "range" which has a beginning and an end, a marker is a single event
2786
+ * in an application, such as detecting a problem:
2787
+ *
2788
+ * \code{.cpp}
2789
+ * bool success = do_operation(...);
2790
+ * if (!success) {
2791
+ * nvtx3::mark("operation failed!", nvtx3::rgb{255,0,0});
2792
+ * }
2793
+ * \endcode
2794
+ *
2795
+ * Note that nvtx3::mark is a function, not a class like scoped_range.
2796
+ *
2797
+ * Forwards the arguments `args...` to construct an `event_attributes` object.
2798
+ * The attributes are then associated with the marker. For more detail, see
2799
+ * the `event_attributes` documentation.
2800
+ *
2801
+ * @param[in] args Variadic parameter pack of arguments to construct an
2802
+ * `event_attributes` associated with this range.
2803
+ *
2804
+ */
2805
+ template <typename... Args>
2806
+ inline void mark(Args const&... args) noexcept
2807
+ {
2808
+ # ifndef NVTX_DISABLE
2809
+ mark_in<domain::global>(args...);
2810
+ # endif
2811
+ }
2812
+
2813
+ } // namespace NVTX3_VERSION_NAMESPACE
2814
+
2815
+ } // namespace nvtx3
2816
+
2817
+ # ifndef NVTX_DISABLE
2818
+ /**
2819
+ * @brief Convenience macro for generating a range in the specified `domain`
2820
+ * from the lifetime of a function
2821
+ *
2822
+ * This macro is useful for generating an NVTX range in `domain` from
2823
+ * the entry point of a function to its exit. It is intended to be the first
2824
+ * line of the function.
2825
+ *
2826
+ * Constructs a static `registered_string_in` using the name of the immediately
2827
+ * enclosing function returned by `__func__` and constructs a
2828
+ * `nvtx3::scoped_range` using the registered function name as the range's
2829
+ * message.
2830
+ *
2831
+ * Example:
2832
+ * \code{.cpp}
2833
+ * struct my_domain{static constexpr char const* name{"my_domain"};};
2834
+ *
2835
+ * void foo(...) {
2836
+ * NVTX3_FUNC_RANGE_IN(my_domain); // Range begins on entry to foo()
2837
+ * // do stuff
2838
+ * ...
2839
+ * } // Range ends on return from foo()
2840
+ * \endcode
2841
+ *
2842
+ * @param[in] D Type containing `name` member used to identify the
2843
+ * `domain` to which the `registered_string_in` belongs. Else,
2844
+ * `domain::global` to indicate that the global NVTX domain should be used.
2845
+ */
2846
+ # define NVTX3_V1_FUNC_RANGE_IN(D) \
2847
+ static ::nvtx3::v1::registered_string_in<D> const nvtx3_func_name__{__func__}; \
2848
+ static ::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \
2849
+ ::nvtx3::v1::scoped_range_in<D> const nvtx3_range__{nvtx3_func_attr__};
2850
+
2851
+ /**
2852
+ * @brief Convenience macro for generating a range in the specified `domain`
2853
+ * from the lifetime of a function if the given boolean expression evaluates
2854
+ * to true.
2855
+ *
2856
+ * Similar to `NVTX3_V1_FUNC_RANGE_IN(D)`, the only difference being that
2857
+ * `NVTX3_V1_FUNC_RANGE_IF_IN(D, C)` only generates a range if the given boolean
2858
+ * expression evaluates to true.
2859
+ *
2860
+ * @param[in] D Type containing `name` member used to identify the
2861
+ * `domain` to which the `registered_string_in` belongs. Else,
2862
+ * `domain::global` to indicate that the global NVTX domain should be used.
2863
+ *
2864
+ * @param[in] C Boolean expression used to determine if a range should be
2865
+ * generated.
2866
+ */
2867
+ # define NVTX3_V1_FUNC_RANGE_IF_IN(D, C) \
2868
+ ::nvtx3::v1::detail::optional_scoped_range_in<D> optional_nvtx3_range__; \
2869
+ if (C) \
2870
+ { \
2871
+ static ::nvtx3::v1::registered_string_in<D> const nvtx3_func_name__{__func__}; \
2872
+ static ::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \
2873
+ optional_nvtx3_range__.begin(nvtx3_func_attr__); \
2874
+ }
2875
+ # else
2876
+ # define NVTX3_V1_FUNC_RANGE_IN(D)
2877
+ # define NVTX3_V1_FUNC_RANGE_IF_IN(D, C)
2878
+ # endif // NVTX_DISABLE
2879
+
2880
+ /**
2881
+ * @brief Convenience macro for generating a range in the global domain from the
2882
+ * lifetime of a function.
2883
+ *
2884
+ * This macro is useful for generating an NVTX range in the global domain from
2885
+ * the entry point of a function to its exit. It is intended to be the first
2886
+ * line of the function.
2887
+ *
2888
+ * Constructs a static `registered_string_in` using the name of the immediately
2889
+ * enclosing function returned by `__func__` and constructs a
2890
+ * `nvtx3::scoped_range` using the registered function name as the range's
2891
+ * message.
2892
+ *
2893
+ * Example:
2894
+ * \code{.cpp}
2895
+ * void foo(...) {
2896
+ * NVTX3_FUNC_RANGE(); // Range begins on entry to foo()
2897
+ * // do stuff
2898
+ * ...
2899
+ * } // Range ends on return from foo()
2900
+ * \endcode
2901
+ */
2902
+ # define NVTX3_V1_FUNC_RANGE() NVTX3_V1_FUNC_RANGE_IN(::nvtx3::v1::domain::global)
2903
+
2904
+ /**
2905
+ * @brief Convenience macro for generating a range in the global domain from the
2906
+ * lifetime of a function if the given boolean expression evaluates to true.
2907
+ *
2908
+ * Similar to `NVTX3_V1_FUNC_RANGE()`, the only difference being that
2909
+ * `NVTX3_V1_FUNC_RANGE_IF(C)` only generates a range if the given boolean
2910
+ * expression evaluates to true.
2911
+ *
2912
+ * @param[in] C Boolean expression used to determine if a range should be
2913
+ * generated.
2914
+ */
2915
+ # define NVTX3_V1_FUNC_RANGE_IF(C) NVTX3_V1_FUNC_RANGE_IF_IN(::nvtx3::v1::domain::global, C)
2916
+
2917
+ /* When inlining this version, versioned macros must have unversioned aliases.
2918
+ * For each NVTX3_Vx_ #define, make an NVTX3_ alias of it here.*/
2919
+ # if defined(NVTX3_INLINE_THIS_VERSION)
2920
+ /* clang format off */
2921
+ # define NVTX3_FUNC_RANGE NVTX3_V1_FUNC_RANGE
2922
+ # define NVTX3_FUNC_RANGE_IF NVTX3_V1_FUNC_RANGE_IF
2923
+ # define NVTX3_FUNC_RANGE_IN NVTX3_V1_FUNC_RANGE_IN
2924
+ # define NVTX3_FUNC_RANGE_IF_IN NVTX3_V1_FUNC_RANGE_IF_IN
2925
+ /* clang format on */
2926
+ # endif
2927
+
2928
+ #endif // NVTX3_CPP_DEFINITIONS_V1_0
2929
+
2930
+ /* Add functionality for new minor versions here, by copying the above section enclosed
2931
+ * in #ifndef NVTX3_CPP_DEFINITIONS_Vx_y, and incrementing the minor version. This code
2932
+ * is an example of how additions for version 1.2 would look, indented for clarity. Note
2933
+ * that the versioned symbols and macros are always provided, and the unversioned symbols
2934
+ * are only provided if NVTX3_INLINE_THIS_VERSION was defined at the top of this header.
2935
+ *
2936
+ * \code{.cpp}
2937
+ * #ifndef NVTX3_CPP_DEFINITIONS_V1_2
2938
+ * #define NVTX3_CPP_DEFINITIONS_V1_2
2939
+ * namespace nvtx3 {
2940
+ * NVTX3_INLINE_IF_REQUESTED namespace NVTX3_VERSION_NAMESPACE {
2941
+ * class new_class {};
2942
+ * inline void new_function() {}
2943
+ * }
2944
+ * }
2945
+ *
2946
+ * // Macros must have the major version in their names:
2947
+ * #define NVTX3_V1_NEW_MACRO_A() ...
2948
+ * #define NVTX3_V1_NEW_MACRO_B() ...
2949
+ *
2950
+ * // If inlining, make aliases for the macros with the version number omitted
2951
+ * #if defined(NVTX3_INLINE_THIS_VERSION)
2952
+ * #define NVTX3_NEW_MACRO_A NVTX3_V1_NEW_MACRO_A
2953
+ * #define NVTX3_NEW_MACRO_B NVTX3_V1_NEW_MACRO_B
2954
+ * #endif
2955
+ * #endif // NVTX3_CPP_DEFINITIONS_V1_2
2956
+ * \endcode
2957
+ */
2958
+
2959
+ /* Undefine all temporarily-defined unversioned macros, which would conflict with
2960
+ * subsequent includes of different versions of this header. */
2961
+ #undef NVTX3_CPP_VERSION_MAJOR
2962
+ #undef NVTX3_CPP_VERSION_MINOR
2963
+ #undef NVTX3_CONCAT
2964
+ #undef NVTX3_NAMESPACE_FOR
2965
+ #undef NVTX3_VERSION_NAMESPACE
2966
+ #undef NVTX3_INLINE_IF_REQUESTED
2967
+ #undef NVTX3_CONSTEXPR_IF_CPP14
2968
+ #undef NVTX3_MAYBE_UNUSED
2969
+ #undef NVTX3_NO_DISCARD
2970
+
2971
+ #if defined(NVTX3_INLINE_THIS_VERSION)
2972
+ # undef NVTX3_INLINE_THIS_VERSION
2973
+ #endif
2974
+
2975
+ #if defined(NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE)
2976
+ # undef NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE
2977
+ # undef NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
2978
+ #endif
2979
+
2980
+ #if defined(NVTX3_STATIC_ASSERT_DEFINED_HERE)
2981
+ # undef NVTX3_STATIC_ASSERT_DEFINED_HERE
2982
+ # undef NVTX3_STATIC_ASSERT
2983
+ #endif