cuda-cccl 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/__init__.py +27 -0
- cuda/cccl/_cuda_version_utils.py +24 -0
- cuda/cccl/cooperative/__init__.py +9 -0
- cuda/cccl/cooperative/experimental/__init__.py +24 -0
- cuda/cccl/headers/__init__.py +7 -0
- cuda/cccl/headers/include/__init__.py +1 -0
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +259 -0
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1182 -0
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +81 -0
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +709 -0
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +234 -0
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +748 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +786 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +703 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +555 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +619 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +806 -0
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1124 -0
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +589 -0
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +474 -0
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +289 -0
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1117 -0
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +346 -0
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +606 -0
- cuda/cccl/headers/include/cub/agent/agent_topk.cuh +764 -0
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +631 -0
- cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1424 -0
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +963 -0
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1227 -0
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +1313 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +424 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +1264 -0
- cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
- cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +800 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1225 -0
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2196 -0
- cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +667 -0
- cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +434 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +2315 -0
- cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
- cuda/cccl/headers/include/cub/block/block_store.cuh +1247 -0
- cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +624 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
- cuda/cccl/headers/include/cub/config.cuh +53 -0
- cuda/cccl/headers/include/cub/cub.cuh +120 -0
- cuda/cccl/headers/include/cub/detail/array_utils.cuh +78 -0
- cuda/cccl/headers/include/cub/detail/choose_offset.cuh +161 -0
- cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +74 -0
- cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +62 -0
- cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +253 -0
- cuda/cccl/headers/include/cub/detail/integer_utils.cuh +88 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +114 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/json.h +62 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/string.h +53 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
- cuda/cccl/headers/include/cub/detail/rfa.cuh +731 -0
- cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
- cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
- cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
- cuda/cccl/headers/include/cub/detail/type_traits.cuh +187 -0
- cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +73 -0
- cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
- cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
- cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
- cuda/cccl/headers/include/cub/device/device_for.cuh +1063 -0
- cuda/cccl/headers/include/cub/device/device_histogram.cuh +1509 -0
- cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
- cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
- cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
- cuda/cccl/headers/include/cub/device/device_partition.cuh +668 -0
- cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3437 -0
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +2518 -0
- cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
- cuda/cccl/headers/include/cub/device/device_scan.cuh +2212 -0
- cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1430 -0
- cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
- cuda/cccl/headers/include/cub/device/device_select.cuh +1228 -0
- cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
- cuda/cccl/headers/include/cub/device/device_transform.cuh +668 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +315 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +719 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +198 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1046 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +303 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1744 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1310 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +655 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +531 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +313 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +615 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +517 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +602 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +975 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +842 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +440 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +389 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +627 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +569 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +545 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +261 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +803 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +583 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +189 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +321 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +522 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +201 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +1028 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +67 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +118 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +60 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +275 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +76 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +126 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1065 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +942 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +673 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +618 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1010 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +398 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1588 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +440 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +85 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +481 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +884 -0
- cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +227 -0
- cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
- cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
- cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +254 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +259 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +250 -0
- cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +320 -0
- cuda/cccl/headers/include/cub/thread/thread_load.cuh +349 -0
- cuda/cccl/headers/include/cub/thread/thread_operators.cuh +688 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +548 -0
- cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
- cuda/cccl/headers/include/cub/thread/thread_search.cuh +199 -0
- cuda/cccl/headers/include/cub/thread/thread_simd.cuh +458 -0
- cuda/cccl/headers/include/cub/thread/thread_sort.cuh +102 -0
- cuda/cccl/headers/include/cub/thread/thread_store.cuh +365 -0
- cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
- cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
- cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
- cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
- cuda/cccl/headers/include/cub/util_device.cuh +800 -0
- cuda/cccl/headers/include/cub/util_macro.cuh +97 -0
- cuda/cccl/headers/include/cub/util_math.cuh +118 -0
- cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
- cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
- cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
- cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
- cuda/cccl/headers/include/cub/util_type.cuh +1120 -0
- cuda/cccl/headers/include/cub/util_vsmem.cuh +253 -0
- cuda/cccl/headers/include/cub/version.cuh +89 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +737 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +408 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +952 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +715 -0
- cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
- cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +829 -0
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1890 -0
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +521 -0
- cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
- cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
- cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
- cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +487 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
- cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
- cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
- cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
- cuda/cccl/headers/include/cuda/__cccl_config +37 -0
- cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
- cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
- cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
- cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
- cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
- cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
- cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
- cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
- cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
- cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
- cuda/cccl/headers/include/cuda/__complex_ +28 -0
- cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +537 -0
- cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +156 -0
- cuda/cccl/headers/include/cuda/__device/physical_device.h +172 -0
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +835 -0
- cuda/cccl/headers/include/cuda/__event/event.h +171 -0
- cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
- cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
- cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
- cuda/cccl/headers/include/cuda/__execution/output_ordering.h +89 -0
- cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
- cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
- cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
- cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
- cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
- cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
- cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +483 -0
- cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
- cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
- cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_function.h +112 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +557 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
- cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +533 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +238 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +117 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
- cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
- cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
- cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
- cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
- cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
- cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
- cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
- cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +113 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +652 -0
- cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
- cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2983 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
- cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
- cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
- cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
- cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
- cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
- cuda/cccl/headers/include/cuda/__stream/stream.h +141 -0
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +303 -0
- cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
- cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +590 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
- cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
- cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
- cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
- cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
- cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
- cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
- cuda/cccl/headers/include/cuda/access_property +26 -0
- cuda/cccl/headers/include/cuda/algorithm +27 -0
- cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
- cuda/cccl/headers/include/cuda/atomic +27 -0
- cuda/cccl/headers/include/cuda/barrier +267 -0
- cuda/cccl/headers/include/cuda/bit +29 -0
- cuda/cccl/headers/include/cuda/cmath +37 -0
- cuda/cccl/headers/include/cuda/devices +33 -0
- cuda/cccl/headers/include/cuda/discard_memory +32 -0
- cuda/cccl/headers/include/cuda/functional +32 -0
- cuda/cccl/headers/include/cuda/iterator +39 -0
- cuda/cccl/headers/include/cuda/latch +27 -0
- cuda/cccl/headers/include/cuda/mdspan +28 -0
- cuda/cccl/headers/include/cuda/memory +35 -0
- cuda/cccl/headers/include/cuda/memory_resource +35 -0
- cuda/cccl/headers/include/cuda/numeric +29 -0
- cuda/cccl/headers/include/cuda/pipeline +579 -0
- cuda/cccl/headers/include/cuda/ptx +129 -0
- cuda/cccl/headers/include/cuda/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
- cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
- cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
- cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
- cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
- cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
- cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
- cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +174 -0
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
- cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
- cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
- cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
- cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
- cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
- cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
- cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +673 -0
- cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
- cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
- cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
- cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
- cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
- cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +91 -0
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
- cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
- cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1286 -0
- cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
- cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
- cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
- cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
- cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
- cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
- cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
- cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
- cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
- cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
- cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
- cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +54 -0
- cuda/cccl/headers/include/cuda/std/__chrono/day.h +162 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +503 -0
- cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +55 -0
- cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +46 -0
- cuda/cccl/headers/include/cuda/std/__chrono/month.h +187 -0
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +60 -0
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +80 -0
- cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +259 -0
- cuda/cccl/headers/include/cuda/std/__chrono/year.h +186 -0
- cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
- cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
- cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +200 -0
- cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +784 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +118 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +231 -0
- cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +186 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
- cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
- cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +534 -0
- cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
- cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
- cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
- cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
- cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
- cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
- cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
- cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
- cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
- cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
- cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
- cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
- cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
- cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
- cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
- cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
- cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
- cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
- cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
- cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
- cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
- cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
- cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
- cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +341 -0
- cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
- cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
- cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
- cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
- cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
- cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
- cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
- cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
- cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
- cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
- cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
- cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
- cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
- cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
- cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +139 -0
- cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
- cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
- cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
- cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +165 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
- cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
- cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
- cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
- cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
- cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
- cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
- cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
- cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
- cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
- cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
- cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
- cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
- cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
- cuda/cccl/headers/include/cuda/std/__format_ +45 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
- cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
- cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
- cuda/cccl/headers/include/cuda/std/__functional/function.h +1275 -0
- cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
- cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +296 -0
- cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
- cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
- cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
- cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
- cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
- cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
- cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
- cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
- cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
- cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
- cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
- cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
- cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
- cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
- cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +72 -0
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +143 -0
- cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
- cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
- cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
- cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
- cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
- cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
- cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
- cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
- cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
- cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
- cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
- cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
- cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
- cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
- cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
- cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
- cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
- cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
- cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
- cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
- cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
- cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
- cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
- cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
- cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
- cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +759 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
- cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
- cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +525 -0
- cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
- cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
- cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +246 -0
- cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
- cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +46 -0
- cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
- cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
- cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
- cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
- cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
- cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
- cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
- cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
- cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
- cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
- cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
- cuda/cccl/headers/include/cuda/std/__new_ +29 -0
- cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
- cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
- cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
- cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
- cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
- cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
- cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
- cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
- cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
- cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
- cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +433 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
- cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
- cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
- cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
- cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
- cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
- cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
- cuda/cccl/headers/include/cuda/std/__random_ +29 -0
- cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
- cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
- cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
- cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
- cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
- cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
- cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
- cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +162 -0
- cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
- cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
- cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
- cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
- cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
- cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
- cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
- cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
- cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
- cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
- cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
- cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
- cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
- cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
- cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
- cuda/cccl/headers/include/cuda/std/__string_ +29 -0
- cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
- cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +260 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +84 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +202 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
- cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
- cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
- cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
- cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
- cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
- cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
- cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
- cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
- cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
- cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
- cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
- cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
- cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
- cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
- cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
- cuda/cccl/headers/include/cuda/std/array +518 -0
- cuda/cccl/headers/include/cuda/std/atomic +810 -0
- cuda/cccl/headers/include/cuda/std/barrier +42 -0
- cuda/cccl/headers/include/cuda/std/bit +35 -0
- cuda/cccl/headers/include/cuda/std/bitset +994 -0
- cuda/cccl/headers/include/cuda/std/cassert +28 -0
- cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
- cuda/cccl/headers/include/cuda/std/cfloat +59 -0
- cuda/cccl/headers/include/cuda/std/chrono +26 -0
- cuda/cccl/headers/include/cuda/std/climits +61 -0
- cuda/cccl/headers/include/cuda/std/cmath +87 -0
- cuda/cccl/headers/include/cuda/std/complex +50 -0
- cuda/cccl/headers/include/cuda/std/concepts +48 -0
- cuda/cccl/headers/include/cuda/std/cstddef +28 -0
- cuda/cccl/headers/include/cuda/std/cstdint +178 -0
- cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
- cuda/cccl/headers/include/cuda/std/cstring +110 -0
- cuda/cccl/headers/include/cuda/std/ctime +154 -0
- cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +207 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1721 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2509 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2144 -0
- cuda/cccl/headers/include/cuda/std/execution +29 -0
- cuda/cccl/headers/include/cuda/std/expected +30 -0
- cuda/cccl/headers/include/cuda/std/functional +56 -0
- cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
- cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
- cuda/cccl/headers/include/cuda/std/iterator +70 -0
- cuda/cccl/headers/include/cuda/std/latch +34 -0
- cuda/cccl/headers/include/cuda/std/limits +28 -0
- cuda/cccl/headers/include/cuda/std/linalg +30 -0
- cuda/cccl/headers/include/cuda/std/mdspan +38 -0
- cuda/cccl/headers/include/cuda/std/memory +39 -0
- cuda/cccl/headers/include/cuda/std/numbers +346 -0
- cuda/cccl/headers/include/cuda/std/numeric +41 -0
- cuda/cccl/headers/include/cuda/std/optional +31 -0
- cuda/cccl/headers/include/cuda/std/ranges +69 -0
- cuda/cccl/headers/include/cuda/std/ratio +416 -0
- cuda/cccl/headers/include/cuda/std/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/source_location +83 -0
- cuda/cccl/headers/include/cuda/std/span +628 -0
- cuda/cccl/headers/include/cuda/std/string_view +925 -0
- cuda/cccl/headers/include/cuda/std/tuple +26 -0
- cuda/cccl/headers/include/cuda/std/type_traits +177 -0
- cuda/cccl/headers/include/cuda/std/utility +70 -0
- cuda/cccl/headers/include/cuda/std/variant +25 -0
- cuda/cccl/headers/include/cuda/std/version +240 -0
- cuda/cccl/headers/include/cuda/stream +31 -0
- cuda/cccl/headers/include/cuda/stream_ref +59 -0
- cuda/cccl/headers/include/cuda/type_traits +27 -0
- cuda/cccl/headers/include/cuda/utility +28 -0
- cuda/cccl/headers/include/cuda/version +16 -0
- cuda/cccl/headers/include/cuda/warp +28 -0
- cuda/cccl/headers/include/cuda/work_stealing +26 -0
- cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
- cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
- cuda/cccl/headers/include/nv/target +240 -0
- cuda/cccl/headers/include/thrust/addressof.h +22 -0
- cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
- cuda/cccl/headers/include/thrust/advance.h +57 -0
- cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
- cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
- cuda/cccl/headers/include/thrust/complex.h +858 -0
- cuda/cccl/headers/include/thrust/copy.h +506 -0
- cuda/cccl/headers/include/thrust/count.h +245 -0
- cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
- cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +626 -0
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +192 -0
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +96 -0
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +78 -0
- cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +115 -0
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +116 -0
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
- cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
- cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
- cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
- cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
- cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
- cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
- cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
- cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
- cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
- cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
- cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
- cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
- cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
- cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
- cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
- cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
- cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
- cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config.h +36 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
- cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
- cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
- cuda/cccl/headers/include/thrust/detail/count.h +55 -0
- cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
- cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
- cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
- cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +81 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
- cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
- cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
- cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
- cuda/cccl/headers/include/thrust/detail/function.h +49 -0
- cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
- cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
- cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
- cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
- cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +328 -0
- cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
- cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
- cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
- cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
- cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
- cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
- cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
- cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
- cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
- cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
- cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
- cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
- cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
- cuda/cccl/headers/include/thrust/detail/reference.h +494 -0
- cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
- cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
- cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
- cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
- cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
- cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
- cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
- cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
- cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
- cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
- cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
- cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
- cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
- cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
- cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
- cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
- cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
- cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
- cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
- cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.inl +1210 -0
- cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
- cuda/cccl/headers/include/thrust/device_delete.h +74 -0
- cuda/cccl/headers/include/thrust/device_free.h +85 -0
- cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
- cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
- cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
- cuda/cccl/headers/include/thrust/device_new.h +112 -0
- cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
- cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
- cuda/cccl/headers/include/thrust/device_reference.h +983 -0
- cuda/cccl/headers/include/thrust/device_vector.h +576 -0
- cuda/cccl/headers/include/thrust/distance.h +43 -0
- cuda/cccl/headers/include/thrust/equal.h +247 -0
- cuda/cccl/headers/include/thrust/execution_policy.h +251 -0
- cuda/cccl/headers/include/thrust/extrema.h +657 -0
- cuda/cccl/headers/include/thrust/fill.h +200 -0
- cuda/cccl/headers/include/thrust/find.h +382 -0
- cuda/cccl/headers/include/thrust/for_each.h +261 -0
- cuda/cccl/headers/include/thrust/functional.h +395 -0
- cuda/cccl/headers/include/thrust/gather.h +464 -0
- cuda/cccl/headers/include/thrust/generate.h +193 -0
- cuda/cccl/headers/include/thrust/host_vector.h +576 -0
- cuda/cccl/headers/include/thrust/inner_product.h +264 -0
- cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
- cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
- cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +182 -0
- cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +58 -0
- cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
- cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
- cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
- cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
- cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
- cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
- cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
- cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
- cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
- cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
- cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
- cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
- cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
- cuda/cccl/headers/include/thrust/logical.h +290 -0
- cuda/cccl/headers/include/thrust/memory.h +299 -0
- cuda/cccl/headers/include/thrust/merge.h +725 -0
- cuda/cccl/headers/include/thrust/mismatch.h +261 -0
- cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
- cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +528 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
- cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
- cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
- cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
- cuda/cccl/headers/include/thrust/mr/new.h +100 -0
- cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
- cuda/cccl/headers/include/thrust/mr/pool.h +528 -0
- cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
- cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
- cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
- cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
- cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
- cuda/cccl/headers/include/thrust/pair.h +99 -0
- cuda/cccl/headers/include/thrust/partition.h +1391 -0
- cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
- cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
- cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
- cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
- cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
- cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
- cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
- cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
- cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
- cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
- cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
- cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
- cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
- cuda/cccl/headers/include/thrust/random.h +120 -0
- cuda/cccl/headers/include/thrust/reduce.h +1113 -0
- cuda/cccl/headers/include/thrust/remove.h +768 -0
- cuda/cccl/headers/include/thrust/replace.h +826 -0
- cuda/cccl/headers/include/thrust/reverse.h +215 -0
- cuda/cccl/headers/include/thrust/scan.h +1671 -0
- cuda/cccl/headers/include/thrust/scatter.h +446 -0
- cuda/cccl/headers/include/thrust/sequence.h +277 -0
- cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
- cuda/cccl/headers/include/thrust/shuffle.h +182 -0
- cuda/cccl/headers/include/thrust/sort.h +1320 -0
- cuda/cccl/headers/include/thrust/swap.h +147 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +109 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory.h +106 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +72 -0
- cuda/cccl/headers/include/thrust/system/cpp/pointer.h +120 -0
- cuda/cccl/headers/include/thrust/system/cpp/vector.h +96 -0
- cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +273 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +593 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +233 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +264 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +100 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +170 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +60 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +80 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +223 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +785 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +341 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +414 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1734 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +469 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +61 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +429 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +117 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +105 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +310 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
- cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
- cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
- cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +61 -0
- cuda/cccl/headers/include/thrust/system/detail/errno.h +120 -0
- cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
- cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
- cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +53 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +79 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +161 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +384 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +146 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +84 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +252 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +137 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +73 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +96 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +59 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +86 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +99 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +148 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +68 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +129 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +207 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +43 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +100 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +83 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +186 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +86 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +95 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +175 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +67 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +63 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +126 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +85 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +126 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +232 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +73 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +85 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +104 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +282 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +476 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +125 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +175 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +44 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +76 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +41 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +47 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +82 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +50 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +56 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +80 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +166 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +115 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +81 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +126 -0
- cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +117 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +42 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +119 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +110 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +62 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +74 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +123 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +43 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +141 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +50 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +75 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +145 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +301 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +98 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +179 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +154 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +145 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +206 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +59 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +356 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +124 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +586 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +115 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +106 -0
- cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
- cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +127 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +73 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.inl +172 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +36 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
- cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
- cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +109 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +33 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
- cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system_error.h +57 -0
- cuda/cccl/headers/include/thrust/tabulate.h +125 -0
- cuda/cccl/headers/include/thrust/transform.h +1045 -0
- cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
- cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
- cuda/cccl/headers/include/thrust/tuple.h +139 -0
- cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
- cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
- cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
- cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
- cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
- cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
- cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
- cuda/cccl/headers/include/thrust/unique.h +1088 -0
- cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
- cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
- cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
- cuda/cccl/headers/include/thrust/version.h +93 -0
- cuda/cccl/headers/include/thrust/zip_function.h +176 -0
- cuda/cccl/headers/include_paths.py +51 -0
- cuda/cccl/parallel/__init__.py +9 -0
- cuda/cccl/parallel/experimental/__init__.py +24 -0
- cuda/cccl/py.typed +0 -0
- cuda/compute/__init__.py +79 -0
- cuda/compute/_bindings.py +79 -0
- cuda/compute/_bindings.pyi +475 -0
- cuda/compute/_bindings_impl.pyx +2273 -0
- cuda/compute/_caching.py +71 -0
- cuda/compute/_cccl_interop.py +422 -0
- cuda/compute/_utils/__init__.py +0 -0
- cuda/compute/_utils/protocols.py +132 -0
- cuda/compute/_utils/temp_storage_buffer.py +86 -0
- cuda/compute/algorithms/__init__.py +54 -0
- cuda/compute/algorithms/_histogram.py +243 -0
- cuda/compute/algorithms/_merge_sort.py +225 -0
- cuda/compute/algorithms/_radix_sort.py +312 -0
- cuda/compute/algorithms/_reduce.py +182 -0
- cuda/compute/algorithms/_scan.py +331 -0
- cuda/compute/algorithms/_segmented_reduce.py +257 -0
- cuda/compute/algorithms/_three_way_partition.py +261 -0
- cuda/compute/algorithms/_transform.py +329 -0
- cuda/compute/algorithms/_unique_by_key.py +252 -0
- cuda/compute/cccl/.gitkeep +0 -0
- cuda/compute/cu12/_bindings_impl.cp313-win_amd64.pyd +0 -0
- cuda/compute/cu12/cccl/cccl.c.parallel.dll +0 -0
- cuda/compute/cu12/cccl/cccl.c.parallel.lib +0 -0
- cuda/compute/cu13/_bindings_impl.cp313-win_amd64.pyd +0 -0
- cuda/compute/cu13/cccl/cccl.c.parallel.dll +0 -0
- cuda/compute/cu13/cccl/cccl.c.parallel.lib +0 -0
- cuda/compute/iterators/__init__.py +21 -0
- cuda/compute/iterators/_factories.py +219 -0
- cuda/compute/iterators/_iterators.py +817 -0
- cuda/compute/iterators/_zip_iterator.py +199 -0
- cuda/compute/numba_utils.py +53 -0
- cuda/compute/op.py +3 -0
- cuda/compute/struct.py +272 -0
- cuda/compute/typing.py +37 -0
- cuda/coop/__init__.py +8 -0
- cuda/coop/_caching.py +48 -0
- cuda/coop/_common.py +275 -0
- cuda/coop/_nvrtc.py +92 -0
- cuda/coop/_scan_op.py +181 -0
- cuda/coop/_types.py +937 -0
- cuda/coop/_typing.py +107 -0
- cuda/coop/block/__init__.py +39 -0
- cuda/coop/block/_block_exchange.py +251 -0
- cuda/coop/block/_block_load_store.py +215 -0
- cuda/coop/block/_block_merge_sort.py +125 -0
- cuda/coop/block/_block_radix_sort.py +214 -0
- cuda/coop/block/_block_reduce.py +294 -0
- cuda/coop/block/_block_scan.py +983 -0
- cuda/coop/warp/__init__.py +9 -0
- cuda/coop/warp/_warp_merge_sort.py +92 -0
- cuda/coop/warp/_warp_reduce.py +153 -0
- cuda/coop/warp/_warp_scan.py +78 -0
- cuda_cccl-0.3.3.dist-info/METADATA +41 -0
- cuda_cccl-0.3.3.dist-info/RECORD +1968 -0
- cuda_cccl-0.3.3.dist-info/WHEEL +5 -0
- cuda_cccl-0.3.3.dist-info/licenses/LICENSE +1 -0
|
@@ -0,0 +1,2983 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*
|
|
17
|
+
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
|
18
|
+
* See https://nvidia.github.io/NVTX/LICENSE.txt for license information.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/* Temporary helper #defines, #undef'ed at end of header */
|
|
22
|
+
#define NVTX3_CPP_VERSION_MAJOR 1
|
|
23
|
+
#define NVTX3_CPP_VERSION_MINOR 0
|
|
24
|
+
|
|
25
|
+
/* This section handles the decision of whether to provide unversioned symbols.
|
|
26
|
+
* If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is #defined, unversioned symbols are
|
|
27
|
+
* not provided, and explicit-version symbols such as nvtx3::v1::scoped_range
|
|
28
|
+
* and NVTX3_V1_FUNC_RANGE must be used. By default, the first #include of this
|
|
29
|
+
* header will define the unversioned symbols such as nvtx3::scoped_range and
|
|
30
|
+
* NVTX3_FUNC_RANGE. Subsequently including a different major version of this
|
|
31
|
+
* header without #defining NVTX3_CPP_REQUIRE_EXPLICIT_VERSION triggers an error
|
|
32
|
+
* since the symbols would conflict. Subsequently including of a different
|
|
33
|
+
* minor version within the same major version is allowed. Functionality of
|
|
34
|
+
* minor versions is cumulative, regardless of include order.
|
|
35
|
+
*
|
|
36
|
+
* Since NVTX3_CPP_REQUIRE_EXPLICIT_VERSION allows all combinations of versions
|
|
37
|
+
* to coexist without problems within a translation unit, the recommended best
|
|
38
|
+
* practice for instrumenting header-based libraries with NVTX C++ Wrappers is
|
|
39
|
+
* is to #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before including nvtx3.hpp,
|
|
40
|
+
* #undef it afterward, and only use explicit-version symbols. This is not
|
|
41
|
+
* necessary in common cases, such as instrumenting a standalone application, or
|
|
42
|
+
* static/shared libraries in .cpp files or headers private to those projects.
|
|
43
|
+
*/
|
|
44
|
+
/* clang-format off */
|
|
45
|
+
#if !defined(NVTX3_CPP_REQUIRE_EXPLICIT_VERSION)
|
|
46
|
+
/* Define macro used by all definitions in this header to indicate the
|
|
47
|
+
* unversioned symbols should be defined in addition to the versioned ones.
|
|
48
|
+
*/
|
|
49
|
+
#define NVTX3_INLINE_THIS_VERSION
|
|
50
|
+
|
|
51
|
+
#if !defined(NVTX3_CPP_INLINED_VERSION_MAJOR)
|
|
52
|
+
/* First occurrence of this header in the translation unit. Define macros
|
|
53
|
+
* indicating which version shall be used for unversioned symbols.
|
|
54
|
+
*/
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* @brief Semantic major version number for NVTX C++ wrappers of unversioned symbols
|
|
58
|
+
*
|
|
59
|
+
* Breaking changes may occur between major versions, and different major versions
|
|
60
|
+
* cannot provide unversioned symbols in the same translation unit (.cpp file).
|
|
61
|
+
*
|
|
62
|
+
* Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
|
|
63
|
+
*
|
|
64
|
+
* Not to be confused with the version number of the NVTX core library.
|
|
65
|
+
*/
|
|
66
|
+
#define NVTX3_CPP_INLINED_VERSION_MAJOR 1 // NVTX3_CPP_VERSION_MAJOR
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* @brief Semantic minor version number for NVTX C++ wrappers of unversioned symbols
|
|
70
|
+
*
|
|
71
|
+
* No breaking changes occur between minor versions -- minor version changes within
|
|
72
|
+
* a major version are purely additive.
|
|
73
|
+
*
|
|
74
|
+
* Note: If NVTX3_CPP_REQUIRE_EXPLICIT_VERSION is defined, this macro is not defined.
|
|
75
|
+
*
|
|
76
|
+
* Not to be confused with the version number of the NVTX core library.
|
|
77
|
+
*/
|
|
78
|
+
#define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
|
|
79
|
+
#elif NVTX3_CPP_INLINED_VERSION_MAJOR != NVTX3_CPP_VERSION_MAJOR
|
|
80
|
+
/* Unsupported case -- cannot define unversioned symbols for different major versions
|
|
81
|
+
* in the same translation unit.
|
|
82
|
+
*/
|
|
83
|
+
#error \
|
|
84
|
+
"Two different major versions of the NVTX C++ Wrappers are being included in a single .cpp file, with unversioned symbols enabled in both. Only one major version can enable unversioned symbols in a .cpp file. To disable unversioned symbols, #define NVTX3_CPP_REQUIRE_EXPLICIT_VERSION before #including nvtx3.hpp, and use the explicit-version symbols instead -- this is the preferred way to use nvtx3.hpp from a header file."
|
|
85
|
+
#elif (NVTX3_CPP_INLINED_VERSION_MAJOR == NVTX3_CPP_VERSION_MAJOR) && \
|
|
86
|
+
(NVTX3_CPP_INLINED_VERSION_MINOR < NVTX3_CPP_VERSION_MINOR)
|
|
87
|
+
/* An older minor version of the same major version already defined unversioned
|
|
88
|
+
* symbols. The new features provided in this header will be inlined
|
|
89
|
+
* redefine the minor version macro to this header's version.
|
|
90
|
+
*/
|
|
91
|
+
#undef NVTX3_CPP_INLINED_VERSION_MINOR
|
|
92
|
+
#define NVTX3_CPP_INLINED_VERSION_MINOR 0 // NVTX3_CPP_VERSION_MINOR
|
|
93
|
+
// else, already have this version or newer, nothing to do
|
|
94
|
+
#endif
|
|
95
|
+
#endif
|
|
96
|
+
/* clang-format on */
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* @file nvtx3.hpp
|
|
100
|
+
*
|
|
101
|
+
* @brief Provides C++ constructs making the NVTX library safer and easier to
|
|
102
|
+
* use with zero overhead.
|
|
103
|
+
*/
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* \mainpage
|
|
107
|
+
* \tableofcontents
|
|
108
|
+
*
|
|
109
|
+
* \section QUICK_START Quick Start
|
|
110
|
+
*
|
|
111
|
+
* To add NVTX ranges to your code, use the `nvtx3::scoped_range` RAII object. A
|
|
112
|
+
* range begins when the object is created, and ends when the object is
|
|
113
|
+
* destroyed.
|
|
114
|
+
*
|
|
115
|
+
* \code{.cpp}
|
|
116
|
+
* #include "nvtx3.hpp"
|
|
117
|
+
* void some_function() {
|
|
118
|
+
* // Begins a NVTX range with the message "some_function"
|
|
119
|
+
* // The range ends when some_function() returns and `r` is destroyed
|
|
120
|
+
* nvtx3::scoped_range r{"some_function"};
|
|
121
|
+
*
|
|
122
|
+
* for(int i = 0; i < 6; ++i) {
|
|
123
|
+
* nvtx3::scoped_range loop{"loop range"};
|
|
124
|
+
* std::this_thread::sleep_for(std::chrono::seconds{1});
|
|
125
|
+
* }
|
|
126
|
+
* } // Range ends when `r` is destroyed
|
|
127
|
+
* \endcode
|
|
128
|
+
*
|
|
129
|
+
* The example code above generates the following timeline view in Nsight
|
|
130
|
+
* Systems:
|
|
131
|
+
*
|
|
132
|
+
* \image html
|
|
133
|
+
* https://raw.githubusercontent.com/NVIDIA/NVTX/release-v3/docs/images/example_range.png
|
|
134
|
+
*
|
|
135
|
+
* Alternatively, use the \ref MACROS like `NVTX3_FUNC_RANGE()` to add
|
|
136
|
+
* ranges to your code that automatically use the name of the enclosing function
|
|
137
|
+
* as the range's message.
|
|
138
|
+
*
|
|
139
|
+
* \code{.cpp}
|
|
140
|
+
* #include "nvtx3.hpp"
|
|
141
|
+
* void some_function() {
|
|
142
|
+
* // Creates a range with a message "some_function" that ends when the
|
|
143
|
+
* // enclosing function returns
|
|
144
|
+
* NVTX3_FUNC_RANGE();
|
|
145
|
+
* ...
|
|
146
|
+
* }
|
|
147
|
+
* \endcode
|
|
148
|
+
*
|
|
149
|
+
*
|
|
150
|
+
* \section Overview
|
|
151
|
+
*
|
|
152
|
+
* The NVTX library provides a set of functions for users to annotate their code
|
|
153
|
+
* to aid in performance profiling and optimization. These annotations provide
|
|
154
|
+
* information to tools like Nsight Systems to improve visualization of
|
|
155
|
+
* application timelines.
|
|
156
|
+
*
|
|
157
|
+
* \ref RANGES are one of the most commonly used NVTX constructs for annotating
|
|
158
|
+
* a span of time. For example, imagine a user wanted to see every time a
|
|
159
|
+
* function, `my_function`, is called and how long it takes to execute. This can
|
|
160
|
+
* be accomplished with an NVTX range created on the entry to the function and
|
|
161
|
+
* terminated on return from `my_function` using the push/pop C APIs:
|
|
162
|
+
*
|
|
163
|
+
* \code{.cpp}
|
|
164
|
+
* void my_function(...) {
|
|
165
|
+
* nvtxRangePushA("my_function"); // Begins NVTX range
|
|
166
|
+
* // do work
|
|
167
|
+
* nvtxRangePop(); // Ends NVTX range
|
|
168
|
+
* }
|
|
169
|
+
* \endcode
|
|
170
|
+
*
|
|
171
|
+
* One of the challenges with using the NVTX C API is that it requires manually
|
|
172
|
+
* terminating the end of the range with `nvtxRangePop`. This can be challenging
|
|
173
|
+
* if `my_function()` has multiple returns or can throw exceptions as it
|
|
174
|
+
* requires calling `nvtxRangePop()` before all possible return points.
|
|
175
|
+
*
|
|
176
|
+
* NVTX C++ solves this inconvenience through the "RAII" technique by providing
|
|
177
|
+
* a `nvtx3::scoped_range` class that begins a range at construction and ends
|
|
178
|
+
* the range on destruction. The above example then becomes:
|
|
179
|
+
*
|
|
180
|
+
* \code{.cpp}
|
|
181
|
+
* void my_function(...) {
|
|
182
|
+
* nvtx3::scoped_range r{"my_function"}; // Begins NVTX range
|
|
183
|
+
* // do work
|
|
184
|
+
* } // Range ends on exit from `my_function` when `r` is destroyed
|
|
185
|
+
* \endcode
|
|
186
|
+
*
|
|
187
|
+
* The range object `r` is deterministically destroyed whenever `my_function`
|
|
188
|
+
* returns---ending the NVTX range without manual intervention. For more
|
|
189
|
+
* information, see \ref RANGES and `nvtx3::scoped_range_in`.
|
|
190
|
+
*
|
|
191
|
+
* Another inconvenience of the NVTX C APIs are the several constructs where the
|
|
192
|
+
* user is expected to initialize an object at the beginning of an application
|
|
193
|
+
* and reuse that object throughout the lifetime of the application. For example
|
|
194
|
+
* see domains, categories, and registered messages.
|
|
195
|
+
*
|
|
196
|
+
* Example:
|
|
197
|
+
* \code{.cpp}
|
|
198
|
+
* nvtxDomainHandle_t D = nvtxDomainCreateA("my domain");
|
|
199
|
+
* // Reuse `D` throughout the rest of the application
|
|
200
|
+
* \endcode
|
|
201
|
+
*
|
|
202
|
+
* This can be problematic if the user application or library does not have an
|
|
203
|
+
* explicit initialization function called before all other functions to
|
|
204
|
+
* ensure that these long-lived objects are initialized before being used.
|
|
205
|
+
*
|
|
206
|
+
* NVTX C++ makes use of the "construct on first use" technique to alleviate
|
|
207
|
+
* this inconvenience. In short, a function local static object is constructed
|
|
208
|
+
* upon the first invocation of a function and returns a reference to that
|
|
209
|
+
* object on all future invocations. See the documentation for `nvtx3::domain`,
|
|
210
|
+
* `nvtx3::named_category`, `nvtx3::registered_string`, and
|
|
211
|
+
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use for more
|
|
212
|
+
* information.
|
|
213
|
+
*
|
|
214
|
+
* Using construct on first use, the above example becomes:
|
|
215
|
+
* \code{.cpp}
|
|
216
|
+
* struct my_domain{ static constexpr char const* name{"my domain"}; };
|
|
217
|
+
*
|
|
218
|
+
* // The first invocation of `domain::get` for the type `my_domain` will
|
|
219
|
+
* // construct a `nvtx3::domain` object and return a reference to it. Future
|
|
220
|
+
* // invocations simply return a reference.
|
|
221
|
+
* nvtx3::domain const& D = nvtx3::domain::get<my_domain>();
|
|
222
|
+
* \endcode
|
|
223
|
+
* For more information about NVTX and how it can be used, see
|
|
224
|
+
* https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx and
|
|
225
|
+
* https://devblogs.nvidia.com/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/
|
|
226
|
+
* for more information.
|
|
227
|
+
*
|
|
228
|
+
* \section RANGES Ranges
|
|
229
|
+
*
|
|
230
|
+
* Ranges are used to describe a span of time during the execution of an
|
|
231
|
+
* application. Common examples are using ranges to annotate the time it takes
|
|
232
|
+
* to execute a function or an iteration of a loop.
|
|
233
|
+
*
|
|
234
|
+
* NVTX C++ uses RAII to automate the generation of ranges that are tied to the
|
|
235
|
+
* lifetime of objects. Similar to `std::lock_guard` in the C++ Standard
|
|
236
|
+
* Template Library.
|
|
237
|
+
*
|
|
238
|
+
* \subsection scoped_range Scoped Range
|
|
239
|
+
*
|
|
240
|
+
* `nvtx3::scoped_range_in` is a class that begins a range upon construction
|
|
241
|
+
* and ends the range at destruction. This is one of the most commonly used
|
|
242
|
+
* constructs in NVTX C++ and is useful for annotating spans of time on a
|
|
243
|
+
* particular thread. These ranges can be nested to arbitrary depths.
|
|
244
|
+
*
|
|
245
|
+
* `nvtx3::scoped_range` is an alias for a `nvtx3::scoped_range_in` in the
|
|
246
|
+
* global NVTX domain. For more information about Domains, see \ref DOMAINS.
|
|
247
|
+
*
|
|
248
|
+
* Various attributes of a range can be configured constructing a
|
|
249
|
+
* `nvtx3::scoped_range_in` with a `nvtx3::event_attributes` object. For
|
|
250
|
+
* more information, see \ref ATTRIBUTES.
|
|
251
|
+
*
|
|
252
|
+
* Example:
|
|
253
|
+
*
|
|
254
|
+
* \code{.cpp}
|
|
255
|
+
* void some_function() {
|
|
256
|
+
* // Creates a range for the duration of `some_function`
|
|
257
|
+
* nvtx3::scoped_range r{};
|
|
258
|
+
*
|
|
259
|
+
* while(true) {
|
|
260
|
+
* // Creates a range for every loop iteration
|
|
261
|
+
* // `loop_range` is nested inside `r`
|
|
262
|
+
* nvtx3::scoped_range loop_range{};
|
|
263
|
+
* }
|
|
264
|
+
* }
|
|
265
|
+
* \endcode
|
|
266
|
+
*
|
|
267
|
+
* \subsection unique_range Unique Range
|
|
268
|
+
*
|
|
269
|
+
* `nvtx3::unique_range` is similar to `nvtx3::scoped_range`, with a few key differences:
|
|
270
|
+
* - `unique_range` objects can be destroyed in any order whereas `scoped_range` objects must be
|
|
271
|
+
* destroyed in exact reverse creation order
|
|
272
|
+
* - `unique_range` can start and end on different threads
|
|
273
|
+
* - `unique_range` is movable
|
|
274
|
+
* - `unique_range` objects can be constructed as heap objects
|
|
275
|
+
*
|
|
276
|
+
* There is extra overhead associated with `unique_range` constructs and therefore use of
|
|
277
|
+
* `nvtx3::scoped_range_in` should be preferred.
|
|
278
|
+
*
|
|
279
|
+
* \section MARKS Marks
|
|
280
|
+
*
|
|
281
|
+
* `nvtx3::mark` annotates an instantaneous point in time with a "marker".
|
|
282
|
+
*
|
|
283
|
+
* Unlike a "range" which has a beginning and an end, a marker is a single event
|
|
284
|
+
* in an application, such as detecting a problem:
|
|
285
|
+
*
|
|
286
|
+
* \code{.cpp}
|
|
287
|
+
* bool success = do_operation(...);
|
|
288
|
+
* if (!success) {
|
|
289
|
+
* nvtx3::mark("operation failed!");
|
|
290
|
+
* }
|
|
291
|
+
* \endcode
|
|
292
|
+
*
|
|
293
|
+
* \section DOMAINS Domains
|
|
294
|
+
*
|
|
295
|
+
* Similar to C++ namespaces, domains allow for scoping NVTX events. By default,
|
|
296
|
+
* all NVTX events belong to the "global" domain. Libraries and applications
|
|
297
|
+
* should scope their events to use a custom domain to differentiate where the
|
|
298
|
+
* events originate from.
|
|
299
|
+
*
|
|
300
|
+
* It is common for a library or application to have only a single domain and
|
|
301
|
+
* for the name of that domain to be known at compile time. Therefore, Domains
|
|
302
|
+
* in NVTX C++ are represented by _tag types_.
|
|
303
|
+
*
|
|
304
|
+
* For example, to define a custom domain, simply define a new concrete type
|
|
305
|
+
* (a `class` or `struct`) with a `static` member called `name` that contains
|
|
306
|
+
* the desired name of the domain.
|
|
307
|
+
*
|
|
308
|
+
* \code{.cpp}
|
|
309
|
+
* struct my_domain{ static constexpr char const* name{"my domain"}; };
|
|
310
|
+
* \endcode
|
|
311
|
+
*
|
|
312
|
+
* For any NVTX C++ construct that can be scoped to a domain, the type
|
|
313
|
+
* `my_domain` can be passed as an explicit template argument to scope it to
|
|
314
|
+
* the custom domain.
|
|
315
|
+
*
|
|
316
|
+
* The tag type `nvtx3::domain::global` represents the global NVTX domain.
|
|
317
|
+
*
|
|
318
|
+
* \code{.cpp}
|
|
319
|
+
* // By default, `scoped_range_in` belongs to the global domain
|
|
320
|
+
* nvtx3::scoped_range_in<> r0{};
|
|
321
|
+
*
|
|
322
|
+
* // Alias for a `scoped_range_in` in the global domain
|
|
323
|
+
* nvtx3::scoped_range r1{};
|
|
324
|
+
*
|
|
325
|
+
* // `r` belongs to the custom domain
|
|
326
|
+
* nvtx3::scoped_range_in<my_domain> r{};
|
|
327
|
+
* \endcode
|
|
328
|
+
*
|
|
329
|
+
* When using a custom domain, it is recommended to define type aliases for NVTX
|
|
330
|
+
* constructs in the custom domain.
|
|
331
|
+
* \code{.cpp}
|
|
332
|
+
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
|
|
333
|
+
* using my_registered_string = nvtx3::registered_string_in<my_domain>;
|
|
334
|
+
* using my_named_category = nvtx3::named_category_in<my_domain>;
|
|
335
|
+
* \endcode
|
|
336
|
+
*
|
|
337
|
+
* See `nvtx3::domain` for more information.
|
|
338
|
+
*
|
|
339
|
+
* \section ATTRIBUTES Event Attributes
|
|
340
|
+
*
|
|
341
|
+
* NVTX events can be customized with various attributes to provide additional
|
|
342
|
+
* information (such as a custom message) or to control visualization of the
|
|
343
|
+
* event (such as the color used). These attributes can be specified per-event
|
|
344
|
+
* via arguments to a `nvtx3::event_attributes` object.
|
|
345
|
+
*
|
|
346
|
+
* NVTX events can be customized via four "attributes":
|
|
347
|
+
* - \ref COLOR : color used to visualize the event in tools.
|
|
348
|
+
* - \ref MESSAGES : Custom message string.
|
|
349
|
+
* - \ref PAYLOAD : User-defined numerical value.
|
|
350
|
+
* - \ref CATEGORY : Intra-domain grouping.
|
|
351
|
+
*
|
|
352
|
+
* It is possible to construct a `nvtx3::event_attributes` from any number of
|
|
353
|
+
* attribute objects (nvtx3::color, nvtx3::message, nvtx3::payload,
|
|
354
|
+
* nvtx3::category) in any order. If an attribute is not specified, a tool
|
|
355
|
+
* specific default value is used. See `nvtx3::event_attributes` for more
|
|
356
|
+
* information.
|
|
357
|
+
*
|
|
358
|
+
* \code{.cpp}
|
|
359
|
+
* // Set message, same as passing nvtx3::message{"message"}
|
|
360
|
+
* nvtx3::event_attributes attr{"message"};
|
|
361
|
+
*
|
|
362
|
+
* // Set message and color
|
|
363
|
+
* nvtx3::event_attributes attr{"message", nvtx3::rgb{127, 255, 0}};
|
|
364
|
+
*
|
|
365
|
+
* // Set message, color, payload, category
|
|
366
|
+
* nvtx3::event_attributes attr{"message",
|
|
367
|
+
* nvtx3::rgb{127, 255, 0},
|
|
368
|
+
* nvtx3::payload{42},
|
|
369
|
+
* nvtx3::category{1}};
|
|
370
|
+
*
|
|
371
|
+
* // Same as above -- can use any order of arguments
|
|
372
|
+
* nvtx3::event_attributes attr{nvtx3::payload{42},
|
|
373
|
+
* nvtx3::category{1},
|
|
374
|
+
* "message",
|
|
375
|
+
* nvtx3::rgb{127, 255, 0}};
|
|
376
|
+
*
|
|
377
|
+
* // Multiple arguments of the same type are allowed, but only the first is
|
|
378
|
+
* // used -- in this example, payload is set to 42:
|
|
379
|
+
* nvtx3::event_attributes attr{ nvtx3::payload{42}, nvtx3::payload{7} };
|
|
380
|
+
*
|
|
381
|
+
* // Using the nvtx3 namespace in a local scope makes the syntax more succinct:
|
|
382
|
+
* using namespace nvtx3;
|
|
383
|
+
* event_attributes attr{"message", rgb{127, 255, 0}, payload{42}, category{1}};
|
|
384
|
+
* \endcode
|
|
385
|
+
*
|
|
386
|
+
* \subsection MESSAGES message
|
|
387
|
+
*
|
|
388
|
+
* `nvtx3::message` sets the message string for an NVTX event.
|
|
389
|
+
*
|
|
390
|
+
* Example:
|
|
391
|
+
* \code{.cpp}
|
|
392
|
+
* // Create an `event_attributes` with the message "my message"
|
|
393
|
+
* nvtx3::event_attributes attr{nvtx3::message{"my message"}};
|
|
394
|
+
*
|
|
395
|
+
* // strings and string literals implicitly assumed to be a `nvtx3::message`
|
|
396
|
+
* nvtx3::event_attributes attr{"my message"};
|
|
397
|
+
* \endcode
|
|
398
|
+
*
|
|
399
|
+
* \subsubsection REGISTERED_MESSAGE Registered Messages
|
|
400
|
+
*
|
|
401
|
+
* Associating a `nvtx3::message` with an event requires copying the contents of
|
|
402
|
+
* the message every time the message is used, i.e., copying the entire message
|
|
403
|
+
* string. This may cause non-trivial overhead in performance sensitive code.
|
|
404
|
+
*
|
|
405
|
+
* To eliminate this overhead, NVTX allows registering a message string,
|
|
406
|
+
* yielding a "handle" that is inexpensive to copy that may be used in place of
|
|
407
|
+
* a message string. When visualizing the events, tools such as Nsight Systems
|
|
408
|
+
* will take care of mapping the message handle to its string.
|
|
409
|
+
*
|
|
410
|
+
* A message should be registered once and the handle reused throughout the rest
|
|
411
|
+
* of the application. This can be done by either explicitly creating static
|
|
412
|
+
* `nvtx3::registered_string` objects, or using the
|
|
413
|
+
* `nvtx3::registered_string::get` construct on first use helper (recommended).
|
|
414
|
+
*
|
|
415
|
+
* Similar to \ref DOMAINS, `nvtx3::registered_string::get` requires defining a
|
|
416
|
+
* custom tag type with a static `message` member whose value will be the
|
|
417
|
+
* contents of the registered string.
|
|
418
|
+
*
|
|
419
|
+
* Example:
|
|
420
|
+
* \code{.cpp}
|
|
421
|
+
* // Explicitly constructed, static `registered_string` in my_domain:
|
|
422
|
+
* static registered_string_in<my_domain> static_message{"my message"};
|
|
423
|
+
*
|
|
424
|
+
* // Or use construct on first use:
|
|
425
|
+
* // Define a tag type with a `message` member string to register
|
|
426
|
+
* struct my_message{ static constexpr char const* message{ "my message" }; };
|
|
427
|
+
*
|
|
428
|
+
* // Uses construct on first use to register the contents of
|
|
429
|
+
* // `my_message::message`
|
|
430
|
+
* auto& msg = nvtx3::registered_string_in<my_domain>::get<my_message>();
|
|
431
|
+
* \endcode
|
|
432
|
+
*
|
|
433
|
+
* \subsection COLOR color
|
|
434
|
+
*
|
|
435
|
+
* Associating a `nvtx3::color` with an event allows controlling how the event
|
|
436
|
+
* is visualized in a tool such as Nsight Systems. This is a convenient way to
|
|
437
|
+
* visually differentiate among different events.
|
|
438
|
+
*
|
|
439
|
+
* \code{.cpp}
|
|
440
|
+
* // Define a color via rgb color values
|
|
441
|
+
* nvtx3::color c{nvtx3::rgb{127, 255, 0}};
|
|
442
|
+
* nvtx3::event_attributes attr{c};
|
|
443
|
+
*
|
|
444
|
+
* // rgb color values can be passed directly to an `event_attributes`
|
|
445
|
+
* nvtx3::event_attributes attr1{nvtx3::rgb{127,255,0}};
|
|
446
|
+
* \endcode
|
|
447
|
+
*
|
|
448
|
+
* \subsection CATEGORY category
|
|
449
|
+
*
|
|
450
|
+
* A `nvtx3::category` is simply an integer id that allows for fine-grain
|
|
451
|
+
* grouping of NVTX events. For example, one might use separate categories for
|
|
452
|
+
* IO, memory allocation, compute, etc.
|
|
453
|
+
*
|
|
454
|
+
* \code{.cpp}
|
|
455
|
+
* nvtx3::event_attributes{nvtx3::category{1}};
|
|
456
|
+
* \endcode
|
|
457
|
+
*
|
|
458
|
+
* \subsubsection NAMED_CATEGORIES Named Categories
|
|
459
|
+
*
|
|
460
|
+
* Associates a `name` string with a category `id` to help differentiate among
|
|
461
|
+
* categories.
|
|
462
|
+
*
|
|
463
|
+
* For any given category id `Id`, a `named_category{Id, "name"}` should only
|
|
464
|
+
* be constructed once and reused throughout an application. This can be done by
|
|
465
|
+
* either explicitly creating static `nvtx3::named_category` objects, or using
|
|
466
|
+
* the `nvtx3::named_category::get` construct on first use helper (recommended).
|
|
467
|
+
*
|
|
468
|
+
* Similar to \ref DOMAINS, `nvtx3::named_category::get` requires defining a
|
|
469
|
+
* custom tag type with static `name` and `id` members.
|
|
470
|
+
*
|
|
471
|
+
* \code{.cpp}
|
|
472
|
+
* // Explicitly constructed, static `named_category` in my_domain:
|
|
473
|
+
* static nvtx3::named_category_in<my_domain> static_category{42, "my category"};
|
|
474
|
+
*
|
|
475
|
+
* // Or use construct on first use:
|
|
476
|
+
* // Define a tag type with `name` and `id` members
|
|
477
|
+
* struct my_category {
|
|
478
|
+
* static constexpr char const* name{"my category"}; // category name
|
|
479
|
+
* static constexpr uint32_t id{42}; // category id
|
|
480
|
+
* };
|
|
481
|
+
*
|
|
482
|
+
* // Use construct on first use to name the category id `42`
|
|
483
|
+
* // with name "my category":
|
|
484
|
+
* auto& cat = named_category_in<my_domain>::get<my_category>();
|
|
485
|
+
*
|
|
486
|
+
* // Range `r` associated with category id `42`
|
|
487
|
+
* nvtx3::event_attributes attr{cat};
|
|
488
|
+
* \endcode
|
|
489
|
+
*
|
|
490
|
+
* \subsection PAYLOAD payload
|
|
491
|
+
*
|
|
492
|
+
* Allows associating a user-defined numerical value with an event.
|
|
493
|
+
*
|
|
494
|
+
* \code{.cpp}
|
|
495
|
+
* // Constructs a payload from the `int32_t` value 42
|
|
496
|
+
* nvtx3:: event_attributes attr{nvtx3::payload{42}};
|
|
497
|
+
* \endcode
|
|
498
|
+
*
|
|
499
|
+
*
|
|
500
|
+
* \section EXAMPLE Example
|
|
501
|
+
*
|
|
502
|
+
* Putting it all together:
|
|
503
|
+
* \code{.cpp}
|
|
504
|
+
* // Define a custom domain tag type
|
|
505
|
+
* struct my_domain{ static constexpr char const* name{"my domain"}; };
|
|
506
|
+
*
|
|
507
|
+
* // Define a named category tag type
|
|
508
|
+
* struct my_category{
|
|
509
|
+
* static constexpr char const* name{"my category"};
|
|
510
|
+
* static constexpr uint32_t id{42};
|
|
511
|
+
* };
|
|
512
|
+
*
|
|
513
|
+
* // Define a registered string tag type
|
|
514
|
+
* struct my_message{ static constexpr char const* message{"my message"}; };
|
|
515
|
+
*
|
|
516
|
+
* // For convenience, use aliases for domain scoped objects
|
|
517
|
+
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
|
|
518
|
+
* using my_registered_string = nvtx3::registered_string_in<my_domain>;
|
|
519
|
+
* using my_named_category = nvtx3::named_category_in<my_domain>;
|
|
520
|
+
*
|
|
521
|
+
* // Default values for all attributes
|
|
522
|
+
* nvtx3::event_attributes attr{};
|
|
523
|
+
* my_scoped_range r0{attr};
|
|
524
|
+
*
|
|
525
|
+
* // Custom (unregistered) message, and unnamed category
|
|
526
|
+
* nvtx3::event_attributes attr1{"message", nvtx3::category{2}};
|
|
527
|
+
* my_scoped_range r1{attr1};
|
|
528
|
+
*
|
|
529
|
+
* // Alternatively, pass arguments of `event_attributes` constructor directly
|
|
530
|
+
* // to `my_scoped_range`
|
|
531
|
+
* my_scoped_range r2{"message", nvtx3::category{2}};
|
|
532
|
+
*
|
|
533
|
+
* // construct on first use a registered string
|
|
534
|
+
* auto& msg = my_registered_string::get<my_message>();
|
|
535
|
+
*
|
|
536
|
+
* // construct on first use a named category
|
|
537
|
+
* auto& cat = my_named_category::get<my_category>();
|
|
538
|
+
*
|
|
539
|
+
* // Use registered string and named category with a custom payload
|
|
540
|
+
* my_scoped_range r3{msg, cat, nvtx3::payload{42}};
|
|
541
|
+
*
|
|
542
|
+
* // Any number of arguments in any order
|
|
543
|
+
* my_scoped_range r{nvtx3::rgb{127, 255,0}, msg};
|
|
544
|
+
*
|
|
545
|
+
* \endcode
|
|
546
|
+
* \section MACROS Convenience Macros
|
|
547
|
+
*
|
|
548
|
+
* Oftentimes users want to quickly and easily add NVTX ranges to their library
|
|
549
|
+
* or application to aid in profiling and optimization.
|
|
550
|
+
*
|
|
551
|
+
* A convenient way to do this is to use the \ref NVTX3_FUNC_RANGE and
|
|
552
|
+
* \ref NVTX3_FUNC_RANGE_IN macros. These macros take care of constructing an
|
|
553
|
+
* `nvtx3::scoped_range_in` with the name of the enclosing function as the
|
|
554
|
+
* range's message.
|
|
555
|
+
*
|
|
556
|
+
* \code{.cpp}
|
|
557
|
+
* void some_function() {
|
|
558
|
+
* // Automatically generates an NVTX range for the duration of the function
|
|
559
|
+
* // using "some_function" as the event's message.
|
|
560
|
+
* NVTX3_FUNC_RANGE();
|
|
561
|
+
* }
|
|
562
|
+
* \endcode
|
|
563
|
+
*
|
|
564
|
+
*/
|
|
565
|
+
|
|
566
|
+
/* Temporary helper #defines, removed with #undef at end of header */
|
|
567
|
+
|
|
568
|
+
/* Some compilers do not correctly support SFINAE, which is used in this API
|
|
569
|
+
* to detect common usage errors and provide clearer error messages (by using
|
|
570
|
+
* static_assert) than the compiler would produce otherwise. These compilers
|
|
571
|
+
* will generate errors while compiling this file such as:
|
|
572
|
+
*
|
|
573
|
+
* error: 'name' is not a member of 'nvtx3::v1::domain::global'
|
|
574
|
+
*
|
|
575
|
+
* The following compiler versions are known to have this problem, and so are
|
|
576
|
+
* set by default to disable the SFINAE-based checks:
|
|
577
|
+
*
|
|
578
|
+
* - All MSVC versions prior to VS2017 Update 7 (15.7)
|
|
579
|
+
* - GCC 8.1-8.3 (the problem was fixed in GCC 8.4)
|
|
580
|
+
*
|
|
581
|
+
* If you find your compiler hits this problem, you can work around it by
|
|
582
|
+
* defining NVTX3_USE_CHECKED_OVERLOADS_FOR_GET to 0 before including this
|
|
583
|
+
* header, or you can add a check for your compiler version to this #if.
|
|
584
|
+
* Also, please report the issue on the NVTX GitHub page.
|
|
585
|
+
*/
|
|
586
|
+
#if !defined(NVTX3_USE_CHECKED_OVERLOADS_FOR_GET)
|
|
587
|
+
# if defined(_MSC_VER) && _MSC_VER < 1914 || defined(__GNUC__) && __GNUC__ == 8 && __GNUC_MINOR__ < 4
|
|
588
|
+
# define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 0
|
|
589
|
+
# else
|
|
590
|
+
# define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET 1
|
|
591
|
+
# endif
|
|
592
|
+
# define NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE
|
|
593
|
+
#endif
|
|
594
|
+
|
|
595
|
+
/* Within this header, nvtx3::NVTX3_VERSION_NAMESPACE resolves to nvtx3::vX,
|
|
596
|
+
* where "X" is the major version number. */
|
|
597
|
+
#define NVTX3_CONCAT(A, B) A##B
|
|
598
|
+
#define NVTX3_NAMESPACE_FOR(VERSION) NVTX3_CONCAT(v, VERSION)
|
|
599
|
+
#define NVTX3_VERSION_NAMESPACE NVTX3_NAMESPACE_FOR(NVTX3_CPP_VERSION_MAJOR)
|
|
600
|
+
|
|
601
|
+
/* Avoid duplicating #if defined(NVTX3_INLINE_THIS_VERSION) for namespaces
|
|
602
|
+
* in each minor version by making a macro to use unconditionally, which
|
|
603
|
+
* resolves to "inline" or nothing as appropriate. */
|
|
604
|
+
#if defined(NVTX3_INLINE_THIS_VERSION)
|
|
605
|
+
# define NVTX3_INLINE_IF_REQUESTED inline
|
|
606
|
+
#else
|
|
607
|
+
# define NVTX3_INLINE_IF_REQUESTED
|
|
608
|
+
#endif
|
|
609
|
+
|
|
610
|
+
/* Enables the use of constexpr when support for C++14 constexpr is present.
|
|
611
|
+
*
|
|
612
|
+
* Initialization of a class member that is a union to a specific union member
|
|
613
|
+
* can only be done in the body of a constructor, not in a member initializer
|
|
614
|
+
* list. A constexpr constructor must have an empty body until C++14, so there
|
|
615
|
+
* is no way to make an initializer of a member union constexpr in C++11. This
|
|
616
|
+
* macro allows making functions constexpr in C++14 or newer, but non-constexpr
|
|
617
|
+
* in C++11 compilation. It is used here on constructors that initialize their
|
|
618
|
+
* member unions.
|
|
619
|
+
*/
|
|
620
|
+
#if __cpp_constexpr >= 201304L
|
|
621
|
+
# define NVTX3_CONSTEXPR_IF_CPP14 constexpr
|
|
622
|
+
#else
|
|
623
|
+
# define NVTX3_CONSTEXPR_IF_CPP14
|
|
624
|
+
#endif
|
|
625
|
+
|
|
626
|
+
// Macro wrappers for C++ attributes
|
|
627
|
+
#if !defined(__has_cpp_attribute)
|
|
628
|
+
# define __has_cpp_attribute(x) 0
|
|
629
|
+
#endif
|
|
630
|
+
#if __has_cpp_attribute(maybe_unused)
|
|
631
|
+
# define NVTX3_MAYBE_UNUSED [[maybe_unused]]
|
|
632
|
+
#else
|
|
633
|
+
# define NVTX3_MAYBE_UNUSED
|
|
634
|
+
#endif
|
|
635
|
+
#if __has_cpp_attribute(nodiscard)
|
|
636
|
+
# define NVTX3_NO_DISCARD [[nodiscard]]
|
|
637
|
+
#else
|
|
638
|
+
# define NVTX3_NO_DISCARD
|
|
639
|
+
#endif
|
|
640
|
+
|
|
641
|
+
/* Use a macro for static asserts, which defaults to static_assert, but that
|
|
642
|
+
* testing tools can replace with a logging function. For example:
|
|
643
|
+
* #define NVTX3_STATIC_ASSERT(c, m) \
|
|
644
|
+
* do { if (!(c)) printf("static_assert would fail: %s\n", m); } while (0)
|
|
645
|
+
*/
|
|
646
|
+
#if !defined(NVTX3_STATIC_ASSERT)
|
|
647
|
+
# define NVTX3_STATIC_ASSERT(condition, message) static_assert(condition, message)
|
|
648
|
+
# define NVTX3_STATIC_ASSERT_DEFINED_HERE
|
|
649
|
+
#endif
|
|
650
|
+
|
|
651
|
+
/* Implementation sections, enclosed in guard macros for each minor version */
|
|
652
|
+
|
|
653
|
+
#ifndef NVTX3_CPP_DEFINITIONS_V1_0
|
|
654
|
+
# define NVTX3_CPP_DEFINITIONS_V1_0
|
|
655
|
+
|
|
656
|
+
# include <cuda/std/__cccl/memory_wrapper.h>
|
|
657
|
+
|
|
658
|
+
# include <cstddef>
|
|
659
|
+
# include <string>
|
|
660
|
+
# include <type_traits>
|
|
661
|
+
# include <utility>
|
|
662
|
+
|
|
663
|
+
# include <nvtx3/nvToolsExt.h>
|
|
664
|
+
|
|
665
|
+
namespace nvtx3
|
|
666
|
+
{
|
|
667
|
+
|
|
668
|
+
NVTX3_INLINE_IF_REQUESTED namespace NVTX3_VERSION_NAMESPACE
|
|
669
|
+
{
|
|
670
|
+
namespace detail
|
|
671
|
+
{
|
|
672
|
+
|
|
673
|
+
template <typename Unused>
|
|
674
|
+
struct always_false : std::false_type
|
|
675
|
+
{};
|
|
676
|
+
|
|
677
|
+
template <typename T, typename = void>
|
|
678
|
+
struct has_name : std::false_type
|
|
679
|
+
{};
|
|
680
|
+
template <typename T>
|
|
681
|
+
struct has_name<T, decltype((void) T::name, void())> : std::true_type
|
|
682
|
+
{};
|
|
683
|
+
|
|
684
|
+
template <typename T, typename = void>
|
|
685
|
+
struct has_id : std::false_type
|
|
686
|
+
{};
|
|
687
|
+
template <typename T>
|
|
688
|
+
struct has_id<T, decltype((void) T::id, void())> : std::true_type
|
|
689
|
+
{};
|
|
690
|
+
|
|
691
|
+
template <typename T, typename = void>
|
|
692
|
+
struct has_message : std::false_type
|
|
693
|
+
{};
|
|
694
|
+
template <typename T>
|
|
695
|
+
struct has_message<T, decltype((void) T::message, void())> : std::true_type
|
|
696
|
+
{};
|
|
697
|
+
|
|
698
|
+
template <typename T, typename = void>
|
|
699
|
+
struct is_c_string : std::false_type
|
|
700
|
+
{};
|
|
701
|
+
template <typename T>
|
|
702
|
+
struct is_c_string<T,
|
|
703
|
+
typename std::enable_if<std::is_convertible<T, char const*>::value
|
|
704
|
+
|| std::is_convertible<T, wchar_t const*>::value>::type> : std::true_type
|
|
705
|
+
{};
|
|
706
|
+
|
|
707
|
+
template <typename T>
|
|
708
|
+
using is_uint32 = std::is_same<typename std::decay<T>::type, uint32_t>;
|
|
709
|
+
|
|
710
|
+
} // namespace detail
|
|
711
|
+
|
|
712
|
+
/**
|
|
713
|
+
* @brief `domain`s allow for grouping NVTX events into a single scope to
|
|
714
|
+
* differentiate them from events in other `domain`s.
|
|
715
|
+
*
|
|
716
|
+
* By default, all NVTX constructs are placed in the "global" NVTX domain.
|
|
717
|
+
*
|
|
718
|
+
* A custom `domain` may be used in order to differentiate a library's or
|
|
719
|
+
* application's NVTX events from other events.
|
|
720
|
+
*
|
|
721
|
+
* `domain`s are expected to be long-lived and unique to a library or
|
|
722
|
+
* application. As such, it is assumed a domain's name is known at compile
|
|
723
|
+
* time. Therefore, all NVTX constructs that can be associated with a domain
|
|
724
|
+
* require the domain to be specified via a *type* `D` passed as an
|
|
725
|
+
* explicit template parameter.
|
|
726
|
+
*
|
|
727
|
+
* The type `domain::global` may be used to indicate that the global NVTX
|
|
728
|
+
* domain should be used.
|
|
729
|
+
*
|
|
730
|
+
* None of the C++ NVTX constructs require the user to manually construct a
|
|
731
|
+
* `domain` object. Instead, if a custom domain is desired, the user is
|
|
732
|
+
* expected to define a type `D` that contains a member
|
|
733
|
+
* `D::name` which resolves to either a `char const*` or `wchar_t
|
|
734
|
+
* const*`. The value of `D::name` is used to name and uniquely
|
|
735
|
+
* identify the custom domain.
|
|
736
|
+
*
|
|
737
|
+
* Upon the first use of an NVTX construct associated with the type
|
|
738
|
+
* `D`, the "construct on first use" pattern is used to construct a
|
|
739
|
+
* function local static `domain` object. All future NVTX constructs
|
|
740
|
+
* associated with `D` will use a reference to the previously
|
|
741
|
+
* constructed `domain` object. See `domain::get`.
|
|
742
|
+
*
|
|
743
|
+
* Example:
|
|
744
|
+
* \code{.cpp}
|
|
745
|
+
* // The type `my_domain` defines a `name` member used to name and identify
|
|
746
|
+
* // the `domain` object identified by `my_domain`.
|
|
747
|
+
* struct my_domain{ static constexpr char const* name{"my_domain"}; };
|
|
748
|
+
*
|
|
749
|
+
* // The NVTX range `r` will be grouped with all other NVTX constructs
|
|
750
|
+
* // associated with `my_domain`.
|
|
751
|
+
* nvtx3::scoped_range_in<my_domain> r{};
|
|
752
|
+
*
|
|
753
|
+
* // An alias can be created for a `scoped_range_in` in the custom domain
|
|
754
|
+
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
|
|
755
|
+
* my_scoped_range my_range{};
|
|
756
|
+
*
|
|
757
|
+
* // `domain::global` indicates that the global NVTX domain is used
|
|
758
|
+
* nvtx3::scoped_range_in<domain::global> r2{};
|
|
759
|
+
*
|
|
760
|
+
* // For convenience, `nvtx3::scoped_range` is an alias for a range in the
|
|
761
|
+
* // global domain
|
|
762
|
+
* nvtx3::scoped_range r3{};
|
|
763
|
+
* \endcode
|
|
764
|
+
*/
|
|
765
|
+
class domain
|
|
766
|
+
{
|
|
767
|
+
public:
|
|
768
|
+
domain(domain const&) = delete;
|
|
769
|
+
domain& operator=(domain const&) = delete;
|
|
770
|
+
domain(domain&&) = delete;
|
|
771
|
+
domain& operator=(domain&&) = delete;
|
|
772
|
+
|
|
773
|
+
/**
|
|
774
|
+
* @brief Tag type for the "global" NVTX domain.
|
|
775
|
+
*
|
|
776
|
+
* This type may be passed as a template argument to any function/class
|
|
777
|
+
* expecting a type to identify a domain to indicate that the global domain
|
|
778
|
+
* should be used.
|
|
779
|
+
*
|
|
780
|
+
* All NVTX events in the global domain across all libraries and
|
|
781
|
+
* applications will be grouped together.
|
|
782
|
+
*
|
|
783
|
+
*/
|
|
784
|
+
struct global
|
|
785
|
+
{};
|
|
786
|
+
|
|
787
|
+
# if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
|
|
788
|
+
/**
|
|
789
|
+
* @brief Returns reference to an instance of a function local static
|
|
790
|
+
* `domain` object.
|
|
791
|
+
*
|
|
792
|
+
* Uses the "construct on first use" idiom to safely ensure the `domain`
|
|
793
|
+
* object is initialized exactly once upon first invocation of
|
|
794
|
+
* `domain::get<D>()`. All following invocations will return a
|
|
795
|
+
* reference to the previously constructed `domain` object. See
|
|
796
|
+
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
|
|
797
|
+
*
|
|
798
|
+
* None of the constructs in this header require the user to directly invoke
|
|
799
|
+
* `domain::get`. It is automatically invoked when constructing objects like
|
|
800
|
+
* a `scoped_range_in` or `category`. Advanced users may wish to use
|
|
801
|
+
* `domain::get` for the convenience of the "construct on first use" idiom
|
|
802
|
+
* when using domains with their own use of the NVTX C API.
|
|
803
|
+
*
|
|
804
|
+
* This function is thread-safe as of C++11. If two or more threads call
|
|
805
|
+
* `domain::get<D>` concurrently, exactly one of them is guaranteed
|
|
806
|
+
* to construct the `domain` object and the other(s) will receive a
|
|
807
|
+
* reference to the object after it is fully constructed.
|
|
808
|
+
*
|
|
809
|
+
* The domain's name is specified via the type `D` pass as an
|
|
810
|
+
* explicit template parameter. `D` is required to contain a
|
|
811
|
+
* member `D::name` that resolves to either a `char const*` or
|
|
812
|
+
* `wchar_t const*`. The value of `D::name` is used to name and
|
|
813
|
+
* uniquely identify the `domain`.
|
|
814
|
+
*
|
|
815
|
+
* Example:
|
|
816
|
+
* \code{.cpp}
|
|
817
|
+
* // The type `my_domain` defines a `name` member used to name and identify
|
|
818
|
+
* // the `domain` object identified by `my_domain`.
|
|
819
|
+
* struct my_domain{ static constexpr char const* name{"my domain"}; };
|
|
820
|
+
*
|
|
821
|
+
* auto& D1 = domain::get<my_domain>(); // First invocation constructs a
|
|
822
|
+
* // `domain` with the name "my domain"
|
|
823
|
+
*
|
|
824
|
+
* auto& D2 = domain::get<my_domain>(); // Quickly returns reference to
|
|
825
|
+
* // previously constructed `domain`.
|
|
826
|
+
* \endcode
|
|
827
|
+
*
|
|
828
|
+
* @tparam D Type that contains a `D::name` member used to
|
|
829
|
+
* name the `domain` object.
|
|
830
|
+
* @return Reference to the `domain` corresponding to the type `D`.
|
|
831
|
+
*/
|
|
832
|
+
template <typename D = global, typename std::enable_if<detail::is_c_string<decltype(D::name)>::value, int>::type = 0>
|
|
833
|
+
NVTX3_NO_DISCARD static domain const& get() noexcept
|
|
834
|
+
{
|
|
835
|
+
static domain const d(D::name);
|
|
836
|
+
return d;
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
/**
|
|
840
|
+
* @brief Overload of `domain::get` to provide a clear compile error when
|
|
841
|
+
* `D` has a `name` member that is not directly convertible to either
|
|
842
|
+
* `char const*` or `wchar_t const*`.
|
|
843
|
+
*/
|
|
844
|
+
template <typename D = global,
|
|
845
|
+
typename std::enable_if<!detail::is_c_string<decltype(D::name)>::value, int>::type = 0>
|
|
846
|
+
NVTX3_NO_DISCARD static domain const& get() noexcept
|
|
847
|
+
{
|
|
848
|
+
NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
|
|
849
|
+
"Type used to identify an NVTX domain must contain a static constexpr member "
|
|
850
|
+
"called 'name' of type const char* or const wchar_t* -- 'name' member is not "
|
|
851
|
+
"convertible to either of those types");
|
|
852
|
+
static domain const unused;
|
|
853
|
+
return unused; // Function must compile for static_assert to be triggered
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
/**
|
|
857
|
+
* @brief Overload of `domain::get` to provide a clear compile error when
|
|
858
|
+
* `D` does not have a `name` member.
|
|
859
|
+
*/
|
|
860
|
+
template <typename D = global, typename std::enable_if<!detail::has_name<D>::value, int>::type = 0>
|
|
861
|
+
NVTX3_NO_DISCARD static domain const& get() noexcept
|
|
862
|
+
{
|
|
863
|
+
NVTX3_STATIC_ASSERT(detail::always_false<D>::value,
|
|
864
|
+
"Type used to identify an NVTX domain must contain a static constexpr member "
|
|
865
|
+
"called 'name' of type const char* or const wchar_t* -- 'name' member is missing");
|
|
866
|
+
static domain const unused;
|
|
867
|
+
return unused; // Function must compile for static_assert to be triggered
|
|
868
|
+
}
|
|
869
|
+
# else
|
|
870
|
+
template <typename D = global>
|
|
871
|
+
NVTX3_NO_DISCARD static domain const& get() noexcept
|
|
872
|
+
{
|
|
873
|
+
static domain const d(D::name);
|
|
874
|
+
return d;
|
|
875
|
+
}
|
|
876
|
+
# endif
|
|
877
|
+
|
|
878
|
+
/**
|
|
879
|
+
* @brief Conversion operator to `nvtxDomainHandle_t`.
|
|
880
|
+
*
|
|
881
|
+
* Allows transparently passing a domain object into an API expecting a
|
|
882
|
+
* native `nvtxDomainHandle_t` object.
|
|
883
|
+
*/
|
|
884
|
+
operator nvtxDomainHandle_t() const noexcept
|
|
885
|
+
{
|
|
886
|
+
return _domain;
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
private:
|
|
890
|
+
/**
|
|
891
|
+
* @brief Construct a new domain with the specified `name`.
|
|
892
|
+
*
|
|
893
|
+
* This constructor is private as it is intended that `domain` objects only
|
|
894
|
+
* be created through the `domain::get` function.
|
|
895
|
+
*
|
|
896
|
+
* @param name A unique name identifying the domain
|
|
897
|
+
*/
|
|
898
|
+
explicit domain(char const* name) noexcept
|
|
899
|
+
: _domain{nvtxDomainCreateA(name)}
|
|
900
|
+
{}
|
|
901
|
+
|
|
902
|
+
/**
|
|
903
|
+
* @brief Construct a new domain with the specified `name`.
|
|
904
|
+
*
|
|
905
|
+
* This constructor is private as it is intended that `domain` objects only
|
|
906
|
+
* be created through the `domain::get` function.
|
|
907
|
+
*
|
|
908
|
+
* @param name A unique name identifying the domain
|
|
909
|
+
*/
|
|
910
|
+
explicit domain(wchar_t const* name) noexcept
|
|
911
|
+
: _domain{nvtxDomainCreateW(name)}
|
|
912
|
+
{}
|
|
913
|
+
|
|
914
|
+
/**
|
|
915
|
+
* @brief Construct a new domain with the specified `name`.
|
|
916
|
+
*
|
|
917
|
+
* This constructor is private as it is intended that `domain` objects only
|
|
918
|
+
* be created through the `domain::get` function.
|
|
919
|
+
*
|
|
920
|
+
* @param name A unique name identifying the domain
|
|
921
|
+
*/
|
|
922
|
+
explicit domain(std::string const& name) noexcept
|
|
923
|
+
: domain{name.c_str()}
|
|
924
|
+
{}
|
|
925
|
+
|
|
926
|
+
/**
|
|
927
|
+
* @brief Construct a new domain with the specified `name`.
|
|
928
|
+
*
|
|
929
|
+
* This constructor is private as it is intended that `domain` objects only
|
|
930
|
+
* be created through the `domain::get` function.
|
|
931
|
+
*
|
|
932
|
+
* @param name A unique name identifying the domain
|
|
933
|
+
*/
|
|
934
|
+
explicit domain(std::wstring const& name) noexcept
|
|
935
|
+
: domain{name.c_str()}
|
|
936
|
+
{}
|
|
937
|
+
|
|
938
|
+
/**
|
|
939
|
+
* @brief Default constructor creates a `domain` representing the
|
|
940
|
+
* "global" NVTX domain.
|
|
941
|
+
*
|
|
942
|
+
* All events not associated with a custom `domain` are grouped in the
|
|
943
|
+
* "global" NVTX domain.
|
|
944
|
+
*
|
|
945
|
+
*/
|
|
946
|
+
constexpr domain() noexcept {}
|
|
947
|
+
|
|
948
|
+
/**
|
|
949
|
+
* @brief Intentionally avoid calling nvtxDomainDestroy on the `domain` object.
|
|
950
|
+
*
|
|
951
|
+
* No currently-available tools attempt to free domain resources when the
|
|
952
|
+
* nvtxDomainDestroy function is called, due to the thread-safety and
|
|
953
|
+
* efficiency challenges of freeing thread-local storage for other threads.
|
|
954
|
+
* Since libraries may be disallowed from introducing static destructors,
|
|
955
|
+
* and destroying the domain is likely to have no effect, the destructor
|
|
956
|
+
* for `domain` intentionally chooses to not destroy the domain.
|
|
957
|
+
*
|
|
958
|
+
* In a situation where domain destruction is necessary, either manually
|
|
959
|
+
* call nvtxDomainDestroy on the domain's handle, or make a class that
|
|
960
|
+
* derives from `domain` and calls nvtxDomainDestroy in its destructor.
|
|
961
|
+
*/
|
|
962
|
+
~domain() = default;
|
|
963
|
+
|
|
964
|
+
private:
|
|
965
|
+
nvtxDomainHandle_t const _domain{}; ///< The `domain`s NVTX handle
|
|
966
|
+
};
|
|
967
|
+
|
|
968
|
+
/**
|
|
969
|
+
* @brief Returns reference to the `domain` object that represents the global
|
|
970
|
+
* NVTX domain.
|
|
971
|
+
*
|
|
972
|
+
* This specialization for `domain::global` returns a default constructed,
|
|
973
|
+
* `domain` object for use when the "global" domain is desired.
|
|
974
|
+
*
|
|
975
|
+
* All NVTX events in the global domain across all libraries and applications
|
|
976
|
+
* will be grouped together.
|
|
977
|
+
*
|
|
978
|
+
* @return Reference to the `domain` corresponding to the global NVTX domain.
|
|
979
|
+
*
|
|
980
|
+
*/
|
|
981
|
+
template <>
|
|
982
|
+
NVTX3_NO_DISCARD inline domain const& domain::get<domain::global>() noexcept
|
|
983
|
+
{
|
|
984
|
+
static domain const d{};
|
|
985
|
+
return d;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
/**
|
|
989
|
+
* @brief Indicates the values of the red, green, and blue color channels for
|
|
990
|
+
* an RGB color to use as an event attribute (assumes no transparency).
|
|
991
|
+
*
|
|
992
|
+
*/
|
|
993
|
+
struct rgb
|
|
994
|
+
{
|
|
995
|
+
/// Type used for component values
|
|
996
|
+
using component_type = uint8_t;
|
|
997
|
+
|
|
998
|
+
/**
|
|
999
|
+
* @brief Construct a rgb with red, green, and blue channels
|
|
1000
|
+
* specified by `red_`, `green_`, and `blue_`, respectively.
|
|
1001
|
+
*
|
|
1002
|
+
* Valid values are in the range `[0,255]`.
|
|
1003
|
+
*
|
|
1004
|
+
* @param red_ Value of the red channel
|
|
1005
|
+
* @param green_ Value of the green channel
|
|
1006
|
+
* @param blue_ Value of the blue channel
|
|
1007
|
+
*/
|
|
1008
|
+
constexpr rgb(component_type red_, component_type green_, component_type blue_) noexcept
|
|
1009
|
+
: red{red_}
|
|
1010
|
+
, green{green_}
|
|
1011
|
+
, blue{blue_}
|
|
1012
|
+
{}
|
|
1013
|
+
|
|
1014
|
+
component_type red{}; ///< Red channel value
|
|
1015
|
+
component_type green{}; ///< Green channel value
|
|
1016
|
+
component_type blue{}; ///< Blue channel value
|
|
1017
|
+
};
|
|
1018
|
+
|
|
1019
|
+
/**
|
|
1020
|
+
* @brief Indicates the value of the alpha, red, green, and blue color
|
|
1021
|
+
* channels for an ARGB color to use as an event attribute.
|
|
1022
|
+
*
|
|
1023
|
+
*/
|
|
1024
|
+
struct argb final : rgb
|
|
1025
|
+
{
|
|
1026
|
+
/**
|
|
1027
|
+
* @brief Construct an argb with alpha, red, green, and blue channels
|
|
1028
|
+
* specified by `alpha_`, `red_`, `green_`, and `blue_`, respectively.
|
|
1029
|
+
*
|
|
1030
|
+
* Valid values are in the range `[0,255]`.
|
|
1031
|
+
*
|
|
1032
|
+
* @param alpha_ Value of the alpha channel (opacity)
|
|
1033
|
+
* @param red_ Value of the red channel
|
|
1034
|
+
* @param green_ Value of the green channel
|
|
1035
|
+
* @param blue_ Value of the blue channel
|
|
1036
|
+
*
|
|
1037
|
+
*/
|
|
1038
|
+
constexpr argb(component_type alpha_, component_type red_, component_type green_, component_type blue_) noexcept
|
|
1039
|
+
: rgb{red_, green_, blue_}
|
|
1040
|
+
, alpha{alpha_}
|
|
1041
|
+
{}
|
|
1042
|
+
|
|
1043
|
+
component_type alpha{}; ///< Alpha channel value
|
|
1044
|
+
};
|
|
1045
|
+
|
|
1046
|
+
/**
|
|
1047
|
+
* @brief Represents a custom color that can be associated with an NVTX event
|
|
1048
|
+
* via its `event_attributes`.
|
|
1049
|
+
*
|
|
1050
|
+
* Specifying colors for NVTX events is a convenient way to visually
|
|
1051
|
+
* differentiate among different events in a visualization tool such as Nsight
|
|
1052
|
+
* Systems.
|
|
1053
|
+
*
|
|
1054
|
+
*/
|
|
1055
|
+
class color
|
|
1056
|
+
{
|
|
1057
|
+
public:
|
|
1058
|
+
/// Type used for the color's value
|
|
1059
|
+
using value_type = uint32_t;
|
|
1060
|
+
|
|
1061
|
+
/**
|
|
1062
|
+
* @brief Constructs a `color` using the value provided by `hex_code`.
|
|
1063
|
+
*
|
|
1064
|
+
* `hex_code` is expected to be a 4 byte argb hex code.
|
|
1065
|
+
*
|
|
1066
|
+
* The most significant byte indicates the value of the alpha channel
|
|
1067
|
+
* (opacity) (0-255)
|
|
1068
|
+
*
|
|
1069
|
+
* The next byte indicates the value of the red channel (0-255)
|
|
1070
|
+
*
|
|
1071
|
+
* The next byte indicates the value of the green channel (0-255)
|
|
1072
|
+
*
|
|
1073
|
+
* The least significant byte indicates the value of the blue channel
|
|
1074
|
+
* (0-255)
|
|
1075
|
+
*
|
|
1076
|
+
* @param hex_code The hex code used to construct the `color`
|
|
1077
|
+
*/
|
|
1078
|
+
constexpr explicit color(value_type hex_code) noexcept
|
|
1079
|
+
: _value{hex_code}
|
|
1080
|
+
{}
|
|
1081
|
+
|
|
1082
|
+
/**
|
|
1083
|
+
* @brief Construct a `color` using the alpha, red, green, blue components
|
|
1084
|
+
* in `argb`.
|
|
1085
|
+
*
|
|
1086
|
+
* @param argb_ The alpha, red, green, blue components of the desired `color`
|
|
1087
|
+
*/
|
|
1088
|
+
constexpr color(argb argb_) noexcept
|
|
1089
|
+
: color{from_bytes_msb_to_lsb(argb_.alpha, argb_.red, argb_.green, argb_.blue)}
|
|
1090
|
+
{}
|
|
1091
|
+
|
|
1092
|
+
/**
|
|
1093
|
+
* @brief Construct a `color` using the red, green, blue components in
|
|
1094
|
+
* `rgb`.
|
|
1095
|
+
*
|
|
1096
|
+
* Uses maximum value for the alpha channel (opacity) of the `color`.
|
|
1097
|
+
*
|
|
1098
|
+
* @param rgb_ The red, green, blue components of the desired `color`
|
|
1099
|
+
*/
|
|
1100
|
+
constexpr color(rgb rgb_) noexcept
|
|
1101
|
+
: color{from_bytes_msb_to_lsb(0xFF, rgb_.red, rgb_.green, rgb_.blue)}
|
|
1102
|
+
{}
|
|
1103
|
+
|
|
1104
|
+
/**
|
|
1105
|
+
* @brief Returns the `color`s argb hex code
|
|
1106
|
+
*
|
|
1107
|
+
*/
|
|
1108
|
+
constexpr value_type get_value() const noexcept
|
|
1109
|
+
{
|
|
1110
|
+
return _value;
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
/**
|
|
1114
|
+
* @brief Return the NVTX color type of the color.
|
|
1115
|
+
*
|
|
1116
|
+
*/
|
|
1117
|
+
constexpr nvtxColorType_t get_type() const noexcept
|
|
1118
|
+
{
|
|
1119
|
+
return _type;
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
color() = delete;
|
|
1123
|
+
~color() = default;
|
|
1124
|
+
color(color const&) = default;
|
|
1125
|
+
color& operator=(color const&) = default;
|
|
1126
|
+
color(color&&) = default;
|
|
1127
|
+
color& operator=(color&&) = default;
|
|
1128
|
+
|
|
1129
|
+
private:
|
|
1130
|
+
/**
|
|
1131
|
+
* @brief Constructs an unsigned, 4B integer from the component bytes in
|
|
1132
|
+
* most to least significant byte order.
|
|
1133
|
+
*
|
|
1134
|
+
*/
|
|
1135
|
+
constexpr static value_type
|
|
1136
|
+
from_bytes_msb_to_lsb(uint8_t byte3, uint8_t byte2, uint8_t byte1, uint8_t byte0) noexcept
|
|
1137
|
+
{
|
|
1138
|
+
return uint32_t{byte3} << 24 | uint32_t{byte2} << 16 | uint32_t{byte1} << 8 | uint32_t{byte0};
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
value_type _value{}; ///< color's argb color code
|
|
1142
|
+
nvtxColorType_t _type{NVTX_COLOR_ARGB}; ///< NVTX color type code
|
|
1143
|
+
};
|
|
1144
|
+
|
|
1145
|
+
/**
|
|
1146
|
+
* @brief Object for intra-domain grouping of NVTX events.
|
|
1147
|
+
*
|
|
1148
|
+
* A `category` is simply an integer id that allows for fine-grain grouping of
|
|
1149
|
+
* NVTX events. For example, one might use separate categories for IO, memory
|
|
1150
|
+
* allocation, compute, etc.
|
|
1151
|
+
*
|
|
1152
|
+
* Example:
|
|
1153
|
+
* \code{.cpp}
|
|
1154
|
+
* nvtx3::category cat1{1};
|
|
1155
|
+
*
|
|
1156
|
+
* // Range `r1` belongs to the category identified by the value `1`.
|
|
1157
|
+
* nvtx3::scoped_range r1{cat1};
|
|
1158
|
+
*
|
|
1159
|
+
* // Range `r2` belongs to the same category as `r1`
|
|
1160
|
+
* nvtx3::scoped_range r2{nvtx3::category{1}};
|
|
1161
|
+
* \endcode
|
|
1162
|
+
*
|
|
1163
|
+
* To associate a name string with a category id, see `named_category`.
|
|
1164
|
+
*
|
|
1165
|
+
*/
|
|
1166
|
+
class category
|
|
1167
|
+
{
|
|
1168
|
+
public:
|
|
1169
|
+
/// Type used for `category`s integer id.
|
|
1170
|
+
using id_type = uint32_t;
|
|
1171
|
+
|
|
1172
|
+
/**
|
|
1173
|
+
* @brief Construct a `category` with the specified `id`.
|
|
1174
|
+
*
|
|
1175
|
+
* The `category` will be unnamed and identified only by its `id` value.
|
|
1176
|
+
*
|
|
1177
|
+
* All `category`s in a domain sharing the same `id` are equivalent.
|
|
1178
|
+
*
|
|
1179
|
+
* @param[in] id The `category`'s identifying value
|
|
1180
|
+
*/
|
|
1181
|
+
constexpr explicit category(id_type id) noexcept
|
|
1182
|
+
: id_{id}
|
|
1183
|
+
{}
|
|
1184
|
+
|
|
1185
|
+
/**
|
|
1186
|
+
* @brief Returns the id of the category.
|
|
1187
|
+
*
|
|
1188
|
+
*/
|
|
1189
|
+
constexpr id_type get_id() const noexcept
|
|
1190
|
+
{
|
|
1191
|
+
return id_;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
category() = delete;
|
|
1195
|
+
~category() = default;
|
|
1196
|
+
category(category const&) = default;
|
|
1197
|
+
category& operator=(category const&) = default;
|
|
1198
|
+
category(category&&) = default;
|
|
1199
|
+
category& operator=(category&&) = default;
|
|
1200
|
+
|
|
1201
|
+
private:
|
|
1202
|
+
id_type id_{}; ///< category's unique identifier
|
|
1203
|
+
};
|
|
1204
|
+
|
|
1205
|
+
/**
|
|
1206
|
+
* @brief A `category` with an associated name string.
|
|
1207
|
+
*
|
|
1208
|
+
* Associates a `name` string with a category `id` to help differentiate among
|
|
1209
|
+
* categories.
|
|
1210
|
+
*
|
|
1211
|
+
* For any given category id `Id`, a `named_category(Id, "name")` should only
|
|
1212
|
+
* be constructed once and reused throughout an application. This can be done
|
|
1213
|
+
* by either explicitly creating static `named_category` objects, or using the
|
|
1214
|
+
* `named_category::get` construct on first use helper (recommended).
|
|
1215
|
+
*
|
|
1216
|
+
* Creating two or more `named_category` objects with the same value for `id`
|
|
1217
|
+
* in the same domain results in undefined behavior.
|
|
1218
|
+
*
|
|
1219
|
+
* Similarly, behavior is undefined when a `named_category` and `category`
|
|
1220
|
+
* share the same value of `id`.
|
|
1221
|
+
*
|
|
1222
|
+
* Example:
|
|
1223
|
+
* \code{.cpp}
|
|
1224
|
+
* // Explicitly constructed, static `named_category` in global domain:
|
|
1225
|
+
* static nvtx3::named_category static_category{42, "my category"};
|
|
1226
|
+
*
|
|
1227
|
+
* // Range `r` associated with category id `42`
|
|
1228
|
+
* nvtx3::scoped_range r{static_category};
|
|
1229
|
+
*
|
|
1230
|
+
* // OR use construct on first use:
|
|
1231
|
+
*
|
|
1232
|
+
* // Define a type with `name` and `id` members
|
|
1233
|
+
* struct my_category {
|
|
1234
|
+
* static constexpr char const* name{"my category"}; // category name
|
|
1235
|
+
* static constexpr uint32_t id{42}; // category id
|
|
1236
|
+
* };
|
|
1237
|
+
*
|
|
1238
|
+
* // Use construct on first use to name the category id `42`
|
|
1239
|
+
* // with name "my category"
|
|
1240
|
+
* auto& cat = named_category_in<my_domain>::get<my_category>();
|
|
1241
|
+
*
|
|
1242
|
+
* // Range `r` associated with category id `42`
|
|
1243
|
+
* nvtx3::scoped_range r{cat};
|
|
1244
|
+
* \endcode
|
|
1245
|
+
*
|
|
1246
|
+
* `named_category_in<D>`'s association of a name to a category id is local to
|
|
1247
|
+
* the domain specified by the type `D`. An id may have a different name in
|
|
1248
|
+
* another domain.
|
|
1249
|
+
*
|
|
1250
|
+
* @tparam D Type containing `name` member used to identify the `domain` to
|
|
1251
|
+
* which the `named_category_in` belongs. Else, `domain::global` to indicate
|
|
1252
|
+
* that the global NVTX domain should be used.
|
|
1253
|
+
*/
|
|
1254
|
+
template <typename D = domain::global>
|
|
1255
|
+
class named_category_in final : public category
|
|
1256
|
+
{
|
|
1257
|
+
public:
|
|
1258
|
+
# if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
|
|
1259
|
+
/**
|
|
1260
|
+
* @brief Returns a global instance of a `named_category_in` as a
|
|
1261
|
+
* function-local static.
|
|
1262
|
+
*
|
|
1263
|
+
* Creates a `named_category_in<D>` with name and id specified by the contents
|
|
1264
|
+
* of a type `C`. `C::name` determines the name and `C::id` determines the
|
|
1265
|
+
* category id.
|
|
1266
|
+
*
|
|
1267
|
+
* This function is useful for constructing a named `category` exactly once
|
|
1268
|
+
* and reusing the same instance throughout an application.
|
|
1269
|
+
*
|
|
1270
|
+
* Example:
|
|
1271
|
+
* \code{.cpp}
|
|
1272
|
+
* // Define a type with `name` and `id` members
|
|
1273
|
+
* struct my_category {
|
|
1274
|
+
* static constexpr char const* name{"my category"}; // category name
|
|
1275
|
+
* static constexpr uint32_t id{42}; // category id
|
|
1276
|
+
* };
|
|
1277
|
+
*
|
|
1278
|
+
* // Use construct on first use to name the category id `42`
|
|
1279
|
+
* // with name "my category"
|
|
1280
|
+
* auto& cat = named_category_in<my_domain>::get<my_category>();
|
|
1281
|
+
*
|
|
1282
|
+
* // Range `r` associated with category id `42`
|
|
1283
|
+
* nvtx3::scoped_range r{cat};
|
|
1284
|
+
* \endcode
|
|
1285
|
+
*
|
|
1286
|
+
* Uses the "construct on first use" idiom to safely ensure the `category`
|
|
1287
|
+
* object is initialized exactly once. See
|
|
1288
|
+
* https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
|
|
1289
|
+
*
|
|
1290
|
+
* @tparam C Type containing a member `C::name` that resolves to either a
|
|
1291
|
+
* `char const*` or `wchar_t const*` and `C::id`.
|
|
1292
|
+
*/
|
|
1293
|
+
template <
|
|
1294
|
+
typename C,
|
|
1295
|
+
typename std::enable_if<detail::is_c_string<decltype(C::name)>::value && detail::is_uint32<decltype(C::id)>::value,
|
|
1296
|
+
int>::type = 0>
|
|
1297
|
+
static named_category_in const& get() noexcept
|
|
1298
|
+
{
|
|
1299
|
+
static named_category_in const cat(C::id, C::name);
|
|
1300
|
+
return cat;
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
/**
|
|
1304
|
+
* @brief Overload of `named_category_in::get` to provide a clear compile error
|
|
1305
|
+
* when `C` has the required `name` and `id` members, but they are not the
|
|
1306
|
+
* required types. `name` must be directly convertible to `char const*` or
|
|
1307
|
+
* `wchar_t const*`, and `id` must be `uint32_t`.
|
|
1308
|
+
*/
|
|
1309
|
+
template <typename C,
|
|
1310
|
+
typename std::enable_if<!detail::is_c_string<decltype(C::name)>::value
|
|
1311
|
+
|| !detail::is_uint32<decltype(C::id)>::value,
|
|
1312
|
+
int>::type = 0>
|
|
1313
|
+
NVTX3_NO_DISCARD static named_category_in const& get() noexcept
|
|
1314
|
+
{
|
|
1315
|
+
NVTX3_STATIC_ASSERT(detail::is_c_string<decltype(C::name)>::value,
|
|
1316
|
+
"Type used to name an NVTX category must contain a static constexpr member "
|
|
1317
|
+
"called 'name' of type const char* or const wchar_t* -- 'name' member is not "
|
|
1318
|
+
"convertible to either of those types");
|
|
1319
|
+
NVTX3_STATIC_ASSERT(detail::is_uint32<decltype(C::id)>::value,
|
|
1320
|
+
"Type used to name an NVTX category must contain a static constexpr member "
|
|
1321
|
+
"called 'id' of type uint32_t -- 'id' member is the wrong type");
|
|
1322
|
+
static named_category_in const unused;
|
|
1323
|
+
return unused; // Function must compile for static_assert to be triggered
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
/**
|
|
1327
|
+
* @brief Overload of `named_category_in::get` to provide a clear compile error
|
|
1328
|
+
* when `C` does not have the required `name` and `id` members.
|
|
1329
|
+
*/
|
|
1330
|
+
template <typename C,
|
|
1331
|
+
typename std::enable_if<!detail::has_name<C>::value || !detail::has_id<C>::value, int>::type = 0>
|
|
1332
|
+
NVTX3_NO_DISCARD static named_category_in const& get() noexcept
|
|
1333
|
+
{
|
|
1334
|
+
NVTX3_STATIC_ASSERT(detail::has_name<C>::value,
|
|
1335
|
+
"Type used to name an NVTX category must contain a static constexpr member "
|
|
1336
|
+
"called 'name' of type const char* or const wchar_t* -- 'name' member is missing");
|
|
1337
|
+
NVTX3_STATIC_ASSERT(detail::has_id<C>::value,
|
|
1338
|
+
"Type used to name an NVTX category must contain a static constexpr member "
|
|
1339
|
+
"called 'id' of type uint32_t -- 'id' member is missing");
|
|
1340
|
+
static named_category_in const unused;
|
|
1341
|
+
return unused; // Function must compile for static_assert to be triggered
|
|
1342
|
+
}
|
|
1343
|
+
# else
|
|
1344
|
+
template <typename C>
|
|
1345
|
+
NVTX3_NO_DISCARD static named_category_in const& get() noexcept
|
|
1346
|
+
{
|
|
1347
|
+
static named_category_in const cat(C::id, C::name);
|
|
1348
|
+
return cat;
|
|
1349
|
+
}
|
|
1350
|
+
# endif
|
|
1351
|
+
|
|
1352
|
+
private:
|
|
1353
|
+
// Default constructor is only used internally for static_assert(false) cases.
|
|
1354
|
+
named_category_in() noexcept
|
|
1355
|
+
: category{0}
|
|
1356
|
+
{}
|
|
1357
|
+
|
|
1358
|
+
public:
|
|
1359
|
+
/**
|
|
1360
|
+
* @brief Construct a `named_category_in` with the specified `id` and `name`.
|
|
1361
|
+
*
|
|
1362
|
+
* The name `name` will be registered with `id`.
|
|
1363
|
+
*
|
|
1364
|
+
* Every unique value of `id` should only be named once.
|
|
1365
|
+
*
|
|
1366
|
+
* @param[in] id The category id to name
|
|
1367
|
+
* @param[in] name The name to associated with `id`
|
|
1368
|
+
*/
|
|
1369
|
+
named_category_in(id_type id, char const* name) noexcept
|
|
1370
|
+
: category{id}
|
|
1371
|
+
{
|
|
1372
|
+
# ifndef NVTX_DISABLE
|
|
1373
|
+
nvtxDomainNameCategoryA(domain::get<D>(), get_id(), name);
|
|
1374
|
+
# else
|
|
1375
|
+
(void) id;
|
|
1376
|
+
(void) name;
|
|
1377
|
+
# endif
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
/**
|
|
1381
|
+
* @brief Construct a `named_category_in` with the specified `id` and `name`.
|
|
1382
|
+
*
|
|
1383
|
+
* The name `name` will be registered with `id`.
|
|
1384
|
+
*
|
|
1385
|
+
* Every unique value of `id` should only be named once.
|
|
1386
|
+
*
|
|
1387
|
+
* @param[in] id The category id to name
|
|
1388
|
+
* @param[in] name The name to associated with `id`
|
|
1389
|
+
*/
|
|
1390
|
+
named_category_in(id_type id, wchar_t const* name) noexcept
|
|
1391
|
+
: category{id}
|
|
1392
|
+
{
|
|
1393
|
+
# ifndef NVTX_DISABLE
|
|
1394
|
+
nvtxDomainNameCategoryW(domain::get<D>(), get_id(), name);
|
|
1395
|
+
# else
|
|
1396
|
+
(void) id;
|
|
1397
|
+
(void) name;
|
|
1398
|
+
# endif
|
|
1399
|
+
}
|
|
1400
|
+
};
|
|
1401
|
+
|
|
1402
|
+
/**
|
|
1403
|
+
* @brief Alias for a `named_category_in` in the global NVTX domain.
|
|
1404
|
+
*
|
|
1405
|
+
*/
|
|
1406
|
+
using named_category = named_category_in<domain::global>;
|
|
1407
|
+
|
|
1408
|
+
/**
|
|
1409
|
+
* @brief A message registered with NVTX.
|
|
1410
|
+
*
|
|
1411
|
+
* Normally, associating a `message` with an NVTX event requires copying the
|
|
1412
|
+
* contents of the message string. This may cause non-trivial overhead in
|
|
1413
|
+
* highly performance sensitive regions of code.
|
|
1414
|
+
*
|
|
1415
|
+
* message registration is an optimization to lower the overhead of
|
|
1416
|
+
* associating a message with an NVTX event. Registering a message yields a
|
|
1417
|
+
* handle that is inexpensive to copy that may be used in place of a message
|
|
1418
|
+
* string.
|
|
1419
|
+
*
|
|
1420
|
+
* A particular message should only be registered once and the handle
|
|
1421
|
+
* reused throughout the rest of the application. This can be done by either
|
|
1422
|
+
* explicitly creating static `registered_string_in` objects, or using the
|
|
1423
|
+
* `registered_string_in::get` construct on first use helper (recommended).
|
|
1424
|
+
*
|
|
1425
|
+
* Example:
|
|
1426
|
+
* \code{.cpp}
|
|
1427
|
+
* // Explicitly constructed, static `registered_string` in my_domain:
|
|
1428
|
+
* static registered_string_in<my_domain> static_message{"message"};
|
|
1429
|
+
*
|
|
1430
|
+
* // "message" is associated with the range `r`
|
|
1431
|
+
* nvtx3::scoped_range r{static_message};
|
|
1432
|
+
*
|
|
1433
|
+
* // Or use construct on first use:
|
|
1434
|
+
*
|
|
1435
|
+
* // Define a type with a `message` member that defines the contents of the
|
|
1436
|
+
* // registered string
|
|
1437
|
+
* struct my_message{ static constexpr char const* message{ "my message" }; };
|
|
1438
|
+
*
|
|
1439
|
+
* // Uses construct on first use to register the contents of
|
|
1440
|
+
* // `my_message::message`
|
|
1441
|
+
* auto& msg = registered_string_in<my_domain>::get<my_message>();
|
|
1442
|
+
*
|
|
1443
|
+
* // "my message" is associated with the range `r`
|
|
1444
|
+
* nvtx3::scoped_range r{msg};
|
|
1445
|
+
* \endcode
|
|
1446
|
+
*
|
|
1447
|
+
* `registered_string_in`s are local to a particular domain specified via
|
|
1448
|
+
* the type `D`.
|
|
1449
|
+
*
|
|
1450
|
+
* @tparam D Type containing `name` member used to identify the `domain` to
|
|
1451
|
+
* which the `registered_string_in` belongs. Else, `domain::global` to indicate
|
|
1452
|
+
* that the global NVTX domain should be used.
|
|
1453
|
+
*/
|
|
1454
|
+
template <typename D = domain::global>
|
|
1455
|
+
class registered_string_in
|
|
1456
|
+
{
|
|
1457
|
+
public:
|
|
1458
|
+
# if NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
|
|
1459
|
+
/**
|
|
1460
|
+
* @brief Returns a global instance of a `registered_string_in` as a function
|
|
1461
|
+
* local static.
|
|
1462
|
+
*
|
|
1463
|
+
* Provides a convenient way to register a message with NVTX without having
|
|
1464
|
+
* to explicitly register the message.
|
|
1465
|
+
*
|
|
1466
|
+
* Upon first invocation, constructs a `registered_string_in` whose contents
|
|
1467
|
+
* are specified by `message::message`.
|
|
1468
|
+
*
|
|
1469
|
+
* All future invocations will return a reference to the object constructed
|
|
1470
|
+
* in the first invocation.
|
|
1471
|
+
*
|
|
1472
|
+
* Example:
|
|
1473
|
+
* \code{.cpp}
|
|
1474
|
+
* // Define a type with a `message` member that defines the contents of the
|
|
1475
|
+
* // registered string
|
|
1476
|
+
* struct my_message{ static constexpr char const* message{ "my message" };
|
|
1477
|
+
* };
|
|
1478
|
+
*
|
|
1479
|
+
* // Uses construct on first use to register the contents of
|
|
1480
|
+
* // `my_message::message`
|
|
1481
|
+
* auto& msg = registered_string_in<my_domain>::get<my_message>();
|
|
1482
|
+
*
|
|
1483
|
+
* // "my message" is associated with the range `r`
|
|
1484
|
+
* nvtx3::scoped_range r{msg};
|
|
1485
|
+
* \endcode
|
|
1486
|
+
*
|
|
1487
|
+
* @tparam M Type required to contain a member `M::message` that
|
|
1488
|
+
* resolves to either a `char const*` or `wchar_t const*` used as the
|
|
1489
|
+
* registered string's contents.
|
|
1490
|
+
* @return Reference to a `registered_string_in` associated with the type `M`.
|
|
1491
|
+
*/
|
|
1492
|
+
template <typename M, typename std::enable_if<detail::is_c_string<decltype(M::message)>::value, int>::type = 0>
|
|
1493
|
+
NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
|
|
1494
|
+
{
|
|
1495
|
+
static registered_string_in const regstr(M::message);
|
|
1496
|
+
return regstr;
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1499
|
+
/**
|
|
1500
|
+
* @brief Overload of `registered_string_in::get` to provide a clear compile error
|
|
1501
|
+
* when `M` has a `message` member that is not directly convertible to either
|
|
1502
|
+
* `char const*` or `wchar_t const*`.
|
|
1503
|
+
*/
|
|
1504
|
+
template <typename M, typename std::enable_if<!detail::is_c_string<decltype(M::message)>::value, int>::type = 0>
|
|
1505
|
+
NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
|
|
1506
|
+
{
|
|
1507
|
+
NVTX3_STATIC_ASSERT(detail::always_false<M>::value,
|
|
1508
|
+
"Type used to register an NVTX string must contain a static constexpr member "
|
|
1509
|
+
"called 'message' of type const char* or const wchar_t* -- 'message' member is "
|
|
1510
|
+
"not convertible to either of those types");
|
|
1511
|
+
static registered_string_in const unused;
|
|
1512
|
+
return unused; // Function must compile for static_assert to be triggered
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1515
|
+
/**
|
|
1516
|
+
* @brief Overload of `registered_string_in::get` to provide a clear compile error when
|
|
1517
|
+
* `M` does not have a `message` member.
|
|
1518
|
+
*/
|
|
1519
|
+
template <typename M, typename std::enable_if<!detail::has_message<M>::value, int>::type = 0>
|
|
1520
|
+
NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
|
|
1521
|
+
{
|
|
1522
|
+
NVTX3_STATIC_ASSERT(detail::always_false<M>::value,
|
|
1523
|
+
"Type used to register an NVTX string must contain a static constexpr member "
|
|
1524
|
+
"called 'message' of type const char* or const wchar_t* -- 'message' member "
|
|
1525
|
+
"is missing");
|
|
1526
|
+
static registered_string_in const unused;
|
|
1527
|
+
return unused; // Function must compile for static_assert to be triggered
|
|
1528
|
+
}
|
|
1529
|
+
# else
|
|
1530
|
+
template <typename M>
|
|
1531
|
+
NVTX3_NO_DISCARD static registered_string_in const& get() noexcept
|
|
1532
|
+
{
|
|
1533
|
+
static registered_string_in const regstr(M::message);
|
|
1534
|
+
return regstr;
|
|
1535
|
+
}
|
|
1536
|
+
# endif
|
|
1537
|
+
|
|
1538
|
+
/**
|
|
1539
|
+
* @brief Constructs a `registered_string_in` from the specified `msg` string.
|
|
1540
|
+
*
|
|
1541
|
+
* Registers `msg` with NVTX and associates a handle with the registered
|
|
1542
|
+
* message.
|
|
1543
|
+
*
|
|
1544
|
+
* A particular message should should only be registered once and the handle
|
|
1545
|
+
* reused throughout the rest of the application.
|
|
1546
|
+
*
|
|
1547
|
+
* @param msg The contents of the message
|
|
1548
|
+
*/
|
|
1549
|
+
explicit registered_string_in(char const* msg) noexcept
|
|
1550
|
+
: handle_{nvtxDomainRegisterStringA(domain::get<D>(), msg)}
|
|
1551
|
+
{}
|
|
1552
|
+
|
|
1553
|
+
/**
|
|
1554
|
+
* @brief Constructs a `registered_string_in` from the specified `msg` string.
|
|
1555
|
+
*
|
|
1556
|
+
* Registers `msg` with NVTX and associates a handle with the registered
|
|
1557
|
+
* message.
|
|
1558
|
+
*
|
|
1559
|
+
* A particular message should should only be registered once and the handle
|
|
1560
|
+
* reused throughout the rest of the application.
|
|
1561
|
+
*
|
|
1562
|
+
* @param msg The contents of the message
|
|
1563
|
+
*/
|
|
1564
|
+
explicit registered_string_in(std::string const& msg) noexcept
|
|
1565
|
+
: registered_string_in{msg.c_str()}
|
|
1566
|
+
{}
|
|
1567
|
+
|
|
1568
|
+
/**
|
|
1569
|
+
* @brief Constructs a `registered_string_in` from the specified `msg` string.
|
|
1570
|
+
*
|
|
1571
|
+
* Registers `msg` with NVTX and associates a handle with the registered
|
|
1572
|
+
* message.
|
|
1573
|
+
*
|
|
1574
|
+
* A particular message should should only be registered once and the handle
|
|
1575
|
+
* reused throughout the rest of the application.
|
|
1576
|
+
*
|
|
1577
|
+
* @param msg The contents of the message
|
|
1578
|
+
*/
|
|
1579
|
+
explicit registered_string_in(wchar_t const* msg) noexcept
|
|
1580
|
+
: handle_{nvtxDomainRegisterStringW(domain::get<D>(), msg)}
|
|
1581
|
+
{}
|
|
1582
|
+
|
|
1583
|
+
/**
|
|
1584
|
+
* @brief Constructs a `registered_string_in` from the specified `msg` string.
|
|
1585
|
+
*
|
|
1586
|
+
* Registers `msg` with NVTX and associates a handle with the registered
|
|
1587
|
+
* message.
|
|
1588
|
+
*
|
|
1589
|
+
* A particular message should only be registered once and the handle
|
|
1590
|
+
* reused throughout the rest of the application.
|
|
1591
|
+
*
|
|
1592
|
+
* @param msg The contents of the message
|
|
1593
|
+
*/
|
|
1594
|
+
explicit registered_string_in(std::wstring const& msg) noexcept
|
|
1595
|
+
: registered_string_in{msg.c_str()}
|
|
1596
|
+
{}
|
|
1597
|
+
|
|
1598
|
+
/**
|
|
1599
|
+
* @brief Returns the registered string's handle
|
|
1600
|
+
*
|
|
1601
|
+
*/
|
|
1602
|
+
nvtxStringHandle_t get_handle() const noexcept
|
|
1603
|
+
{
|
|
1604
|
+
return handle_;
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
private:
|
|
1608
|
+
// Default constructor is only used internally for static_assert(false) cases.
|
|
1609
|
+
registered_string_in() noexcept {}
|
|
1610
|
+
|
|
1611
|
+
public:
|
|
1612
|
+
~registered_string_in() = default;
|
|
1613
|
+
registered_string_in(registered_string_in const&) = default;
|
|
1614
|
+
registered_string_in& operator=(registered_string_in const&) = default;
|
|
1615
|
+
registered_string_in(registered_string_in&&) = default;
|
|
1616
|
+
registered_string_in& operator=(registered_string_in&&) = default;
|
|
1617
|
+
|
|
1618
|
+
private:
|
|
1619
|
+
nvtxStringHandle_t handle_{}; ///< The handle returned from
|
|
1620
|
+
///< registering the message with NVTX
|
|
1621
|
+
};
|
|
1622
|
+
|
|
1623
|
+
/**
|
|
1624
|
+
* @brief Alias for a `registered_string_in` in the global NVTX domain.
|
|
1625
|
+
*
|
|
1626
|
+
*/
|
|
1627
|
+
using registered_string = registered_string_in<domain::global>;
|
|
1628
|
+
|
|
1629
|
+
/**
|
|
1630
|
+
* @brief Allows associating a message string with an NVTX event via
|
|
1631
|
+
* its `EventAttribute`s.
|
|
1632
|
+
*
|
|
1633
|
+
* Associating a `message` with an NVTX event through its `event_attributes`
|
|
1634
|
+
* allows for naming events to easily differentiate them from other events.
|
|
1635
|
+
*
|
|
1636
|
+
* Every time an NVTX event is created with an associated `message`, the
|
|
1637
|
+
* contents of the message string must be copied. This may cause non-trivial
|
|
1638
|
+
* overhead in highly performance sensitive sections of code. Use of a
|
|
1639
|
+
* `nvtx3::registered_string` is recommended in these situations.
|
|
1640
|
+
*
|
|
1641
|
+
* Example:
|
|
1642
|
+
* \code{.cpp}
|
|
1643
|
+
* // Creates an `event_attributes` with message "message 0"
|
|
1644
|
+
* nvtx3::event_attributes attr0{nvtx3::message{"message 0"}};
|
|
1645
|
+
*
|
|
1646
|
+
* // `range0` contains message "message 0"
|
|
1647
|
+
* nvtx3::scoped_range range0{attr0};
|
|
1648
|
+
*
|
|
1649
|
+
* // `std::string` and string literals are implicitly assumed to be
|
|
1650
|
+
* // the contents of an `nvtx3::message`
|
|
1651
|
+
* // Creates an `event_attributes` with message "message 1"
|
|
1652
|
+
* nvtx3::event_attributes attr1{"message 1"};
|
|
1653
|
+
*
|
|
1654
|
+
* // `range1` contains message "message 1"
|
|
1655
|
+
* nvtx3::scoped_range range1{attr1};
|
|
1656
|
+
*
|
|
1657
|
+
* // `range2` contains message "message 2"
|
|
1658
|
+
* nvtx3::scoped_range range2{nvtx3::message{"message 2"}};
|
|
1659
|
+
*
|
|
1660
|
+
* // `std::string` and string literals are implicitly assumed to be
|
|
1661
|
+
* // the contents of an `nvtx3::message`
|
|
1662
|
+
* // `range3` contains message "message 3"
|
|
1663
|
+
* nvtx3::scoped_range range3{"message 3"};
|
|
1664
|
+
* \endcode
|
|
1665
|
+
*/
|
|
1666
|
+
class message
|
|
1667
|
+
{
|
|
1668
|
+
public:
|
|
1669
|
+
using value_type = nvtxMessageValue_t;
|
|
1670
|
+
|
|
1671
|
+
/**
|
|
1672
|
+
* @brief Construct a `message` whose contents are specified by `msg`.
|
|
1673
|
+
*
|
|
1674
|
+
* @param msg The contents of the message
|
|
1675
|
+
*/
|
|
1676
|
+
NVTX3_CONSTEXPR_IF_CPP14 message(char const* msg) noexcept
|
|
1677
|
+
: type_{NVTX_MESSAGE_TYPE_ASCII}
|
|
1678
|
+
{
|
|
1679
|
+
value_.ascii = msg;
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
/**
|
|
1683
|
+
* @brief Construct a `message` whose contents are specified by `msg`.
|
|
1684
|
+
*
|
|
1685
|
+
* @param msg The contents of the message
|
|
1686
|
+
*/
|
|
1687
|
+
message(std::string const& msg) noexcept
|
|
1688
|
+
: message{msg.c_str()}
|
|
1689
|
+
{}
|
|
1690
|
+
|
|
1691
|
+
/**
|
|
1692
|
+
* @brief Disallow construction for `std::string` r-value
|
|
1693
|
+
*
|
|
1694
|
+
* `message` is a non-owning type and therefore cannot take ownership of an
|
|
1695
|
+
* r-value. Therefore, constructing from an r-value is disallowed to prevent
|
|
1696
|
+
* a dangling pointer.
|
|
1697
|
+
*
|
|
1698
|
+
*/
|
|
1699
|
+
message(std::string&&) = delete;
|
|
1700
|
+
|
|
1701
|
+
/**
|
|
1702
|
+
* @brief Construct a `message` whose contents are specified by `msg`.
|
|
1703
|
+
*
|
|
1704
|
+
* @param msg The contents of the message
|
|
1705
|
+
*/
|
|
1706
|
+
NVTX3_CONSTEXPR_IF_CPP14 message(wchar_t const* msg) noexcept
|
|
1707
|
+
: type_{NVTX_MESSAGE_TYPE_UNICODE}
|
|
1708
|
+
{
|
|
1709
|
+
value_.unicode = msg;
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
/**
|
|
1713
|
+
* @brief Construct a `message` whose contents are specified by `msg`.
|
|
1714
|
+
*
|
|
1715
|
+
* @param msg The contents of the message
|
|
1716
|
+
*/
|
|
1717
|
+
message(std::wstring const& msg) noexcept
|
|
1718
|
+
: message{msg.c_str()}
|
|
1719
|
+
{}
|
|
1720
|
+
|
|
1721
|
+
/**
|
|
1722
|
+
* @brief Disallow construction for `std::wstring` r-value
|
|
1723
|
+
*
|
|
1724
|
+
* `message` is a non-owning type and therefore cannot take ownership of an
|
|
1725
|
+
* r-value. Therefore, constructing from an r-value is disallowed to prevent
|
|
1726
|
+
* a dangling pointer.
|
|
1727
|
+
*
|
|
1728
|
+
*/
|
|
1729
|
+
message(std::wstring&&) = delete;
|
|
1730
|
+
|
|
1731
|
+
/**
|
|
1732
|
+
* @brief Construct a `message` from a `registered_string_in`.
|
|
1733
|
+
*
|
|
1734
|
+
* @tparam D Type containing `name` member used to identify the `domain`
|
|
1735
|
+
* to which the `registered_string_in` belongs. Else, `domain::global` to
|
|
1736
|
+
* indicate that the global NVTX domain should be used.
|
|
1737
|
+
* @param msg The message that has already been registered with NVTX.
|
|
1738
|
+
*/
|
|
1739
|
+
template <typename D>
|
|
1740
|
+
NVTX3_CONSTEXPR_IF_CPP14 message(registered_string_in<D> const& msg) noexcept
|
|
1741
|
+
: type_{NVTX_MESSAGE_TYPE_REGISTERED}
|
|
1742
|
+
{
|
|
1743
|
+
value_.registered = msg.get_handle();
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
/**
|
|
1747
|
+
* @brief Construct a `message` from NVTX C API type and value.
|
|
1748
|
+
*
|
|
1749
|
+
* @param type nvtxMessageType_t enum value indicating type of the payload
|
|
1750
|
+
* @param value nvtxMessageValue_t union containing message
|
|
1751
|
+
*/
|
|
1752
|
+
constexpr message(nvtxMessageType_t const& type, nvtxMessageValue_t const& value) noexcept
|
|
1753
|
+
: type_{type}
|
|
1754
|
+
, value_(value)
|
|
1755
|
+
{}
|
|
1756
|
+
|
|
1757
|
+
/**
|
|
1758
|
+
* @brief Construct a `message` from NVTX C API registered string handle.
|
|
1759
|
+
*
|
|
1760
|
+
* @param handle nvtxStringHandle_t value of registered string handle
|
|
1761
|
+
*/
|
|
1762
|
+
NVTX3_CONSTEXPR_IF_CPP14 message(nvtxStringHandle_t handle) noexcept
|
|
1763
|
+
: type_{NVTX_MESSAGE_TYPE_REGISTERED}
|
|
1764
|
+
{
|
|
1765
|
+
value_.registered = handle;
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
/**
|
|
1769
|
+
* @brief Return the union holding the value of the message.
|
|
1770
|
+
*
|
|
1771
|
+
*/
|
|
1772
|
+
constexpr value_type get_value() const noexcept
|
|
1773
|
+
{
|
|
1774
|
+
return value_;
|
|
1775
|
+
}
|
|
1776
|
+
|
|
1777
|
+
/**
|
|
1778
|
+
* @brief Return the type information about the value the union holds.
|
|
1779
|
+
*
|
|
1780
|
+
*/
|
|
1781
|
+
constexpr nvtxMessageType_t get_type() const noexcept
|
|
1782
|
+
{
|
|
1783
|
+
return type_;
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1786
|
+
private:
|
|
1787
|
+
nvtxMessageType_t type_{}; ///< message type
|
|
1788
|
+
nvtxMessageValue_t value_{}; ///< message contents
|
|
1789
|
+
};
|
|
1790
|
+
|
|
1791
|
+
/**
|
|
1792
|
+
* @brief A numerical value that can be associated with an NVTX event via
|
|
1793
|
+
* its `event_attributes`.
|
|
1794
|
+
*
|
|
1795
|
+
* Example:
|
|
1796
|
+
* \code{.cpp}
|
|
1797
|
+
* // Constructs a payload from the int32_t value 42
|
|
1798
|
+
* nvtx3:: event_attributes attr{nvtx3::payload{42}};
|
|
1799
|
+
*
|
|
1800
|
+
* // `range0` will have an int32_t payload of 42
|
|
1801
|
+
* nvtx3::scoped_range range0{attr};
|
|
1802
|
+
*
|
|
1803
|
+
* // range1 has double payload of 3.14
|
|
1804
|
+
* nvtx3::scoped_range range1{nvtx3::payload{3.14}};
|
|
1805
|
+
* \endcode
|
|
1806
|
+
*/
|
|
1807
|
+
class payload
|
|
1808
|
+
{
|
|
1809
|
+
public:
|
|
1810
|
+
using value_type = typename nvtxEventAttributes_v2::payload_t;
|
|
1811
|
+
|
|
1812
|
+
/**
|
|
1813
|
+
* @brief Construct a `payload` from a signed, 8 byte integer.
|
|
1814
|
+
*
|
|
1815
|
+
* @param value Value to use as contents of the payload
|
|
1816
|
+
*/
|
|
1817
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(int64_t value) noexcept
|
|
1818
|
+
: type_{NVTX_PAYLOAD_TYPE_INT64}
|
|
1819
|
+
, value_{}
|
|
1820
|
+
{
|
|
1821
|
+
value_.llValue = value;
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
/**
|
|
1825
|
+
* @brief Construct a `payload` from a signed, 4 byte integer.
|
|
1826
|
+
*
|
|
1827
|
+
* @param value Value to use as contents of the payload
|
|
1828
|
+
*/
|
|
1829
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(int32_t value) noexcept
|
|
1830
|
+
: type_{NVTX_PAYLOAD_TYPE_INT32}
|
|
1831
|
+
, value_{}
|
|
1832
|
+
{
|
|
1833
|
+
value_.iValue = value;
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
/**
|
|
1837
|
+
* @brief Construct a `payload` from an unsigned, 8 byte integer.
|
|
1838
|
+
*
|
|
1839
|
+
* @param value Value to use as contents of the payload
|
|
1840
|
+
*/
|
|
1841
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(uint64_t value) noexcept
|
|
1842
|
+
: type_{NVTX_PAYLOAD_TYPE_UNSIGNED_INT64}
|
|
1843
|
+
, value_{}
|
|
1844
|
+
{
|
|
1845
|
+
value_.ullValue = value;
|
|
1846
|
+
}
|
|
1847
|
+
|
|
1848
|
+
/**
|
|
1849
|
+
* @brief Construct a `payload` from an unsigned, 4 byte integer.
|
|
1850
|
+
*
|
|
1851
|
+
* @param value Value to use as contents of the payload
|
|
1852
|
+
*/
|
|
1853
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(uint32_t value) noexcept
|
|
1854
|
+
: type_{NVTX_PAYLOAD_TYPE_UNSIGNED_INT32}
|
|
1855
|
+
, value_{}
|
|
1856
|
+
{
|
|
1857
|
+
value_.uiValue = value;
|
|
1858
|
+
}
|
|
1859
|
+
|
|
1860
|
+
/**
|
|
1861
|
+
* @brief Construct a `payload` from a single-precision floating point
|
|
1862
|
+
* value.
|
|
1863
|
+
*
|
|
1864
|
+
* @param value Value to use as contents of the payload
|
|
1865
|
+
*/
|
|
1866
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(float value) noexcept
|
|
1867
|
+
: type_{NVTX_PAYLOAD_TYPE_FLOAT}
|
|
1868
|
+
, value_{}
|
|
1869
|
+
{
|
|
1870
|
+
value_.fValue = value;
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1873
|
+
/**
|
|
1874
|
+
* @brief Construct a `payload` from a double-precision floating point
|
|
1875
|
+
* value.
|
|
1876
|
+
*
|
|
1877
|
+
* @param value Value to use as contents of the payload
|
|
1878
|
+
*/
|
|
1879
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit payload(double value) noexcept
|
|
1880
|
+
: type_{NVTX_PAYLOAD_TYPE_DOUBLE}
|
|
1881
|
+
, value_{}
|
|
1882
|
+
{
|
|
1883
|
+
value_.dValue = value;
|
|
1884
|
+
}
|
|
1885
|
+
|
|
1886
|
+
/**
|
|
1887
|
+
* @brief Construct a `payload` from NVTX C API type and value.
|
|
1888
|
+
*
|
|
1889
|
+
* @param type nvtxPayloadType_t enum value indicating type of the payload
|
|
1890
|
+
* @param value nvtxEventAttributes_t::payload_t union containing payload
|
|
1891
|
+
*/
|
|
1892
|
+
constexpr payload(nvtxPayloadType_t const& type, value_type const& value) noexcept
|
|
1893
|
+
: type_{type}
|
|
1894
|
+
, value_(value)
|
|
1895
|
+
{}
|
|
1896
|
+
|
|
1897
|
+
/**
|
|
1898
|
+
* @brief Return the union holding the value of the payload
|
|
1899
|
+
*
|
|
1900
|
+
*/
|
|
1901
|
+
constexpr value_type get_value() const noexcept
|
|
1902
|
+
{
|
|
1903
|
+
return value_;
|
|
1904
|
+
}
|
|
1905
|
+
|
|
1906
|
+
/**
|
|
1907
|
+
* @brief Return the information about the type the union holds.
|
|
1908
|
+
*
|
|
1909
|
+
*/
|
|
1910
|
+
constexpr nvtxPayloadType_t get_type() const noexcept
|
|
1911
|
+
{
|
|
1912
|
+
return type_;
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
private:
|
|
1916
|
+
nvtxPayloadType_t type_; ///< Type of the payload value
|
|
1917
|
+
value_type value_; ///< Union holding the payload value
|
|
1918
|
+
};
|
|
1919
|
+
|
|
1920
|
+
/**
|
|
1921
|
+
* @brief Describes the attributes of a NVTX event.
|
|
1922
|
+
*
|
|
1923
|
+
* NVTX events can be customized via four "attributes":
|
|
1924
|
+
*
|
|
1925
|
+
* - color: color used to visualize the event in tools such as Nsight
|
|
1926
|
+
* Systems. See `color`.
|
|
1927
|
+
* - message: Custom message string. See `message`.
|
|
1928
|
+
* - payload: User-defined numerical value. See `payload`.
|
|
1929
|
+
* - category: Intra-domain grouping. See `category`.
|
|
1930
|
+
*
|
|
1931
|
+
* These component attributes are specified via an `event_attributes` object.
|
|
1932
|
+
* See `nvtx3::color`, `nvtx3::message`, `nvtx3::payload`, and
|
|
1933
|
+
* `nvtx3::category` for how these individual attributes are constructed.
|
|
1934
|
+
*
|
|
1935
|
+
* While it is possible to specify all four attributes, it is common to want
|
|
1936
|
+
* to only specify a subset of attributes and use default values for the
|
|
1937
|
+
* others. For convenience, `event_attributes` can be constructed from any
|
|
1938
|
+
* number of attribute components in any order.
|
|
1939
|
+
*
|
|
1940
|
+
* Example:
|
|
1941
|
+
* \code{.cpp}
|
|
1942
|
+
* // Set message, same as using nvtx3::message{"message"}
|
|
1943
|
+
* event_attributes attr{"message"};
|
|
1944
|
+
*
|
|
1945
|
+
* // Set message and color
|
|
1946
|
+
* event_attributes attr{"message", nvtx3::rgb{127, 255, 0}};
|
|
1947
|
+
*
|
|
1948
|
+
* // Set message, color, payload, category
|
|
1949
|
+
* event_attributes attr{"message",
|
|
1950
|
+
* nvtx3::rgb{127, 255, 0},
|
|
1951
|
+
* nvtx3::payload{42},
|
|
1952
|
+
* nvtx3::category{1}};
|
|
1953
|
+
*
|
|
1954
|
+
* // Same as above -- can use any order of arguments
|
|
1955
|
+
* event_attributes attr{nvtx3::payload{42},
|
|
1956
|
+
* nvtx3::category{1},
|
|
1957
|
+
* "message",
|
|
1958
|
+
* nvtx3::rgb{127, 255, 0}};
|
|
1959
|
+
*
|
|
1960
|
+
* // Multiple arguments of the same type are allowed, but only the first is
|
|
1961
|
+
* // used -- in this example, payload is set to 42:
|
|
1962
|
+
* event_attributes attr{ nvtx3::payload{42}, nvtx3::payload{7} };
|
|
1963
|
+
*
|
|
1964
|
+
* // Range `r` will be customized according the attributes in `attr`
|
|
1965
|
+
* nvtx3::scoped_range r{attr};
|
|
1966
|
+
*
|
|
1967
|
+
* // For convenience, `event_attributes` constructor arguments may be passed
|
|
1968
|
+
* // to the `scoped_range_in` constructor -- they are forwarded to the
|
|
1969
|
+
* // `event_attributes` constructor
|
|
1970
|
+
* nvtx3::scoped_range r{nvtx3::payload{42}, nvtx3::category{1}, "message"};
|
|
1971
|
+
*
|
|
1972
|
+
* // Using the nvtx3 namespace in a local scope makes the syntax more succinct:
|
|
1973
|
+
* using namespace nvtx3;
|
|
1974
|
+
* scoped_range r{payload{42}, category{1}, "message"};
|
|
1975
|
+
* \endcode
|
|
1976
|
+
*
|
|
1977
|
+
*/
|
|
1978
|
+
class event_attributes
|
|
1979
|
+
{
|
|
1980
|
+
public:
|
|
1981
|
+
using value_type = nvtxEventAttributes_t;
|
|
1982
|
+
|
|
1983
|
+
/**
|
|
1984
|
+
* @brief Default constructor creates an `event_attributes` with no
|
|
1985
|
+
* category, color, payload, nor message.
|
|
1986
|
+
*/
|
|
1987
|
+
constexpr event_attributes() noexcept
|
|
1988
|
+
: attributes_{
|
|
1989
|
+
NVTX_VERSION, // version
|
|
1990
|
+
sizeof(nvtxEventAttributes_t), // size
|
|
1991
|
+
0, // category
|
|
1992
|
+
NVTX_COLOR_UNKNOWN, // color type
|
|
1993
|
+
0, // color value
|
|
1994
|
+
NVTX_PAYLOAD_UNKNOWN, // payload type
|
|
1995
|
+
0, // reserved 4B
|
|
1996
|
+
{0}, // payload value (union)
|
|
1997
|
+
NVTX_MESSAGE_UNKNOWN, // message type
|
|
1998
|
+
{0} // message value (union)
|
|
1999
|
+
}
|
|
2000
|
+
{}
|
|
2001
|
+
|
|
2002
|
+
/**
|
|
2003
|
+
* @brief Variadic constructor where the first argument is a `category`.
|
|
2004
|
+
*
|
|
2005
|
+
* Sets the value of the `EventAttribute`s category based on `c` and
|
|
2006
|
+
* forwards the remaining variadic parameter pack to the next constructor.
|
|
2007
|
+
*
|
|
2008
|
+
*/
|
|
2009
|
+
template <typename... Args>
|
|
2010
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(category const& c, Args const&... args) noexcept
|
|
2011
|
+
: event_attributes(args...)
|
|
2012
|
+
{
|
|
2013
|
+
attributes_.category = c.get_id();
|
|
2014
|
+
}
|
|
2015
|
+
|
|
2016
|
+
/**
|
|
2017
|
+
* @brief Variadic constructor where the first argument is a `color`.
|
|
2018
|
+
*
|
|
2019
|
+
* Sets the value of the `EventAttribute`s color based on `c` and forwards
|
|
2020
|
+
* the remaining variadic parameter pack to the next constructor.
|
|
2021
|
+
*
|
|
2022
|
+
*/
|
|
2023
|
+
template <typename... Args>
|
|
2024
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(color const& c, Args const&... args) noexcept
|
|
2025
|
+
: event_attributes(args...)
|
|
2026
|
+
{
|
|
2027
|
+
attributes_.color = c.get_value();
|
|
2028
|
+
attributes_.colorType = c.get_type();
|
|
2029
|
+
}
|
|
2030
|
+
|
|
2031
|
+
/**
|
|
2032
|
+
* @brief Variadic constructor where the first argument is a `payload`.
|
|
2033
|
+
*
|
|
2034
|
+
* Sets the value of the `EventAttribute`s payload based on `p` and forwards
|
|
2035
|
+
* the remaining variadic parameter pack to the next constructor.
|
|
2036
|
+
*
|
|
2037
|
+
*/
|
|
2038
|
+
template <typename... Args>
|
|
2039
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(payload const& p, Args const&... args) noexcept
|
|
2040
|
+
: event_attributes(args...)
|
|
2041
|
+
{
|
|
2042
|
+
attributes_.payload = p.get_value();
|
|
2043
|
+
attributes_.payloadType = p.get_type();
|
|
2044
|
+
}
|
|
2045
|
+
|
|
2046
|
+
/**
|
|
2047
|
+
* @brief Variadic constructor where the first argument is a `message`.
|
|
2048
|
+
*
|
|
2049
|
+
* Sets the value of the `EventAttribute`s message based on `m` and forwards
|
|
2050
|
+
* the remaining variadic parameter pack to the next constructor.
|
|
2051
|
+
*
|
|
2052
|
+
*/
|
|
2053
|
+
template <typename... Args>
|
|
2054
|
+
NVTX3_CONSTEXPR_IF_CPP14 explicit event_attributes(message const& m, Args const&... args) noexcept
|
|
2055
|
+
: event_attributes(args...)
|
|
2056
|
+
{
|
|
2057
|
+
attributes_.message = m.get_value();
|
|
2058
|
+
attributes_.messageType = m.get_type();
|
|
2059
|
+
}
|
|
2060
|
+
|
|
2061
|
+
~event_attributes() = default;
|
|
2062
|
+
event_attributes(event_attributes const&) = default;
|
|
2063
|
+
event_attributes& operator=(event_attributes const&) = default;
|
|
2064
|
+
event_attributes(event_attributes&&) = default;
|
|
2065
|
+
event_attributes& operator=(event_attributes&&) = default;
|
|
2066
|
+
|
|
2067
|
+
/**
|
|
2068
|
+
* @brief Get raw pointer to underlying NVTX attributes object.
|
|
2069
|
+
*
|
|
2070
|
+
*/
|
|
2071
|
+
constexpr value_type const* get() const noexcept
|
|
2072
|
+
{
|
|
2073
|
+
return &attributes_;
|
|
2074
|
+
}
|
|
2075
|
+
|
|
2076
|
+
private:
|
|
2077
|
+
value_type attributes_{}; ///< The NVTX attributes structure
|
|
2078
|
+
};
|
|
2079
|
+
|
|
2080
|
+
/**
|
|
2081
|
+
* @brief A RAII object for creating a NVTX range local to a thread within a
|
|
2082
|
+
* domain.
|
|
2083
|
+
*
|
|
2084
|
+
* When constructed, begins a nested NVTX range on the calling thread in the
|
|
2085
|
+
* specified domain. Upon destruction, ends the NVTX range.
|
|
2086
|
+
*
|
|
2087
|
+
* Behavior is undefined if a `scoped_range_in` object is
|
|
2088
|
+
* created/destroyed on different threads.
|
|
2089
|
+
*
|
|
2090
|
+
* `scoped_range_in` is neither movable nor copyable.
|
|
2091
|
+
*
|
|
2092
|
+
* `scoped_range_in`s may be nested within other ranges.
|
|
2093
|
+
*
|
|
2094
|
+
* The domain of the range is specified by the template type parameter `D`.
|
|
2095
|
+
* By default, the `domain::global` is used, which scopes the range to the
|
|
2096
|
+
* global NVTX domain. The convenience alias `scoped_range` is provided for
|
|
2097
|
+
* ranges scoped to the global domain.
|
|
2098
|
+
*
|
|
2099
|
+
* A custom domain can be defined by creating a type, `D`, with a static
|
|
2100
|
+
* member `D::name` whose value is used to name the domain associated with
|
|
2101
|
+
* `D`. `D::name` must resolve to either `char const*` or `wchar_t const*`
|
|
2102
|
+
*
|
|
2103
|
+
* Example:
|
|
2104
|
+
* \code{.cpp}
|
|
2105
|
+
* // Define a type `my_domain` with a member `name` used to name the domain
|
|
2106
|
+
* // associated with the type `my_domain`.
|
|
2107
|
+
* struct my_domain{
|
|
2108
|
+
* static constexpr char const* name{"my domain"};
|
|
2109
|
+
* };
|
|
2110
|
+
* \endcode
|
|
2111
|
+
*
|
|
2112
|
+
* Usage:
|
|
2113
|
+
* \code{.cpp}
|
|
2114
|
+
* nvtx3::scoped_range_in<my_domain> r1{"range 1"}; // Range in my domain
|
|
2115
|
+
*
|
|
2116
|
+
* // Three equivalent ways to make a range in the global domain:
|
|
2117
|
+
* nvtx3::scoped_range_in<nvtx3::domain::global> r2{"range 2"};
|
|
2118
|
+
* nvtx3::scoped_range_in<> r3{"range 3"};
|
|
2119
|
+
* nvtx3::scoped_range r4{"range 4"};
|
|
2120
|
+
*
|
|
2121
|
+
* // Create an alias to succinctly make ranges in my domain:
|
|
2122
|
+
* using my_scoped_range = nvtx3::scoped_range_in<my_domain>;
|
|
2123
|
+
*
|
|
2124
|
+
* my_scoped_range r3{"range 3"};
|
|
2125
|
+
* \endcode
|
|
2126
|
+
*/
|
|
2127
|
+
template <class D = domain::global>
|
|
2128
|
+
class NVTX3_MAYBE_UNUSED scoped_range_in
|
|
2129
|
+
{
|
|
2130
|
+
public:
|
|
2131
|
+
/**
|
|
2132
|
+
* @brief Construct a `scoped_range_in` with the specified
|
|
2133
|
+
* `event_attributes`
|
|
2134
|
+
*
|
|
2135
|
+
* Example:
|
|
2136
|
+
* \code{cpp}
|
|
2137
|
+
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
|
|
2138
|
+
* nvtx3::scoped_range range{attr}; // Creates a range with message contents
|
|
2139
|
+
* // "msg" and green color
|
|
2140
|
+
* \endcode
|
|
2141
|
+
*
|
|
2142
|
+
* @param[in] attr `event_attributes` that describes the desired attributes
|
|
2143
|
+
* of the range.
|
|
2144
|
+
*/
|
|
2145
|
+
explicit scoped_range_in(event_attributes const& attr) noexcept
|
|
2146
|
+
{
|
|
2147
|
+
# ifndef NVTX_DISABLE
|
|
2148
|
+
nvtxDomainRangePushEx(domain::get<D>(), attr.get());
|
|
2149
|
+
# else
|
|
2150
|
+
(void) attr;
|
|
2151
|
+
# endif
|
|
2152
|
+
}
|
|
2153
|
+
|
|
2154
|
+
/**
|
|
2155
|
+
* @brief Constructs a `scoped_range_in` from the constructor arguments
|
|
2156
|
+
* of an `event_attributes`.
|
|
2157
|
+
*
|
|
2158
|
+
* Forwards the arguments `args...` to construct an
|
|
2159
|
+
* `event_attributes` object. The `event_attributes` object is then
|
|
2160
|
+
* associated with the `scoped_range_in`.
|
|
2161
|
+
*
|
|
2162
|
+
* For more detail, see `event_attributes` documentation.
|
|
2163
|
+
*
|
|
2164
|
+
* Example:
|
|
2165
|
+
* \code{cpp}
|
|
2166
|
+
* // Creates a range with message "message" and green color
|
|
2167
|
+
* nvtx3::scoped_range r{"message", nvtx3::rgb{127,255,0}};
|
|
2168
|
+
* \endcode
|
|
2169
|
+
*
|
|
2170
|
+
* @param[in] args Arguments to used to construct an `event_attributes` associated with this
|
|
2171
|
+
* range.
|
|
2172
|
+
*
|
|
2173
|
+
*/
|
|
2174
|
+
template <typename... Args>
|
|
2175
|
+
explicit scoped_range_in(Args const&... args) noexcept
|
|
2176
|
+
: scoped_range_in{event_attributes{args...}}
|
|
2177
|
+
{}
|
|
2178
|
+
|
|
2179
|
+
/**
|
|
2180
|
+
* @brief Default constructor creates a `scoped_range_in` with no
|
|
2181
|
+
* message, color, payload, nor category.
|
|
2182
|
+
*
|
|
2183
|
+
*/
|
|
2184
|
+
scoped_range_in() noexcept
|
|
2185
|
+
: scoped_range_in{event_attributes{}}
|
|
2186
|
+
{}
|
|
2187
|
+
|
|
2188
|
+
/**
|
|
2189
|
+
* @brief Delete `operator new` to disallow heap allocated objects.
|
|
2190
|
+
*
|
|
2191
|
+
* `scoped_range_in` must follow RAII semantics to guarantee proper push/pop semantics.
|
|
2192
|
+
*
|
|
2193
|
+
*/
|
|
2194
|
+
void* operator new(std::size_t) = delete;
|
|
2195
|
+
|
|
2196
|
+
scoped_range_in(scoped_range_in const&) = delete;
|
|
2197
|
+
scoped_range_in& operator=(scoped_range_in const&) = delete;
|
|
2198
|
+
scoped_range_in(scoped_range_in&&) = delete;
|
|
2199
|
+
scoped_range_in& operator=(scoped_range_in&&) = delete;
|
|
2200
|
+
|
|
2201
|
+
/**
|
|
2202
|
+
* @brief Destroy the scoped_range_in, ending the NVTX range event.
|
|
2203
|
+
*/
|
|
2204
|
+
~scoped_range_in() noexcept
|
|
2205
|
+
{
|
|
2206
|
+
# ifndef NVTX_DISABLE
|
|
2207
|
+
nvtxDomainRangePop(domain::get<D>());
|
|
2208
|
+
# endif
|
|
2209
|
+
}
|
|
2210
|
+
};
|
|
2211
|
+
|
|
2212
|
+
/**
|
|
2213
|
+
* @brief Alias for a `scoped_range_in` in the global NVTX domain.
|
|
2214
|
+
*
|
|
2215
|
+
*/
|
|
2216
|
+
using scoped_range = scoped_range_in<domain::global>;
|
|
2217
|
+
|
|
2218
|
+
namespace detail
|
|
2219
|
+
{
|
|
2220
|
+
|
|
2221
|
+
/// @cond internal
|
|
2222
|
+
template <typename D = domain::global>
|
|
2223
|
+
class NVTX3_MAYBE_UNUSED optional_scoped_range_in
|
|
2224
|
+
{
|
|
2225
|
+
public:
|
|
2226
|
+
optional_scoped_range_in() = default;
|
|
2227
|
+
|
|
2228
|
+
void begin(event_attributes const& attr) noexcept
|
|
2229
|
+
{
|
|
2230
|
+
# ifndef NVTX_DISABLE
|
|
2231
|
+
// This class is not meant to be part of the public NVTX C++ API and should
|
|
2232
|
+
// only be used in the `NVTX3_FUNC_RANGE_IF` and `NVTX3_FUNC_RANGE_IF_IN`
|
|
2233
|
+
// macros. However, to prevent developers from misusing this class, make
|
|
2234
|
+
// sure to not start multiple ranges.
|
|
2235
|
+
if (initialized)
|
|
2236
|
+
{
|
|
2237
|
+
return;
|
|
2238
|
+
}
|
|
2239
|
+
|
|
2240
|
+
nvtxDomainRangePushEx(domain::get<D>(), attr.get());
|
|
2241
|
+
initialized = true;
|
|
2242
|
+
# endif
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
~optional_scoped_range_in() noexcept
|
|
2246
|
+
{
|
|
2247
|
+
# ifndef NVTX_DISABLE
|
|
2248
|
+
if (initialized)
|
|
2249
|
+
{
|
|
2250
|
+
nvtxDomainRangePop(domain::get<D>());
|
|
2251
|
+
}
|
|
2252
|
+
# endif
|
|
2253
|
+
}
|
|
2254
|
+
|
|
2255
|
+
void* operator new(std::size_t) = delete;
|
|
2256
|
+
optional_scoped_range_in(optional_scoped_range_in const&) = delete;
|
|
2257
|
+
optional_scoped_range_in& operator=(optional_scoped_range_in const&) = delete;
|
|
2258
|
+
optional_scoped_range_in(optional_scoped_range_in&&) = delete;
|
|
2259
|
+
optional_scoped_range_in& operator=(optional_scoped_range_in&&) = delete;
|
|
2260
|
+
|
|
2261
|
+
private:
|
|
2262
|
+
# ifndef NVTX_DISABLE
|
|
2263
|
+
bool initialized = false;
|
|
2264
|
+
# endif
|
|
2265
|
+
};
|
|
2266
|
+
/// @endcond
|
|
2267
|
+
|
|
2268
|
+
} // namespace detail
|
|
2269
|
+
|
|
2270
|
+
/**
|
|
2271
|
+
* @brief Handle used for correlating explicit range start and end events.
|
|
2272
|
+
*
|
|
2273
|
+
* A handle is "null" if it does not correspond to any range.
|
|
2274
|
+
*
|
|
2275
|
+
*/
|
|
2276
|
+
struct range_handle
|
|
2277
|
+
{
|
|
2278
|
+
/// Type used for the handle's value
|
|
2279
|
+
using value_type = nvtxRangeId_t;
|
|
2280
|
+
|
|
2281
|
+
/**
|
|
2282
|
+
* @brief Construct a `range_handle` from the given id.
|
|
2283
|
+
*
|
|
2284
|
+
*/
|
|
2285
|
+
constexpr explicit range_handle(value_type id) noexcept
|
|
2286
|
+
: _range_id{id}
|
|
2287
|
+
{}
|
|
2288
|
+
|
|
2289
|
+
/**
|
|
2290
|
+
* @brief Constructs a null range handle.
|
|
2291
|
+
*
|
|
2292
|
+
* A null range_handle corresponds to no range. Calling `end_range` on a
|
|
2293
|
+
* null handle is undefined behavior when a tool is active.
|
|
2294
|
+
*
|
|
2295
|
+
*/
|
|
2296
|
+
constexpr range_handle() noexcept = default;
|
|
2297
|
+
|
|
2298
|
+
/**
|
|
2299
|
+
* @brief Checks whether this handle is null
|
|
2300
|
+
*
|
|
2301
|
+
* Provides contextual conversion to `bool`.
|
|
2302
|
+
*
|
|
2303
|
+
* \code{cpp}
|
|
2304
|
+
* range_handle handle{};
|
|
2305
|
+
* if (handle) {...}
|
|
2306
|
+
* \endcode
|
|
2307
|
+
*
|
|
2308
|
+
*/
|
|
2309
|
+
constexpr explicit operator bool() const noexcept
|
|
2310
|
+
{
|
|
2311
|
+
return get_value() != null_range_id;
|
|
2312
|
+
}
|
|
2313
|
+
|
|
2314
|
+
/**
|
|
2315
|
+
* @brief Implicit conversion from `nullptr` constructs a null handle.
|
|
2316
|
+
*
|
|
2317
|
+
* Satisfies the "NullablePointer" requirement to make `range_handle` comparable with `nullptr`.
|
|
2318
|
+
*
|
|
2319
|
+
*/
|
|
2320
|
+
constexpr range_handle(std::nullptr_t) noexcept {}
|
|
2321
|
+
|
|
2322
|
+
/**
|
|
2323
|
+
* @brief Returns the `range_handle`'s value
|
|
2324
|
+
*
|
|
2325
|
+
* @return value_type The handle's value
|
|
2326
|
+
*/
|
|
2327
|
+
constexpr value_type get_value() const noexcept
|
|
2328
|
+
{
|
|
2329
|
+
return _range_id;
|
|
2330
|
+
}
|
|
2331
|
+
|
|
2332
|
+
private:
|
|
2333
|
+
/// Sentinel value for a null handle that corresponds to no range
|
|
2334
|
+
static constexpr value_type null_range_id = nvtxRangeId_t{0};
|
|
2335
|
+
|
|
2336
|
+
value_type _range_id{null_range_id}; ///< The underlying NVTX range id
|
|
2337
|
+
};
|
|
2338
|
+
|
|
2339
|
+
/**
|
|
2340
|
+
* @brief Compares two range_handles for equality
|
|
2341
|
+
*
|
|
2342
|
+
* @param lhs The first range_handle to compare
|
|
2343
|
+
* @param rhs The second range_handle to compare
|
|
2344
|
+
*/
|
|
2345
|
+
inline constexpr bool operator==(range_handle lhs, range_handle rhs) noexcept
|
|
2346
|
+
{
|
|
2347
|
+
return lhs.get_value() == rhs.get_value();
|
|
2348
|
+
}
|
|
2349
|
+
|
|
2350
|
+
/**
|
|
2351
|
+
* @brief Compares two range_handles for inequality
|
|
2352
|
+
*
|
|
2353
|
+
* @param lhs The first range_handle to compare
|
|
2354
|
+
* @param rhs The second range_handle to compare
|
|
2355
|
+
*/
|
|
2356
|
+
inline constexpr bool operator!=(range_handle lhs, range_handle rhs) noexcept
|
|
2357
|
+
{
|
|
2358
|
+
return !(lhs == rhs);
|
|
2359
|
+
}
|
|
2360
|
+
|
|
2361
|
+
/**
|
|
2362
|
+
* @brief Manually begin an NVTX range.
|
|
2363
|
+
*
|
|
2364
|
+
* Explicitly begins an NVTX range and returns a unique handle. To end the
|
|
2365
|
+
* range, pass the handle to `end_range_in<D>()`.
|
|
2366
|
+
*
|
|
2367
|
+
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
|
|
2368
|
+
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
|
|
2369
|
+
*
|
|
2370
|
+
* `start_range_in/end_range_in` are the most explicit and lowest level APIs
|
|
2371
|
+
* provided for creating ranges. Use of `nvtx3::unique_range_in` should be
|
|
2372
|
+
* preferred unless one is unable to tie the range to the lifetime of an object.
|
|
2373
|
+
*
|
|
2374
|
+
* Example:
|
|
2375
|
+
* \code{.cpp}
|
|
2376
|
+
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
|
|
2377
|
+
* // Manually begin a range
|
|
2378
|
+
* nvtx3::range_handle h = nvtx3::start_range_in<my_domain>(attr);
|
|
2379
|
+
* ...
|
|
2380
|
+
* nvtx3::end_range_in<my_domain>(h); // End the range
|
|
2381
|
+
* \endcode
|
|
2382
|
+
*
|
|
2383
|
+
* @tparam D Type containing `name` member used to identify the `domain`
|
|
2384
|
+
* to which the range belongs. Else, `domain::global` to indicate that the
|
|
2385
|
+
* global NVTX domain should be used.
|
|
2386
|
+
* @param[in] attr `event_attributes` that describes the desired attributes
|
|
2387
|
+
* of the range.
|
|
2388
|
+
* @return Unique handle to be passed to `end_range_in` to end the range.
|
|
2389
|
+
*/
|
|
2390
|
+
template <typename D = domain::global>
|
|
2391
|
+
NVTX3_NO_DISCARD inline range_handle start_range_in(event_attributes const& attr) noexcept
|
|
2392
|
+
{
|
|
2393
|
+
# ifndef NVTX_DISABLE
|
|
2394
|
+
return range_handle{nvtxDomainRangeStartEx(domain::get<D>(), attr.get())};
|
|
2395
|
+
# else
|
|
2396
|
+
(void) attr;
|
|
2397
|
+
return {};
|
|
2398
|
+
# endif
|
|
2399
|
+
}
|
|
2400
|
+
|
|
2401
|
+
/**
|
|
2402
|
+
* @brief Manually begin an NVTX range.
|
|
2403
|
+
*
|
|
2404
|
+
* Explicitly begins an NVTX range and returns a unique handle. To end the
|
|
2405
|
+
* range, pass the handle to `end_range_in<D>()`.
|
|
2406
|
+
*
|
|
2407
|
+
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
|
|
2408
|
+
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
|
|
2409
|
+
*
|
|
2410
|
+
* `start_range_in/end_range_in` are the most explicit and lowest level APIs
|
|
2411
|
+
* provided for creating ranges. Use of `nvtx3::unique_range_in` should be
|
|
2412
|
+
* preferred unless one is unable to tie the range to the lifetime of an object.
|
|
2413
|
+
*
|
|
2414
|
+
* This overload uses `args...` to construct an `event_attributes` to
|
|
2415
|
+
* associate with the range. For more detail, see `event_attributes`.
|
|
2416
|
+
*
|
|
2417
|
+
* Example:
|
|
2418
|
+
* \code{cpp}
|
|
2419
|
+
* // Manually begin a range
|
|
2420
|
+
* nvtx3::range_handle h = nvtx3::start_range_in<D>("msg", nvtx3::rgb{127,255,0});
|
|
2421
|
+
* ...
|
|
2422
|
+
* nvtx3::end_range_in<D>(h); // Ends the range
|
|
2423
|
+
* \endcode
|
|
2424
|
+
*
|
|
2425
|
+
* @tparam D Type containing `name` member used to identify the `domain`
|
|
2426
|
+
* to which the range belongs. Else, `domain::global` to indicate that the
|
|
2427
|
+
* global NVTX domain should be used.
|
|
2428
|
+
* @param[in] args Variadic parameter pack of the arguments for an `event_attributes`.
|
|
2429
|
+
* @return Unique handle to be passed to `end_range` to end the range.
|
|
2430
|
+
*/
|
|
2431
|
+
template <typename D = domain::global, typename... Args>
|
|
2432
|
+
NVTX3_NO_DISCARD inline range_handle start_range_in(Args const&... args) noexcept
|
|
2433
|
+
{
|
|
2434
|
+
# ifndef NVTX_DISABLE
|
|
2435
|
+
return start_range_in<D>(event_attributes{args...});
|
|
2436
|
+
# else
|
|
2437
|
+
return {};
|
|
2438
|
+
# endif
|
|
2439
|
+
}
|
|
2440
|
+
|
|
2441
|
+
/**
|
|
2442
|
+
* @brief Manually begin an NVTX range in the global domain.
|
|
2443
|
+
*
|
|
2444
|
+
* Explicitly begins an NVTX range and returns a unique handle. To end the
|
|
2445
|
+
* range, pass the handle to `end_range()`.
|
|
2446
|
+
*
|
|
2447
|
+
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
|
|
2448
|
+
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
|
|
2449
|
+
*
|
|
2450
|
+
* `start_range/end_range` are the most explicit and lowest level APIs
|
|
2451
|
+
* provided for creating ranges. Use of `nvtx3::unique_range` should be
|
|
2452
|
+
* preferred unless one is unable to tie the range to the lifetime of an object.
|
|
2453
|
+
*
|
|
2454
|
+
* Example:
|
|
2455
|
+
* \code{.cpp}
|
|
2456
|
+
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
|
|
2457
|
+
* // Manually begin a range
|
|
2458
|
+
* nvtx3::range_handle h = nvtx3::start_range(attr);
|
|
2459
|
+
* ...
|
|
2460
|
+
* nvtx3::end_range(h); // End the range
|
|
2461
|
+
* \endcode
|
|
2462
|
+
*
|
|
2463
|
+
* @param[in] attr `event_attributes` that describes the desired attributes
|
|
2464
|
+
* of the range.
|
|
2465
|
+
* @return Unique handle to be passed to `end_range_in` to end the range.
|
|
2466
|
+
*/
|
|
2467
|
+
NVTX3_NO_DISCARD inline range_handle start_range(event_attributes const& attr) noexcept
|
|
2468
|
+
{
|
|
2469
|
+
# ifndef NVTX_DISABLE
|
|
2470
|
+
return start_range_in<domain::global>(attr);
|
|
2471
|
+
# else
|
|
2472
|
+
(void) attr;
|
|
2473
|
+
return {};
|
|
2474
|
+
# endif
|
|
2475
|
+
}
|
|
2476
|
+
|
|
2477
|
+
/**
|
|
2478
|
+
* @brief Manually begin an NVTX range in the global domain.
|
|
2479
|
+
*
|
|
2480
|
+
* Explicitly begins an NVTX range and returns a unique handle. To end the
|
|
2481
|
+
* range, pass the handle to `end_range_in<D>()`.
|
|
2482
|
+
*
|
|
2483
|
+
* `nvtx3::start_range(...)` is equivalent to `nvtx3::start_range_in<>(...)` and
|
|
2484
|
+
* `nvtx3::start_range_in<nvtx3::domain::global>(...)`.
|
|
2485
|
+
*
|
|
2486
|
+
* `start_range_in/end_range_in` are the most explicit and lowest level APIs
|
|
2487
|
+
* provided for creating ranges. Use of `nvtx3::unique_range_in` should be
|
|
2488
|
+
* preferred unless one is unable to tie the range to the lifetime of an object.
|
|
2489
|
+
*
|
|
2490
|
+
* This overload uses `args...` to construct an `event_attributes` to
|
|
2491
|
+
* associate with the range. For more detail, see `event_attributes`.
|
|
2492
|
+
*
|
|
2493
|
+
* Example:
|
|
2494
|
+
* \code{cpp}
|
|
2495
|
+
* // Manually begin a range
|
|
2496
|
+
* nvtx3::range_handle h = nvtx3::start_range("msg", nvtx3::rgb{127,255,0});
|
|
2497
|
+
* ...
|
|
2498
|
+
* nvtx3::end_range(h); // Ends the range
|
|
2499
|
+
* \endcode
|
|
2500
|
+
*
|
|
2501
|
+
* @param[in] args Variadic parameter pack of the arguments for an `event_attributes`.
|
|
2502
|
+
* @return Unique handle to be passed to `end_range` to end the range.
|
|
2503
|
+
*/
|
|
2504
|
+
template <typename... Args>
|
|
2505
|
+
NVTX3_NO_DISCARD inline range_handle start_range(Args const&... args) noexcept
|
|
2506
|
+
{
|
|
2507
|
+
# ifndef NVTX_DISABLE
|
|
2508
|
+
return start_range_in<domain::global>(args...);
|
|
2509
|
+
# else
|
|
2510
|
+
return {};
|
|
2511
|
+
# endif
|
|
2512
|
+
}
|
|
2513
|
+
|
|
2514
|
+
/**
|
|
2515
|
+
* @brief Manually end the range associated with the handle `r` in domain `D`.
|
|
2516
|
+
*
|
|
2517
|
+
* Explicitly ends the NVTX range indicated by the handle `r` returned from a
|
|
2518
|
+
* prior call to `start_range_in<D>`. The range may end on a different thread
|
|
2519
|
+
* from where it began.
|
|
2520
|
+
*
|
|
2521
|
+
* @tparam D Type containing `name` member used to identify the `domain` to
|
|
2522
|
+
* which the range belongs. Else, `domain::global` to indicate that the global
|
|
2523
|
+
* NVTX domain should be used.
|
|
2524
|
+
* @param r Handle to a range started by a prior call to `start_range_in`.
|
|
2525
|
+
*
|
|
2526
|
+
* @warning The domain type specified as template parameter to this function
|
|
2527
|
+
* must be the same that was specified on the associated `start_range_in` call.
|
|
2528
|
+
*/
|
|
2529
|
+
template <typename D = domain::global>
|
|
2530
|
+
inline void end_range_in(range_handle r) noexcept
|
|
2531
|
+
{
|
|
2532
|
+
# ifndef NVTX_DISABLE
|
|
2533
|
+
nvtxDomainRangeEnd(domain::get<D>(), r.get_value());
|
|
2534
|
+
# else
|
|
2535
|
+
(void) r;
|
|
2536
|
+
# endif
|
|
2537
|
+
}
|
|
2538
|
+
|
|
2539
|
+
/**
|
|
2540
|
+
* @brief Manually end the range associated with the handle `r` in the global
|
|
2541
|
+
* domain.
|
|
2542
|
+
*
|
|
2543
|
+
* Explicitly ends the NVTX range indicated by the handle `r` returned from a
|
|
2544
|
+
* prior call to `start_range`. The range may end on a different thread from
|
|
2545
|
+
* where it began.
|
|
2546
|
+
*
|
|
2547
|
+
* @param r Handle to a range started by a prior call to `start_range`.
|
|
2548
|
+
*
|
|
2549
|
+
* @warning The domain type specified as template parameter to this function
|
|
2550
|
+
* must be the same that was specified on the associated `start_range` call.
|
|
2551
|
+
*/
|
|
2552
|
+
inline void end_range(range_handle r) noexcept
|
|
2553
|
+
{
|
|
2554
|
+
# ifndef NVTX_DISABLE
|
|
2555
|
+
end_range_in<domain::global>(r);
|
|
2556
|
+
# else
|
|
2557
|
+
(void) r;
|
|
2558
|
+
# endif
|
|
2559
|
+
}
|
|
2560
|
+
|
|
2561
|
+
/**
|
|
2562
|
+
* @brief A RAII object for creating a NVTX range within a domain that can
|
|
2563
|
+
* be created and destroyed on different threads.
|
|
2564
|
+
*
|
|
2565
|
+
* When constructed, begins a NVTX range in the specified domain. Upon
|
|
2566
|
+
* destruction, ends the NVTX range.
|
|
2567
|
+
*
|
|
2568
|
+
* Similar to `nvtx3::scoped_range_in`, with a few key differences:
|
|
2569
|
+
* - `unique_range` objects can be destroyed in an order whereas `scoped_range` objects must be
|
|
2570
|
+
* destroyed in exact reverse creation order
|
|
2571
|
+
* - `unique_range` can start and end on different threads
|
|
2572
|
+
* - `unique_range` is movable
|
|
2573
|
+
* - `unique_range` objects can be constructed as heap objects
|
|
2574
|
+
*
|
|
2575
|
+
* There is extra overhead associated with `unique_range` constructs and therefore use of
|
|
2576
|
+
* `nvtx3::scoped_range_in` should be preferred.
|
|
2577
|
+
*
|
|
2578
|
+
* @tparam D Type containing `name` member used to identify the `domain`
|
|
2579
|
+
* to which the `unique_range_in` belongs. Else, `domain::global` to
|
|
2580
|
+
* indicate that the global NVTX domain should be used.
|
|
2581
|
+
*/
|
|
2582
|
+
template <typename D = domain::global>
|
|
2583
|
+
class NVTX3_MAYBE_UNUSED unique_range_in
|
|
2584
|
+
{
|
|
2585
|
+
public:
|
|
2586
|
+
/**
|
|
2587
|
+
* @brief Construct a new unique_range_in object with the specified event attributes
|
|
2588
|
+
*
|
|
2589
|
+
* Example:
|
|
2590
|
+
* \code{cpp}
|
|
2591
|
+
* nvtx3::event_attributes attr{"msg", nvtx3::rgb{127,255,0}};
|
|
2592
|
+
* nvtx3::unique_range_in<my_domain> range{attr}; // Creates a range with message contents
|
|
2593
|
+
* // "msg" and green color
|
|
2594
|
+
* \endcode
|
|
2595
|
+
*
|
|
2596
|
+
* @param[in] attr `event_attributes` that describes the desired attributes
|
|
2597
|
+
* of the range.
|
|
2598
|
+
*/
|
|
2599
|
+
explicit unique_range_in(event_attributes const& attr) noexcept
|
|
2600
|
+
: handle_{start_range_in<D>(attr)}
|
|
2601
|
+
{}
|
|
2602
|
+
|
|
2603
|
+
/**
|
|
2604
|
+
* @brief Constructs a `unique_range_in` from the constructor arguments
|
|
2605
|
+
* of an `event_attributes`.
|
|
2606
|
+
*
|
|
2607
|
+
* Forwards the arguments `args...` to construct an
|
|
2608
|
+
* `event_attributes` object. The `event_attributes` object is then
|
|
2609
|
+
* associated with the `unique_range_in`.
|
|
2610
|
+
*
|
|
2611
|
+
* For more detail, see `event_attributes` documentation.
|
|
2612
|
+
*
|
|
2613
|
+
* Example:
|
|
2614
|
+
* \code{.cpp}
|
|
2615
|
+
* // Creates a range with message "message" and green color
|
|
2616
|
+
* nvtx3::unique_range_in<> r{"message", nvtx3::rgb{127,255,0}};
|
|
2617
|
+
* \endcode
|
|
2618
|
+
*
|
|
2619
|
+
* @param[in] args Variadic parameter pack of arguments to construct an `event_attributes`
|
|
2620
|
+
* associated with this range.
|
|
2621
|
+
*/
|
|
2622
|
+
template <typename... Args>
|
|
2623
|
+
explicit unique_range_in(Args const&... args) noexcept
|
|
2624
|
+
: unique_range_in{event_attributes{args...}}
|
|
2625
|
+
{}
|
|
2626
|
+
|
|
2627
|
+
/**
|
|
2628
|
+
* @brief Default constructor creates a `unique_range_in` with no
|
|
2629
|
+
* message, color, payload, nor category.
|
|
2630
|
+
*
|
|
2631
|
+
*/
|
|
2632
|
+
constexpr unique_range_in() noexcept
|
|
2633
|
+
: unique_range_in{event_attributes{}}
|
|
2634
|
+
{}
|
|
2635
|
+
|
|
2636
|
+
/**
|
|
2637
|
+
* @brief Destroy the `unique_range_in` ending the range.
|
|
2638
|
+
*
|
|
2639
|
+
*/
|
|
2640
|
+
~unique_range_in() noexcept = default;
|
|
2641
|
+
|
|
2642
|
+
/**
|
|
2643
|
+
* @brief Move constructor allows taking ownership of the NVTX range from
|
|
2644
|
+
* another `unique_range_in`.
|
|
2645
|
+
*
|
|
2646
|
+
* @param other The range to take ownership of
|
|
2647
|
+
*/
|
|
2648
|
+
unique_range_in(unique_range_in&& other) noexcept = default;
|
|
2649
|
+
|
|
2650
|
+
/**
|
|
2651
|
+
* @brief Move assignment operator allows taking ownership of an NVTX range
|
|
2652
|
+
* from another `unique_range_in`.
|
|
2653
|
+
*
|
|
2654
|
+
* @param other The range to take ownership of
|
|
2655
|
+
*/
|
|
2656
|
+
unique_range_in& operator=(unique_range_in&& other) noexcept = default;
|
|
2657
|
+
|
|
2658
|
+
/// Copy construction is not allowed to prevent multiple objects from owning
|
|
2659
|
+
/// the same range handle
|
|
2660
|
+
unique_range_in(unique_range_in const&) = delete;
|
|
2661
|
+
|
|
2662
|
+
/// Copy assignment is not allowed to prevent multiple objects from owning the
|
|
2663
|
+
/// same range handle
|
|
2664
|
+
unique_range_in& operator=(unique_range_in const&) = delete;
|
|
2665
|
+
|
|
2666
|
+
private:
|
|
2667
|
+
struct end_range_handle
|
|
2668
|
+
{
|
|
2669
|
+
using pointer = range_handle; /// Override the pointer type of the unique_ptr
|
|
2670
|
+
void operator()(range_handle h) const noexcept
|
|
2671
|
+
{
|
|
2672
|
+
end_range_in<D>(h);
|
|
2673
|
+
}
|
|
2674
|
+
};
|
|
2675
|
+
|
|
2676
|
+
/// Range handle used to correlate the start/end of the range
|
|
2677
|
+
std::unique_ptr<range_handle, end_range_handle> handle_;
|
|
2678
|
+
};
|
|
2679
|
+
|
|
2680
|
+
/**
|
|
2681
|
+
* @brief Alias for a `unique_range_in` in the global NVTX domain.
|
|
2682
|
+
*
|
|
2683
|
+
*/
|
|
2684
|
+
using unique_range = unique_range_in<domain::global>;
|
|
2685
|
+
|
|
2686
|
+
/**
|
|
2687
|
+
* @brief Annotates an instantaneous point in time with a "marker", using the
|
|
2688
|
+
* attributes specified by `attr`.
|
|
2689
|
+
*
|
|
2690
|
+
* Unlike a "range" which has a beginning and an end, a marker is a single event
|
|
2691
|
+
* in an application, such as detecting a problem:
|
|
2692
|
+
*
|
|
2693
|
+
* \code{.cpp}
|
|
2694
|
+
* bool success = do_operation(...);
|
|
2695
|
+
* if (!success) {
|
|
2696
|
+
* nvtx3::event_attributes attr{"operation failed!", nvtx3::rgb{255,0,0}};
|
|
2697
|
+
* nvtx3::mark_in<my_domain>(attr);
|
|
2698
|
+
* }
|
|
2699
|
+
* \endcode
|
|
2700
|
+
*
|
|
2701
|
+
* Note that nvtx3::mark_in<D> is a function, not a class like scoped_range_in<D>.
|
|
2702
|
+
*
|
|
2703
|
+
* @tparam D Type containing `name` member used to identify the `domain`
|
|
2704
|
+
* to which the `unique_range_in` belongs. Else, `domain::global` to
|
|
2705
|
+
* indicate that the global NVTX domain should be used.
|
|
2706
|
+
* @param[in] attr `event_attributes` that describes the desired attributes
|
|
2707
|
+
* of the mark.
|
|
2708
|
+
*/
|
|
2709
|
+
template <typename D = domain::global>
|
|
2710
|
+
inline void mark_in(event_attributes const& attr) noexcept
|
|
2711
|
+
{
|
|
2712
|
+
# ifndef NVTX_DISABLE
|
|
2713
|
+
nvtxDomainMarkEx(domain::get<D>(), attr.get());
|
|
2714
|
+
# else
|
|
2715
|
+
(void) (attr);
|
|
2716
|
+
# endif
|
|
2717
|
+
}
|
|
2718
|
+
|
|
2719
|
+
/**
|
|
2720
|
+
* @brief Annotates an instantaneous point in time with a "marker", using the
|
|
2721
|
+
* arguments to construct an `event_attributes`.
|
|
2722
|
+
*
|
|
2723
|
+
* Unlike a "range" which has a beginning and an end, a marker is a single event
|
|
2724
|
+
* in an application, such as detecting a problem:
|
|
2725
|
+
*
|
|
2726
|
+
* \code{.cpp}
|
|
2727
|
+
* bool success = do_operation(...);
|
|
2728
|
+
* if (!success) {
|
|
2729
|
+
* nvtx3::mark_in<my_domain>("operation failed!", nvtx3::rgb{255,0,0});
|
|
2730
|
+
* }
|
|
2731
|
+
* \endcode
|
|
2732
|
+
*
|
|
2733
|
+
* Note that nvtx3::mark_in<D> is a function, not a class like scoped_range_in<D>.
|
|
2734
|
+
*
|
|
2735
|
+
* Forwards the arguments `args...` to construct an `event_attributes` object.
|
|
2736
|
+
* The attributes are then associated with the marker. For more detail, see
|
|
2737
|
+
* the `event_attributes` documentation.
|
|
2738
|
+
*
|
|
2739
|
+
* @tparam D Type containing `name` member used to identify the `domain`
|
|
2740
|
+
* to which the `unique_range_in` belongs. Else `domain::global` to
|
|
2741
|
+
* indicate that the global NVTX domain should be used.
|
|
2742
|
+
* @param[in] args Variadic parameter pack of arguments to construct an `event_attributes`
|
|
2743
|
+
* associated with this range.
|
|
2744
|
+
*
|
|
2745
|
+
*/
|
|
2746
|
+
template <typename D = domain::global, typename... Args>
|
|
2747
|
+
inline void mark_in(Args const&... args) noexcept
|
|
2748
|
+
{
|
|
2749
|
+
# ifndef NVTX_DISABLE
|
|
2750
|
+
mark_in<D>(event_attributes{args...});
|
|
2751
|
+
# endif
|
|
2752
|
+
}
|
|
2753
|
+
|
|
2754
|
+
/**
|
|
2755
|
+
* @brief Annotates an instantaneous point in time with a "marker", using the
|
|
2756
|
+
* attributes specified by `attr`, in the global domain.
|
|
2757
|
+
*
|
|
2758
|
+
* Unlike a "range" which has a beginning and an end, a marker is a single event
|
|
2759
|
+
* in an application, such as detecting a problem:
|
|
2760
|
+
*
|
|
2761
|
+
* \code{.cpp}
|
|
2762
|
+
* bool success = do_operation(...);
|
|
2763
|
+
* if (!success) {
|
|
2764
|
+
* nvtx3::event_attributes attr{"operation failed!", nvtx3::rgb{255,0,0}};
|
|
2765
|
+
* nvtx3::mark(attr);
|
|
2766
|
+
* }
|
|
2767
|
+
* \endcode
|
|
2768
|
+
*
|
|
2769
|
+
* Note that nvtx3::mark is a function, not a class like scoped_range.
|
|
2770
|
+
*
|
|
2771
|
+
* @param[in] attr `event_attributes` that describes the desired attributes
|
|
2772
|
+
* of the mark.
|
|
2773
|
+
*/
|
|
2774
|
+
inline void mark(event_attributes const& attr) noexcept
|
|
2775
|
+
{
|
|
2776
|
+
# ifndef NVTX_DISABLE
|
|
2777
|
+
mark_in<domain::global>(attr);
|
|
2778
|
+
# endif
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
/**
|
|
2782
|
+
* @brief Annotates an instantaneous point in time with a "marker", using the
|
|
2783
|
+
* arguments to construct an `event_attributes`, in the global domain.
|
|
2784
|
+
*
|
|
2785
|
+
* Unlike a "range" which has a beginning and an end, a marker is a single event
|
|
2786
|
+
* in an application, such as detecting a problem:
|
|
2787
|
+
*
|
|
2788
|
+
* \code{.cpp}
|
|
2789
|
+
* bool success = do_operation(...);
|
|
2790
|
+
* if (!success) {
|
|
2791
|
+
* nvtx3::mark("operation failed!", nvtx3::rgb{255,0,0});
|
|
2792
|
+
* }
|
|
2793
|
+
* \endcode
|
|
2794
|
+
*
|
|
2795
|
+
* Note that nvtx3::mark is a function, not a class like scoped_range.
|
|
2796
|
+
*
|
|
2797
|
+
* Forwards the arguments `args...` to construct an `event_attributes` object.
|
|
2798
|
+
* The attributes are then associated with the marker. For more detail, see
|
|
2799
|
+
* the `event_attributes` documentation.
|
|
2800
|
+
*
|
|
2801
|
+
* @param[in] args Variadic parameter pack of arguments to construct an
|
|
2802
|
+
* `event_attributes` associated with this range.
|
|
2803
|
+
*
|
|
2804
|
+
*/
|
|
2805
|
+
template <typename... Args>
|
|
2806
|
+
inline void mark(Args const&... args) noexcept
|
|
2807
|
+
{
|
|
2808
|
+
# ifndef NVTX_DISABLE
|
|
2809
|
+
mark_in<domain::global>(args...);
|
|
2810
|
+
# endif
|
|
2811
|
+
}
|
|
2812
|
+
|
|
2813
|
+
} // namespace NVTX3_VERSION_NAMESPACE
|
|
2814
|
+
|
|
2815
|
+
} // namespace nvtx3
|
|
2816
|
+
|
|
2817
|
+
# ifndef NVTX_DISABLE
|
|
2818
|
+
/**
|
|
2819
|
+
* @brief Convenience macro for generating a range in the specified `domain`
|
|
2820
|
+
* from the lifetime of a function
|
|
2821
|
+
*
|
|
2822
|
+
* This macro is useful for generating an NVTX range in `domain` from
|
|
2823
|
+
* the entry point of a function to its exit. It is intended to be the first
|
|
2824
|
+
* line of the function.
|
|
2825
|
+
*
|
|
2826
|
+
* Constructs a static `registered_string_in` using the name of the immediately
|
|
2827
|
+
* enclosing function returned by `__func__` and constructs a
|
|
2828
|
+
* `nvtx3::scoped_range` using the registered function name as the range's
|
|
2829
|
+
* message.
|
|
2830
|
+
*
|
|
2831
|
+
* Example:
|
|
2832
|
+
* \code{.cpp}
|
|
2833
|
+
* struct my_domain{static constexpr char const* name{"my_domain"};};
|
|
2834
|
+
*
|
|
2835
|
+
* void foo(...) {
|
|
2836
|
+
* NVTX3_FUNC_RANGE_IN(my_domain); // Range begins on entry to foo()
|
|
2837
|
+
* // do stuff
|
|
2838
|
+
* ...
|
|
2839
|
+
* } // Range ends on return from foo()
|
|
2840
|
+
* \endcode
|
|
2841
|
+
*
|
|
2842
|
+
* @param[in] D Type containing `name` member used to identify the
|
|
2843
|
+
* `domain` to which the `registered_string_in` belongs. Else,
|
|
2844
|
+
* `domain::global` to indicate that the global NVTX domain should be used.
|
|
2845
|
+
*/
|
|
2846
|
+
# define NVTX3_V1_FUNC_RANGE_IN(D) \
|
|
2847
|
+
static ::nvtx3::v1::registered_string_in<D> const nvtx3_func_name__{__func__}; \
|
|
2848
|
+
static ::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \
|
|
2849
|
+
::nvtx3::v1::scoped_range_in<D> const nvtx3_range__{nvtx3_func_attr__};
|
|
2850
|
+
|
|
2851
|
+
/**
|
|
2852
|
+
* @brief Convenience macro for generating a range in the specified `domain`
|
|
2853
|
+
* from the lifetime of a function if the given boolean expression evaluates
|
|
2854
|
+
* to true.
|
|
2855
|
+
*
|
|
2856
|
+
* Similar to `NVTX3_V1_FUNC_RANGE_IN(D)`, the only difference being that
|
|
2857
|
+
* `NVTX3_V1_FUNC_RANGE_IF_IN(D, C)` only generates a range if the given boolean
|
|
2858
|
+
* expression evaluates to true.
|
|
2859
|
+
*
|
|
2860
|
+
* @param[in] D Type containing `name` member used to identify the
|
|
2861
|
+
* `domain` to which the `registered_string_in` belongs. Else,
|
|
2862
|
+
* `domain::global` to indicate that the global NVTX domain should be used.
|
|
2863
|
+
*
|
|
2864
|
+
* @param[in] C Boolean expression used to determine if a range should be
|
|
2865
|
+
* generated.
|
|
2866
|
+
*/
|
|
2867
|
+
# define NVTX3_V1_FUNC_RANGE_IF_IN(D, C) \
|
|
2868
|
+
::nvtx3::v1::detail::optional_scoped_range_in<D> optional_nvtx3_range__; \
|
|
2869
|
+
if (C) \
|
|
2870
|
+
{ \
|
|
2871
|
+
static ::nvtx3::v1::registered_string_in<D> const nvtx3_func_name__{__func__}; \
|
|
2872
|
+
static ::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \
|
|
2873
|
+
optional_nvtx3_range__.begin(nvtx3_func_attr__); \
|
|
2874
|
+
}
|
|
2875
|
+
# else
|
|
2876
|
+
# define NVTX3_V1_FUNC_RANGE_IN(D)
|
|
2877
|
+
# define NVTX3_V1_FUNC_RANGE_IF_IN(D, C)
|
|
2878
|
+
# endif // NVTX_DISABLE
|
|
2879
|
+
|
|
2880
|
+
/**
|
|
2881
|
+
* @brief Convenience macro for generating a range in the global domain from the
|
|
2882
|
+
* lifetime of a function.
|
|
2883
|
+
*
|
|
2884
|
+
* This macro is useful for generating an NVTX range in the global domain from
|
|
2885
|
+
* the entry point of a function to its exit. It is intended to be the first
|
|
2886
|
+
* line of the function.
|
|
2887
|
+
*
|
|
2888
|
+
* Constructs a static `registered_string_in` using the name of the immediately
|
|
2889
|
+
* enclosing function returned by `__func__` and constructs a
|
|
2890
|
+
* `nvtx3::scoped_range` using the registered function name as the range's
|
|
2891
|
+
* message.
|
|
2892
|
+
*
|
|
2893
|
+
* Example:
|
|
2894
|
+
* \code{.cpp}
|
|
2895
|
+
* void foo(...) {
|
|
2896
|
+
* NVTX3_FUNC_RANGE(); // Range begins on entry to foo()
|
|
2897
|
+
* // do stuff
|
|
2898
|
+
* ...
|
|
2899
|
+
* } // Range ends on return from foo()
|
|
2900
|
+
* \endcode
|
|
2901
|
+
*/
|
|
2902
|
+
# define NVTX3_V1_FUNC_RANGE() NVTX3_V1_FUNC_RANGE_IN(::nvtx3::v1::domain::global)
|
|
2903
|
+
|
|
2904
|
+
/**
|
|
2905
|
+
* @brief Convenience macro for generating a range in the global domain from the
|
|
2906
|
+
* lifetime of a function if the given boolean expression evaluates to true.
|
|
2907
|
+
*
|
|
2908
|
+
* Similar to `NVTX3_V1_FUNC_RANGE()`, the only difference being that
|
|
2909
|
+
* `NVTX3_V1_FUNC_RANGE_IF(C)` only generates a range if the given boolean
|
|
2910
|
+
* expression evaluates to true.
|
|
2911
|
+
*
|
|
2912
|
+
* @param[in] C Boolean expression used to determine if a range should be
|
|
2913
|
+
* generated.
|
|
2914
|
+
*/
|
|
2915
|
+
# define NVTX3_V1_FUNC_RANGE_IF(C) NVTX3_V1_FUNC_RANGE_IF_IN(::nvtx3::v1::domain::global, C)
|
|
2916
|
+
|
|
2917
|
+
/* When inlining this version, versioned macros must have unversioned aliases.
|
|
2918
|
+
* For each NVTX3_Vx_ #define, make an NVTX3_ alias of it here.*/
|
|
2919
|
+
# if defined(NVTX3_INLINE_THIS_VERSION)
|
|
2920
|
+
/* clang format off */
|
|
2921
|
+
# define NVTX3_FUNC_RANGE NVTX3_V1_FUNC_RANGE
|
|
2922
|
+
# define NVTX3_FUNC_RANGE_IF NVTX3_V1_FUNC_RANGE_IF
|
|
2923
|
+
# define NVTX3_FUNC_RANGE_IN NVTX3_V1_FUNC_RANGE_IN
|
|
2924
|
+
# define NVTX3_FUNC_RANGE_IF_IN NVTX3_V1_FUNC_RANGE_IF_IN
|
|
2925
|
+
/* clang format on */
|
|
2926
|
+
# endif
|
|
2927
|
+
|
|
2928
|
+
#endif // NVTX3_CPP_DEFINITIONS_V1_0
|
|
2929
|
+
|
|
2930
|
+
/* Add functionality for new minor versions here, by copying the above section enclosed
|
|
2931
|
+
* in #ifndef NVTX3_CPP_DEFINITIONS_Vx_y, and incrementing the minor version. This code
|
|
2932
|
+
* is an example of how additions for version 1.2 would look, indented for clarity. Note
|
|
2933
|
+
* that the versioned symbols and macros are always provided, and the unversioned symbols
|
|
2934
|
+
* are only provided if NVTX3_INLINE_THIS_VERSION was defined at the top of this header.
|
|
2935
|
+
*
|
|
2936
|
+
* \code{.cpp}
|
|
2937
|
+
* #ifndef NVTX3_CPP_DEFINITIONS_V1_2
|
|
2938
|
+
* #define NVTX3_CPP_DEFINITIONS_V1_2
|
|
2939
|
+
* namespace nvtx3 {
|
|
2940
|
+
* NVTX3_INLINE_IF_REQUESTED namespace NVTX3_VERSION_NAMESPACE {
|
|
2941
|
+
* class new_class {};
|
|
2942
|
+
* inline void new_function() {}
|
|
2943
|
+
* }
|
|
2944
|
+
* }
|
|
2945
|
+
*
|
|
2946
|
+
* // Macros must have the major version in their names:
|
|
2947
|
+
* #define NVTX3_V1_NEW_MACRO_A() ...
|
|
2948
|
+
* #define NVTX3_V1_NEW_MACRO_B() ...
|
|
2949
|
+
*
|
|
2950
|
+
* // If inlining, make aliases for the macros with the version number omitted
|
|
2951
|
+
* #if defined(NVTX3_INLINE_THIS_VERSION)
|
|
2952
|
+
* #define NVTX3_NEW_MACRO_A NVTX3_V1_NEW_MACRO_A
|
|
2953
|
+
* #define NVTX3_NEW_MACRO_B NVTX3_V1_NEW_MACRO_B
|
|
2954
|
+
* #endif
|
|
2955
|
+
* #endif // NVTX3_CPP_DEFINITIONS_V1_2
|
|
2956
|
+
* \endcode
|
|
2957
|
+
*/
|
|
2958
|
+
|
|
2959
|
+
/* Undefine all temporarily-defined unversioned macros, which would conflict with
|
|
2960
|
+
* subsequent includes of different versions of this header. */
|
|
2961
|
+
#undef NVTX3_CPP_VERSION_MAJOR
|
|
2962
|
+
#undef NVTX3_CPP_VERSION_MINOR
|
|
2963
|
+
#undef NVTX3_CONCAT
|
|
2964
|
+
#undef NVTX3_NAMESPACE_FOR
|
|
2965
|
+
#undef NVTX3_VERSION_NAMESPACE
|
|
2966
|
+
#undef NVTX3_INLINE_IF_REQUESTED
|
|
2967
|
+
#undef NVTX3_CONSTEXPR_IF_CPP14
|
|
2968
|
+
#undef NVTX3_MAYBE_UNUSED
|
|
2969
|
+
#undef NVTX3_NO_DISCARD
|
|
2970
|
+
|
|
2971
|
+
#if defined(NVTX3_INLINE_THIS_VERSION)
|
|
2972
|
+
# undef NVTX3_INLINE_THIS_VERSION
|
|
2973
|
+
#endif
|
|
2974
|
+
|
|
2975
|
+
#if defined(NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE)
|
|
2976
|
+
# undef NVTX3_USE_CHECKED_OVERLOADS_FOR_GET_DEFINED_HERE
|
|
2977
|
+
# undef NVTX3_USE_CHECKED_OVERLOADS_FOR_GET
|
|
2978
|
+
#endif
|
|
2979
|
+
|
|
2980
|
+
#if defined(NVTX3_STATIC_ASSERT_DEFINED_HERE)
|
|
2981
|
+
# undef NVTX3_STATIC_ASSERT_DEFINED_HERE
|
|
2982
|
+
# undef NVTX3_STATIC_ASSERT
|
|
2983
|
+
#endif
|