cuda-cccl 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/__init__.py +27 -0
- cuda/cccl/_cuda_version_utils.py +24 -0
- cuda/cccl/cooperative/__init__.py +9 -0
- cuda/cccl/cooperative/experimental/__init__.py +24 -0
- cuda/cccl/headers/__init__.py +7 -0
- cuda/cccl/headers/include/__init__.py +1 -0
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +259 -0
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1182 -0
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +81 -0
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +709 -0
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +234 -0
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +748 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +786 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +703 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +555 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +619 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +806 -0
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1124 -0
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +589 -0
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +474 -0
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +289 -0
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1117 -0
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +346 -0
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +606 -0
- cuda/cccl/headers/include/cub/agent/agent_topk.cuh +764 -0
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +631 -0
- cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1424 -0
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +963 -0
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1227 -0
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +1313 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +424 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +1264 -0
- cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
- cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +800 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1225 -0
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2196 -0
- cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +667 -0
- cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +434 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +2315 -0
- cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
- cuda/cccl/headers/include/cub/block/block_store.cuh +1247 -0
- cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +624 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
- cuda/cccl/headers/include/cub/config.cuh +53 -0
- cuda/cccl/headers/include/cub/cub.cuh +120 -0
- cuda/cccl/headers/include/cub/detail/array_utils.cuh +78 -0
- cuda/cccl/headers/include/cub/detail/choose_offset.cuh +161 -0
- cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +74 -0
- cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +62 -0
- cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +253 -0
- cuda/cccl/headers/include/cub/detail/integer_utils.cuh +88 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +114 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/json.h +62 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/string.h +53 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
- cuda/cccl/headers/include/cub/detail/rfa.cuh +731 -0
- cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
- cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
- cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
- cuda/cccl/headers/include/cub/detail/type_traits.cuh +187 -0
- cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +73 -0
- cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
- cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
- cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
- cuda/cccl/headers/include/cub/device/device_for.cuh +1063 -0
- cuda/cccl/headers/include/cub/device/device_histogram.cuh +1509 -0
- cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
- cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
- cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
- cuda/cccl/headers/include/cub/device/device_partition.cuh +668 -0
- cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3437 -0
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +2518 -0
- cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
- cuda/cccl/headers/include/cub/device/device_scan.cuh +2212 -0
- cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1430 -0
- cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
- cuda/cccl/headers/include/cub/device/device_select.cuh +1228 -0
- cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
- cuda/cccl/headers/include/cub/device/device_transform.cuh +668 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +315 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +719 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +198 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1046 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +303 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1744 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1310 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +655 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +531 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +313 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +615 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +517 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +602 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +975 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +842 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +440 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +389 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +627 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +569 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +545 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +261 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +803 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +583 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +189 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +321 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +522 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +201 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +1028 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +67 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +118 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +60 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +275 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +76 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +126 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1065 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +942 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +673 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +618 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1010 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +398 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1588 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +440 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +85 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +481 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +884 -0
- cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +227 -0
- cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
- cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
- cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +254 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +259 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +250 -0
- cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +320 -0
- cuda/cccl/headers/include/cub/thread/thread_load.cuh +349 -0
- cuda/cccl/headers/include/cub/thread/thread_operators.cuh +688 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +548 -0
- cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
- cuda/cccl/headers/include/cub/thread/thread_search.cuh +199 -0
- cuda/cccl/headers/include/cub/thread/thread_simd.cuh +458 -0
- cuda/cccl/headers/include/cub/thread/thread_sort.cuh +102 -0
- cuda/cccl/headers/include/cub/thread/thread_store.cuh +365 -0
- cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
- cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
- cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
- cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
- cuda/cccl/headers/include/cub/util_device.cuh +800 -0
- cuda/cccl/headers/include/cub/util_macro.cuh +97 -0
- cuda/cccl/headers/include/cub/util_math.cuh +118 -0
- cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
- cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
- cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
- cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
- cuda/cccl/headers/include/cub/util_type.cuh +1120 -0
- cuda/cccl/headers/include/cub/util_vsmem.cuh +253 -0
- cuda/cccl/headers/include/cub/version.cuh +89 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +737 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +408 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +952 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +715 -0
- cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
- cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +829 -0
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1890 -0
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +521 -0
- cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
- cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
- cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
- cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +487 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
- cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
- cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
- cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
- cuda/cccl/headers/include/cuda/__cccl_config +37 -0
- cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
- cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
- cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
- cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
- cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
- cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
- cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
- cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
- cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
- cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
- cuda/cccl/headers/include/cuda/__complex_ +28 -0
- cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +537 -0
- cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +156 -0
- cuda/cccl/headers/include/cuda/__device/physical_device.h +172 -0
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +835 -0
- cuda/cccl/headers/include/cuda/__event/event.h +171 -0
- cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
- cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
- cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
- cuda/cccl/headers/include/cuda/__execution/output_ordering.h +89 -0
- cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
- cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
- cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
- cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
- cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
- cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
- cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +483 -0
- cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
- cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
- cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_function.h +112 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +557 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
- cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +533 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +238 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +117 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
- cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
- cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
- cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
- cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
- cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
- cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
- cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
- cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +113 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +652 -0
- cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
- cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2983 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
- cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
- cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
- cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
- cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
- cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
- cuda/cccl/headers/include/cuda/__stream/stream.h +141 -0
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +303 -0
- cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
- cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +590 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
- cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
- cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
- cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
- cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
- cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
- cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
- cuda/cccl/headers/include/cuda/access_property +26 -0
- cuda/cccl/headers/include/cuda/algorithm +27 -0
- cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
- cuda/cccl/headers/include/cuda/atomic +27 -0
- cuda/cccl/headers/include/cuda/barrier +267 -0
- cuda/cccl/headers/include/cuda/bit +29 -0
- cuda/cccl/headers/include/cuda/cmath +37 -0
- cuda/cccl/headers/include/cuda/devices +33 -0
- cuda/cccl/headers/include/cuda/discard_memory +32 -0
- cuda/cccl/headers/include/cuda/functional +32 -0
- cuda/cccl/headers/include/cuda/iterator +39 -0
- cuda/cccl/headers/include/cuda/latch +27 -0
- cuda/cccl/headers/include/cuda/mdspan +28 -0
- cuda/cccl/headers/include/cuda/memory +35 -0
- cuda/cccl/headers/include/cuda/memory_resource +35 -0
- cuda/cccl/headers/include/cuda/numeric +29 -0
- cuda/cccl/headers/include/cuda/pipeline +579 -0
- cuda/cccl/headers/include/cuda/ptx +129 -0
- cuda/cccl/headers/include/cuda/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
- cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
- cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
- cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
- cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
- cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
- cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
- cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +174 -0
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
- cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
- cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
- cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
- cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
- cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
- cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
- cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +673 -0
- cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
- cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
- cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
- cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
- cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
- cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +91 -0
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
- cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
- cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1286 -0
- cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
- cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
- cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
- cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
- cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
- cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
- cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
- cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
- cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
- cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
- cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
- cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +54 -0
- cuda/cccl/headers/include/cuda/std/__chrono/day.h +162 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +503 -0
- cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +55 -0
- cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +46 -0
- cuda/cccl/headers/include/cuda/std/__chrono/month.h +187 -0
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +60 -0
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +80 -0
- cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +259 -0
- cuda/cccl/headers/include/cuda/std/__chrono/year.h +186 -0
- cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
- cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
- cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +200 -0
- cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +784 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +118 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +231 -0
- cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +186 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
- cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
- cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +534 -0
- cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
- cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
- cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
- cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
- cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
- cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
- cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
- cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
- cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
- cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
- cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
- cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
- cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
- cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
- cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
- cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
- cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
- cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
- cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
- cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
- cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
- cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
- cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
- cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +341 -0
- cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
- cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
- cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
- cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
- cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
- cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
- cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
- cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
- cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
- cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
- cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
- cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
- cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
- cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
- cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +139 -0
- cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
- cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
- cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
- cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +165 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
- cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
- cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
- cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
- cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
- cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
- cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
- cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
- cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
- cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
- cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
- cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
- cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
- cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
- cuda/cccl/headers/include/cuda/std/__format_ +45 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
- cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
- cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
- cuda/cccl/headers/include/cuda/std/__functional/function.h +1275 -0
- cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
- cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +296 -0
- cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
- cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
- cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
- cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
- cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
- cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
- cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
- cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
- cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
- cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
- cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
- cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
- cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
- cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
- cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +72 -0
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +143 -0
- cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
- cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
- cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
- cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
- cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
- cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
- cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
- cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
- cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
- cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
- cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
- cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
- cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
- cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
- cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
- cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
- cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
- cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
- cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
- cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
- cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
- cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
- cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
- cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
- cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
- cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +759 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
- cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
- cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +525 -0
- cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
- cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
- cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +246 -0
- cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
- cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +46 -0
- cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
- cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
- cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
- cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
- cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
- cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
- cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
- cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
- cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
- cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
- cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
- cuda/cccl/headers/include/cuda/std/__new_ +29 -0
- cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
- cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
- cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
- cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
- cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
- cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
- cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
- cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
- cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
- cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
- cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +433 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
- cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
- cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
- cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
- cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
- cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
- cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
- cuda/cccl/headers/include/cuda/std/__random_ +29 -0
- cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
- cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
- cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
- cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
- cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
- cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
- cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
- cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +162 -0
- cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
- cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
- cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
- cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
- cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
- cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
- cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
- cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
- cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
- cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
- cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
- cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
- cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
- cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
- cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
- cuda/cccl/headers/include/cuda/std/__string_ +29 -0
- cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
- cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +260 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +84 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +202 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
- cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
- cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
- cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
- cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
- cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
- cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
- cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
- cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
- cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
- cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
- cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
- cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
- cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
- cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
- cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
- cuda/cccl/headers/include/cuda/std/array +518 -0
- cuda/cccl/headers/include/cuda/std/atomic +810 -0
- cuda/cccl/headers/include/cuda/std/barrier +42 -0
- cuda/cccl/headers/include/cuda/std/bit +35 -0
- cuda/cccl/headers/include/cuda/std/bitset +994 -0
- cuda/cccl/headers/include/cuda/std/cassert +28 -0
- cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
- cuda/cccl/headers/include/cuda/std/cfloat +59 -0
- cuda/cccl/headers/include/cuda/std/chrono +26 -0
- cuda/cccl/headers/include/cuda/std/climits +61 -0
- cuda/cccl/headers/include/cuda/std/cmath +87 -0
- cuda/cccl/headers/include/cuda/std/complex +50 -0
- cuda/cccl/headers/include/cuda/std/concepts +48 -0
- cuda/cccl/headers/include/cuda/std/cstddef +28 -0
- cuda/cccl/headers/include/cuda/std/cstdint +178 -0
- cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
- cuda/cccl/headers/include/cuda/std/cstring +110 -0
- cuda/cccl/headers/include/cuda/std/ctime +154 -0
- cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +207 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1721 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2509 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2144 -0
- cuda/cccl/headers/include/cuda/std/execution +29 -0
- cuda/cccl/headers/include/cuda/std/expected +30 -0
- cuda/cccl/headers/include/cuda/std/functional +56 -0
- cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
- cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
- cuda/cccl/headers/include/cuda/std/iterator +70 -0
- cuda/cccl/headers/include/cuda/std/latch +34 -0
- cuda/cccl/headers/include/cuda/std/limits +28 -0
- cuda/cccl/headers/include/cuda/std/linalg +30 -0
- cuda/cccl/headers/include/cuda/std/mdspan +38 -0
- cuda/cccl/headers/include/cuda/std/memory +39 -0
- cuda/cccl/headers/include/cuda/std/numbers +346 -0
- cuda/cccl/headers/include/cuda/std/numeric +41 -0
- cuda/cccl/headers/include/cuda/std/optional +31 -0
- cuda/cccl/headers/include/cuda/std/ranges +69 -0
- cuda/cccl/headers/include/cuda/std/ratio +416 -0
- cuda/cccl/headers/include/cuda/std/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/source_location +83 -0
- cuda/cccl/headers/include/cuda/std/span +628 -0
- cuda/cccl/headers/include/cuda/std/string_view +925 -0
- cuda/cccl/headers/include/cuda/std/tuple +26 -0
- cuda/cccl/headers/include/cuda/std/type_traits +177 -0
- cuda/cccl/headers/include/cuda/std/utility +70 -0
- cuda/cccl/headers/include/cuda/std/variant +25 -0
- cuda/cccl/headers/include/cuda/std/version +240 -0
- cuda/cccl/headers/include/cuda/stream +31 -0
- cuda/cccl/headers/include/cuda/stream_ref +59 -0
- cuda/cccl/headers/include/cuda/type_traits +27 -0
- cuda/cccl/headers/include/cuda/utility +28 -0
- cuda/cccl/headers/include/cuda/version +16 -0
- cuda/cccl/headers/include/cuda/warp +28 -0
- cuda/cccl/headers/include/cuda/work_stealing +26 -0
- cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
- cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
- cuda/cccl/headers/include/nv/target +240 -0
- cuda/cccl/headers/include/thrust/addressof.h +22 -0
- cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
- cuda/cccl/headers/include/thrust/advance.h +57 -0
- cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
- cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
- cuda/cccl/headers/include/thrust/complex.h +858 -0
- cuda/cccl/headers/include/thrust/copy.h +506 -0
- cuda/cccl/headers/include/thrust/count.h +245 -0
- cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
- cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +626 -0
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +192 -0
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +96 -0
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +78 -0
- cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +115 -0
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +116 -0
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
- cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
- cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
- cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
- cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
- cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
- cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
- cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
- cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
- cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
- cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
- cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
- cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
- cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
- cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
- cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
- cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
- cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
- cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config.h +36 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
- cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
- cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
- cuda/cccl/headers/include/thrust/detail/count.h +55 -0
- cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
- cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
- cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
- cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +81 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
- cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
- cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
- cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
- cuda/cccl/headers/include/thrust/detail/function.h +49 -0
- cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
- cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
- cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
- cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
- cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +328 -0
- cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
- cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
- cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
- cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
- cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
- cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
- cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
- cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
- cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
- cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
- cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
- cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
- cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
- cuda/cccl/headers/include/thrust/detail/reference.h +494 -0
- cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
- cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
- cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
- cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
- cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
- cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
- cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
- cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
- cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
- cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
- cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
- cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
- cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
- cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
- cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
- cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
- cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
- cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
- cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
- cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.inl +1210 -0
- cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
- cuda/cccl/headers/include/thrust/device_delete.h +74 -0
- cuda/cccl/headers/include/thrust/device_free.h +85 -0
- cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
- cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
- cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
- cuda/cccl/headers/include/thrust/device_new.h +112 -0
- cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
- cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
- cuda/cccl/headers/include/thrust/device_reference.h +983 -0
- cuda/cccl/headers/include/thrust/device_vector.h +576 -0
- cuda/cccl/headers/include/thrust/distance.h +43 -0
- cuda/cccl/headers/include/thrust/equal.h +247 -0
- cuda/cccl/headers/include/thrust/execution_policy.h +251 -0
- cuda/cccl/headers/include/thrust/extrema.h +657 -0
- cuda/cccl/headers/include/thrust/fill.h +200 -0
- cuda/cccl/headers/include/thrust/find.h +382 -0
- cuda/cccl/headers/include/thrust/for_each.h +261 -0
- cuda/cccl/headers/include/thrust/functional.h +395 -0
- cuda/cccl/headers/include/thrust/gather.h +464 -0
- cuda/cccl/headers/include/thrust/generate.h +193 -0
- cuda/cccl/headers/include/thrust/host_vector.h +576 -0
- cuda/cccl/headers/include/thrust/inner_product.h +264 -0
- cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
- cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
- cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +182 -0
- cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +58 -0
- cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
- cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
- cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
- cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
- cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
- cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
- cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
- cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
- cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
- cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
- cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
- cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
- cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
- cuda/cccl/headers/include/thrust/logical.h +290 -0
- cuda/cccl/headers/include/thrust/memory.h +299 -0
- cuda/cccl/headers/include/thrust/merge.h +725 -0
- cuda/cccl/headers/include/thrust/mismatch.h +261 -0
- cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
- cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +528 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
- cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
- cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
- cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
- cuda/cccl/headers/include/thrust/mr/new.h +100 -0
- cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
- cuda/cccl/headers/include/thrust/mr/pool.h +528 -0
- cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
- cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
- cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
- cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
- cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
- cuda/cccl/headers/include/thrust/pair.h +99 -0
- cuda/cccl/headers/include/thrust/partition.h +1391 -0
- cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
- cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
- cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
- cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
- cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
- cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
- cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
- cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
- cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
- cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
- cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
- cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
- cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
- cuda/cccl/headers/include/thrust/random.h +120 -0
- cuda/cccl/headers/include/thrust/reduce.h +1113 -0
- cuda/cccl/headers/include/thrust/remove.h +768 -0
- cuda/cccl/headers/include/thrust/replace.h +826 -0
- cuda/cccl/headers/include/thrust/reverse.h +215 -0
- cuda/cccl/headers/include/thrust/scan.h +1671 -0
- cuda/cccl/headers/include/thrust/scatter.h +446 -0
- cuda/cccl/headers/include/thrust/sequence.h +277 -0
- cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
- cuda/cccl/headers/include/thrust/shuffle.h +182 -0
- cuda/cccl/headers/include/thrust/sort.h +1320 -0
- cuda/cccl/headers/include/thrust/swap.h +147 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +109 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory.h +106 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +72 -0
- cuda/cccl/headers/include/thrust/system/cpp/pointer.h +120 -0
- cuda/cccl/headers/include/thrust/system/cpp/vector.h +96 -0
- cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +273 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +593 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +233 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +264 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +100 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +170 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +60 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +80 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +223 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +785 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +341 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +414 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1734 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +469 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +61 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +429 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +117 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +105 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +310 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
- cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
- cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
- cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +61 -0
- cuda/cccl/headers/include/thrust/system/detail/errno.h +120 -0
- cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
- cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
- cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +53 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +79 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +161 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +384 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +146 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +84 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +252 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +137 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +73 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +96 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +59 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +86 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +99 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +148 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +68 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +129 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +207 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +43 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +100 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +83 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +186 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +86 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +95 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +175 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +67 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +63 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +126 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +85 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +126 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +232 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +73 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +85 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +104 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +282 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +476 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +125 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +175 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +44 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +76 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +41 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +47 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +82 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +50 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +56 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +80 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +166 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +115 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +81 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +126 -0
- cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +117 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +42 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +119 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +110 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +62 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +74 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +123 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +43 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +141 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +50 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +75 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +145 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +301 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +98 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +179 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +154 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +145 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +206 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +59 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +356 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +124 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +586 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +115 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +106 -0
- cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
- cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +127 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +73 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.inl +172 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +36 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
- cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
- cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +109 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +33 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
- cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system_error.h +57 -0
- cuda/cccl/headers/include/thrust/tabulate.h +125 -0
- cuda/cccl/headers/include/thrust/transform.h +1045 -0
- cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
- cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
- cuda/cccl/headers/include/thrust/tuple.h +139 -0
- cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
- cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
- cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
- cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
- cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
- cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
- cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
- cuda/cccl/headers/include/thrust/unique.h +1088 -0
- cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
- cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
- cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
- cuda/cccl/headers/include/thrust/version.h +93 -0
- cuda/cccl/headers/include/thrust/zip_function.h +176 -0
- cuda/cccl/headers/include_paths.py +51 -0
- cuda/cccl/parallel/__init__.py +9 -0
- cuda/cccl/parallel/experimental/__init__.py +24 -0
- cuda/cccl/py.typed +0 -0
- cuda/compute/__init__.py +79 -0
- cuda/compute/_bindings.py +79 -0
- cuda/compute/_bindings.pyi +475 -0
- cuda/compute/_bindings_impl.pyx +2273 -0
- cuda/compute/_caching.py +71 -0
- cuda/compute/_cccl_interop.py +422 -0
- cuda/compute/_utils/__init__.py +0 -0
- cuda/compute/_utils/protocols.py +132 -0
- cuda/compute/_utils/temp_storage_buffer.py +86 -0
- cuda/compute/algorithms/__init__.py +54 -0
- cuda/compute/algorithms/_histogram.py +243 -0
- cuda/compute/algorithms/_merge_sort.py +225 -0
- cuda/compute/algorithms/_radix_sort.py +312 -0
- cuda/compute/algorithms/_reduce.py +182 -0
- cuda/compute/algorithms/_scan.py +331 -0
- cuda/compute/algorithms/_segmented_reduce.py +257 -0
- cuda/compute/algorithms/_three_way_partition.py +261 -0
- cuda/compute/algorithms/_transform.py +329 -0
- cuda/compute/algorithms/_unique_by_key.py +252 -0
- cuda/compute/cccl/.gitkeep +0 -0
- cuda/compute/cu12/_bindings_impl.cp313-win_amd64.pyd +0 -0
- cuda/compute/cu12/cccl/cccl.c.parallel.dll +0 -0
- cuda/compute/cu12/cccl/cccl.c.parallel.lib +0 -0
- cuda/compute/cu13/_bindings_impl.cp313-win_amd64.pyd +0 -0
- cuda/compute/cu13/cccl/cccl.c.parallel.dll +0 -0
- cuda/compute/cu13/cccl/cccl.c.parallel.lib +0 -0
- cuda/compute/iterators/__init__.py +21 -0
- cuda/compute/iterators/_factories.py +219 -0
- cuda/compute/iterators/_iterators.py +817 -0
- cuda/compute/iterators/_zip_iterator.py +199 -0
- cuda/compute/numba_utils.py +53 -0
- cuda/compute/op.py +3 -0
- cuda/compute/struct.py +272 -0
- cuda/compute/typing.py +37 -0
- cuda/coop/__init__.py +8 -0
- cuda/coop/_caching.py +48 -0
- cuda/coop/_common.py +275 -0
- cuda/coop/_nvrtc.py +92 -0
- cuda/coop/_scan_op.py +181 -0
- cuda/coop/_types.py +937 -0
- cuda/coop/_typing.py +107 -0
- cuda/coop/block/__init__.py +39 -0
- cuda/coop/block/_block_exchange.py +251 -0
- cuda/coop/block/_block_load_store.py +215 -0
- cuda/coop/block/_block_merge_sort.py +125 -0
- cuda/coop/block/_block_radix_sort.py +214 -0
- cuda/coop/block/_block_reduce.py +294 -0
- cuda/coop/block/_block_scan.py +983 -0
- cuda/coop/warp/__init__.py +9 -0
- cuda/coop/warp/_warp_merge_sort.py +92 -0
- cuda/coop/warp/_warp_reduce.py +153 -0
- cuda/coop/warp/_warp_scan.py +78 -0
- cuda_cccl-0.3.3.dist-info/METADATA +41 -0
- cuda_cccl-0.3.3.dist-info/RECORD +1968 -0
- cuda_cccl-0.3.3.dist-info/WHEEL +5 -0
- cuda_cccl-0.3.3.dist-info/licenses/LICENSE +1 -0
|
@@ -0,0 +1,1509 @@
|
|
|
1
|
+
/******************************************************************************
|
|
2
|
+
* Copyright (c) 2011, Duane Merrill. All rights reserved.
|
|
3
|
+
* Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* Redistribution and use in source and binary forms, with or without
|
|
6
|
+
* modification, are permitted provided that the following conditions are met:
|
|
7
|
+
* * Redistributions of source code must retain the above copyright
|
|
8
|
+
* notice, this list of conditions and the following disclaimer.
|
|
9
|
+
* * Redistributions in binary form must reproduce the above copyright
|
|
10
|
+
* notice, this list of conditions and the following disclaimer in the
|
|
11
|
+
* documentation and/or other materials provided with the distribution.
|
|
12
|
+
* * Neither the name of the NVIDIA CORPORATION nor the
|
|
13
|
+
* names of its contributors may be used to endorse or promote products
|
|
14
|
+
* derived from this software without specific prior written permission.
|
|
15
|
+
*
|
|
16
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
17
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
18
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
19
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
|
|
20
|
+
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
21
|
+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
22
|
+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
23
|
+
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
24
|
+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
25
|
+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
26
|
+
*
|
|
27
|
+
******************************************************************************/
|
|
28
|
+
|
|
29
|
+
//! @file
|
|
30
|
+
//! cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of
|
|
31
|
+
//! samples data residing within device-accessible memory.
|
|
32
|
+
|
|
33
|
+
#pragma once
|
|
34
|
+
|
|
35
|
+
#include <cub/config.cuh>
|
|
36
|
+
|
|
37
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
38
|
+
# pragma GCC system_header
|
|
39
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
40
|
+
# pragma clang system_header
|
|
41
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
42
|
+
# pragma system_header
|
|
43
|
+
#endif // no system header
|
|
44
|
+
|
|
45
|
+
#include <cub/device/dispatch/dispatch_histogram.cuh>
|
|
46
|
+
|
|
47
|
+
#include <cuda/std/__algorithm/copy.h>
|
|
48
|
+
#include <cuda/std/__type_traits/integral_constant.h>
|
|
49
|
+
#include <cuda/std/__type_traits/remove_const.h>
|
|
50
|
+
#include <cuda/std/array>
|
|
51
|
+
#include <cuda/std/limits>
|
|
52
|
+
|
|
53
|
+
CUB_NAMESPACE_BEGIN
|
|
54
|
+
|
|
55
|
+
//! @rst
|
|
56
|
+
//! DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of
|
|
57
|
+
//! samples data residing within device-accessible memory.
|
|
58
|
+
//!
|
|
59
|
+
//! Overview
|
|
60
|
+
//! ++++++++++++++++++++++++++
|
|
61
|
+
//!
|
|
62
|
+
//! A `histogram <http://en.wikipedia.org/wiki/Histogram>`_ counts the number of observations that fall into each
|
|
63
|
+
//! of the disjoint categories (known as *bins*).
|
|
64
|
+
//!
|
|
65
|
+
//! Usage Considerations
|
|
66
|
+
//! ++++++++++++++++++++++++++
|
|
67
|
+
//!
|
|
68
|
+
//! @cdp_class{DeviceHistogram}
|
|
69
|
+
//!
|
|
70
|
+
//! @endrst
|
|
71
|
+
struct DeviceHistogram
|
|
72
|
+
{
|
|
73
|
+
//! @name Evenly-segmented bin ranges
|
|
74
|
+
//! @{
|
|
75
|
+
|
|
76
|
+
//! @rst
|
|
77
|
+
//! Computes an intensity histogram from a sequence of data samples using equal-width bins.
|
|
78
|
+
//!
|
|
79
|
+
//! - The number of histogram bins is (``num_levels - 1``)
|
|
80
|
+
//! - All bins comprise the same width of sample values: ``(upper_level - lower_level) / (num_levels - 1)``.
|
|
81
|
+
//! - If the common type of ``SampleT`` and ``LevelT`` is of integral type, the bin for a sample is
|
|
82
|
+
//! computed as ``(sample - lower_level) * (num_levels - 1) / (upper_level - lower_level)``, round
|
|
83
|
+
//! down to the nearest whole number. To protect against potential overflows, if the product
|
|
84
|
+
//! ``(upper_level - lower_level) * (num_levels - 1)`` exceeds the number representable by an
|
|
85
|
+
//! ``uint64_t``, the cuda error ``cudaErrorInvalidValue`` is returned. If the common type is 128
|
|
86
|
+
//! bits wide, bin computation will use 128-bit arithmetic and ``cudaErrorInvalidValue`` will only
|
|
87
|
+
//! be returned if bin computation would overflow for 128-bit arithmetic.
|
|
88
|
+
//! - The ranges ``[d_samples, d_samples + num_samples)`` and
|
|
89
|
+
//! ``[d_histogram, d_histogram + num_levels - 1)`` shall not overlap in any way.
|
|
90
|
+
//! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT and SampleT must be valid
|
|
91
|
+
//! arithmetic types. The common type must be convertible to ``int`` and trivially copyable.
|
|
92
|
+
//! - @devicestorage
|
|
93
|
+
//!
|
|
94
|
+
//! Snippet
|
|
95
|
+
//! +++++++
|
|
96
|
+
//!
|
|
97
|
+
//! The code snippet below illustrates the computation of a six-bin histogram
|
|
98
|
+
//! from a sequence of float samples
|
|
99
|
+
//!
|
|
100
|
+
//! .. code-block:: c++
|
|
101
|
+
//!
|
|
102
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
103
|
+
//!
|
|
104
|
+
//! // Declare, allocate, and initialize device-accessible pointers for
|
|
105
|
+
//! // input samples and output histogram
|
|
106
|
+
//! int num_samples; // e.g., 10
|
|
107
|
+
//! float* d_samples; // e.g., [2.2, 6.1, 7.1, 2.9, 3.5, 0.3, 2.9, 2.1, 6.1, 999.5]
|
|
108
|
+
//! int* d_histogram; // e.g., [ -, -, -, -, -, -]
|
|
109
|
+
//! int num_levels; // e.g., 7 (seven level boundaries for six bins)
|
|
110
|
+
//! float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin)
|
|
111
|
+
//! float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin)
|
|
112
|
+
//! ...
|
|
113
|
+
//!
|
|
114
|
+
//! // Determine temporary device storage requirements
|
|
115
|
+
//! void* d_temp_storage = nullptr;
|
|
116
|
+
//! size_t temp_storage_bytes = 0;
|
|
117
|
+
//! cub::DeviceHistogram::HistogramEven(
|
|
118
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
119
|
+
//! d_samples, d_histogram, num_levels,
|
|
120
|
+
//! lower_level, upper_level, num_samples);
|
|
121
|
+
//!
|
|
122
|
+
//! // Allocate temporary storage
|
|
123
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
124
|
+
//!
|
|
125
|
+
//! // Compute histograms
|
|
126
|
+
//! cub::DeviceHistogram::HistogramEven(
|
|
127
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
128
|
+
//! d_samples, d_histogram, num_levels,
|
|
129
|
+
//! lower_level, upper_level, num_samples);
|
|
130
|
+
//!
|
|
131
|
+
//! // d_histogram <-- [1, 5, 0, 3, 0, 0];
|
|
132
|
+
//!
|
|
133
|
+
//! @endrst
|
|
134
|
+
//!
|
|
135
|
+
//! @tparam SampleIteratorT
|
|
136
|
+
//! **[inferred]** Random-access input iterator type for reading input samples @iterator
|
|
137
|
+
//!
|
|
138
|
+
//! @tparam CounterT
|
|
139
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
140
|
+
//!
|
|
141
|
+
//! @tparam LevelT
|
|
142
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
143
|
+
//!
|
|
144
|
+
//! @tparam OffsetT
|
|
145
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
146
|
+
//! pointer differences, etc. @offset_size1
|
|
147
|
+
//!
|
|
148
|
+
//! @param[in] d_temp_storage
|
|
149
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
150
|
+
//! required allocation size is written to `temp_storage_bytes` and no
|
|
151
|
+
//! work is done.
|
|
152
|
+
//!
|
|
153
|
+
//! @param[in,out] temp_storage_bytes
|
|
154
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
155
|
+
//!
|
|
156
|
+
//! @param[in] d_samples
|
|
157
|
+
//! The pointer to the input sequence of data samples.
|
|
158
|
+
//!
|
|
159
|
+
//! @param[out] d_histogram
|
|
160
|
+
//! The pointer to the histogram counter output array of length
|
|
161
|
+
//! `num_levels - 1`.
|
|
162
|
+
//!
|
|
163
|
+
//! @param[in] num_levels
|
|
164
|
+
//! The number of boundaries (levels) for delineating histogram samples.
|
|
165
|
+
//! Implies that the number of bins is `num_levels - 1`.
|
|
166
|
+
//!
|
|
167
|
+
//! @param[in] lower_level
|
|
168
|
+
//! The lower sample value bound (inclusive) for the lowest histogram bin.
|
|
169
|
+
//!
|
|
170
|
+
//! @param[in] upper_level
|
|
171
|
+
//! The upper sample value bound (exclusive) for the highest histogram bin.
|
|
172
|
+
//!
|
|
173
|
+
//! @param[in] num_samples
|
|
174
|
+
//! The number of input samples (i.e., the length of `d_samples`)
|
|
175
|
+
//!
|
|
176
|
+
//! @param[in] stream
|
|
177
|
+
//! @rst
|
|
178
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
179
|
+
//! @endrst
|
|
180
|
+
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
|
|
181
|
+
CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven(
|
|
182
|
+
void* d_temp_storage,
|
|
183
|
+
size_t& temp_storage_bytes,
|
|
184
|
+
SampleIteratorT d_samples,
|
|
185
|
+
CounterT* d_histogram,
|
|
186
|
+
int num_levels,
|
|
187
|
+
LevelT lower_level,
|
|
188
|
+
LevelT upper_level,
|
|
189
|
+
OffsetT num_samples,
|
|
190
|
+
cudaStream_t stream = 0)
|
|
191
|
+
{
|
|
192
|
+
/// The sample value type of the input iterator
|
|
193
|
+
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
|
|
194
|
+
return MultiHistogramEven<1, 1>(
|
|
195
|
+
d_temp_storage,
|
|
196
|
+
temp_storage_bytes,
|
|
197
|
+
d_samples,
|
|
198
|
+
::cuda::std::array{d_histogram},
|
|
199
|
+
::cuda::std::array{num_levels},
|
|
200
|
+
::cuda::std::array{lower_level},
|
|
201
|
+
::cuda::std::array{upper_level},
|
|
202
|
+
num_samples,
|
|
203
|
+
static_cast<OffsetT>(1),
|
|
204
|
+
sizeof(SampleT) * num_samples,
|
|
205
|
+
stream);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
//! @rst
|
|
209
|
+
//! Computes an intensity histogram from a sequence of data samples using equal-width bins.
|
|
210
|
+
//!
|
|
211
|
+
//! - A two-dimensional *region of interest* within ``d_samples`` can be specified using
|
|
212
|
+
//! the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
|
|
213
|
+
//! - The row stride must be a whole multiple of the sample data type
|
|
214
|
+
//! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
|
|
215
|
+
//! - The number of histogram bins is (``num_levels - 1``)
|
|
216
|
+
//! - All bins comprise the same width of sample values: ``(upper_level - lower_level) / (num_levels - 1)``
|
|
217
|
+
//! - If the common type of ``SampleT`` and ``LevelT`` is of integral type, the bin for a sample is
|
|
218
|
+
//! computed as ``(sample - lower_level) * (num_levels - 1) / (upper_level - lower_level)``, round
|
|
219
|
+
//! down to the nearest whole number. To protect against potential overflows, if the product
|
|
220
|
+
//! ``(upper_level - lower_level) * (num_levels - 1)`` exceeds the number representable by an
|
|
221
|
+
//! ``uint64_t``, the cuda error ``cudaErrorInvalidValue`` is returned. If the common type is 128
|
|
222
|
+
//! bits wide, bin computation will use 128-bit arithmetic and ``cudaErrorInvalidValue`` will only
|
|
223
|
+
//! be returned if bin computation would overflow for 128-bit arithmetic.
|
|
224
|
+
//! - For a given row ``r`` in ``[0, num_rows)``, let
|
|
225
|
+
//! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)`` and
|
|
226
|
+
//! ``row_end = row_begin + num_row_samples``. The ranges
|
|
227
|
+
//! ``[row_begin, row_end)`` and ``[d_histogram, d_histogram + num_levels - 1)``
|
|
228
|
+
//! shall not overlap in any way.
|
|
229
|
+
//! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT
|
|
230
|
+
//! and SampleT must be valid arithmetic types. The common type must be
|
|
231
|
+
//! convertible to ``int`` and trivially copyable.
|
|
232
|
+
//! - @devicestorage
|
|
233
|
+
//!
|
|
234
|
+
//! Snippet
|
|
235
|
+
//! +++++++
|
|
236
|
+
//!
|
|
237
|
+
//! The code snippet below illustrates the computation of a six-bin histogram
|
|
238
|
+
//! from a 2x5 region of interest within a flattened 2x7 array of float samples.
|
|
239
|
+
//!
|
|
240
|
+
//! .. code-block:: c++
|
|
241
|
+
//!
|
|
242
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
243
|
+
//!
|
|
244
|
+
//! // Declare, allocate, and initialize device-accessible pointers for
|
|
245
|
+
//! // input samples and output histogram
|
|
246
|
+
//! int num_row_samples; // e.g., 5
|
|
247
|
+
//! int num_rows; // e.g., 2;
|
|
248
|
+
//! size_t row_stride_bytes; // e.g., 7 * sizeof(float)
|
|
249
|
+
//! float* d_samples; // e.g., [2.2, 6.1, 7.1, 2.9, 3.5, -, -,
|
|
250
|
+
//! // 0.3, 2.9, 2.1, 6.1, 999.5, -, -]
|
|
251
|
+
//! int* d_histogram; // e.g., [ -, -, -, -, -, -]
|
|
252
|
+
//! int num_levels; // e.g., 7 (seven level boundaries for six bins)
|
|
253
|
+
//! float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin)
|
|
254
|
+
//! float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin)
|
|
255
|
+
//! ...
|
|
256
|
+
//!
|
|
257
|
+
//! // Determine temporary device storage requirements
|
|
258
|
+
//! void* d_temp_storage = nullptr;
|
|
259
|
+
//! size_t temp_storage_bytes = 0;
|
|
260
|
+
//! cub::DeviceHistogram::HistogramEven(
|
|
261
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
262
|
+
//! d_samples, d_histogram, num_levels, lower_level, upper_level,
|
|
263
|
+
//! num_row_samples, num_rows, row_stride_bytes);
|
|
264
|
+
//!
|
|
265
|
+
//! // Allocate temporary storage
|
|
266
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
267
|
+
//!
|
|
268
|
+
//! // Compute histograms
|
|
269
|
+
//! cub::DeviceHistogram::HistogramEven(
|
|
270
|
+
//! d_temp_storage, temp_storage_bytes, d_samples, d_histogram,
|
|
271
|
+
//! d_samples, d_histogram, num_levels, lower_level, upper_level,
|
|
272
|
+
//! num_row_samples, num_rows, row_stride_bytes);
|
|
273
|
+
//!
|
|
274
|
+
//! // d_histogram <-- [1, 5, 0, 3, 0, 0];
|
|
275
|
+
//!
|
|
276
|
+
//! @endrst
|
|
277
|
+
//!
|
|
278
|
+
//! @tparam SampleIteratorT
|
|
279
|
+
//! **[inferred]** Random-access input iterator type for reading
|
|
280
|
+
//! input samples. @iterator
|
|
281
|
+
//!
|
|
282
|
+
//! @tparam CounterT
|
|
283
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
284
|
+
//!
|
|
285
|
+
//! @tparam LevelT
|
|
286
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
287
|
+
//!
|
|
288
|
+
//! @tparam OffsetT
|
|
289
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
290
|
+
//! pointer differences, etc. @offset_size1
|
|
291
|
+
//!
|
|
292
|
+
//! @param[in] d_temp_storage
|
|
293
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
294
|
+
//! required allocation size is written to `temp_storage_bytes` and no
|
|
295
|
+
//! work is done.
|
|
296
|
+
//!
|
|
297
|
+
//! @param[in,out] temp_storage_bytes
|
|
298
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
299
|
+
//!
|
|
300
|
+
//! @param[in] d_samples
|
|
301
|
+
//! The pointer to the input sequence of data samples.
|
|
302
|
+
//!
|
|
303
|
+
//! @param[out] d_histogram
|
|
304
|
+
//! The pointer to the histogram counter output array of
|
|
305
|
+
//! length `num_levels - 1`.
|
|
306
|
+
//!
|
|
307
|
+
//! @param[in] num_levels
|
|
308
|
+
//! The number of boundaries (levels) for delineating histogram samples.
|
|
309
|
+
//! Implies that the number of bins is `num_levels - 1`.
|
|
310
|
+
//!
|
|
311
|
+
//! @param[in] lower_level
|
|
312
|
+
//! The lower sample value bound (inclusive) for the lowest histogram bin.
|
|
313
|
+
//!
|
|
314
|
+
//! @param[in] upper_level
|
|
315
|
+
//! The upper sample value bound (exclusive) for the highest histogram bin.
|
|
316
|
+
//!
|
|
317
|
+
//! @param[in] num_row_samples
|
|
318
|
+
//! The number of data samples per row in the region of interest
|
|
319
|
+
//!
|
|
320
|
+
//! @param[in] num_rows
|
|
321
|
+
//! The number of rows in the region of interest
|
|
322
|
+
//!
|
|
323
|
+
//! @param[in] row_stride_bytes
|
|
324
|
+
//! The number of bytes between starts of consecutive rows in
|
|
325
|
+
//! the region of interest
|
|
326
|
+
//!
|
|
327
|
+
//! @param[in] stream
|
|
328
|
+
//! @rst
|
|
329
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
330
|
+
//! @endrst
|
|
331
|
+
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
|
|
332
|
+
CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven(
|
|
333
|
+
void* d_temp_storage,
|
|
334
|
+
size_t& temp_storage_bytes,
|
|
335
|
+
SampleIteratorT d_samples,
|
|
336
|
+
CounterT* d_histogram,
|
|
337
|
+
int num_levels,
|
|
338
|
+
LevelT lower_level,
|
|
339
|
+
LevelT upper_level,
|
|
340
|
+
OffsetT num_row_samples,
|
|
341
|
+
OffsetT num_rows,
|
|
342
|
+
size_t row_stride_bytes,
|
|
343
|
+
cudaStream_t stream = 0)
|
|
344
|
+
{
|
|
345
|
+
return MultiHistogramEven<1, 1>(
|
|
346
|
+
d_temp_storage,
|
|
347
|
+
temp_storage_bytes,
|
|
348
|
+
d_samples,
|
|
349
|
+
::cuda::std::array{d_histogram},
|
|
350
|
+
::cuda::std::array{num_levels},
|
|
351
|
+
::cuda::std::array{lower_level},
|
|
352
|
+
::cuda::std::array{upper_level},
|
|
353
|
+
num_row_samples,
|
|
354
|
+
num_rows,
|
|
355
|
+
row_stride_bytes,
|
|
356
|
+
stream);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
//! @rst
|
|
360
|
+
//! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using
|
|
361
|
+
//! equal-width bins.
|
|
362
|
+
//!
|
|
363
|
+
//! - The input is a sequence of *pixel* structures, where each pixel comprises
|
|
364
|
+
//! a record of ``NUM_CHANNELS`` consecutive data samples
|
|
365
|
+
//! (e.g., an *RGBA* pixel).
|
|
366
|
+
//! - ``NUM_CHANNELS`` can be up to 4.
|
|
367
|
+
//! - Of the ``NUM_CHANNELS`` specified, the function will only compute
|
|
368
|
+
//! histograms for the first ``NUM_ACTIVE_CHANNELS``
|
|
369
|
+
//! (e.g., only *RGB* histograms from *RGBA* pixel samples).
|
|
370
|
+
//! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
371
|
+
//! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
|
|
372
|
+
//! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
|
|
373
|
+
//! - If the common type of sample and level is of integral type, the bin for a sample is
|
|
374
|
+
//! computed as ``(sample - lower_level[i]) * (num_levels - 1) / (upper_level[i] - lower_level[i])``, round down
|
|
375
|
+
//! to the nearest whole number. To protect against potential overflows, if, for any channel ``i``, the product
|
|
376
|
+
//! ``(upper_level[i] - lower_level[i]) * (num_levels[i] - 1)`` exceeds the number representable by an ``uint64_t``,
|
|
377
|
+
//! the cuda error ``cudaErrorInvalidValue`` is returned. If the common type is 128 bits wide, bin computation
|
|
378
|
+
//! will use 128-bit arithmetic and ``cudaErrorInvalidValue`` will only be returned if bin
|
|
379
|
+
//! computation would overflow for 128-bit arithmetic.
|
|
380
|
+
//! - For a given channel ``c`` in ``[0, NUM_ACTIVE_CHANNELS)``, the ranges
|
|
381
|
+
//! ``[d_samples, d_samples + NUM_CHANNELS * num_pixels)`` and
|
|
382
|
+
//! ``[d_histogram[c], d_histogram[c] + num_levels[c] - 1)`` shall not overlap in any way.
|
|
383
|
+
//! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT
|
|
384
|
+
//! and SampleT must be valid arithmetic types.
|
|
385
|
+
//! The common type must be convertible to ``int`` and trivially copyable.
|
|
386
|
+
//! - @devicestorage
|
|
387
|
+
//!
|
|
388
|
+
//! Snippet
|
|
389
|
+
//! +++++++
|
|
390
|
+
//!
|
|
391
|
+
//! The code snippet below illustrates the computation of three 256-bin *RGB* histograms
|
|
392
|
+
//! from a quad-channel sequence of *RGBA* pixels (8 bits per channel per pixel)
|
|
393
|
+
//!
|
|
394
|
+
//! .. code-block:: c++
|
|
395
|
+
//!
|
|
396
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
397
|
+
//!
|
|
398
|
+
//! // Declare, allocate, and initialize device-accessible pointers for
|
|
399
|
+
//! // input samples and output histograms
|
|
400
|
+
//! int num_pixels; // e.g., 5
|
|
401
|
+
//! unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2),
|
|
402
|
+
//! // (0, 6, 7, 5), (3, 0, 2, 6)]
|
|
403
|
+
//! int* d_histogram[3]; // e.g., three device pointers to three device buffers,
|
|
404
|
+
//! // each allocated with 256 integer counters
|
|
405
|
+
//! int num_levels[3]; // e.g., {257, 257, 257};
|
|
406
|
+
//! unsigned int lower_level[3]; // e.g., {0, 0, 0};
|
|
407
|
+
//! unsigned int upper_level[3]; // e.g., {256, 256, 256};
|
|
408
|
+
//! ...
|
|
409
|
+
//!
|
|
410
|
+
//! // Determine temporary device storage requirements
|
|
411
|
+
//! void* d_temp_storage = nullptr;
|
|
412
|
+
//! size_t temp_storage_bytes = 0;
|
|
413
|
+
//! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
|
|
414
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
415
|
+
//! d_samples, d_histogram, num_levels,
|
|
416
|
+
//! lower_level, upper_level, num_pixels);
|
|
417
|
+
//!
|
|
418
|
+
//! // Allocate temporary storage
|
|
419
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
420
|
+
//!
|
|
421
|
+
//! // Compute histograms
|
|
422
|
+
//! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
|
|
423
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
424
|
+
//! d_samples, d_histogram, num_levels,
|
|
425
|
+
//! lower_level, upper_level, num_pixels);
|
|
426
|
+
//!
|
|
427
|
+
//! // d_histogram <-- [ [1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0],
|
|
428
|
+
//! // [0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0],
|
|
429
|
+
//! // [0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ]
|
|
430
|
+
//!
|
|
431
|
+
//! @endrst
|
|
432
|
+
//!
|
|
433
|
+
//! @tparam NUM_CHANNELS
|
|
434
|
+
//! Number of channels interleaved in the input data (may be greater than
|
|
435
|
+
//! the number of channels being actively histogrammed)
|
|
436
|
+
//!
|
|
437
|
+
//! @tparam NUM_ACTIVE_CHANNELS
|
|
438
|
+
//! **[inferred]** Number of channels actively being histogrammed
|
|
439
|
+
//!
|
|
440
|
+
//! @tparam SampleIteratorT
|
|
441
|
+
//! **[inferred]** Random-access input iterator type for reading
|
|
442
|
+
//! input samples. @iterator
|
|
443
|
+
//!
|
|
444
|
+
//! @tparam CounterT
|
|
445
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
446
|
+
//!
|
|
447
|
+
//! @tparam LevelT
|
|
448
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
449
|
+
//!
|
|
450
|
+
//! @tparam OffsetT
|
|
451
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
452
|
+
//! pointer differences, etc. @offset_size1
|
|
453
|
+
//!
|
|
454
|
+
//! @param[in] d_temp_storage
|
|
455
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
456
|
+
//! required allocation size is written to `temp_storage_bytes` and no
|
|
457
|
+
//! work is done.
|
|
458
|
+
//!
|
|
459
|
+
//! @param[in,out] temp_storage_bytes
|
|
460
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
461
|
+
//!
|
|
462
|
+
//! @param[in] d_samples
|
|
463
|
+
//! The pointer to the multi-channel input sequence of data samples.
|
|
464
|
+
//! The samples from different channels are assumed to be interleaved
|
|
465
|
+
//! (e.g., an array of 32-bit pixels where each pixel consists of four
|
|
466
|
+
//! *RGBA* 8-bit samples).
|
|
467
|
+
//!
|
|
468
|
+
//! @param[out] d_histogram
|
|
469
|
+
//! @rst
|
|
470
|
+
//! The pointers to the histogram counter output arrays, one for each active
|
|
471
|
+
//! channel. For channel\ :sub:`i`, the allocation length of
|
|
472
|
+
//! ``d_histogram[i]`` should be `num_levels[i] - 1``.
|
|
473
|
+
//! @endrst
|
|
474
|
+
//!
|
|
475
|
+
//! @param[in] num_levels
|
|
476
|
+
//! @rst
|
|
477
|
+
//! The number of boundaries (levels) for delineating histogram samples in each active channel.
|
|
478
|
+
//! Implies that the number of bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
479
|
+
//! @endrst
|
|
480
|
+
//!
|
|
481
|
+
//! @param[in] lower_level
|
|
482
|
+
//! The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
|
|
483
|
+
//!
|
|
484
|
+
//! @param[in] upper_level
|
|
485
|
+
//! The upper sample value bound (exclusive) for the highest histogram bin in each active channel.
|
|
486
|
+
//!
|
|
487
|
+
//! @param[in] num_pixels
|
|
488
|
+
//! The number of multi-channel pixels (i.e., the length of `d_samples / NUM_CHANNELS`)
|
|
489
|
+
//!
|
|
490
|
+
//! @param[in] stream
|
|
491
|
+
//! @rst
|
|
492
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
493
|
+
//! @endrst
|
|
494
|
+
template <int NUM_CHANNELS,
|
|
495
|
+
int NUM_ACTIVE_CHANNELS,
|
|
496
|
+
typename SampleIteratorT,
|
|
497
|
+
typename CounterT,
|
|
498
|
+
typename LevelT,
|
|
499
|
+
typename OffsetT>
|
|
500
|
+
CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven(
|
|
501
|
+
void* d_temp_storage,
|
|
502
|
+
size_t& temp_storage_bytes,
|
|
503
|
+
SampleIteratorT d_samples,
|
|
504
|
+
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
|
|
505
|
+
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
|
|
506
|
+
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
|
|
507
|
+
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
|
|
508
|
+
OffsetT num_pixels,
|
|
509
|
+
cudaStream_t stream = 0)
|
|
510
|
+
{
|
|
511
|
+
/// The sample value type of the input iterator
|
|
512
|
+
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
|
|
513
|
+
|
|
514
|
+
return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
|
|
515
|
+
d_temp_storage,
|
|
516
|
+
temp_storage_bytes,
|
|
517
|
+
d_samples,
|
|
518
|
+
d_histogram,
|
|
519
|
+
num_levels,
|
|
520
|
+
lower_level,
|
|
521
|
+
upper_level,
|
|
522
|
+
num_pixels,
|
|
523
|
+
static_cast<OffsetT>(1),
|
|
524
|
+
sizeof(SampleT) * NUM_CHANNELS * num_pixels,
|
|
525
|
+
stream);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
private:
|
|
529
|
+
template <size_t N, typename T>
|
|
530
|
+
_CCCL_HOST_DEVICE static auto to_array(T* ptr)
|
|
531
|
+
{
|
|
532
|
+
::cuda::std::array<::cuda::std::remove_const_t<T>, N> a{};
|
|
533
|
+
::cuda::std::copy(ptr, ptr + N, a.begin());
|
|
534
|
+
return a;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
public:
|
|
538
|
+
//! Deprecate [Since 3.0]
|
|
539
|
+
template <int NUM_CHANNELS,
|
|
540
|
+
int NUM_ACTIVE_CHANNELS,
|
|
541
|
+
typename SampleIteratorT,
|
|
542
|
+
typename CounterT,
|
|
543
|
+
typename LevelT,
|
|
544
|
+
typename OffsetT>
|
|
545
|
+
CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
|
|
546
|
+
MultiHistogramEven(
|
|
547
|
+
void* d_temp_storage,
|
|
548
|
+
size_t& temp_storage_bytes,
|
|
549
|
+
SampleIteratorT d_samples,
|
|
550
|
+
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
|
|
551
|
+
const int num_levels[NUM_ACTIVE_CHANNELS],
|
|
552
|
+
const LevelT lower_level[NUM_ACTIVE_CHANNELS],
|
|
553
|
+
const LevelT upper_level[NUM_ACTIVE_CHANNELS],
|
|
554
|
+
OffsetT num_pixels,
|
|
555
|
+
cudaStream_t stream = 0)
|
|
556
|
+
{
|
|
557
|
+
/// The sample value type of the input iterator
|
|
558
|
+
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
|
|
559
|
+
return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
|
|
560
|
+
d_temp_storage,
|
|
561
|
+
temp_storage_bytes,
|
|
562
|
+
d_samples,
|
|
563
|
+
to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
|
|
564
|
+
to_array<NUM_ACTIVE_CHANNELS>(num_levels),
|
|
565
|
+
to_array<NUM_ACTIVE_CHANNELS>(lower_level),
|
|
566
|
+
to_array<NUM_ACTIVE_CHANNELS>(upper_level),
|
|
567
|
+
num_pixels,
|
|
568
|
+
stream);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
//! @rst
|
|
572
|
+
//! Computes per-channel intensity histograms from a sequence of
|
|
573
|
+
//! multi-channel "pixel" data samples using equal-width bins.
|
|
574
|
+
//!
|
|
575
|
+
//! - The input is a sequence of *pixel* structures, where each pixel
|
|
576
|
+
//! comprises a record of ``NUM_CHANNELS`` consecutive data samples (e.g., an *RGBA* pixel).
|
|
577
|
+
//! - ``NUM_CHANNELS`` can be up to 4.
|
|
578
|
+
//! - Of the ``NUM_CHANNELS`` specified, the function will only compute
|
|
579
|
+
//! histograms for the first ``NUM_ACTIVE_CHANNELS`` (e.g., only *RGB*
|
|
580
|
+
//! histograms from *RGBA* pixel samples).
|
|
581
|
+
//! - A two-dimensional *region of interest* within ``d_samples`` can be
|
|
582
|
+
//! specified using the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
|
|
583
|
+
//! - The row stride must be a whole multiple of the sample data type
|
|
584
|
+
//! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
|
|
585
|
+
//! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
586
|
+
//! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
|
|
587
|
+
//! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
|
|
588
|
+
//! - If the common type of sample and level is of integral type, the bin for a sample is
|
|
589
|
+
//! computed as ``(sample - lower_level[i]) * (num_levels - 1) / (upper_level[i] - lower_level[i])``,
|
|
590
|
+
//! round down to the nearest whole number. To protect against potential overflows, if, for any channel ``i``,
|
|
591
|
+
//! the product ``(upper_level[i] - lower_level[i]) * (num_levels[i] - 1)`` exceeds the number representable by
|
|
592
|
+
//! an ``uint64_t``, the cuda error ``cudaErrorInvalidValue`` is returned.
|
|
593
|
+
//! If the common type is 128 bits wide, bin computation will use 128-bit arithmetic and ``cudaErrorInvalidValue``
|
|
594
|
+
//! will only be returned if bin computation would overflow for 128-bit arithmetic.
|
|
595
|
+
//! - For a given row ``r`` in ``[0, num_rows)``, and sample ``s`` in
|
|
596
|
+
//! ``[0, num_row_pixels)``, let
|
|
597
|
+
//! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)``,
|
|
598
|
+
//! ``sample_begin = row_begin + s * NUM_CHANNELS``, and
|
|
599
|
+
//! ``sample_end = sample_begin + NUM_ACTIVE_CHANNELS``. For a given channel ``c`` in
|
|
600
|
+
//! ``[0, NUM_ACTIVE_CHANNELS)``, the ranges
|
|
601
|
+
//! ``[sample_begin, sample_end)`` and
|
|
602
|
+
//! ``[d_histogram[c], d_histogram[c] + num_levels[c] - 1)`` shall not overlap in any way.
|
|
603
|
+
//! - ``cuda::std::common_type<LevelT, SampleT>`` must be valid, and both LevelT
|
|
604
|
+
//! and SampleT must be valid arithmetic types. The common type must be
|
|
605
|
+
//! convertible to ``int`` and trivially copyable.
|
|
606
|
+
//! - @devicestorage
|
|
607
|
+
//!
|
|
608
|
+
//! Snippet
|
|
609
|
+
//! +++++++
|
|
610
|
+
//!
|
|
611
|
+
//! The code snippet below illustrates the computation of three 256-bin
|
|
612
|
+
//! *RGB* histograms from a 2x3 region of interest of within a flattened 2x4
|
|
613
|
+
//! array of quad-channel *RGBA* pixels (8 bits per channel per pixel).
|
|
614
|
+
//!
|
|
615
|
+
//! .. code-block:: c++
|
|
616
|
+
//!
|
|
617
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
618
|
+
//!
|
|
619
|
+
//! // Declare, allocate, and initialize device-accessible pointers for input
|
|
620
|
+
//! // samples and output histograms
|
|
621
|
+
//! int num_row_pixels; // e.g., 3
|
|
622
|
+
//! int num_rows; // e.g., 2
|
|
623
|
+
//! size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS
|
|
624
|
+
//! unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), (-, -, -, -),
|
|
625
|
+
//! // (0, 6, 7, 5), (3, 0, 2, 6), (1, 1, 1, 1), (-, -, -, -)]
|
|
626
|
+
//! int* d_histogram[3]; // e.g., three device pointers to three device buffers,
|
|
627
|
+
//! // each allocated with 256 integer counters
|
|
628
|
+
//! int num_levels[3]; // e.g., {257, 257, 257};
|
|
629
|
+
//! unsigned int lower_level[3]; // e.g., {0, 0, 0};
|
|
630
|
+
//! unsigned int upper_level[3]; // e.g., {256, 256, 256};
|
|
631
|
+
//! ...
|
|
632
|
+
//!
|
|
633
|
+
//! // Determine temporary device storage requirements
|
|
634
|
+
//! void* d_temp_storage = nullptr;
|
|
635
|
+
//! size_t temp_storage_bytes = 0;
|
|
636
|
+
//! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
|
|
637
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
638
|
+
//! d_samples, d_histogram, num_levels, lower_level, upper_level,
|
|
639
|
+
//! num_row_pixels, num_rows, row_stride_bytes);
|
|
640
|
+
//!
|
|
641
|
+
//! // Allocate temporary storage
|
|
642
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
643
|
+
//!
|
|
644
|
+
//! // Compute histograms
|
|
645
|
+
//! cub::DeviceHistogram::MultiHistogramEven<4, 3>(
|
|
646
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
647
|
+
//! d_samples, d_histogram, num_levels, lower_level, upper_level,
|
|
648
|
+
//! num_row_pixels, num_rows, row_stride_bytes);
|
|
649
|
+
//!
|
|
650
|
+
//! // d_histogram <-- [ [1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0],
|
|
651
|
+
//! // [0, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0],
|
|
652
|
+
//! // [0, 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ]
|
|
653
|
+
//!
|
|
654
|
+
//! @endrst
|
|
655
|
+
//!
|
|
656
|
+
//! @tparam NUM_CHANNELS
|
|
657
|
+
//! Number of channels interleaved in the input data (may be greater than
|
|
658
|
+
//! the number of channels being actively histogrammed)
|
|
659
|
+
//!
|
|
660
|
+
//! @tparam NUM_ACTIVE_CHANNELS
|
|
661
|
+
//! **[inferred]** Number of channels actively being histogrammed
|
|
662
|
+
//!
|
|
663
|
+
//! @tparam SampleIteratorT
|
|
664
|
+
//! **[inferred]** Random-access input iterator type for reading input
|
|
665
|
+
//! samples. @iterator
|
|
666
|
+
//!
|
|
667
|
+
//! @tparam CounterT
|
|
668
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
669
|
+
//!
|
|
670
|
+
//! @tparam LevelT
|
|
671
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
672
|
+
//!
|
|
673
|
+
//! @tparam OffsetT
|
|
674
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
675
|
+
//! pointer differences, etc. @offset_size1
|
|
676
|
+
//!
|
|
677
|
+
//! @param[in] d_temp_storage
|
|
678
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
679
|
+
//! required allocation size is written to `temp_storage_bytes` and no
|
|
680
|
+
//! work is done.
|
|
681
|
+
//!
|
|
682
|
+
//! @param[in,out] temp_storage_bytes
|
|
683
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
684
|
+
//!
|
|
685
|
+
//! @param[in] d_samples
|
|
686
|
+
//! The pointer to the multi-channel input sequence of data samples. The
|
|
687
|
+
//! samples from different channels are assumed to be interleaved (e.g.,
|
|
688
|
+
//! an array of 32-bit pixels where each pixel consists of four
|
|
689
|
+
//! *RGBA* 8-bit samples).
|
|
690
|
+
//!
|
|
691
|
+
//! @param[out] d_histogram
|
|
692
|
+
//! @rst
|
|
693
|
+
//! The pointers to the histogram counter output arrays, one for each
|
|
694
|
+
//! active channel. For channel\ :sub:`i`, the allocation length
|
|
695
|
+
//! of ``d_histogram[i]`` should be ``num_levels[i] - 1``.
|
|
696
|
+
//! @endrst
|
|
697
|
+
//!
|
|
698
|
+
//! @param[in] num_levels
|
|
699
|
+
//! @rst
|
|
700
|
+
//! The number of boundaries (levels) for delineating histogram samples in each active channel.
|
|
701
|
+
//! Implies that the number of bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
702
|
+
//! @endrst
|
|
703
|
+
//!
|
|
704
|
+
//! @param[in] lower_level
|
|
705
|
+
//! The lower sample value bound (inclusive) for the lowest histogram bin in each active channel.
|
|
706
|
+
//!
|
|
707
|
+
//! @param[in] upper_level
|
|
708
|
+
//! The upper sample value bound (exclusive) for the highest histogram bin in each active channel.
|
|
709
|
+
//!
|
|
710
|
+
//! @param[in] num_row_pixels
|
|
711
|
+
//! The number of multi-channel pixels per row in the region of interest
|
|
712
|
+
//!
|
|
713
|
+
//! @param[in] num_rows
|
|
714
|
+
//! The number of rows in the region of interest
|
|
715
|
+
//!
|
|
716
|
+
//! @param[in] row_stride_bytes
|
|
717
|
+
//! The number of bytes between starts of consecutive rows in the region of
|
|
718
|
+
//! interest
|
|
719
|
+
//!
|
|
720
|
+
//! @param[in] stream
|
|
721
|
+
//! @rst
|
|
722
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
723
|
+
//! @endrst
|
|
724
|
+
template <int NUM_CHANNELS,
|
|
725
|
+
int NUM_ACTIVE_CHANNELS,
|
|
726
|
+
typename SampleIteratorT,
|
|
727
|
+
typename CounterT,
|
|
728
|
+
typename LevelT,
|
|
729
|
+
typename OffsetT>
|
|
730
|
+
CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven(
|
|
731
|
+
void* d_temp_storage,
|
|
732
|
+
size_t& temp_storage_bytes,
|
|
733
|
+
SampleIteratorT d_samples,
|
|
734
|
+
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
|
|
735
|
+
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
|
|
736
|
+
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> lower_level,
|
|
737
|
+
::cuda::std::array<LevelT, NUM_ACTIVE_CHANNELS> upper_level,
|
|
738
|
+
OffsetT num_row_pixels,
|
|
739
|
+
OffsetT num_rows,
|
|
740
|
+
size_t row_stride_bytes,
|
|
741
|
+
cudaStream_t stream = 0)
|
|
742
|
+
{
|
|
743
|
+
_CCCL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceHistogram::MultiHistogramEven");
|
|
744
|
+
|
|
745
|
+
/// The sample value type of the input iterator
|
|
746
|
+
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
|
|
747
|
+
::cuda::std::bool_constant<sizeof(SampleT) == 1> is_byte_sample;
|
|
748
|
+
|
|
749
|
+
if constexpr (sizeof(OffsetT) > sizeof(int))
|
|
750
|
+
{
|
|
751
|
+
if ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) INT_MAX)
|
|
752
|
+
{
|
|
753
|
+
// Down-convert OffsetT data type
|
|
754
|
+
return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchEven(
|
|
755
|
+
d_temp_storage,
|
|
756
|
+
temp_storage_bytes,
|
|
757
|
+
d_samples,
|
|
758
|
+
d_histogram,
|
|
759
|
+
num_levels,
|
|
760
|
+
lower_level,
|
|
761
|
+
upper_level,
|
|
762
|
+
(int) num_row_pixels,
|
|
763
|
+
(int) num_rows,
|
|
764
|
+
(int) (row_stride_bytes / sizeof(SampleT)),
|
|
765
|
+
stream,
|
|
766
|
+
is_byte_sample);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchEven(
|
|
771
|
+
d_temp_storage,
|
|
772
|
+
temp_storage_bytes,
|
|
773
|
+
d_samples,
|
|
774
|
+
d_histogram,
|
|
775
|
+
num_levels,
|
|
776
|
+
lower_level,
|
|
777
|
+
upper_level,
|
|
778
|
+
num_row_pixels,
|
|
779
|
+
num_rows,
|
|
780
|
+
(OffsetT) (row_stride_bytes / sizeof(SampleT)),
|
|
781
|
+
stream,
|
|
782
|
+
is_byte_sample);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
//! Deprecate [Since 3.0]
|
|
786
|
+
template <int NUM_CHANNELS,
|
|
787
|
+
int NUM_ACTIVE_CHANNELS,
|
|
788
|
+
typename SampleIteratorT,
|
|
789
|
+
typename CounterT,
|
|
790
|
+
typename LevelT,
|
|
791
|
+
typename OffsetT>
|
|
792
|
+
CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
|
|
793
|
+
MultiHistogramEven(
|
|
794
|
+
void* d_temp_storage,
|
|
795
|
+
size_t& temp_storage_bytes,
|
|
796
|
+
SampleIteratorT d_samples,
|
|
797
|
+
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
|
|
798
|
+
const int num_levels[NUM_ACTIVE_CHANNELS],
|
|
799
|
+
const LevelT lower_level[NUM_ACTIVE_CHANNELS],
|
|
800
|
+
const LevelT upper_level[NUM_ACTIVE_CHANNELS],
|
|
801
|
+
OffsetT num_row_pixels,
|
|
802
|
+
OffsetT num_rows,
|
|
803
|
+
size_t row_stride_bytes,
|
|
804
|
+
cudaStream_t stream = 0)
|
|
805
|
+
{
|
|
806
|
+
return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
|
|
807
|
+
d_temp_storage,
|
|
808
|
+
temp_storage_bytes,
|
|
809
|
+
d_samples,
|
|
810
|
+
to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
|
|
811
|
+
to_array<NUM_ACTIVE_CHANNELS>(num_levels),
|
|
812
|
+
to_array<NUM_ACTIVE_CHANNELS>(lower_level),
|
|
813
|
+
to_array<NUM_ACTIVE_CHANNELS>(upper_level),
|
|
814
|
+
num_row_pixels,
|
|
815
|
+
num_rows,
|
|
816
|
+
row_stride_bytes,
|
|
817
|
+
stream);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
//! @} end member group
|
|
821
|
+
//! @name Custom bin ranges
|
|
822
|
+
//! @{
|
|
823
|
+
|
|
824
|
+
//! @rst
|
|
825
|
+
//! Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.
|
|
826
|
+
//!
|
|
827
|
+
//! - The number of histogram bins is (``num_levels - 1``)
|
|
828
|
+
//! - The value range for bin\ :sub:`i` is ``[level[i], level[i+1])``
|
|
829
|
+
//! - The range ``[d_histogram, d_histogram + num_levels - 1)`` shall not
|
|
830
|
+
//! overlap ``[d_samples, d_samples + num_samples)`` nor
|
|
831
|
+
//! ``[d_levels, d_levels + num_levels)`` in any way. The ranges
|
|
832
|
+
//! ``[d_levels, d_levels + num_levels)`` and
|
|
833
|
+
//! ``[d_samples, d_samples + num_samples)`` may overlap.
|
|
834
|
+
//! - @devicestorage
|
|
835
|
+
//!
|
|
836
|
+
//! Snippet
|
|
837
|
+
//! +++++++
|
|
838
|
+
//!
|
|
839
|
+
//! The code snippet below illustrates the computation of an six-bin histogram
|
|
840
|
+
//! from a sequence of float samples
|
|
841
|
+
//!
|
|
842
|
+
//! .. code-block:: c++
|
|
843
|
+
//!
|
|
844
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
845
|
+
//!
|
|
846
|
+
//! // Declare, allocate, and initialize device-accessible pointers for input
|
|
847
|
+
//! // samples and output histogram
|
|
848
|
+
//! int num_samples; // e.g., 10
|
|
849
|
+
//! float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5]
|
|
850
|
+
//! int* d_histogram; // e.g., [ -, -, -, -, -, -]
|
|
851
|
+
//! int num_levels // e.g., 7 (seven level boundaries for six bins)
|
|
852
|
+
//! float* d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0]
|
|
853
|
+
//! ...
|
|
854
|
+
//!
|
|
855
|
+
//! // Determine temporary device storage requirements
|
|
856
|
+
//! void* d_temp_storage = nullptr;
|
|
857
|
+
//! size_t temp_storage_bytes = 0;
|
|
858
|
+
//! cub::DeviceHistogram::HistogramRange(
|
|
859
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
860
|
+
//! d_samples, d_histogram, num_levels, d_levels, num_samples);
|
|
861
|
+
//!
|
|
862
|
+
//! // Allocate temporary storage
|
|
863
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
864
|
+
//!
|
|
865
|
+
//! // Compute histograms
|
|
866
|
+
//! cub::DeviceHistogram::HistogramRange(
|
|
867
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
868
|
+
//! d_samples, d_histogram, num_levels, d_levels, num_samples);
|
|
869
|
+
//!
|
|
870
|
+
//! // d_histogram <-- [1, 5, 0, 3, 0, 0];
|
|
871
|
+
//!
|
|
872
|
+
//! @endrst
|
|
873
|
+
//!
|
|
874
|
+
//! @tparam SampleIteratorT
|
|
875
|
+
//! **[inferred]** Random-access input iterator type for reading
|
|
876
|
+
//! input samples. @iterator
|
|
877
|
+
//!
|
|
878
|
+
//! @tparam CounterT
|
|
879
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
880
|
+
//!
|
|
881
|
+
//! @tparam LevelT
|
|
882
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
883
|
+
//!
|
|
884
|
+
//! @tparam OffsetT
|
|
885
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
886
|
+
//! pointer differences, etc. @offset_size1
|
|
887
|
+
//!
|
|
888
|
+
//! @param[in] d_temp_storage
|
|
889
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
890
|
+
//! required allocation size is written to `temp_storage_bytes` and no work
|
|
891
|
+
//! is done.
|
|
892
|
+
//!
|
|
893
|
+
//! @param[in,out] temp_storage_bytes
|
|
894
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
895
|
+
//!
|
|
896
|
+
//! @param[in] d_samples
|
|
897
|
+
//! The pointer to the input sequence of data samples.
|
|
898
|
+
//!
|
|
899
|
+
//! @param[out] d_histogram
|
|
900
|
+
//! The pointer to the histogram counter output array of length
|
|
901
|
+
//! `num_levels - 1`.
|
|
902
|
+
//!
|
|
903
|
+
//! @param[in] num_levels
|
|
904
|
+
//! The number of boundaries (levels) for delineating histogram samples.
|
|
905
|
+
//! Implies that the number of bins is `num_levels - 1`.
|
|
906
|
+
//!
|
|
907
|
+
//! @param[in] d_levels
|
|
908
|
+
//! The pointer to the array of boundaries (levels). Bin ranges are defined
|
|
909
|
+
//! by consecutive boundary pairings: lower sample value boundaries are
|
|
910
|
+
//! inclusive and upper sample value boundaries are exclusive.
|
|
911
|
+
//!
|
|
912
|
+
//! @param[in] num_samples
|
|
913
|
+
//! The number of data samples per row in the region of interest
|
|
914
|
+
//!
|
|
915
|
+
//! @param[in] stream
|
|
916
|
+
//! @rst
|
|
917
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
918
|
+
//! @endrst
|
|
919
|
+
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
|
|
920
|
+
CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange(
|
|
921
|
+
void* d_temp_storage,
|
|
922
|
+
size_t& temp_storage_bytes,
|
|
923
|
+
SampleIteratorT d_samples,
|
|
924
|
+
CounterT* d_histogram,
|
|
925
|
+
int num_levels,
|
|
926
|
+
const LevelT* d_levels,
|
|
927
|
+
OffsetT num_samples,
|
|
928
|
+
cudaStream_t stream = 0)
|
|
929
|
+
{
|
|
930
|
+
/// The sample value type of the input iterator
|
|
931
|
+
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
|
|
932
|
+
return MultiHistogramRange<1, 1>(
|
|
933
|
+
d_temp_storage,
|
|
934
|
+
temp_storage_bytes,
|
|
935
|
+
d_samples,
|
|
936
|
+
::cuda::std::array{d_histogram},
|
|
937
|
+
::cuda::std::array{num_levels},
|
|
938
|
+
::cuda::std::array{d_levels},
|
|
939
|
+
num_samples,
|
|
940
|
+
(OffsetT) 1,
|
|
941
|
+
(size_t) (sizeof(SampleT) * num_samples),
|
|
942
|
+
stream);
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
//! @rst
|
|
946
|
+
//! Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels.
|
|
947
|
+
//!
|
|
948
|
+
//! - A two-dimensional *region of interest* within ``d_samples`` can be
|
|
949
|
+
//! specified using the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
|
|
950
|
+
//! - The row stride must be a whole multiple of the sample data type
|
|
951
|
+
//! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
|
|
952
|
+
//! - The number of histogram bins is (``num_levels - 1``)
|
|
953
|
+
//! - The value range for bin\ :sub:`i` is ``[level[i], level[i+1])``
|
|
954
|
+
//! - For a given row ``r`` in ``[0, num_rows)``, let
|
|
955
|
+
//! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)`` and
|
|
956
|
+
//! ``row_end = row_begin + num_row_samples``. The range
|
|
957
|
+
//! ``[d_histogram, d_histogram + num_levels - 1)`` shall not overlap
|
|
958
|
+
//! ``[row_begin, row_end)`` nor ``[d_levels, d_levels + num_levels)``.
|
|
959
|
+
//! The ranges ``[d_levels, d_levels + num_levels)`` and ``[row_begin, row_end)`` may overlap.
|
|
960
|
+
//! - @devicestorage
|
|
961
|
+
//!
|
|
962
|
+
//! Snippet
|
|
963
|
+
//! +++++++
|
|
964
|
+
//!
|
|
965
|
+
//! The code snippet below illustrates the computation of a six-bin histogram
|
|
966
|
+
//! from a 2x5 region of interest within a flattened 2x7 array of float samples.
|
|
967
|
+
//!
|
|
968
|
+
//! .. code-block:: c++
|
|
969
|
+
//!
|
|
970
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
971
|
+
//!
|
|
972
|
+
//! // Declare, allocate, and initialize device-accessible pointers for input samples and
|
|
973
|
+
//! // output histogram
|
|
974
|
+
//! int num_row_samples; // e.g., 5
|
|
975
|
+
//! int num_rows; // e.g., 2;
|
|
976
|
+
//! int row_stride_bytes; // e.g., 7 * sizeof(float)
|
|
977
|
+
//! float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -,
|
|
978
|
+
//! // 0.3, 2.9, 2.0, 6.1, 999.5, -, -]
|
|
979
|
+
//! int* d_histogram; // e.g., [ -, -, -, -, -, -]
|
|
980
|
+
//! int num_levels // e.g., 7 (seven level boundaries for six bins)
|
|
981
|
+
//! float *d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0]
|
|
982
|
+
//! ...
|
|
983
|
+
//!
|
|
984
|
+
//! // Determine temporary device storage requirements
|
|
985
|
+
//! void* d_temp_storage = nullptr;
|
|
986
|
+
//! size_t temp_storage_bytes = 0;
|
|
987
|
+
//! cub::DeviceHistogram::HistogramRange(
|
|
988
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
989
|
+
//! d_samples, d_histogram, num_levels, d_levels,
|
|
990
|
+
//! num_row_samples, num_rows, row_stride_bytes);
|
|
991
|
+
//!
|
|
992
|
+
//! // Allocate temporary storage
|
|
993
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
994
|
+
//!
|
|
995
|
+
//! // Compute histograms
|
|
996
|
+
//! cub::DeviceHistogram::HistogramRange(
|
|
997
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
998
|
+
//! d_samples, d_histogram, num_levels, d_levels,
|
|
999
|
+
//! num_row_samples, num_rows, row_stride_bytes);
|
|
1000
|
+
//!
|
|
1001
|
+
//! // d_histogram <-- [1, 5, 0, 3, 0, 0];
|
|
1002
|
+
//!
|
|
1003
|
+
//! @endrst
|
|
1004
|
+
//!
|
|
1005
|
+
//! @tparam SampleIteratorT
|
|
1006
|
+
//! **[inferred]** Random-access input iterator type for reading
|
|
1007
|
+
//! input samples. @iterator
|
|
1008
|
+
//!
|
|
1009
|
+
//! @tparam CounterT
|
|
1010
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
1011
|
+
//!
|
|
1012
|
+
//! @tparam LevelT
|
|
1013
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
1014
|
+
//!
|
|
1015
|
+
//! @tparam OffsetT
|
|
1016
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
1017
|
+
//! pointer differences, etc. @offset_size1
|
|
1018
|
+
//!
|
|
1019
|
+
//! @param[in] d_temp_storage
|
|
1020
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
1021
|
+
//! required allocation size is written to `temp_storage_bytes` and no
|
|
1022
|
+
//! work is done.
|
|
1023
|
+
//!
|
|
1024
|
+
//! @param[in,out] temp_storage_bytes
|
|
1025
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
1026
|
+
//!
|
|
1027
|
+
//! @param[in] d_samples
|
|
1028
|
+
//! The pointer to the input sequence of data samples.
|
|
1029
|
+
//!
|
|
1030
|
+
//! @param[out] d_histogram
|
|
1031
|
+
//! The pointer to the histogram counter output array of length
|
|
1032
|
+
//! `num_levels - 1`.
|
|
1033
|
+
//!
|
|
1034
|
+
//! @param[in] num_levels
|
|
1035
|
+
//! The number of boundaries (levels) for delineating histogram samples.
|
|
1036
|
+
//! Implies that the number of bins is `num_levels - 1`.
|
|
1037
|
+
//!
|
|
1038
|
+
//! @param[in] d_levels
|
|
1039
|
+
//! The pointer to the array of boundaries (levels). Bin ranges are defined
|
|
1040
|
+
//! by consecutive boundary pairings: lower sample value boundaries are
|
|
1041
|
+
//! inclusive and upper sample value boundaries are exclusive.
|
|
1042
|
+
//!
|
|
1043
|
+
//! @param[in] num_row_samples
|
|
1044
|
+
//! The number of data samples per row in the region of interest
|
|
1045
|
+
//!
|
|
1046
|
+
//! @param[in] num_rows
|
|
1047
|
+
//! The number of rows in the region of interest
|
|
1048
|
+
//!
|
|
1049
|
+
//! @param[in] row_stride_bytes
|
|
1050
|
+
//! The number of bytes between starts of consecutive rows in the region
|
|
1051
|
+
//! of interest
|
|
1052
|
+
//!
|
|
1053
|
+
//! @param[in] stream
|
|
1054
|
+
//! @rst
|
|
1055
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
1056
|
+
//! @endrst
|
|
1057
|
+
template <typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT>
|
|
1058
|
+
CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange(
|
|
1059
|
+
void* d_temp_storage,
|
|
1060
|
+
size_t& temp_storage_bytes,
|
|
1061
|
+
SampleIteratorT d_samples,
|
|
1062
|
+
CounterT* d_histogram,
|
|
1063
|
+
int num_levels,
|
|
1064
|
+
const LevelT* d_levels,
|
|
1065
|
+
OffsetT num_row_samples,
|
|
1066
|
+
OffsetT num_rows,
|
|
1067
|
+
size_t row_stride_bytes,
|
|
1068
|
+
cudaStream_t stream = 0)
|
|
1069
|
+
{
|
|
1070
|
+
return MultiHistogramRange<1, 1>(
|
|
1071
|
+
d_temp_storage,
|
|
1072
|
+
temp_storage_bytes,
|
|
1073
|
+
d_samples,
|
|
1074
|
+
::cuda::std::array{d_histogram},
|
|
1075
|
+
::cuda::std::array{num_levels},
|
|
1076
|
+
::cuda::std::array{d_levels},
|
|
1077
|
+
num_row_samples,
|
|
1078
|
+
num_rows,
|
|
1079
|
+
row_stride_bytes,
|
|
1080
|
+
stream);
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
//! @rst
|
|
1084
|
+
//! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples
|
|
1085
|
+
//! using the specified bin boundary levels.
|
|
1086
|
+
//!
|
|
1087
|
+
//! - The input is a sequence of *pixel* structures, where each pixel
|
|
1088
|
+
//! comprises a record of ``NUM_CHANNELS`` consecutive data samples (e.g., an *RGBA* pixel).
|
|
1089
|
+
//! - ``NUM_CHANNELS`` can be up to 4.
|
|
1090
|
+
//! - Of the ``NUM_CHANNELS`` specified, the function will only compute
|
|
1091
|
+
//! histograms for the first ``NUM_ACTIVE_CHANNELS`` (e.g., *RGB* histograms from *RGBA* pixel samples).
|
|
1092
|
+
//! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
1093
|
+
//! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
|
|
1094
|
+
//! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
|
|
1095
|
+
//! - For given channels ``c1`` and ``c2`` in ``[0, NUM_ACTIVE_CHANNELS)``, the
|
|
1096
|
+
//! range ``[d_histogram[c1], d_histogram[c1] + num_levels[c1] - 1)`` shall
|
|
1097
|
+
//! not overlap ``[d_samples, d_samples + NUM_CHANNELS * num_pixels)`` nor
|
|
1098
|
+
//! ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` in any way.
|
|
1099
|
+
//! The ranges ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` and
|
|
1100
|
+
//! ``[d_samples, d_samples + NUM_CHANNELS * num_pixels)`` may overlap.
|
|
1101
|
+
//! - @devicestorage
|
|
1102
|
+
//!
|
|
1103
|
+
//! Snippet
|
|
1104
|
+
//! +++++++
|
|
1105
|
+
//!
|
|
1106
|
+
//! The code snippet below illustrates the computation of three 4-bin *RGB*
|
|
1107
|
+
//! histograms from a quad-channel sequence of *RGBA* pixels
|
|
1108
|
+
//! (8 bits per channel per pixel)
|
|
1109
|
+
//!
|
|
1110
|
+
//! .. code-block:: c++
|
|
1111
|
+
//!
|
|
1112
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
1113
|
+
//!
|
|
1114
|
+
//! // Declare, allocate, and initialize device-accessible pointers for
|
|
1115
|
+
//! // input samples and output histograms
|
|
1116
|
+
//! int num_pixels; // e.g., 5
|
|
1117
|
+
//! unsigned char *d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(7, 0, 6, 2),
|
|
1118
|
+
//! // (0, 6, 7, 5),(3, 0, 2, 6)]
|
|
1119
|
+
//! unsigned int *d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]];
|
|
1120
|
+
//! int num_levels[3]; // e.g., {5, 5, 5};
|
|
1121
|
+
//! unsigned int *d_levels[3]; // e.g., [ [0, 2, 4, 6, 8],
|
|
1122
|
+
//! // [0, 2, 4, 6, 8],
|
|
1123
|
+
//! // [0, 2, 4, 6, 8] ];
|
|
1124
|
+
//! ...
|
|
1125
|
+
//!
|
|
1126
|
+
//! // Determine temporary device storage requirements
|
|
1127
|
+
//! void* d_temp_storage = nullptr;
|
|
1128
|
+
//! size_t temp_storage_bytes = 0;
|
|
1129
|
+
//! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
|
|
1130
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
1131
|
+
//! d_samples, d_histogram, num_levels, d_levels, num_pixels);
|
|
1132
|
+
//!
|
|
1133
|
+
//! // Allocate temporary storage
|
|
1134
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
1135
|
+
//!
|
|
1136
|
+
//! // Compute histograms
|
|
1137
|
+
//! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
|
|
1138
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
1139
|
+
//! d_samples, d_histogram, num_levels, d_levels, num_pixels);
|
|
1140
|
+
//!
|
|
1141
|
+
//! // d_histogram <-- [ [1, 3, 0, 1],
|
|
1142
|
+
//! // [3, 0, 0, 2],
|
|
1143
|
+
//! // [0, 2, 0, 3] ]
|
|
1144
|
+
//!
|
|
1145
|
+
//! @endrst
|
|
1146
|
+
//!
|
|
1147
|
+
//! @tparam NUM_CHANNELS
|
|
1148
|
+
//! Number of channels interleaved in the input data (may be greater than
|
|
1149
|
+
//! the number of channels being actively histogrammed)
|
|
1150
|
+
//!
|
|
1151
|
+
//! @tparam NUM_ACTIVE_CHANNELS
|
|
1152
|
+
//! **[inferred]** Number of channels actively being histogrammed
|
|
1153
|
+
//!
|
|
1154
|
+
//! @tparam SampleIteratorT
|
|
1155
|
+
//! **[inferred]** Random-access input iterator type for reading
|
|
1156
|
+
//! input samples. @iterator
|
|
1157
|
+
//!
|
|
1158
|
+
//! @tparam CounterT
|
|
1159
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
1160
|
+
//!
|
|
1161
|
+
//! @tparam LevelT
|
|
1162
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
1163
|
+
//!
|
|
1164
|
+
//! @tparam OffsetT
|
|
1165
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
1166
|
+
//! pointer differences, etc. @offset_size1
|
|
1167
|
+
//!
|
|
1168
|
+
//! @param[in] d_temp_storage
|
|
1169
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
1170
|
+
//! required allocation size is written to `temp_storage_bytes` and no
|
|
1171
|
+
//! work is done.
|
|
1172
|
+
//!
|
|
1173
|
+
//! @param[in,out] temp_storage_bytes
|
|
1174
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
1175
|
+
//!
|
|
1176
|
+
//! @param[in] d_samples
|
|
1177
|
+
//! The pointer to the multi-channel input sequence of data samples.
|
|
1178
|
+
//! The samples from different channels are assumed to be interleaved (e.g.,
|
|
1179
|
+
//! an array of 32-bit pixels where each pixel consists of four *RGBA*
|
|
1180
|
+
//! 8-bit samples).
|
|
1181
|
+
//!
|
|
1182
|
+
//! @param[out] d_histogram
|
|
1183
|
+
//! @rst
|
|
1184
|
+
//! The pointers to the histogram counter output arrays, one for each active
|
|
1185
|
+
//! channel. For channel\ :sub:`i`, the allocation length of
|
|
1186
|
+
//! ``d_histogram[i]`` should be ``num_levels[i] - 1``.
|
|
1187
|
+
//! @endrst
|
|
1188
|
+
//!
|
|
1189
|
+
//! @param[in] num_levels
|
|
1190
|
+
//! @rst
|
|
1191
|
+
//! The number of boundaries (levels) for delineating histogram samples in
|
|
1192
|
+
//! each active channel. Implies that the number of bins for
|
|
1193
|
+
//! channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
1194
|
+
//! @endrst
|
|
1195
|
+
//!
|
|
1196
|
+
//! @param[in] d_levels
|
|
1197
|
+
//! The pointers to the arrays of boundaries (levels), one for each active
|
|
1198
|
+
//! channel. Bin ranges are defined by consecutive boundary pairings: lower
|
|
1199
|
+
//! sample value boundaries are inclusive and upper sample value boundaries
|
|
1200
|
+
//! are exclusive.
|
|
1201
|
+
//!
|
|
1202
|
+
//! @param[in] num_pixels
|
|
1203
|
+
//! The number of multi-channel pixels (i.e., the length of `d_samples / NUM_CHANNELS`)
|
|
1204
|
+
//!
|
|
1205
|
+
//! @param[in] stream
|
|
1206
|
+
//! @rst
|
|
1207
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
1208
|
+
//! @endrst
|
|
1209
|
+
template <int NUM_CHANNELS,
|
|
1210
|
+
int NUM_ACTIVE_CHANNELS,
|
|
1211
|
+
typename SampleIteratorT,
|
|
1212
|
+
typename CounterT,
|
|
1213
|
+
typename LevelT,
|
|
1214
|
+
typename OffsetT>
|
|
1215
|
+
CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange(
|
|
1216
|
+
void* d_temp_storage,
|
|
1217
|
+
size_t& temp_storage_bytes,
|
|
1218
|
+
SampleIteratorT d_samples,
|
|
1219
|
+
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
|
|
1220
|
+
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
|
|
1221
|
+
::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
|
|
1222
|
+
OffsetT num_pixels,
|
|
1223
|
+
cudaStream_t stream = 0)
|
|
1224
|
+
{
|
|
1225
|
+
/// The sample value type of the input iterator
|
|
1226
|
+
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
|
|
1227
|
+
|
|
1228
|
+
return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
|
|
1229
|
+
d_temp_storage,
|
|
1230
|
+
temp_storage_bytes,
|
|
1231
|
+
d_samples,
|
|
1232
|
+
d_histogram,
|
|
1233
|
+
num_levels,
|
|
1234
|
+
d_levels,
|
|
1235
|
+
num_pixels,
|
|
1236
|
+
(OffsetT) 1,
|
|
1237
|
+
(size_t) (sizeof(SampleT) * NUM_CHANNELS * num_pixels),
|
|
1238
|
+
stream);
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
//! Deprecate [Since 3.0]
|
|
1242
|
+
template <int NUM_CHANNELS,
|
|
1243
|
+
int NUM_ACTIVE_CHANNELS,
|
|
1244
|
+
typename SampleIteratorT,
|
|
1245
|
+
typename CounterT,
|
|
1246
|
+
typename LevelT,
|
|
1247
|
+
typename OffsetT>
|
|
1248
|
+
CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
|
|
1249
|
+
MultiHistogramRange(
|
|
1250
|
+
void* d_temp_storage,
|
|
1251
|
+
size_t& temp_storage_bytes,
|
|
1252
|
+
SampleIteratorT d_samples,
|
|
1253
|
+
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
|
|
1254
|
+
const int num_levels[NUM_ACTIVE_CHANNELS],
|
|
1255
|
+
const LevelT* const d_levels[NUM_ACTIVE_CHANNELS],
|
|
1256
|
+
OffsetT num_pixels,
|
|
1257
|
+
cudaStream_t stream = 0)
|
|
1258
|
+
{
|
|
1259
|
+
return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
|
|
1260
|
+
d_temp_storage,
|
|
1261
|
+
temp_storage_bytes,
|
|
1262
|
+
d_samples,
|
|
1263
|
+
to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
|
|
1264
|
+
to_array<NUM_ACTIVE_CHANNELS>(num_levels),
|
|
1265
|
+
to_array<NUM_ACTIVE_CHANNELS>(d_levels),
|
|
1266
|
+
num_pixels,
|
|
1267
|
+
stream);
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
//! @rst
|
|
1271
|
+
//! Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using
|
|
1272
|
+
//! the specified bin boundary levels.
|
|
1273
|
+
//!
|
|
1274
|
+
//! - The input is a sequence of *pixel* structures, where each pixel comprises
|
|
1275
|
+
//! a record of ``NUM_CHANNELS`` consecutive data samples (e.g., an *RGBA* pixel).
|
|
1276
|
+
//! - ``NUM_CHANNELS`` can be up to 4.
|
|
1277
|
+
//! - Of the ``NUM_CHANNELS`` specified, the function will only compute
|
|
1278
|
+
//! histograms for the first ``NUM_ACTIVE_CHANNELS`` (e.g., *RGB* histograms from *RGBA* pixel samples).
|
|
1279
|
+
//! - A two-dimensional *region of interest* within ``d_samples`` can be
|
|
1280
|
+
//! specified using the ``num_row_samples``, ``num_rows``, and ``row_stride_bytes`` parameters.
|
|
1281
|
+
//! - The row stride must be a whole multiple of the sample data type
|
|
1282
|
+
//! size, i.e., ``(row_stride_bytes % sizeof(SampleT)) == 0``.
|
|
1283
|
+
//! - The number of histogram bins for channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
1284
|
+
//! - For channel\ :sub:`i`, the range of values for all histogram bins have the same width:
|
|
1285
|
+
//! ``(upper_level[i] - lower_level[i]) / (num_levels[i] - 1)``
|
|
1286
|
+
//! - For a given row ``r`` in ``[0, num_rows)``, and sample ``s`` in ``[0, num_row_pixels)``, let
|
|
1287
|
+
//! ``row_begin = d_samples + r * row_stride_bytes / sizeof(SampleT)``,
|
|
1288
|
+
//! ``sample_begin = row_begin + s * NUM_CHANNELS``, and
|
|
1289
|
+
//! ``sample_end = sample_begin + NUM_ACTIVE_CHANNELS``. For given channels
|
|
1290
|
+
//! ``c1`` and ``c2`` in ``[0, NUM_ACTIVE_CHANNELS)``, the range
|
|
1291
|
+
//! ``[d_histogram[c1], d_histogram[c1] + num_levels[c1] - 1)`` shall not overlap
|
|
1292
|
+
//! ``[sample_begin, sample_end)`` nor
|
|
1293
|
+
//! ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` in any way. The ranges
|
|
1294
|
+
//! ``[d_levels[c2], d_levels[c2] + num_levels[c2])`` and
|
|
1295
|
+
//! ``[sample_begin, sample_end)`` may overlap.
|
|
1296
|
+
//! - @devicestorage
|
|
1297
|
+
//!
|
|
1298
|
+
//! Snippet
|
|
1299
|
+
//! +++++++
|
|
1300
|
+
//!
|
|
1301
|
+
//! The code snippet below illustrates the computation of three 4-bin *RGB*
|
|
1302
|
+
//! histograms from a 2x3 region of interest of within a flattened 2x4 array
|
|
1303
|
+
//! of quad-channel *RGBA* pixels (8 bits per channel per pixel).
|
|
1304
|
+
//!
|
|
1305
|
+
//!
|
|
1306
|
+
//! .. code-block:: c++
|
|
1307
|
+
//!
|
|
1308
|
+
//! #include <cub/cub.cuh> // or equivalently <cub/device/device_histogram.cuh>
|
|
1309
|
+
//!
|
|
1310
|
+
//! // Declare, allocate, and initialize device-accessible pointers for input
|
|
1311
|
+
//! // samples and output histograms
|
|
1312
|
+
//! int num_row_pixels; // e.g., 3
|
|
1313
|
+
//! int num_rows; // e.g., 2
|
|
1314
|
+
//! size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS
|
|
1315
|
+
//! unsigned char* d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(1, 1, 1, 1),(-, -, -, -),
|
|
1316
|
+
//! // (7, 0, 6, 2),(0, 6, 7, 5),(3, 0, 2, 6),(-, -, -, -)]
|
|
1317
|
+
//! int* d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]];
|
|
1318
|
+
//! int num_levels[3]; // e.g., {5, 5, 5};
|
|
1319
|
+
//! unsigned int* d_levels[3]; // e.g., [ [0, 2, 4, 6, 8],
|
|
1320
|
+
//! // [0, 2, 4, 6, 8],
|
|
1321
|
+
//! // [0, 2, 4, 6, 8] ];
|
|
1322
|
+
//! ...
|
|
1323
|
+
//!
|
|
1324
|
+
//! // Determine temporary device storage requirements
|
|
1325
|
+
//! void* d_temp_storage = nullptr;
|
|
1326
|
+
//! size_t temp_storage_bytes = 0;
|
|
1327
|
+
//! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
|
|
1328
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
1329
|
+
//! d_samples, d_histogram, num_levels, d_levels,
|
|
1330
|
+
//! num_row_pixels, num_rows, row_stride_bytes);
|
|
1331
|
+
//!
|
|
1332
|
+
//! // Allocate temporary storage
|
|
1333
|
+
//! cudaMalloc(&d_temp_storage, temp_storage_bytes);
|
|
1334
|
+
//!
|
|
1335
|
+
//! // Compute histograms
|
|
1336
|
+
//! cub::DeviceHistogram::MultiHistogramRange<4, 3>(
|
|
1337
|
+
//! d_temp_storage, temp_storage_bytes,
|
|
1338
|
+
//! d_samples, d_histogram, num_levels,
|
|
1339
|
+
//! d_levels, num_row_pixels, num_rows, row_stride_bytes);
|
|
1340
|
+
//!
|
|
1341
|
+
//! // d_histogram <-- [ [2, 3, 0, 1],
|
|
1342
|
+
//! // [3, 0, 0, 2],
|
|
1343
|
+
//! // [1, 2, 0, 3] ]
|
|
1344
|
+
//!
|
|
1345
|
+
//! @endrst
|
|
1346
|
+
//!
|
|
1347
|
+
//! @tparam NUM_CHANNELS
|
|
1348
|
+
//! Number of channels interleaved in the input data (may be greater than
|
|
1349
|
+
//! the number of channels being actively histogrammed)
|
|
1350
|
+
//!
|
|
1351
|
+
//! @tparam NUM_ACTIVE_CHANNELS
|
|
1352
|
+
//! **[inferred]** Number of channels actively being histogrammed
|
|
1353
|
+
//!
|
|
1354
|
+
//! @tparam SampleIteratorT
|
|
1355
|
+
//! **[inferred]** Random-access input iterator type for reading input
|
|
1356
|
+
//! samples. @iterator
|
|
1357
|
+
//!
|
|
1358
|
+
//! @tparam CounterT
|
|
1359
|
+
//! **[inferred]** Integer type for histogram bin counters
|
|
1360
|
+
//!
|
|
1361
|
+
//! @tparam LevelT
|
|
1362
|
+
//! **[inferred]** Type for specifying boundaries (levels)
|
|
1363
|
+
//!
|
|
1364
|
+
//! @tparam OffsetT
|
|
1365
|
+
//! **[inferred]** Signed integer type for sequence offsets, list lengths,
|
|
1366
|
+
//! pointer differences, etc. @offset_size1
|
|
1367
|
+
//!
|
|
1368
|
+
//! @param[in] d_temp_storage
|
|
1369
|
+
//! Device-accessible allocation of temporary storage. When `nullptr`, the
|
|
1370
|
+
//! required allocation size is written to `temp_storage_bytes` and no work is done.
|
|
1371
|
+
//!
|
|
1372
|
+
//! @param[in,out] temp_storage_bytes
|
|
1373
|
+
//! Reference to size in bytes of `d_temp_storage` allocation
|
|
1374
|
+
//!
|
|
1375
|
+
//! @param[in] d_samples
|
|
1376
|
+
//! The pointer to the multi-channel input sequence of data samples. The
|
|
1377
|
+
//! samples from different channels are assumed to be interleaved (e.g., an
|
|
1378
|
+
//! array of 32-bit pixels where each pixel consists of four
|
|
1379
|
+
//! *RGBA* 8-bit samples).
|
|
1380
|
+
//!
|
|
1381
|
+
//! @param[out] d_histogram
|
|
1382
|
+
//! @rst
|
|
1383
|
+
//! The pointers to the histogram counter output arrays, one for each active
|
|
1384
|
+
//! channel. For channel\ :sub:`i`, the allocation length of
|
|
1385
|
+
//! ``d_histogram[i]`` should be ``num_levels[i] - 1``.
|
|
1386
|
+
//! @endrst
|
|
1387
|
+
//!
|
|
1388
|
+
//! @param[in] num_levels
|
|
1389
|
+
//! @rst
|
|
1390
|
+
//! The number of boundaries (levels) for delineating histogram samples in
|
|
1391
|
+
//! each active channel. Implies that the number of bins for
|
|
1392
|
+
//! channel\ :sub:`i` is ``num_levels[i] - 1``.
|
|
1393
|
+
//! @endrst
|
|
1394
|
+
//!
|
|
1395
|
+
//! @param[in] d_levels
|
|
1396
|
+
//! The pointers to the arrays of boundaries (levels), one for each active
|
|
1397
|
+
//! channel. Bin ranges are defined by consecutive boundary pairings: lower
|
|
1398
|
+
//! sample value boundaries are inclusive and upper sample value boundaries
|
|
1399
|
+
//! are exclusive.
|
|
1400
|
+
//!
|
|
1401
|
+
//! @param[in] num_row_pixels
|
|
1402
|
+
//! The number of multi-channel pixels per row in the region of interest
|
|
1403
|
+
//!
|
|
1404
|
+
//! @param[in] num_rows
|
|
1405
|
+
//! The number of rows in the region of interest
|
|
1406
|
+
//!
|
|
1407
|
+
//! @param[in] row_stride_bytes
|
|
1408
|
+
//! The number of bytes between starts of consecutive rows in the
|
|
1409
|
+
//! region of interest
|
|
1410
|
+
//!
|
|
1411
|
+
//! @param[in] stream
|
|
1412
|
+
//! @rst
|
|
1413
|
+
//! **[optional]** CUDA stream to launch kernels within. Default is stream\ :sub:`0`.
|
|
1414
|
+
//! @endrst
|
|
1415
|
+
template <int NUM_CHANNELS,
|
|
1416
|
+
int NUM_ACTIVE_CHANNELS,
|
|
1417
|
+
typename SampleIteratorT,
|
|
1418
|
+
typename CounterT,
|
|
1419
|
+
typename LevelT,
|
|
1420
|
+
typename OffsetT>
|
|
1421
|
+
CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange(
|
|
1422
|
+
void* d_temp_storage,
|
|
1423
|
+
size_t& temp_storage_bytes,
|
|
1424
|
+
SampleIteratorT d_samples,
|
|
1425
|
+
::cuda::std::array<CounterT*, NUM_ACTIVE_CHANNELS> d_histogram,
|
|
1426
|
+
::cuda::std::array<int, NUM_ACTIVE_CHANNELS> num_levels,
|
|
1427
|
+
::cuda::std::array<const LevelT*, NUM_ACTIVE_CHANNELS> d_levels,
|
|
1428
|
+
OffsetT num_row_pixels,
|
|
1429
|
+
OffsetT num_rows,
|
|
1430
|
+
size_t row_stride_bytes,
|
|
1431
|
+
cudaStream_t stream = 0)
|
|
1432
|
+
{
|
|
1433
|
+
_CCCL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceHistogram::MultiHistogramRange");
|
|
1434
|
+
|
|
1435
|
+
/// The sample value type of the input iterator
|
|
1436
|
+
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
|
|
1437
|
+
::cuda::std::bool_constant<sizeof(SampleT) == 1> is_byte_sample;
|
|
1438
|
+
|
|
1439
|
+
if constexpr (sizeof(OffsetT) > sizeof(int))
|
|
1440
|
+
{
|
|
1441
|
+
if ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) INT_MAX)
|
|
1442
|
+
{
|
|
1443
|
+
// Down-convert OffsetT data type
|
|
1444
|
+
return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchRange(
|
|
1445
|
+
d_temp_storage,
|
|
1446
|
+
temp_storage_bytes,
|
|
1447
|
+
d_samples,
|
|
1448
|
+
d_histogram,
|
|
1449
|
+
num_levels,
|
|
1450
|
+
d_levels,
|
|
1451
|
+
(int) num_row_pixels,
|
|
1452
|
+
(int) num_rows,
|
|
1453
|
+
(int) (row_stride_bytes / sizeof(SampleT)),
|
|
1454
|
+
stream,
|
|
1455
|
+
is_byte_sample);
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
return DispatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchRange(
|
|
1460
|
+
d_temp_storage,
|
|
1461
|
+
temp_storage_bytes,
|
|
1462
|
+
d_samples,
|
|
1463
|
+
d_histogram,
|
|
1464
|
+
num_levels,
|
|
1465
|
+
d_levels,
|
|
1466
|
+
num_row_pixels,
|
|
1467
|
+
num_rows,
|
|
1468
|
+
(OffsetT) (row_stride_bytes / sizeof(SampleT)),
|
|
1469
|
+
stream,
|
|
1470
|
+
is_byte_sample);
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
//! Deprecate [Since 3.0]
|
|
1474
|
+
template <int NUM_CHANNELS,
|
|
1475
|
+
int NUM_ACTIVE_CHANNELS,
|
|
1476
|
+
typename SampleIteratorT,
|
|
1477
|
+
typename CounterT,
|
|
1478
|
+
typename LevelT,
|
|
1479
|
+
typename OffsetT>
|
|
1480
|
+
CCCL_DEPRECATED_BECAUSE("Prefer the new overload taking cuda::std::arrays") CUB_RUNTIME_FUNCTION static cudaError_t
|
|
1481
|
+
MultiHistogramRange(
|
|
1482
|
+
void* d_temp_storage,
|
|
1483
|
+
size_t& temp_storage_bytes,
|
|
1484
|
+
SampleIteratorT d_samples,
|
|
1485
|
+
CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
|
|
1486
|
+
const int num_levels[NUM_ACTIVE_CHANNELS],
|
|
1487
|
+
const LevelT* const d_levels[NUM_ACTIVE_CHANNELS],
|
|
1488
|
+
OffsetT num_row_pixels,
|
|
1489
|
+
OffsetT num_rows,
|
|
1490
|
+
size_t row_stride_bytes,
|
|
1491
|
+
cudaStream_t stream = 0)
|
|
1492
|
+
{
|
|
1493
|
+
return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
|
|
1494
|
+
d_temp_storage,
|
|
1495
|
+
temp_storage_bytes,
|
|
1496
|
+
d_samples,
|
|
1497
|
+
to_array<NUM_ACTIVE_CHANNELS>(d_histogram),
|
|
1498
|
+
to_array<NUM_ACTIVE_CHANNELS>(num_levels),
|
|
1499
|
+
to_array<NUM_ACTIVE_CHANNELS>(d_levels),
|
|
1500
|
+
num_row_pixels,
|
|
1501
|
+
num_rows,
|
|
1502
|
+
row_stride_bytes,
|
|
1503
|
+
stream);
|
|
1504
|
+
}
|
|
1505
|
+
|
|
1506
|
+
//@} end member group
|
|
1507
|
+
};
|
|
1508
|
+
|
|
1509
|
+
CUB_NAMESPACE_END
|