cuda-cccl 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/__init__.py +27 -0
- cuda/cccl/_cuda_version_utils.py +24 -0
- cuda/cccl/cooperative/__init__.py +9 -0
- cuda/cccl/cooperative/experimental/__init__.py +24 -0
- cuda/cccl/headers/__init__.py +7 -0
- cuda/cccl/headers/include/__init__.py +1 -0
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +259 -0
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1182 -0
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +81 -0
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +709 -0
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +234 -0
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +748 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +786 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +703 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +555 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +619 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +806 -0
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +1124 -0
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +589 -0
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +474 -0
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +289 -0
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1117 -0
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +346 -0
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +606 -0
- cuda/cccl/headers/include/cub/agent/agent_topk.cuh +764 -0
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +631 -0
- cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1424 -0
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +963 -0
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1227 -0
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +1313 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +424 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +1264 -0
- cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
- cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +800 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1225 -0
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2196 -0
- cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +667 -0
- cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +434 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +2315 -0
- cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
- cuda/cccl/headers/include/cub/block/block_store.cuh +1247 -0
- cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +624 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +281 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
- cuda/cccl/headers/include/cub/config.cuh +53 -0
- cuda/cccl/headers/include/cub/cub.cuh +120 -0
- cuda/cccl/headers/include/cub/detail/array_utils.cuh +78 -0
- cuda/cccl/headers/include/cub/detail/choose_offset.cuh +161 -0
- cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +74 -0
- cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +62 -0
- cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +253 -0
- cuda/cccl/headers/include/cub/detail/integer_utils.cuh +88 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +142 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +100 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +114 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/json.h +62 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/string.h +53 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/value.h +95 -0
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
- cuda/cccl/headers/include/cub/detail/rfa.cuh +731 -0
- cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
- cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
- cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +384 -0
- cuda/cccl/headers/include/cub/detail/type_traits.cuh +187 -0
- cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +73 -0
- cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
- cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
- cuda/cccl/headers/include/cub/device/device_copy.cuh +276 -0
- cuda/cccl/headers/include/cub/device/device_for.cuh +1063 -0
- cuda/cccl/headers/include/cub/device/device_histogram.cuh +1509 -0
- cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
- cuda/cccl/headers/include/cub/device/device_merge.cuh +203 -0
- cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
- cuda/cccl/headers/include/cub/device/device_partition.cuh +668 -0
- cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3437 -0
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +2518 -0
- cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +370 -0
- cuda/cccl/headers/include/cub/device/device_scan.cuh +2212 -0
- cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1430 -0
- cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
- cuda/cccl/headers/include/cub/device/device_select.cuh +1228 -0
- cuda/cccl/headers/include/cub/device/device_topk.cuh +511 -0
- cuda/cccl/headers/include/cub/device/device_transform.cuh +668 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +315 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +719 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +43 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +198 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1046 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +303 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1744 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1310 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +655 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +531 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +313 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +615 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +517 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +602 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +975 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +842 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +440 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +389 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +627 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +569 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +545 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +261 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +334 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +803 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +583 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +189 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +321 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +522 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/three_way_partition.cuh +201 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +1028 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +67 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +118 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +60 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +275 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +76 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +126 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1065 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +493 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +942 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +673 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +618 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1010 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +398 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1588 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +440 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +85 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +481 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +884 -0
- cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +227 -0
- cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
- cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
- cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +254 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +259 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +250 -0
- cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +320 -0
- cuda/cccl/headers/include/cub/thread/thread_load.cuh +349 -0
- cuda/cccl/headers/include/cub/thread/thread_operators.cuh +688 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +548 -0
- cuda/cccl/headers/include/cub/thread/thread_scan.cuh +498 -0
- cuda/cccl/headers/include/cub/thread/thread_search.cuh +199 -0
- cuda/cccl/headers/include/cub/thread/thread_simd.cuh +458 -0
- cuda/cccl/headers/include/cub/thread/thread_sort.cuh +102 -0
- cuda/cccl/headers/include/cub/thread/thread_store.cuh +365 -0
- cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
- cuda/cccl/headers/include/cub/util_arch.cuh +167 -0
- cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
- cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
- cuda/cccl/headers/include/cub/util_device.cuh +800 -0
- cuda/cccl/headers/include/cub/util_macro.cuh +97 -0
- cuda/cccl/headers/include/cub/util_math.cuh +118 -0
- cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
- cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
- cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
- cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
- cuda/cccl/headers/include/cub/util_type.cuh +1120 -0
- cuda/cccl/headers/include/cub/util_vsmem.cuh +253 -0
- cuda/cccl/headers/include/cub/version.cuh +89 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +737 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +408 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +952 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +715 -0
- cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
- cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +829 -0
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1890 -0
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +521 -0
- cuda/cccl/headers/include/cub/warp/warp_utils.cuh +61 -0
- cuda/cccl/headers/include/cuda/__algorithm/common.h +68 -0
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +196 -0
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +107 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +165 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +217 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +83 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +128 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +210 -0
- cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
- cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier.h +65 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +102 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +487 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +74 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +60 -0
- cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
- cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
- cuda/cccl/headers/include/cuda/__bit/bitmask.h +90 -0
- cuda/cccl/headers/include/cuda/__cccl_config +37 -0
- cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +124 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +178 -0
- cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
- cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
- cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
- cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
- cuda/cccl/headers/include/cuda/__cmath/round_down.h +102 -0
- cuda/cccl/headers/include/cuda/__cmath/round_up.h +104 -0
- cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
- cuda/cccl/headers/include/cuda/__complex/complex.h +238 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +89 -0
- cuda/cccl/headers/include/cuda/__complex/traits.h +64 -0
- cuda/cccl/headers/include/cuda/__complex_ +28 -0
- cuda/cccl/headers/include/cuda/__device/all_devices.h +140 -0
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +537 -0
- cuda/cccl/headers/include/cuda/__device/attributes.h +772 -0
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +156 -0
- cuda/cccl/headers/include/cuda/__device/physical_device.h +172 -0
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +835 -0
- cuda/cccl/headers/include/cuda/__event/event.h +171 -0
- cuda/cccl/headers/include/cuda/__event/event_ref.h +157 -0
- cuda/cccl/headers/include/cuda/__event/timed_event.h +120 -0
- cuda/cccl/headers/include/cuda/__execution/determinism.h +91 -0
- cuda/cccl/headers/include/cuda/__execution/output_ordering.h +89 -0
- cuda/cccl/headers/include/cuda/__execution/require.h +75 -0
- cuda/cccl/headers/include/cuda/__execution/tune.h +70 -0
- cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
- cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +321 -0
- cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
- cuda/cccl/headers/include/cuda/__fwd/complex.h +48 -0
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +58 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +315 -0
- cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +483 -0
- cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +324 -0
- cuda/cccl/headers/include/cuda/__iterator/permutation_iterator.h +456 -0
- cuda/cccl/headers/include/cuda/__iterator/shuffle_iterator.h +334 -0
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +418 -0
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +367 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +528 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +527 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +486 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_function.h +112 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +557 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
- cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +533 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +238 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +152 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +117 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +148 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +165 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +53 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +104 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
- cuda/cccl/headers/include/cuda/__memory/address_space.h +227 -0
- cuda/cccl/headers/include/cuda/__memory/align_down.h +56 -0
- cuda/cccl/headers/include/cuda/__memory/align_up.h +56 -0
- cuda/cccl/headers/include/cuda/__memory/aligned_size.h +61 -0
- cuda/cccl/headers/include/cuda/__memory/check_address.h +111 -0
- cuda/cccl/headers/include/cuda/__memory/discard_memory.h +64 -0
- cuda/cccl/headers/include/cuda/__memory/get_device_address.h +58 -0
- cuda/cccl/headers/include/cuda/__memory/is_aligned.h +47 -0
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory/ptr_rebind.h +75 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +82 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +153 -0
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +113 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +125 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +652 -0
- cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +306 -0
- cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +59 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +120 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2983 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +242 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +197 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +54 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +997 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +318 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +671 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1470 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +64 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +102 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +64 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +5542 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +399 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +184 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +102 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +143 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +144 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +286 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +290 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2202 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1362 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +236 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +460 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1490 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +99 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +765 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +58 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4927 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4291 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +7110 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +42 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +5063 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +56 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +71 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +1030 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +244 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +176 -0
- cuda/cccl/headers/include/cuda/__random/feistel_bijection.h +105 -0
- cuda/cccl/headers/include/cuda/__random/random_bijection.h +88 -0
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +99 -0
- cuda/cccl/headers/include/cuda/__runtime/types.h +41 -0
- cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
- cuda/cccl/headers/include/cuda/__stream/get_stream.h +110 -0
- cuda/cccl/headers/include/cuda/__stream/stream.h +141 -0
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +303 -0
- cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
- cuda/cccl/headers/include/cuda/__type_traits/is_specialization_of.h +37 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/access.h +88 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/any_cast.h +83 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_base.h +148 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_from.h +96 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_fwd.h +128 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +304 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ref.h +337 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +590 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/conversions.h +169 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/dynamic_any_cast.h +107 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/interfaces.h +359 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/iset.h +142 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/overrides.h +64 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/rtti.h +257 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +322 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/storage.h +79 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/tagged_ptr.h +58 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +162 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_functions.h +184 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_ptrs.h +80 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +155 -0
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +507 -0
- cuda/cccl/headers/include/cuda/__utility/immovable.h +50 -0
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/__utility/inherit.h +36 -0
- cuda/cccl/headers/include/cuda/__utility/no_init.h +29 -0
- cuda/cccl/headers/include/cuda/__utility/static_for.h +79 -0
- cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
- cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +65 -0
- cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +251 -0
- cuda/cccl/headers/include/cuda/access_property +26 -0
- cuda/cccl/headers/include/cuda/algorithm +27 -0
- cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
- cuda/cccl/headers/include/cuda/atomic +27 -0
- cuda/cccl/headers/include/cuda/barrier +267 -0
- cuda/cccl/headers/include/cuda/bit +29 -0
- cuda/cccl/headers/include/cuda/cmath +37 -0
- cuda/cccl/headers/include/cuda/devices +33 -0
- cuda/cccl/headers/include/cuda/discard_memory +32 -0
- cuda/cccl/headers/include/cuda/functional +32 -0
- cuda/cccl/headers/include/cuda/iterator +39 -0
- cuda/cccl/headers/include/cuda/latch +27 -0
- cuda/cccl/headers/include/cuda/mdspan +28 -0
- cuda/cccl/headers/include/cuda/memory +35 -0
- cuda/cccl/headers/include/cuda/memory_resource +35 -0
- cuda/cccl/headers/include/cuda/numeric +29 -0
- cuda/cccl/headers/include/cuda/pipeline +579 -0
- cuda/cccl/headers/include/cuda/ptx +129 -0
- cuda/cccl/headers/include/cuda/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +53 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +142 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +80 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +73 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +128 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/in_fun_result.h +55 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +90 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +57 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +185 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +82 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +70 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +67 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +87 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +66 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +139 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move.h +86 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +120 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +93 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +100 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_for_each_n.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +98 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +261 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +122 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +134 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +128 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +70 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +76 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +155 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +126 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +136 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +208 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
- cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
- cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +249 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +104 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
- cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
- cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
- cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +82 -0
- cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +76 -0
- cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +185 -0
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +174 -0
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
- cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
- cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
- cuda/cccl/headers/include/cuda/std/__bit/integral.h +126 -0
- cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +1272 -0
- cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
- cuda/cccl/headers/include/cuda/std/__cccl/assert.h +161 -0
- cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +206 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +673 -0
- cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +217 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +51 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +56 -0
- cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +88 -0
- cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +131 -0
- cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +123 -0
- cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +344 -0
- cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +91 -0
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +74 -0
- cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +160 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/os.h +54 -0
- cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1286 -0
- cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +281 -0
- cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +253 -0
- cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
- cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
- cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
- cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
- cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
- cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +171 -0
- cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
- cuda/cccl/headers/include/cuda/std/__charconv/from_chars.h +154 -0
- cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv_ +31 -0
- cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +54 -0
- cuda/cccl/headers/include/cuda/std/__chrono/day.h +162 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +503 -0
- cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +55 -0
- cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +46 -0
- cuda/cccl/headers/include/cuda/std/__chrono/month.h +187 -0
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +60 -0
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +80 -0
- cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +259 -0
- cuda/cccl/headers/include/cuda/std/__chrono/year.h +186 -0
- cuda/cccl/headers/include/cuda/std/__cmath/abs.h +127 -0
- cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +88 -0
- cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +200 -0
- cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +784 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +118 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fma.h +125 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +231 -0
- cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +205 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +286 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +221 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +286 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +371 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +167 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +205 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +186 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +138 -0
- cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +101 -0
- cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +534 -0
- cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +287 -0
- cuda/cccl/headers/include/cuda/std/__cmath/modulo.h +208 -0
- cuda/cccl/headers/include/cuda/std/__cmath/nan.h +54 -0
- cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +206 -0
- cuda/cccl/headers/include/cuda/std/__cmath/roots.h +199 -0
- cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +984 -0
- cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +56 -0
- cuda/cccl/headers/include/cuda/std/__cmath/traits.h +238 -0
- cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +328 -0
- cuda/cccl/headers/include/cuda/std/__complex/arg.h +84 -0
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +669 -0
- cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +411 -0
- cuda/cccl/headers/include/cuda/std/__complex/hyperbolic_functions.h +117 -0
- cuda/cccl/headers/include/cuda/std/__complex/inverse_hyperbolic_functions.h +216 -0
- cuda/cccl/headers/include/cuda/std/__complex/inverse_trigonometric_functions.h +131 -0
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +86 -0
- cuda/cccl/headers/include/cuda/std/__complex/logarithms.h +303 -0
- cuda/cccl/headers/include/cuda/std/__complex/math.h +159 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +323 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +322 -0
- cuda/cccl/headers/include/cuda/std/__complex/roots.h +214 -0
- cuda/cccl/headers/include/cuda/std/__complex/trigonometric_functions.h +61 -0
- cuda/cccl/headers/include/cuda/std/__complex/tuple.h +107 -0
- cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +130 -0
- cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
- cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
- cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +45 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
- cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +341 -0
- cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +174 -0
- cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +70 -0
- cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +56 -0
- cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
- cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
- cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
- cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
- cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
- cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +39 -0
- cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
- cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +70 -0
- cuda/cccl/headers/include/cuda/std/__cstring/memcpy.h +61 -0
- cuda/cccl/headers/include/cuda/std/__cstring/memset.h +46 -0
- cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
- cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +139 -0
- cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
- cuda/cccl/headers/include/cuda/std/__execution/env.h +455 -0
- cuda/cccl/headers/include/cuda/std/__execution/policy.h +88 -0
- cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected.h +1941 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1050 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +165 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +812 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +376 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +124 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +116 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/decompose.h +69 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +40 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +78 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
- cuda/cccl/headers/include/cuda/std/__format/buffer.h +48 -0
- cuda/cccl/headers/include/cuda/std/__format/concepts.h +69 -0
- cuda/cccl/headers/include/cuda/std/__format/format_arg.h +282 -0
- cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +279 -0
- cuda/cccl/headers/include/cuda/std/__format/format_args.h +122 -0
- cuda/cccl/headers/include/cuda/std/__format/format_context.h +92 -0
- cuda/cccl/headers/include/cuda/std/__format/format_error.h +76 -0
- cuda/cccl/headers/include/cuda/std/__format/format_integral.h +237 -0
- cuda/cccl/headers/include/cuda/std/__format/format_parse_context.h +124 -0
- cuda/cccl/headers/include/cuda/std/__format/format_spec_parser.h +1230 -0
- cuda/cccl/headers/include/cuda/std/__format/formatter.h +59 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/bool.h +101 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/char.h +124 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/fp.h +101 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/int.h +174 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/ptr.h +104 -0
- cuda/cccl/headers/include/cuda/std/__format/formatters/str.h +178 -0
- cuda/cccl/headers/include/cuda/std/__format/output_utils.h +272 -0
- cuda/cccl/headers/include/cuda/std/__format/parse_arg_id.h +138 -0
- cuda/cccl/headers/include/cuda/std/__format_ +45 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +334 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +80 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +74 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +74 -0
- cuda/cccl/headers/include/cuda/std/__functional/compose.h +68 -0
- cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
- cuda/cccl/headers/include/cuda/std/__functional/function.h +1275 -0
- cuda/cccl/headers/include/cuda/std/__functional/hash.h +649 -0
- cuda/cccl/headers/include/cuda/std/__functional/identity.h +57 -0
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +296 -0
- cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +41 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +66 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +211 -0
- cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +120 -0
- cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
- cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +128 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +64 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +63 -0
- cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +262 -0
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +53 -0
- cuda/cccl/headers/include/cuda/std/__fwd/array.h +42 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +74 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +75 -0
- cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
- cuda/cccl/headers/include/cuda/std/__fwd/format.h +84 -0
- cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
- cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/iterator.h +43 -0
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +122 -0
- cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/optional.h +39 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +47 -0
- cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/span.h +45 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +112 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +91 -0
- cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
- cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
- cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +72 -0
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +143 -0
- cuda/cccl/headers/include/cuda/std/__iterator/access.h +128 -0
- cuda/cccl/headers/include/cuda/std/__iterator/advance.h +228 -0
- cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +163 -0
- cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +253 -0
- cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +645 -0
- cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +464 -0
- cuda/cccl/headers/include/cuda/std/__iterator/data.h +61 -0
- cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
- cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
- cuda/cccl/headers/include/cuda/std/__iterator/empty.h +53 -0
- cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
- cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +99 -0
- cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +143 -0
- cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +107 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +146 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +847 -0
- cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +432 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
- cuda/cccl/headers/include/cuda/std/__iterator/next.h +101 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +95 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +100 -0
- cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
- cuda/cccl/headers/include/cuda/std/__iterator/prev.h +90 -0
- cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
- cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +156 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +142 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +371 -0
- cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
- cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +84 -0
- cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +245 -0
- cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +753 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +54 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +139 -0
- cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +132 -0
- cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +321 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +97 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +73 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +352 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +759 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +314 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +307 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +605 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +189 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +344 -0
- cuda/cccl/headers/include/cuda/std/__memory/addressof.h +67 -0
- cuda/cccl/headers/include/cuda/std/__memory/align.h +67 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +525 -0
- cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
- cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
- cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +225 -0
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +246 -0
- cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
- cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +46 -0
- cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +246 -0
- cuda/cccl/headers/include/cuda/std/__memory/runtime_assume_aligned.h +62 -0
- cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
- cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +678 -0
- cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +765 -0
- cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +54 -0
- cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
- cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
- cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
- cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
- cuda/cccl/headers/include/cuda/std/__new/launder.h +53 -0
- cuda/cccl/headers/include/cuda/std/__new_ +29 -0
- cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +56 -0
- cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
- cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +78 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
- cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
- cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +97 -0
- cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +69 -0
- cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +60 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
- cuda/cccl/headers/include/cuda/std/__optional/bad_optional_access.h +74 -0
- cuda/cccl/headers/include/cuda/std/__optional/hash.h +53 -0
- cuda/cccl/headers/include/cuda/std/__optional/make_optional.h +61 -0
- cuda/cccl/headers/include/cuda/std/__optional/nullopt.h +43 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional.h +859 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +433 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +324 -0
- cuda/cccl/headers/include/cuda/std/__random/generate_canonical.h +56 -0
- cuda/cccl/headers/include/cuda/std/__random/is_seed_sequence.h +39 -0
- cuda/cccl/headers/include/cuda/std/__random/is_valid.h +106 -0
- cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +398 -0
- cuda/cccl/headers/include/cuda/std/__random/uniform_int_distribution.h +335 -0
- cuda/cccl/headers/include/cuda/std/__random/uniform_real_distribution.h +183 -0
- cuda/cccl/headers/include/cuda/std/__random_ +29 -0
- cuda/cccl/headers/include/cuda/std/__ranges/access.h +303 -0
- cuda/cccl/headers/include/cuda/std/__ranges/all.h +98 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +302 -0
- cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
- cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
- cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty.h +109 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +78 -0
- cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
- cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +266 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
- cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +162 -0
- cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +110 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
- cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
- cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +345 -0
- cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +155 -0
- cuda/cccl/headers/include/cuda/std/__ranges/size.h +201 -0
- cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
- cuda/cccl/headers/include/cuda/std/__ranges/take_view.h +476 -0
- cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +259 -0
- cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +522 -0
- cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
- cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +183 -0
- cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +234 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
- cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +581 -0
- cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +296 -0
- cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
- cuda/cccl/headers/include/cuda/std/__string_ +29 -0
- cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
- cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +106 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +143 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +120 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +260 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +212 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +84 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +68 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +290 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +173 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +211 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_execution_policy.h +81 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +82 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +121 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +88 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +202 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +122 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1067 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +131 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +52 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
- cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
- cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +101 -0
- cuda/cccl/headers/include/cuda/std/__utility/declval.h +76 -0
- cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +161 -0
- cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +55 -0
- cuda/cccl/headers/include/cuda/std/__utility/in_place.h +86 -0
- cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
- cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
- cuda/cccl/headers/include/cuda/std/__utility/move.h +74 -0
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +791 -0
- cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
- cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +527 -0
- cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
- cuda/cccl/headers/include/cuda/std/__utility/swap.h +64 -0
- cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/typeid.h +421 -0
- cuda/cccl/headers/include/cuda/std/__utility/undefined.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
- cuda/cccl/headers/include/cuda/std/array +518 -0
- cuda/cccl/headers/include/cuda/std/atomic +810 -0
- cuda/cccl/headers/include/cuda/std/barrier +42 -0
- cuda/cccl/headers/include/cuda/std/bit +35 -0
- cuda/cccl/headers/include/cuda/std/bitset +994 -0
- cuda/cccl/headers/include/cuda/std/cassert +28 -0
- cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
- cuda/cccl/headers/include/cuda/std/cfloat +59 -0
- cuda/cccl/headers/include/cuda/std/chrono +26 -0
- cuda/cccl/headers/include/cuda/std/climits +61 -0
- cuda/cccl/headers/include/cuda/std/cmath +87 -0
- cuda/cccl/headers/include/cuda/std/complex +50 -0
- cuda/cccl/headers/include/cuda/std/concepts +48 -0
- cuda/cccl/headers/include/cuda/std/cstddef +28 -0
- cuda/cccl/headers/include/cuda/std/cstdint +178 -0
- cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
- cuda/cccl/headers/include/cuda/std/cstring +110 -0
- cuda/cccl/headers/include/cuda/std/ctime +154 -0
- cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +207 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1721 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +2509 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1365 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2144 -0
- cuda/cccl/headers/include/cuda/std/execution +29 -0
- cuda/cccl/headers/include/cuda/std/expected +30 -0
- cuda/cccl/headers/include/cuda/std/functional +56 -0
- cuda/cccl/headers/include/cuda/std/initializer_list +44 -0
- cuda/cccl/headers/include/cuda/std/inplace_vector +2170 -0
- cuda/cccl/headers/include/cuda/std/iterator +70 -0
- cuda/cccl/headers/include/cuda/std/latch +34 -0
- cuda/cccl/headers/include/cuda/std/limits +28 -0
- cuda/cccl/headers/include/cuda/std/linalg +30 -0
- cuda/cccl/headers/include/cuda/std/mdspan +38 -0
- cuda/cccl/headers/include/cuda/std/memory +39 -0
- cuda/cccl/headers/include/cuda/std/numbers +346 -0
- cuda/cccl/headers/include/cuda/std/numeric +41 -0
- cuda/cccl/headers/include/cuda/std/optional +31 -0
- cuda/cccl/headers/include/cuda/std/ranges +69 -0
- cuda/cccl/headers/include/cuda/std/ratio +416 -0
- cuda/cccl/headers/include/cuda/std/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/source_location +83 -0
- cuda/cccl/headers/include/cuda/std/span +628 -0
- cuda/cccl/headers/include/cuda/std/string_view +925 -0
- cuda/cccl/headers/include/cuda/std/tuple +26 -0
- cuda/cccl/headers/include/cuda/std/type_traits +177 -0
- cuda/cccl/headers/include/cuda/std/utility +70 -0
- cuda/cccl/headers/include/cuda/std/variant +25 -0
- cuda/cccl/headers/include/cuda/std/version +240 -0
- cuda/cccl/headers/include/cuda/stream +31 -0
- cuda/cccl/headers/include/cuda/stream_ref +59 -0
- cuda/cccl/headers/include/cuda/type_traits +27 -0
- cuda/cccl/headers/include/cuda/utility +28 -0
- cuda/cccl/headers/include/cuda/version +16 -0
- cuda/cccl/headers/include/cuda/warp +28 -0
- cuda/cccl/headers/include/cuda/work_stealing +26 -0
- cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
- cuda/cccl/headers/include/nv/detail/__target_macros +718 -0
- cuda/cccl/headers/include/nv/target +240 -0
- cuda/cccl/headers/include/thrust/addressof.h +22 -0
- cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
- cuda/cccl/headers/include/thrust/advance.h +57 -0
- cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
- cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
- cuda/cccl/headers/include/thrust/complex.h +858 -0
- cuda/cccl/headers/include/thrust/copy.h +506 -0
- cuda/cccl/headers/include/thrust/count.h +245 -0
- cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
- cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +626 -0
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +192 -0
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +96 -0
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +78 -0
- cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +115 -0
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +116 -0
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +77 -0
- cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
- cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
- cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
- cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
- cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
- cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
- cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
- cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
- cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
- cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
- cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
- cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
- cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
- cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
- cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
- cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
- cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
- cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config.h +36 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
- cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
- cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
- cuda/cccl/headers/include/thrust/detail/count.h +55 -0
- cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
- cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
- cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
- cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +81 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
- cuda/cccl/headers/include/thrust/detail/execution_policy.h +120 -0
- cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
- cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
- cuda/cccl/headers/include/thrust/detail/function.h +49 -0
- cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
- cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
- cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
- cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
- cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +328 -0
- cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +77 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
- cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
- cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
- cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
- cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
- cuda/cccl/headers/include/thrust/detail/pointer.h +309 -0
- cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
- cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
- cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
- cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
- cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
- cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +192 -0
- cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
- cuda/cccl/headers/include/thrust/detail/reference.h +494 -0
- cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
- cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
- cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
- cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
- cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
- cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
- cuda/cccl/headers/include/thrust/detail/seq.h +66 -0
- cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
- cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
- cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
- cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
- cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
- cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.inl +120 -0
- cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
- cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
- cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
- cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +130 -0
- cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
- cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +59 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
- cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.h +613 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.inl +1210 -0
- cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
- cuda/cccl/headers/include/thrust/device_delete.h +74 -0
- cuda/cccl/headers/include/thrust/device_free.h +85 -0
- cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
- cuda/cccl/headers/include/thrust/device_malloc.h +84 -0
- cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
- cuda/cccl/headers/include/thrust/device_new.h +112 -0
- cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
- cuda/cccl/headers/include/thrust/device_ptr.h +196 -0
- cuda/cccl/headers/include/thrust/device_reference.h +983 -0
- cuda/cccl/headers/include/thrust/device_vector.h +576 -0
- cuda/cccl/headers/include/thrust/distance.h +43 -0
- cuda/cccl/headers/include/thrust/equal.h +247 -0
- cuda/cccl/headers/include/thrust/execution_policy.h +251 -0
- cuda/cccl/headers/include/thrust/extrema.h +657 -0
- cuda/cccl/headers/include/thrust/fill.h +200 -0
- cuda/cccl/headers/include/thrust/find.h +382 -0
- cuda/cccl/headers/include/thrust/for_each.h +261 -0
- cuda/cccl/headers/include/thrust/functional.h +395 -0
- cuda/cccl/headers/include/thrust/gather.h +464 -0
- cuda/cccl/headers/include/thrust/generate.h +193 -0
- cuda/cccl/headers/include/thrust/host_vector.h +576 -0
- cuda/cccl/headers/include/thrust/inner_product.h +264 -0
- cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +221 -0
- cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
- cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +60 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +65 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +182 -0
- cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +58 -0
- cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
- cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
- cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +163 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +211 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +659 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +334 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_traversal_tags.h +64 -0
- cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +194 -0
- cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
- cuda/cccl/headers/include/thrust/iterator/retag.h +72 -0
- cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +51 -0
- cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +185 -0
- cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +152 -0
- cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
- cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
- cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
- cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +359 -0
- cuda/cccl/headers/include/thrust/logical.h +290 -0
- cuda/cccl/headers/include/thrust/memory.h +299 -0
- cuda/cccl/headers/include/thrust/merge.h +725 -0
- cuda/cccl/headers/include/thrust/mismatch.h +261 -0
- cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
- cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +528 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +67 -0
- cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
- cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
- cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
- cuda/cccl/headers/include/thrust/mr/new.h +100 -0
- cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
- cuda/cccl/headers/include/thrust/mr/pool.h +528 -0
- cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
- cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
- cuda/cccl/headers/include/thrust/mr/tls_pool.h +64 -0
- cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
- cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
- cuda/cccl/headers/include/thrust/pair.h +99 -0
- cuda/cccl/headers/include/thrust/partition.h +1391 -0
- cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
- cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
- cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
- cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
- cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +200 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +187 -0
- cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
- cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
- cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
- cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
- cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
- cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
- cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
- cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
- cuda/cccl/headers/include/thrust/random.h +120 -0
- cuda/cccl/headers/include/thrust/reduce.h +1113 -0
- cuda/cccl/headers/include/thrust/remove.h +768 -0
- cuda/cccl/headers/include/thrust/replace.h +826 -0
- cuda/cccl/headers/include/thrust/reverse.h +215 -0
- cuda/cccl/headers/include/thrust/scan.h +1671 -0
- cuda/cccl/headers/include/thrust/scatter.h +446 -0
- cuda/cccl/headers/include/thrust/sequence.h +277 -0
- cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
- cuda/cccl/headers/include/thrust/shuffle.h +182 -0
- cuda/cccl/headers/include/thrust/sort.h +1320 -0
- cuda/cccl/headers/include/thrust/swap.h +147 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +109 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +63 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory.h +106 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +72 -0
- cuda/cccl/headers/include/thrust/system/cpp/pointer.h +120 -0
- cuda/cccl/headers/include/thrust/system/cpp/vector.h +96 -0
- cuda/cccl/headers/include/thrust/system/cuda/config.h +126 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +273 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +593 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +233 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +264 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +100 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +170 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +60 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +80 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +223 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +785 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1001 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +87 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +341 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +414 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1734 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +469 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +98 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +61 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +429 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +117 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +105 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +289 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +310 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +253 -0
- cuda/cccl/headers/include/thrust/system/cuda/error.h +168 -0
- cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +15 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/pointer.h +160 -0
- cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +61 -0
- cuda/cccl/headers/include/thrust/system/detail/errno.h +120 -0
- cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
- cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
- cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +53 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +79 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +161 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +384 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +146 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +84 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +252 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +137 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +73 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +96 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +59 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +86 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +99 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +148 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +68 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +129 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +207 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +43 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +100 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +83 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +186 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +86 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +95 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +175 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +67 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +63 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +126 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +85 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +126 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +232 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +73 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +85 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +104 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +282 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +476 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +125 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +175 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +44 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +76 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +41 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +47 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +82 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +395 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +50 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +56 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +80 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +166 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +115 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +113 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +81 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +126 -0
- cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +117 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +42 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +136 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +119 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +71 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +52 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +110 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +62 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +74 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +123 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +43 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +141 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +45 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +50 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +75 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +145 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +301 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +98 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +179 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +154 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +145 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +206 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +59 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +116 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +356 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +124 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +586 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +58 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +115 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +106 -0
- cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
- cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +127 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +73 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.inl +172 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +36 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +265 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +62 -0
- cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/omp/pointer.h +124 -0
- cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system/system_error.h +185 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +109 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +33 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +62 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/pointer.h +124 -0
- cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system_error.h +57 -0
- cuda/cccl/headers/include/thrust/tabulate.h +125 -0
- cuda/cccl/headers/include/thrust/transform.h +1045 -0
- cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
- cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
- cuda/cccl/headers/include/thrust/tuple.h +139 -0
- cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +154 -0
- cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
- cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +336 -0
- cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
- cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +63 -0
- cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
- cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
- cuda/cccl/headers/include/thrust/unique.h +1088 -0
- cuda/cccl/headers/include/thrust/universal_allocator.h +93 -0
- cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
- cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
- cuda/cccl/headers/include/thrust/version.h +93 -0
- cuda/cccl/headers/include/thrust/zip_function.h +176 -0
- cuda/cccl/headers/include_paths.py +51 -0
- cuda/cccl/parallel/__init__.py +9 -0
- cuda/cccl/parallel/experimental/__init__.py +24 -0
- cuda/cccl/py.typed +0 -0
- cuda/compute/__init__.py +79 -0
- cuda/compute/_bindings.py +79 -0
- cuda/compute/_bindings.pyi +475 -0
- cuda/compute/_bindings_impl.pyx +2273 -0
- cuda/compute/_caching.py +71 -0
- cuda/compute/_cccl_interop.py +422 -0
- cuda/compute/_utils/__init__.py +0 -0
- cuda/compute/_utils/protocols.py +132 -0
- cuda/compute/_utils/temp_storage_buffer.py +86 -0
- cuda/compute/algorithms/__init__.py +54 -0
- cuda/compute/algorithms/_histogram.py +243 -0
- cuda/compute/algorithms/_merge_sort.py +225 -0
- cuda/compute/algorithms/_radix_sort.py +312 -0
- cuda/compute/algorithms/_reduce.py +182 -0
- cuda/compute/algorithms/_scan.py +331 -0
- cuda/compute/algorithms/_segmented_reduce.py +257 -0
- cuda/compute/algorithms/_three_way_partition.py +261 -0
- cuda/compute/algorithms/_transform.py +329 -0
- cuda/compute/algorithms/_unique_by_key.py +252 -0
- cuda/compute/cccl/.gitkeep +0 -0
- cuda/compute/cu12/_bindings_impl.cp313-win_amd64.pyd +0 -0
- cuda/compute/cu12/cccl/cccl.c.parallel.dll +0 -0
- cuda/compute/cu12/cccl/cccl.c.parallel.lib +0 -0
- cuda/compute/cu13/_bindings_impl.cp313-win_amd64.pyd +0 -0
- cuda/compute/cu13/cccl/cccl.c.parallel.dll +0 -0
- cuda/compute/cu13/cccl/cccl.c.parallel.lib +0 -0
- cuda/compute/iterators/__init__.py +21 -0
- cuda/compute/iterators/_factories.py +219 -0
- cuda/compute/iterators/_iterators.py +817 -0
- cuda/compute/iterators/_zip_iterator.py +199 -0
- cuda/compute/numba_utils.py +53 -0
- cuda/compute/op.py +3 -0
- cuda/compute/struct.py +272 -0
- cuda/compute/typing.py +37 -0
- cuda/coop/__init__.py +8 -0
- cuda/coop/_caching.py +48 -0
- cuda/coop/_common.py +275 -0
- cuda/coop/_nvrtc.py +92 -0
- cuda/coop/_scan_op.py +181 -0
- cuda/coop/_types.py +937 -0
- cuda/coop/_typing.py +107 -0
- cuda/coop/block/__init__.py +39 -0
- cuda/coop/block/_block_exchange.py +251 -0
- cuda/coop/block/_block_load_store.py +215 -0
- cuda/coop/block/_block_merge_sort.py +125 -0
- cuda/coop/block/_block_radix_sort.py +214 -0
- cuda/coop/block/_block_reduce.py +294 -0
- cuda/coop/block/_block_scan.py +983 -0
- cuda/coop/warp/__init__.py +9 -0
- cuda/coop/warp/_warp_merge_sort.py +92 -0
- cuda/coop/warp/_warp_reduce.py +153 -0
- cuda/coop/warp/_warp_scan.py +78 -0
- cuda_cccl-0.3.3.dist-info/METADATA +41 -0
- cuda_cccl-0.3.3.dist-info/RECORD +1968 -0
- cuda_cccl-0.3.3.dist-info/WHEEL +5 -0
- cuda_cccl-0.3.3.dist-info/licenses/LICENSE +1 -0
|
@@ -0,0 +1,1671 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2008-2013 NVIDIA Corporation
|
|
3
|
+
*
|
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
* you may not use this file except in compliance with the License.
|
|
6
|
+
* You may obtain a copy of the License at
|
|
7
|
+
*
|
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
*
|
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
* See the License for the specific language governing permissions and
|
|
14
|
+
* limitations under the License.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/*! \file scan.h
|
|
18
|
+
* \brief Functions for computing prefix sums
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
#pragma once
|
|
22
|
+
|
|
23
|
+
#include <thrust/detail/config.h>
|
|
24
|
+
|
|
25
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
26
|
+
# pragma GCC system_header
|
|
27
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
28
|
+
# pragma clang system_header
|
|
29
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
30
|
+
# pragma system_header
|
|
31
|
+
#endif // no system header
|
|
32
|
+
#include <thrust/detail/execution_policy.h>
|
|
33
|
+
|
|
34
|
+
THRUST_NAMESPACE_BEGIN
|
|
35
|
+
|
|
36
|
+
/*! \addtogroup algorithms
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
/*! \addtogroup prefixsums Prefix Sums
|
|
40
|
+
* \ingroup algorithms
|
|
41
|
+
* \{
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
/*! \p inclusive_scan computes an inclusive prefix sum operation. The
|
|
45
|
+
* term 'inclusive' means that each result includes the corresponding
|
|
46
|
+
* input operand in the partial sum. More precisely, <tt>*first</tt> is
|
|
47
|
+
* assigned to <tt>*result</tt> and the sum of <tt>*first</tt> and
|
|
48
|
+
* <tt>*(first + 1)</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
|
|
49
|
+
* This version of \p inclusive_scan assumes plus as the associative operator.
|
|
50
|
+
* When the input and output sequences are the same, the scan is performed
|
|
51
|
+
* in-place.
|
|
52
|
+
*
|
|
53
|
+
* \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
|
|
54
|
+
* difference between the two functions is that \c std::partial_sum guarantees
|
|
55
|
+
* a serial summation order, while \p inclusive_scan requires associativity of
|
|
56
|
+
* the binary operation to parallelize the prefix sum.
|
|
57
|
+
*
|
|
58
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
59
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
60
|
+
* operators may vary from run to run.
|
|
61
|
+
*
|
|
62
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
63
|
+
*
|
|
64
|
+
* \param exec The execution policy to use for parallelization.
|
|
65
|
+
* \param first The beginning of the input sequence.
|
|
66
|
+
* \param last The end of the input sequence.
|
|
67
|
+
* \param result The beginning of the output sequence.
|
|
68
|
+
* \return The end of the output sequence.
|
|
69
|
+
*
|
|
70
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
71
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
72
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
73
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
|
|
74
|
+
* Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
|
|
75
|
+
* If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
|
|
76
|
+
*
|
|
77
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
78
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
79
|
+
*
|
|
80
|
+
* The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place
|
|
81
|
+
* prefix sum using the \p thrust::host execution policy for parallelization:
|
|
82
|
+
*
|
|
83
|
+
* \code
|
|
84
|
+
* #include <thrust/scan.h>
|
|
85
|
+
* #include <thrust/execution_policy.h>
|
|
86
|
+
* ...
|
|
87
|
+
*
|
|
88
|
+
* int data[6] = {1, 0, 2, 2, 1, 3};
|
|
89
|
+
*
|
|
90
|
+
* thrust::inclusive_scan(thrust::host, data, data + 6, data); // in-place scan
|
|
91
|
+
*
|
|
92
|
+
* // data is now {1, 1, 3, 5, 6, 9}
|
|
93
|
+
* \endcode
|
|
94
|
+
*
|
|
95
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
96
|
+
*
|
|
97
|
+
*/
|
|
98
|
+
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
|
|
99
|
+
_CCCL_HOST_DEVICE OutputIterator inclusive_scan(
|
|
100
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
101
|
+
InputIterator first,
|
|
102
|
+
InputIterator last,
|
|
103
|
+
OutputIterator result);
|
|
104
|
+
|
|
105
|
+
/*! \p inclusive_scan computes an inclusive prefix sum operation. The
|
|
106
|
+
* term 'inclusive' means that each result includes the corresponding
|
|
107
|
+
* input operand in the partial sum. More precisely, <tt>*first</tt> is
|
|
108
|
+
* assigned to <tt>*result</tt> and the sum of <tt>*first</tt> and
|
|
109
|
+
* <tt>*(first + 1)</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
|
|
110
|
+
* This version of \p inclusive_scan assumes plus as the associative operator.
|
|
111
|
+
* When the input and output sequences are the same, the scan is performed
|
|
112
|
+
* in-place.
|
|
113
|
+
*
|
|
114
|
+
* \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
|
|
115
|
+
* difference between the two functions is that \c std::partial_sum guarantees
|
|
116
|
+
* a serial summation order, while \p inclusive_scan requires associativity of
|
|
117
|
+
* the binary operation to parallelize the prefix sum.
|
|
118
|
+
*
|
|
119
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
120
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
121
|
+
* operators may vary from run to run.
|
|
122
|
+
*
|
|
123
|
+
* \param first The beginning of the input sequence.
|
|
124
|
+
* \param last The end of the input sequence.
|
|
125
|
+
* \param result The beginning of the output sequence.
|
|
126
|
+
* \return The end of the output sequence.
|
|
127
|
+
*
|
|
128
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
129
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
130
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
|
|
131
|
+
* Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
|
|
132
|
+
* If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
|
|
133
|
+
*
|
|
134
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
135
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
136
|
+
*
|
|
137
|
+
* The following code snippet demonstrates how to use \p inclusive_scan
|
|
138
|
+
*
|
|
139
|
+
* \code
|
|
140
|
+
* #include <thrust/scan.h>
|
|
141
|
+
*
|
|
142
|
+
* int data[6] = {1, 0, 2, 2, 1, 3};
|
|
143
|
+
*
|
|
144
|
+
* thrust::inclusive_scan(data, data + 6, data); // in-place scan
|
|
145
|
+
*
|
|
146
|
+
* // data is now {1, 1, 3, 5, 6, 9}
|
|
147
|
+
* \endcode
|
|
148
|
+
*
|
|
149
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
150
|
+
*
|
|
151
|
+
*/
|
|
152
|
+
template <typename InputIterator, typename OutputIterator>
|
|
153
|
+
OutputIterator inclusive_scan(InputIterator first, InputIterator last, OutputIterator result);
|
|
154
|
+
|
|
155
|
+
/*! \p inclusive_scan computes an inclusive prefix sum operation. The
|
|
156
|
+
* term 'inclusive' means that each result includes the corresponding
|
|
157
|
+
* input operand in the partial sum. When the input and output sequences
|
|
158
|
+
* are the same, the scan is performed in-place.
|
|
159
|
+
*
|
|
160
|
+
* \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
|
|
161
|
+
* difference between the two functions is that \c std::partial_sum guarantees
|
|
162
|
+
* a serial summation order, while \p inclusive_scan requires associativity of
|
|
163
|
+
* the binary operation to parallelize the prefix sum.
|
|
164
|
+
*
|
|
165
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
166
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
167
|
+
* operators may vary from run to run.
|
|
168
|
+
*
|
|
169
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
170
|
+
*
|
|
171
|
+
* \param exec The execution policy to use for parallelization.
|
|
172
|
+
* \param first The beginning of the input sequence.
|
|
173
|
+
* \param last The end of the input sequence.
|
|
174
|
+
* \param result The beginning of the output sequence.
|
|
175
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
176
|
+
* \return The end of the output sequence.
|
|
177
|
+
*
|
|
178
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
179
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
180
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
181
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
|
|
182
|
+
* and \c OutputIterator's \c value_type is convertible to
|
|
183
|
+
* both \c AssociativeOperator's first and second argument type.
|
|
184
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
185
|
+
*
|
|
186
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
187
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
188
|
+
*
|
|
189
|
+
* The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place
|
|
190
|
+
* prefix sum using the \p thrust::host execution policy for parallelization:
|
|
191
|
+
*
|
|
192
|
+
* \code
|
|
193
|
+
* int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
|
|
194
|
+
*
|
|
195
|
+
* ::cuda::maximum<int> binary_op;
|
|
196
|
+
*
|
|
197
|
+
* thrust::inclusive_scan(thrust::host, data, data + 10, data, binary_op); // in-place scan
|
|
198
|
+
*
|
|
199
|
+
* // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8}
|
|
200
|
+
* \endcode
|
|
201
|
+
*
|
|
202
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
203
|
+
*/
|
|
204
|
+
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename AssociativeOperator>
|
|
205
|
+
_CCCL_HOST_DEVICE OutputIterator inclusive_scan(
|
|
206
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
207
|
+
InputIterator first,
|
|
208
|
+
InputIterator last,
|
|
209
|
+
OutputIterator result,
|
|
210
|
+
AssociativeOperator binary_op);
|
|
211
|
+
|
|
212
|
+
/*! \p inclusive_scan computes an inclusive prefix sum operation. The
|
|
213
|
+
* term 'inclusive' means that each result includes the corresponding
|
|
214
|
+
* input operand in the partial sum. When the input and output sequences
|
|
215
|
+
* are the same, the scan is performed in-place.
|
|
216
|
+
*
|
|
217
|
+
* \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary
|
|
218
|
+
* difference between the two functions is that \c std::partial_sum guarantees
|
|
219
|
+
* a serial summation order, while \p inclusive_scan requires associativity of
|
|
220
|
+
* the binary operation to parallelize the prefix sum.
|
|
221
|
+
*
|
|
222
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
223
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
224
|
+
* operators may vary from run to run.
|
|
225
|
+
*
|
|
226
|
+
* \param first The beginning of the input sequence.
|
|
227
|
+
* \param last The end of the input sequence.
|
|
228
|
+
* \param result The beginning of the output sequence.
|
|
229
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
230
|
+
* \return The end of the output sequence.
|
|
231
|
+
*
|
|
232
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
233
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
234
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
|
|
235
|
+
* and \c OutputIterator's \c value_type is convertible to
|
|
236
|
+
* both \c AssociativeOperator's first and second argument type.
|
|
237
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
238
|
+
*
|
|
239
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
240
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
241
|
+
*
|
|
242
|
+
* The following code snippet demonstrates how to use \p inclusive_scan
|
|
243
|
+
*
|
|
244
|
+
* \code
|
|
245
|
+
* int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
|
|
246
|
+
*
|
|
247
|
+
* ::cuda::maximum<int> binary_op;
|
|
248
|
+
*
|
|
249
|
+
* thrust::inclusive_scan(data, data + 10, data, binary_op); // in-place scan
|
|
250
|
+
*
|
|
251
|
+
* // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8}
|
|
252
|
+
* \endcode
|
|
253
|
+
*
|
|
254
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
255
|
+
*/
|
|
256
|
+
template <typename InputIterator, typename OutputIterator, typename AssociativeOperator>
|
|
257
|
+
OutputIterator
|
|
258
|
+
inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, AssociativeOperator binary_op);
|
|
259
|
+
|
|
260
|
+
/*! \p inclusive_scan computes an inclusive prefix sum operation. The
|
|
261
|
+
* term 'inclusive' means that each result includes the corresponding
|
|
262
|
+
* input operand in the partial sum. More precisely,
|
|
263
|
+
* <tt>binary_op(init, \*first)</tt> is assigned to <tt>*result</tt>
|
|
264
|
+
* and so on. This version of \p inclusive_scan requires both an associative
|
|
265
|
+
* operator and an initial value \p init. When the input and
|
|
266
|
+
* output sequences are the same, the scan is performed in-place.
|
|
267
|
+
*
|
|
268
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
269
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
270
|
+
* operators may vary from run to run.
|
|
271
|
+
*
|
|
272
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
273
|
+
*
|
|
274
|
+
* \param exec The execution policy to use for parallelization.
|
|
275
|
+
* \param first The beginning of the input sequence.
|
|
276
|
+
* \param last The end of the input sequence.
|
|
277
|
+
* \param result The beginning of the output sequence.
|
|
278
|
+
* \param init The initial value.
|
|
279
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
280
|
+
* \return The end of the output sequence.
|
|
281
|
+
*
|
|
282
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
283
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
284
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
285
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
|
|
286
|
+
* and \c OutputIterator's \c value_type is convertible to both \c AssociativeOperator's first and second argument type.
|
|
287
|
+
* \tparam T is convertible to \c OutputIterator's \c value_type.
|
|
288
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
289
|
+
*
|
|
290
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last -
|
|
291
|
+
* first))</tt> shall not overlap otherwise.
|
|
292
|
+
*
|
|
293
|
+
* The following code snippet demonstrates how to use \p inclusive_scan with initial value to compute an in-place
|
|
294
|
+
* prefix sum using the \p thrust::host execution policy for parallelization:
|
|
295
|
+
*
|
|
296
|
+
* \code
|
|
297
|
+
* int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
|
|
298
|
+
* thrust::inclusive_scan(thrust::host, data, data + 10, data, 1, ::cuda::maximum<>{}); // in-place scan
|
|
299
|
+
* // data is now {1, 1, 2, 2, 2, 4, 4, 4, 4, 8}
|
|
300
|
+
* \endcode
|
|
301
|
+
*
|
|
302
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
303
|
+
*/
|
|
304
|
+
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
|
|
305
|
+
_CCCL_HOST_DEVICE OutputIterator inclusive_scan(
|
|
306
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
307
|
+
InputIterator first,
|
|
308
|
+
InputIterator last,
|
|
309
|
+
OutputIterator result,
|
|
310
|
+
T init,
|
|
311
|
+
AssociativeOperator binary_op);
|
|
312
|
+
|
|
313
|
+
/*! \p inclusive_scan computes an inclusive prefix sum operation. The
|
|
314
|
+
* term 'inclusive' means that each result includes the corresponding
|
|
315
|
+
* input operand in the partial sum. More precisely,
|
|
316
|
+
* <tt>binary_op(init, \*first)</tt> is assigned to <tt>*result</tt>
|
|
317
|
+
* and so on. This version of \p inclusive_scan requires both an associative
|
|
318
|
+
* operator and an initial value \p init. When the input and
|
|
319
|
+
* output sequences are the same, the scan is performed in-place.
|
|
320
|
+
*
|
|
321
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
322
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
323
|
+
* operators may vary from run to run.
|
|
324
|
+
*
|
|
325
|
+
* \param first The beginning of the input sequence.
|
|
326
|
+
* \param last The end of the input sequence.
|
|
327
|
+
* \param result The beginning of the output sequence.
|
|
328
|
+
* \param init The initial value.
|
|
329
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
330
|
+
* \return The end of the output sequence.
|
|
331
|
+
*
|
|
332
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
333
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
334
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
|
|
335
|
+
* and \c OutputIterator's \c value_type is convertible to both \c AssociativeOperator's first and second argument type.
|
|
336
|
+
* \tparam T is convertible to \c OutputIterator's \c value_type.
|
|
337
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
338
|
+
*
|
|
339
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range <tt>[result, result + (last -
|
|
340
|
+
* first))</tt> shall not overlap otherwise.
|
|
341
|
+
*
|
|
342
|
+
* The following code snippet demonstrates how to use \p inclusive_scan with initial value:
|
|
343
|
+
*
|
|
344
|
+
* \code
|
|
345
|
+
* int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
|
|
346
|
+
* ::cuda::maximum<int> binary_op;
|
|
347
|
+
* thrust::inclusive_scan(data, data + 10, data, 1, ::cuda::maximum<>{}); // in-place scan
|
|
348
|
+
* // data is now {1, 1, 2, 2, 2, 4, 4, 4, 4, 8}
|
|
349
|
+
* \endcode
|
|
350
|
+
*
|
|
351
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
352
|
+
*/
|
|
353
|
+
template <typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
|
|
354
|
+
OutputIterator
|
|
355
|
+
inclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, AssociativeOperator binary_op);
|
|
356
|
+
|
|
357
|
+
/*! \p exclusive_scan computes an exclusive prefix sum operation. The
|
|
358
|
+
* term 'exclusive' means that each result does not include the
|
|
359
|
+
* corresponding input operand in the partial sum. More precisely,
|
|
360
|
+
* <tt>0</tt> is assigned to <tt>*result</tt> and the sum of
|
|
361
|
+
* <tt>0</tt> and <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>,
|
|
362
|
+
* and so on. This version of \p exclusive_scan assumes plus as the
|
|
363
|
+
* associative operator and \c 0 as the initial value. When the input and
|
|
364
|
+
* output sequences are the same, the scan is performed in-place.
|
|
365
|
+
*
|
|
366
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
367
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
368
|
+
* operators may vary from run to run.
|
|
369
|
+
*
|
|
370
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
371
|
+
*
|
|
372
|
+
* \param exec The execution policy to use for parallelization.
|
|
373
|
+
* \param first The beginning of the input sequence.
|
|
374
|
+
* \param last The end of the input sequence.
|
|
375
|
+
* \param result The beginning of the output sequence.
|
|
376
|
+
* \return The end of the output sequence.
|
|
377
|
+
*
|
|
378
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
379
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
380
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
381
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
|
|
382
|
+
* Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
|
|
383
|
+
* If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
|
|
384
|
+
*
|
|
385
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
386
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
387
|
+
*
|
|
388
|
+
* The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
|
|
389
|
+
* prefix sum using the \p thrust::host execution policy for parallelization:
|
|
390
|
+
*
|
|
391
|
+
* \code
|
|
392
|
+
* #include <thrust/scan.h>
|
|
393
|
+
* #include <thrust/execution_policy.h>
|
|
394
|
+
* ...
|
|
395
|
+
*
|
|
396
|
+
* int data[6] = {1, 0, 2, 2, 1, 3};
|
|
397
|
+
*
|
|
398
|
+
* thrust::exclusive_scan(thrust::host, data, data + 6, data); // in-place scan
|
|
399
|
+
*
|
|
400
|
+
* // data is now {0, 1, 1, 3, 5, 6}
|
|
401
|
+
* \endcode
|
|
402
|
+
*
|
|
403
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
404
|
+
*/
|
|
405
|
+
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
|
|
406
|
+
_CCCL_HOST_DEVICE OutputIterator exclusive_scan(
|
|
407
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
408
|
+
InputIterator first,
|
|
409
|
+
InputIterator last,
|
|
410
|
+
OutputIterator result);
|
|
411
|
+
|
|
412
|
+
/*! \p exclusive_scan computes an exclusive prefix sum operation. The
|
|
413
|
+
* term 'exclusive' means that each result does not include the
|
|
414
|
+
* corresponding input operand in the partial sum. More precisely,
|
|
415
|
+
* <tt>0</tt> is assigned to <tt>*result</tt> and the sum of
|
|
416
|
+
* <tt>0</tt> and <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>,
|
|
417
|
+
* and so on. This version of \p exclusive_scan assumes plus as the
|
|
418
|
+
* associative operator and \c 0 as the initial value. When the input and
|
|
419
|
+
* output sequences are the same, the scan is performed in-place.
|
|
420
|
+
*
|
|
421
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
422
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
423
|
+
* operators may vary from run to run.
|
|
424
|
+
*
|
|
425
|
+
* \param first The beginning of the input sequence.
|
|
426
|
+
* \param last The end of the input sequence.
|
|
427
|
+
* \param result The beginning of the output sequence.
|
|
428
|
+
* \return The end of the output sequence.
|
|
429
|
+
*
|
|
430
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
431
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
432
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
|
|
433
|
+
* Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
|
|
434
|
+
* If \c T is \c OutputIterator's \c value_type, then <tt>T(0)</tt> is defined.
|
|
435
|
+
*
|
|
436
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
437
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
438
|
+
*
|
|
439
|
+
* The following code snippet demonstrates how to use \p exclusive_scan
|
|
440
|
+
*
|
|
441
|
+
* \code
|
|
442
|
+
* #include <thrust/scan.h>
|
|
443
|
+
*
|
|
444
|
+
* int data[6] = {1, 0, 2, 2, 1, 3};
|
|
445
|
+
*
|
|
446
|
+
* thrust::exclusive_scan(data, data + 6, data); // in-place scan
|
|
447
|
+
*
|
|
448
|
+
* // data is now {0, 1, 1, 3, 5, 6}
|
|
449
|
+
* \endcode
|
|
450
|
+
*
|
|
451
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
452
|
+
*/
|
|
453
|
+
template <typename InputIterator, typename OutputIterator>
|
|
454
|
+
OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result);
|
|
455
|
+
|
|
456
|
+
/*! \p exclusive_scan computes an exclusive prefix sum operation. The
|
|
457
|
+
* term 'exclusive' means that each result does not include the
|
|
458
|
+
* corresponding input operand in the partial sum. More precisely,
|
|
459
|
+
* \p init is assigned to <tt>*result</tt> and the sum of \p init and
|
|
460
|
+
* <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
|
|
461
|
+
* This version of \p exclusive_scan assumes plus as the associative
|
|
462
|
+
* operator but requires an initial value \p init. When the input and
|
|
463
|
+
* output sequences are the same, the scan is performed in-place.
|
|
464
|
+
*
|
|
465
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
466
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
467
|
+
* operators may vary from run to run.
|
|
468
|
+
*
|
|
469
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
470
|
+
*
|
|
471
|
+
* \param exec The execution policy to use for parallelization.
|
|
472
|
+
* \param first The beginning of the input sequence.
|
|
473
|
+
* \param last The end of the input sequence.
|
|
474
|
+
* \param result The beginning of the output sequence.
|
|
475
|
+
* \param init The initial value.
|
|
476
|
+
* \return The end of the output sequence.
|
|
477
|
+
*
|
|
478
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
479
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
480
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
481
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
|
|
482
|
+
* Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
|
|
483
|
+
* \tparam T is convertible to \c OutputIterator's \c value_type.
|
|
484
|
+
*
|
|
485
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
486
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
487
|
+
*
|
|
488
|
+
* The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
|
|
489
|
+
* prefix sum using the \p thrust::host execution policy for parallelization:
|
|
490
|
+
*
|
|
491
|
+
* \code
|
|
492
|
+
* #include <thrust/scan.h>
|
|
493
|
+
* #include <thrust/execution_policy.h>
|
|
494
|
+
*
|
|
495
|
+
* int data[6] = {1, 0, 2, 2, 1, 3};
|
|
496
|
+
*
|
|
497
|
+
* thrust::exclusive_scan(thrust::host, data, data + 6, data, 4); // in-place scan
|
|
498
|
+
*
|
|
499
|
+
* // data is now {4, 5, 5, 7, 9, 10}
|
|
500
|
+
* \endcode
|
|
501
|
+
*
|
|
502
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
503
|
+
*/
|
|
504
|
+
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
|
|
505
|
+
_CCCL_HOST_DEVICE OutputIterator exclusive_scan(
|
|
506
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
507
|
+
InputIterator first,
|
|
508
|
+
InputIterator last,
|
|
509
|
+
OutputIterator result,
|
|
510
|
+
T init);
|
|
511
|
+
|
|
512
|
+
/*! \p exclusive_scan computes an exclusive prefix sum operation. The
|
|
513
|
+
* term 'exclusive' means that each result does not include the
|
|
514
|
+
* corresponding input operand in the partial sum. More precisely,
|
|
515
|
+
* \p init is assigned to <tt>*result</tt> and the sum of \p init and
|
|
516
|
+
* <tt>*first</tt> is assigned to <tt>*(result + 1)</tt>, and so on.
|
|
517
|
+
* This version of \p exclusive_scan assumes plus as the associative
|
|
518
|
+
* operator but requires an initial value \p init. When the input and
|
|
519
|
+
* output sequences are the same, the scan is performed in-place.
|
|
520
|
+
*
|
|
521
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
522
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
523
|
+
* operators may vary from run to run.
|
|
524
|
+
*
|
|
525
|
+
* \param first The beginning of the input sequence.
|
|
526
|
+
* \param last The end of the input sequence.
|
|
527
|
+
* \param result The beginning of the output sequence.
|
|
528
|
+
* \param init The initial value.
|
|
529
|
+
* \return The end of the output sequence.
|
|
530
|
+
*
|
|
531
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
532
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
533
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output
|
|
534
|
+
* Iterator</a>, and if \c x and \c y are objects of \c OutputIterator's \c value_type, then <tt>x + y</tt> is defined.
|
|
535
|
+
* \tparam T is convertible to \c OutputIterator's \c value_type.
|
|
536
|
+
*
|
|
537
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
538
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
539
|
+
*
|
|
540
|
+
* The following code snippet demonstrates how to use \p exclusive_scan
|
|
541
|
+
*
|
|
542
|
+
* \code
|
|
543
|
+
* #include <thrust/scan.h>
|
|
544
|
+
*
|
|
545
|
+
* int data[6] = {1, 0, 2, 2, 1, 3};
|
|
546
|
+
*
|
|
547
|
+
* thrust::exclusive_scan(data, data + 6, data, 4); // in-place scan
|
|
548
|
+
*
|
|
549
|
+
* // data is now {4, 5, 5, 7, 9, 10}
|
|
550
|
+
* \endcode
|
|
551
|
+
*
|
|
552
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
553
|
+
*/
|
|
554
|
+
template <typename InputIterator, typename OutputIterator, typename T>
|
|
555
|
+
OutputIterator exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init);
|
|
556
|
+
|
|
557
|
+
/*! \p exclusive_scan computes an exclusive prefix sum operation. The
|
|
558
|
+
* term 'exclusive' means that each result does not include the
|
|
559
|
+
* corresponding input operand in the partial sum. More precisely,
|
|
560
|
+
* \p init is assigned to <tt>\*result</tt> and the value
|
|
561
|
+
* <tt>binary_op(init, \*first)</tt> is assigned to <tt>\*(result + 1)</tt>,
|
|
562
|
+
* and so on. This version of the function requires both an associative
|
|
563
|
+
* operator and an initial value \p init. When the input and output
|
|
564
|
+
* sequences are the same, the scan is performed in-place.
|
|
565
|
+
*
|
|
566
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
567
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
568
|
+
* operators may vary from run to run.
|
|
569
|
+
*
|
|
570
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
571
|
+
*
|
|
572
|
+
* \param exec The execution policy to use for parallelization.
|
|
573
|
+
* \param first The beginning of the input sequence.
|
|
574
|
+
* \param last The end of the input sequence.
|
|
575
|
+
* \param result The beginning of the output sequence.
|
|
576
|
+
* \param init The initial value.
|
|
577
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
578
|
+
* \return The end of the output sequence.
|
|
579
|
+
*
|
|
580
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
581
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
582
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
583
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
|
|
584
|
+
* and \c OutputIterator's \c value_type is convertible to
|
|
585
|
+
* both \c AssociativeOperator's first and second argument type.
|
|
586
|
+
* \tparam T is convertible to \c OutputIterator's \c value_type.
|
|
587
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
588
|
+
*
|
|
589
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
590
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
591
|
+
*
|
|
592
|
+
* The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place
|
|
593
|
+
* prefix sum using the \p thrust::host execution policy for parallelization:
|
|
594
|
+
*
|
|
595
|
+
* \code
|
|
596
|
+
* #include <thrust/scan.h>
|
|
597
|
+
* #include <thrust/functional.h>
|
|
598
|
+
* #include <thrust/execution_policy.h>
|
|
599
|
+
* ...
|
|
600
|
+
*
|
|
601
|
+
* int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
|
|
602
|
+
*
|
|
603
|
+
* ::cuda::maximum<int> binary_op;
|
|
604
|
+
*
|
|
605
|
+
* thrust::exclusive_scan(thrust::host, data, data + 10, data, 1, binary_op); // in-place scan
|
|
606
|
+
*
|
|
607
|
+
* // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 }
|
|
608
|
+
* \endcode
|
|
609
|
+
*
|
|
610
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
611
|
+
*/
|
|
612
|
+
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
|
|
613
|
+
_CCCL_HOST_DEVICE OutputIterator exclusive_scan(
|
|
614
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
615
|
+
InputIterator first,
|
|
616
|
+
InputIterator last,
|
|
617
|
+
OutputIterator result,
|
|
618
|
+
T init,
|
|
619
|
+
AssociativeOperator binary_op);
|
|
620
|
+
|
|
621
|
+
/*! \p exclusive_scan computes an exclusive prefix sum operation. The
|
|
622
|
+
* term 'exclusive' means that each result does not include the
|
|
623
|
+
* corresponding input operand in the partial sum. More precisely,
|
|
624
|
+
* \p init is assigned to <tt>\*result</tt> and the value
|
|
625
|
+
* <tt>binary_op(init, \*first)</tt> is assigned to <tt>\*(result + 1)</tt>,
|
|
626
|
+
* and so on. This version of the function requires both an associative
|
|
627
|
+
* operator and an initial value \p init. When the input and output
|
|
628
|
+
* sequences are the same, the scan is performed in-place.
|
|
629
|
+
*
|
|
630
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
631
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
632
|
+
* operators may vary from run to run.
|
|
633
|
+
*
|
|
634
|
+
* \param first The beginning of the input sequence.
|
|
635
|
+
* \param last The end of the input sequence.
|
|
636
|
+
* \param result The beginning of the output sequence.
|
|
637
|
+
* \param init The initial value.
|
|
638
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
639
|
+
* \return The end of the output sequence.
|
|
640
|
+
*
|
|
641
|
+
* \tparam InputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
642
|
+
* Iterator</a> and \c InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. \tparam
|
|
643
|
+
* OutputIterator is a model of <a href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>
|
|
644
|
+
* and \c OutputIterator's \c value_type is convertible to
|
|
645
|
+
* both \c AssociativeOperator's first and second argument type.
|
|
646
|
+
* \tparam T is convertible to \c OutputIterator's \c value_type.
|
|
647
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
648
|
+
*
|
|
649
|
+
* \pre \p first may equal \p result but the range <tt>[first, last)</tt> and the range
|
|
650
|
+
* <tt>[result, result + (last - first))</tt> shall not overlap otherwise.
|
|
651
|
+
*
|
|
652
|
+
* The following code snippet demonstrates how to use \p exclusive_scan
|
|
653
|
+
*
|
|
654
|
+
* \code
|
|
655
|
+
* #include <thrust/scan.h>
|
|
656
|
+
* #include <thrust/functional.h>
|
|
657
|
+
*
|
|
658
|
+
* int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
|
|
659
|
+
*
|
|
660
|
+
* ::cuda::maximum<int> binary_op;
|
|
661
|
+
*
|
|
662
|
+
* thrust::exclusive_scan(data, data + 10, data, 1, binary_op); // in-place scan
|
|
663
|
+
*
|
|
664
|
+
* // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 }
|
|
665
|
+
* \endcode
|
|
666
|
+
*
|
|
667
|
+
* \see https://en.cppreference.com/w/cpp/algorithm/partial_sum
|
|
668
|
+
*/
|
|
669
|
+
template <typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
|
|
670
|
+
OutputIterator
|
|
671
|
+
exclusive_scan(InputIterator first, InputIterator last, OutputIterator result, T init, AssociativeOperator binary_op);
|
|
672
|
+
|
|
673
|
+
/*! \addtogroup segmentedprefixsums Segmented Prefix Sums
|
|
674
|
+
* \ingroup prefixsums
|
|
675
|
+
* \{
|
|
676
|
+
*/
|
|
677
|
+
|
|
678
|
+
/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
|
|
679
|
+
* sum operation. The term 'inclusive' means that each result includes
|
|
680
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
681
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
682
|
+
* words, within each segment a separate inclusive scan operation is computed.
|
|
683
|
+
* Refer to the code sample below for example usage.
|
|
684
|
+
*
|
|
685
|
+
* This version of \p inclusive_scan_by_key assumes \c equal_to as the binary
|
|
686
|
+
* predicate used to compare adjacent keys. Specifically, consecutive iterators
|
|
687
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
|
|
688
|
+
* belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
|
|
689
|
+
* different segments otherwise.
|
|
690
|
+
*
|
|
691
|
+
* This version of \p inclusive_scan_by_key assumes \c plus as the associative
|
|
692
|
+
* operator used to perform the prefix sum. When the input and output sequences
|
|
693
|
+
* are the same, the scan is performed in-place.
|
|
694
|
+
*
|
|
695
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
696
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
697
|
+
* operators may vary from run to run.
|
|
698
|
+
*
|
|
699
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
700
|
+
*
|
|
701
|
+
* \param exec The execution policy to use for parallelization.
|
|
702
|
+
* \param first1 The beginning of the key sequence.
|
|
703
|
+
* \param last1 The end of the key sequence.
|
|
704
|
+
* \param first2 The beginning of the input value sequence.
|
|
705
|
+
* \param result The beginning of the output value sequence.
|
|
706
|
+
* \return The end of the output sequence.
|
|
707
|
+
*
|
|
708
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
709
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
710
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
711
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
712
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
713
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
714
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined.
|
|
715
|
+
*
|
|
716
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
717
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
718
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
719
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
720
|
+
*
|
|
721
|
+
* The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
|
|
722
|
+
* execution policy for parallelization:
|
|
723
|
+
*
|
|
724
|
+
* \code
|
|
725
|
+
* #include <thrust/scan.h>
|
|
726
|
+
* #include <thrust/execution_policy.h>
|
|
727
|
+
* ...
|
|
728
|
+
*
|
|
729
|
+
* int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
730
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
731
|
+
*
|
|
732
|
+
* thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, data, data); // in-place scan
|
|
733
|
+
*
|
|
734
|
+
* // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
|
|
735
|
+
* \endcode
|
|
736
|
+
*
|
|
737
|
+
* \see inclusive_scan
|
|
738
|
+
* \see exclusive_scan_by_key
|
|
739
|
+
*
|
|
740
|
+
*/
|
|
741
|
+
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator>
|
|
742
|
+
_CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key(
|
|
743
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
744
|
+
InputIterator1 first1,
|
|
745
|
+
InputIterator1 last1,
|
|
746
|
+
InputIterator2 first2,
|
|
747
|
+
OutputIterator result);
|
|
748
|
+
|
|
749
|
+
/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
|
|
750
|
+
* sum operation. The term 'inclusive' means that each result includes
|
|
751
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
752
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
753
|
+
* words, within each segment a separate inclusive scan operation is computed.
|
|
754
|
+
* Refer to the code sample below for example usage.
|
|
755
|
+
*
|
|
756
|
+
* This version of \p inclusive_scan_by_key assumes \c equal_to as the binary
|
|
757
|
+
* predicate used to compare adjacent keys. Specifically, consecutive iterators
|
|
758
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
|
|
759
|
+
* belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
|
|
760
|
+
* different segments otherwise.
|
|
761
|
+
*
|
|
762
|
+
* This version of \p inclusive_scan_by_key assumes \c plus as the associative
|
|
763
|
+
* operator used to perform the prefix sum. When the input and output sequences
|
|
764
|
+
* are the same, the scan is performed in-place.
|
|
765
|
+
*
|
|
766
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
767
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
768
|
+
* operators may vary from run to run.
|
|
769
|
+
*
|
|
770
|
+
* \param first1 The beginning of the key sequence.
|
|
771
|
+
* \param last1 The end of the key sequence.
|
|
772
|
+
* \param first2 The beginning of the input value sequence.
|
|
773
|
+
* \param result The beginning of the output value sequence.
|
|
774
|
+
* \return The end of the output sequence.
|
|
775
|
+
*
|
|
776
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
777
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
778
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
779
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
780
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
781
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined.
|
|
782
|
+
*
|
|
783
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
784
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
785
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
786
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
787
|
+
*
|
|
788
|
+
* The following code snippet demonstrates how to use \p inclusive_scan_by_key
|
|
789
|
+
*
|
|
790
|
+
* \code
|
|
791
|
+
* #include <thrust/scan.h>
|
|
792
|
+
*
|
|
793
|
+
* int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
794
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
795
|
+
*
|
|
796
|
+
* thrust::inclusive_scan_by_key(keys, keys + 10, data, data); // in-place scan
|
|
797
|
+
*
|
|
798
|
+
* // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
|
|
799
|
+
* \endcode
|
|
800
|
+
*
|
|
801
|
+
* \see inclusive_scan
|
|
802
|
+
* \see exclusive_scan_by_key
|
|
803
|
+
*
|
|
804
|
+
*/
|
|
805
|
+
template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
|
|
806
|
+
OutputIterator
|
|
807
|
+
inclusive_scan_by_key(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result);
|
|
808
|
+
|
|
809
|
+
/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
|
|
810
|
+
* sum operation. The term 'inclusive' means that each result includes
|
|
811
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
812
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
813
|
+
* words, within each segment a separate inclusive scan operation is computed.
|
|
814
|
+
* Refer to the code sample below for example usage.
|
|
815
|
+
*
|
|
816
|
+
* This version of \p inclusive_scan_by_key uses the binary predicate
|
|
817
|
+
* \c pred to compare adjacent keys. Specifically, consecutive iterators
|
|
818
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
|
|
819
|
+
* belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
|
|
820
|
+
* different segments otherwise.
|
|
821
|
+
*
|
|
822
|
+
* This version of \p inclusive_scan_by_key assumes \c plus as the associative
|
|
823
|
+
* operator used to perform the prefix sum. When the input and output sequences
|
|
824
|
+
* are the same, the scan is performed in-place.
|
|
825
|
+
*
|
|
826
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
827
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
828
|
+
* operators may vary from run to run.
|
|
829
|
+
*
|
|
830
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
831
|
+
*
|
|
832
|
+
* \param exec The execution policy to use for parallelization.
|
|
833
|
+
* \param first1 The beginning of the key sequence.
|
|
834
|
+
* \param last1 The end of the key sequence.
|
|
835
|
+
* \param first2 The beginning of the input value sequence.
|
|
836
|
+
* \param result The beginning of the output value sequence.
|
|
837
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
838
|
+
* \return The end of the output sequence.
|
|
839
|
+
*
|
|
840
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
841
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
842
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
843
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
844
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
845
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
846
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
|
|
847
|
+
* model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
|
|
848
|
+
*
|
|
849
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
850
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
851
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
852
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
853
|
+
*
|
|
854
|
+
* The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
|
|
855
|
+
* execution policy for parallelization:
|
|
856
|
+
*
|
|
857
|
+
* \code
|
|
858
|
+
* #include <thrust/scan.h>
|
|
859
|
+
* #include <thrust/functional.h>
|
|
860
|
+
* #include <thrust/execution_policy.h>
|
|
861
|
+
* ...
|
|
862
|
+
*
|
|
863
|
+
* int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
864
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
865
|
+
*
|
|
866
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
867
|
+
*
|
|
868
|
+
* thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, data, data, binary_pred); // in-place scan
|
|
869
|
+
*
|
|
870
|
+
* // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
|
|
871
|
+
* \endcode
|
|
872
|
+
*
|
|
873
|
+
* \see inclusive_scan
|
|
874
|
+
* \see exclusive_scan_by_key
|
|
875
|
+
*
|
|
876
|
+
*/
|
|
877
|
+
template <typename DerivedPolicy,
|
|
878
|
+
typename InputIterator1,
|
|
879
|
+
typename InputIterator2,
|
|
880
|
+
typename OutputIterator,
|
|
881
|
+
typename BinaryPredicate>
|
|
882
|
+
_CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key(
|
|
883
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
884
|
+
InputIterator1 first1,
|
|
885
|
+
InputIterator1 last1,
|
|
886
|
+
InputIterator2 first2,
|
|
887
|
+
OutputIterator result,
|
|
888
|
+
BinaryPredicate binary_pred);
|
|
889
|
+
|
|
890
|
+
/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
|
|
891
|
+
* sum operation. The term 'inclusive' means that each result includes
|
|
892
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
893
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
894
|
+
* words, within each segment a separate inclusive scan operation is computed.
|
|
895
|
+
* Refer to the code sample below for example usage.
|
|
896
|
+
*
|
|
897
|
+
* This version of \p inclusive_scan_by_key uses the binary predicate
|
|
898
|
+
* \c pred to compare adjacent keys. Specifically, consecutive iterators
|
|
899
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
|
|
900
|
+
* belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
|
|
901
|
+
* different segments otherwise.
|
|
902
|
+
*
|
|
903
|
+
* This version of \p inclusive_scan_by_key assumes \c plus as the associative
|
|
904
|
+
* operator used to perform the prefix sum. When the input and output sequences
|
|
905
|
+
* are the same, the scan is performed in-place.
|
|
906
|
+
*
|
|
907
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
908
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
909
|
+
* operators may vary from run to run.
|
|
910
|
+
*
|
|
911
|
+
* \param first1 The beginning of the key sequence.
|
|
912
|
+
* \param last1 The end of the key sequence.
|
|
913
|
+
* \param first2 The beginning of the input value sequence.
|
|
914
|
+
* \param result The beginning of the output value sequence.
|
|
915
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
916
|
+
* \return The end of the output sequence.
|
|
917
|
+
*
|
|
918
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
919
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
920
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
921
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
922
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
923
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
|
|
924
|
+
* model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
|
|
925
|
+
*
|
|
926
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
927
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
928
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
929
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
930
|
+
*
|
|
931
|
+
* The following code snippet demonstrates how to use \p inclusive_scan_by_key
|
|
932
|
+
*
|
|
933
|
+
* \code
|
|
934
|
+
* #include <thrust/scan.h>
|
|
935
|
+
* #include <thrust/functional.h>
|
|
936
|
+
*
|
|
937
|
+
* int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
938
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
939
|
+
*
|
|
940
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
941
|
+
*
|
|
942
|
+
* thrust::inclusive_scan_by_key(keys, keys + 10, data, data, binary_pred); // in-place scan
|
|
943
|
+
*
|
|
944
|
+
* // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
|
|
945
|
+
* \endcode
|
|
946
|
+
*
|
|
947
|
+
* \see inclusive_scan
|
|
948
|
+
* \see exclusive_scan_by_key
|
|
949
|
+
*
|
|
950
|
+
*/
|
|
951
|
+
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryPredicate>
|
|
952
|
+
OutputIterator inclusive_scan_by_key(
|
|
953
|
+
InputIterator1 first1,
|
|
954
|
+
InputIterator1 last1,
|
|
955
|
+
InputIterator2 first2,
|
|
956
|
+
OutputIterator result,
|
|
957
|
+
BinaryPredicate binary_pred);
|
|
958
|
+
|
|
959
|
+
/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
|
|
960
|
+
* sum operation. The term 'inclusive' means that each result includes
|
|
961
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
962
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
963
|
+
* words, within each segment a separate inclusive scan operation is computed.
|
|
964
|
+
* Refer to the code sample below for example usage.
|
|
965
|
+
*
|
|
966
|
+
* This version of \p inclusive_scan_by_key uses the binary predicate
|
|
967
|
+
* \c pred to compare adjacent keys. Specifically, consecutive iterators
|
|
968
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
|
|
969
|
+
* belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
|
|
970
|
+
* different segments otherwise.
|
|
971
|
+
*
|
|
972
|
+
* This version of \p inclusive_scan_by_key uses the associative operator
|
|
973
|
+
* \c binary_op to perform the prefix sum. When the input and output sequences
|
|
974
|
+
* are the same, the scan is performed in-place.
|
|
975
|
+
*
|
|
976
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
977
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
978
|
+
* operators may vary from run to run.
|
|
979
|
+
*
|
|
980
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
981
|
+
*
|
|
982
|
+
* \param exec The execution policy to use for parallelization.
|
|
983
|
+
* \param first1 The beginning of the key sequence.
|
|
984
|
+
* \param last1 The end of the key sequence.
|
|
985
|
+
* \param first2 The beginning of the input value sequence.
|
|
986
|
+
* \param result The beginning of the output value sequence.
|
|
987
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
988
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
989
|
+
* \return The end of the output sequence.
|
|
990
|
+
*
|
|
991
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
992
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
993
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
994
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
995
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
996
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
997
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
|
|
998
|
+
* model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>. \tparam
|
|
999
|
+
* AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
1000
|
+
*
|
|
1001
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1002
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1003
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1004
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1005
|
+
*
|
|
1006
|
+
* The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host
|
|
1007
|
+
* execution policy for parallelization:
|
|
1008
|
+
*
|
|
1009
|
+
* \code
|
|
1010
|
+
* #include <thrust/scan.h>
|
|
1011
|
+
* #include <thrust/functional.h>
|
|
1012
|
+
* #include <thrust/execution_policy.h>
|
|
1013
|
+
* ...
|
|
1014
|
+
*
|
|
1015
|
+
* int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1016
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1017
|
+
*
|
|
1018
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
1019
|
+
* ::cuda::std::plus<int> binary_op;
|
|
1020
|
+
*
|
|
1021
|
+
* thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, data, data, binary_pred, binary_op); // in-place scan
|
|
1022
|
+
*
|
|
1023
|
+
* // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
|
|
1024
|
+
* \endcode
|
|
1025
|
+
*
|
|
1026
|
+
* \see inclusive_scan
|
|
1027
|
+
* \see exclusive_scan_by_key
|
|
1028
|
+
*
|
|
1029
|
+
*/
|
|
1030
|
+
template <typename DerivedPolicy,
|
|
1031
|
+
typename InputIterator1,
|
|
1032
|
+
typename InputIterator2,
|
|
1033
|
+
typename OutputIterator,
|
|
1034
|
+
typename BinaryPredicate,
|
|
1035
|
+
typename AssociativeOperator>
|
|
1036
|
+
_CCCL_HOST_DEVICE OutputIterator inclusive_scan_by_key(
|
|
1037
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
1038
|
+
InputIterator1 first1,
|
|
1039
|
+
InputIterator1 last1,
|
|
1040
|
+
InputIterator2 first2,
|
|
1041
|
+
OutputIterator result,
|
|
1042
|
+
BinaryPredicate binary_pred,
|
|
1043
|
+
AssociativeOperator binary_op);
|
|
1044
|
+
|
|
1045
|
+
/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix
|
|
1046
|
+
* sum operation. The term 'inclusive' means that each result includes
|
|
1047
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
1048
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
1049
|
+
* words, within each segment a separate inclusive scan operation is computed.
|
|
1050
|
+
* Refer to the code sample below for example usage.
|
|
1051
|
+
*
|
|
1052
|
+
* This version of \p inclusive_scan_by_key uses the binary predicate
|
|
1053
|
+
* \c pred to compare adjacent keys. Specifically, consecutive iterators
|
|
1054
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1)</tt>
|
|
1055
|
+
* belong to the same segment if <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to
|
|
1056
|
+
* different segments otherwise.
|
|
1057
|
+
*
|
|
1058
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1059
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1060
|
+
* operators may vary from run to run.
|
|
1061
|
+
*
|
|
1062
|
+
* This version of \p inclusive_scan_by_key uses the associative operator
|
|
1063
|
+
* \c binary_op to perform the prefix sum. When the input and output sequences
|
|
1064
|
+
* are the same, the scan is performed in-place.
|
|
1065
|
+
*
|
|
1066
|
+
* \param first1 The beginning of the key sequence.
|
|
1067
|
+
* \param last1 The end of the key sequence.
|
|
1068
|
+
* \param first2 The beginning of the input value sequence.
|
|
1069
|
+
* \param result The beginning of the output value sequence.
|
|
1070
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
1071
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
1072
|
+
* \return The end of the output sequence.
|
|
1073
|
+
*
|
|
1074
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
1075
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
1076
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
1077
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
1078
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
1079
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam BinaryPredicate is a
|
|
1080
|
+
* model of <a href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>. \tparam
|
|
1081
|
+
* AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
1082
|
+
*
|
|
1083
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1084
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1085
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1086
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1087
|
+
*
|
|
1088
|
+
* The following code snippet demonstrates how to use \p inclusive_scan_by_key
|
|
1089
|
+
*
|
|
1090
|
+
* \code
|
|
1091
|
+
* #include <thrust/scan.h>
|
|
1092
|
+
* #include <thrust/functional.h>
|
|
1093
|
+
*
|
|
1094
|
+
* int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1095
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1096
|
+
*
|
|
1097
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
1098
|
+
* ::cuda::std::plus<int> binary_op;
|
|
1099
|
+
*
|
|
1100
|
+
* thrust::inclusive_scan_by_key(keys, keys + 10, data, data, binary_pred, binary_op); // in-place scan
|
|
1101
|
+
*
|
|
1102
|
+
* // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4};
|
|
1103
|
+
* \endcode
|
|
1104
|
+
*
|
|
1105
|
+
* \see inclusive_scan
|
|
1106
|
+
* \see exclusive_scan_by_key
|
|
1107
|
+
*
|
|
1108
|
+
*/
|
|
1109
|
+
template <typename InputIterator1,
|
|
1110
|
+
typename InputIterator2,
|
|
1111
|
+
typename OutputIterator,
|
|
1112
|
+
typename BinaryPredicate,
|
|
1113
|
+
typename AssociativeOperator>
|
|
1114
|
+
OutputIterator inclusive_scan_by_key(
|
|
1115
|
+
InputIterator1 first1,
|
|
1116
|
+
InputIterator1 last1,
|
|
1117
|
+
InputIterator2 first2,
|
|
1118
|
+
OutputIterator result,
|
|
1119
|
+
BinaryPredicate binary_pred,
|
|
1120
|
+
AssociativeOperator binary_op);
|
|
1121
|
+
|
|
1122
|
+
/*! \p exclusive_scan_by_key computes an exclusive segmented prefix
|
|
1123
|
+
*
|
|
1124
|
+
* This version of \p exclusive_scan_by_key uses the value \c 0 to
|
|
1125
|
+
* initialize the exclusive scan operation.
|
|
1126
|
+
*
|
|
1127
|
+
* This version of \p exclusive_scan_by_key assumes \c plus as the associative
|
|
1128
|
+
* operator used to perform the prefix sum. When the input and output sequences
|
|
1129
|
+
* are the same, the scan is performed in-place.
|
|
1130
|
+
*
|
|
1131
|
+
* This version of \p exclusive_scan_by_key assumes \c equal_to as the binary
|
|
1132
|
+
* predicate used to compare adjacent keys. Specifically, consecutive iterators
|
|
1133
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1</tt>
|
|
1134
|
+
* belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
|
|
1135
|
+
* different segments otherwise.
|
|
1136
|
+
*
|
|
1137
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1138
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1139
|
+
* operators may vary from run to run.
|
|
1140
|
+
*
|
|
1141
|
+
* Refer to the most general form of \p exclusive_scan_by_key for additional details.
|
|
1142
|
+
*
|
|
1143
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
1144
|
+
*
|
|
1145
|
+
* \param exec The execution policy to use for parallelization.
|
|
1146
|
+
* \param first1 The beginning of the key sequence.
|
|
1147
|
+
* \param last1 The end of the key sequence.
|
|
1148
|
+
* \param first2 The beginning of the input value sequence.
|
|
1149
|
+
* \param result The beginning of the output value sequence.
|
|
1150
|
+
*
|
|
1151
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1152
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1153
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1154
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1155
|
+
*
|
|
1156
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
|
|
1157
|
+
* \p thrust::host execution policy for parallelization:
|
|
1158
|
+
*
|
|
1159
|
+
* \code
|
|
1160
|
+
* #include <thrust/scan.h>
|
|
1161
|
+
* #include <thrust/execution_policy.h>
|
|
1162
|
+
* ...
|
|
1163
|
+
*
|
|
1164
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1165
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1166
|
+
*
|
|
1167
|
+
* thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals); // in-place scan
|
|
1168
|
+
*
|
|
1169
|
+
* // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3};
|
|
1170
|
+
* \endcode
|
|
1171
|
+
*
|
|
1172
|
+
* \see exclusive_scan
|
|
1173
|
+
*
|
|
1174
|
+
*/
|
|
1175
|
+
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator>
|
|
1176
|
+
_CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
|
|
1177
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
1178
|
+
InputIterator1 first1,
|
|
1179
|
+
InputIterator1 last1,
|
|
1180
|
+
InputIterator2 first2,
|
|
1181
|
+
OutputIterator result);
|
|
1182
|
+
|
|
1183
|
+
/*! \p exclusive_scan_by_key computes an exclusive segmented prefix
|
|
1184
|
+
*
|
|
1185
|
+
* This version of \p exclusive_scan_by_key uses the value \c 0 to
|
|
1186
|
+
* initialize the exclusive scan operation.
|
|
1187
|
+
*
|
|
1188
|
+
* This version of \p exclusive_scan_by_key assumes \c plus as the associative
|
|
1189
|
+
* operator used to perform the prefix sum. When the input and output sequences
|
|
1190
|
+
* are the same, the scan is performed in-place.
|
|
1191
|
+
*
|
|
1192
|
+
* This version of \p exclusive_scan_by_key assumes \c equal_to as the binary
|
|
1193
|
+
* predicate used to compare adjacent keys. Specifically, consecutive iterators
|
|
1194
|
+
* <tt>i</tt> and <tt>i+1</tt> in the range <tt>[first1, last1</tt>
|
|
1195
|
+
* belong to the same segment if <tt>*i == *(i+1)</tt>, and belong to
|
|
1196
|
+
* different segments otherwise.
|
|
1197
|
+
*
|
|
1198
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1199
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1200
|
+
* operators may vary from run to run.
|
|
1201
|
+
*
|
|
1202
|
+
* Refer to the most general form of \p exclusive_scan_by_key for additional details.
|
|
1203
|
+
*
|
|
1204
|
+
* \param first1 The beginning of the key sequence.
|
|
1205
|
+
* \param last1 The end of the key sequence.
|
|
1206
|
+
* \param first2 The beginning of the input value sequence.
|
|
1207
|
+
* \param result The beginning of the output value sequence.
|
|
1208
|
+
*
|
|
1209
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1210
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1211
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1212
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1213
|
+
*
|
|
1214
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key.
|
|
1215
|
+
*
|
|
1216
|
+
* \code
|
|
1217
|
+
* #include <thrust/scan.h>
|
|
1218
|
+
*
|
|
1219
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1220
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1221
|
+
*
|
|
1222
|
+
* thrust::exclusive_scan_by_key(key, key + 10, vals, vals); // in-place scan
|
|
1223
|
+
*
|
|
1224
|
+
* // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3};
|
|
1225
|
+
* \endcode
|
|
1226
|
+
*
|
|
1227
|
+
* \see exclusive_scan
|
|
1228
|
+
*
|
|
1229
|
+
*/
|
|
1230
|
+
template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
|
|
1231
|
+
OutputIterator
|
|
1232
|
+
exclusive_scan_by_key(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result);
|
|
1233
|
+
|
|
1234
|
+
/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
|
|
1235
|
+
* sum operation. The term 'exclusive' means that each result does not include
|
|
1236
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
1237
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
1238
|
+
* words, within each segment a separate exclusive scan operation is computed.
|
|
1239
|
+
* Refer to the code sample below for example usage.
|
|
1240
|
+
*
|
|
1241
|
+
* This version of \p exclusive_scan_by_key uses the value \c init to
|
|
1242
|
+
* initialize the exclusive scan operation.
|
|
1243
|
+
*
|
|
1244
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1245
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1246
|
+
* operators may vary from run to run.
|
|
1247
|
+
*
|
|
1248
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
1249
|
+
*
|
|
1250
|
+
* \param exec The execution policy to use for parallelization.
|
|
1251
|
+
* \param first1 The beginning of the key sequence.
|
|
1252
|
+
* \param last1 The end of the key sequence.
|
|
1253
|
+
* \param first2 The beginning of the input value sequence.
|
|
1254
|
+
* \param result The beginning of the output value sequence.
|
|
1255
|
+
* \param init The initial of the exclusive sum value.
|
|
1256
|
+
* \return The end of the output sequence.
|
|
1257
|
+
*
|
|
1258
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1259
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1260
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1261
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1262
|
+
*
|
|
1263
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key using the \p
|
|
1264
|
+
* thrust::host execution policy for parallelization:
|
|
1265
|
+
*
|
|
1266
|
+
* \code
|
|
1267
|
+
* #include <thrust/scan.h>
|
|
1268
|
+
* #include <thrust/functional.h>
|
|
1269
|
+
* #include <thrust/execution_policy.h>
|
|
1270
|
+
* ...
|
|
1271
|
+
*
|
|
1272
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1273
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1274
|
+
*
|
|
1275
|
+
* int init = 5;
|
|
1276
|
+
*
|
|
1277
|
+
* thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init); // in-place scan
|
|
1278
|
+
*
|
|
1279
|
+
* // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
|
|
1280
|
+
* \endcode
|
|
1281
|
+
*
|
|
1282
|
+
* \see exclusive_scan
|
|
1283
|
+
* \see inclusive_scan_by_key
|
|
1284
|
+
*
|
|
1285
|
+
*/
|
|
1286
|
+
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T>
|
|
1287
|
+
_CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
|
|
1288
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
1289
|
+
InputIterator1 first1,
|
|
1290
|
+
InputIterator1 last1,
|
|
1291
|
+
InputIterator2 first2,
|
|
1292
|
+
OutputIterator result,
|
|
1293
|
+
T init);
|
|
1294
|
+
|
|
1295
|
+
/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
|
|
1296
|
+
* sum operation. The term 'exclusive' means that each result does not include
|
|
1297
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
1298
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
1299
|
+
* words, within each segment a separate exclusive scan operation is computed.
|
|
1300
|
+
* Refer to the code sample below for example usage.
|
|
1301
|
+
*
|
|
1302
|
+
* This version of \p exclusive_scan_by_key uses the value \c init to
|
|
1303
|
+
* initialize the exclusive scan operation.
|
|
1304
|
+
*
|
|
1305
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1306
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1307
|
+
* operators may vary from run to run.
|
|
1308
|
+
*
|
|
1309
|
+
* \param first1 The beginning of the key sequence.
|
|
1310
|
+
* \param last1 The end of the key sequence.
|
|
1311
|
+
* \param first2 The beginning of the input value sequence.
|
|
1312
|
+
* \param result The beginning of the output value sequence.
|
|
1313
|
+
* \param init The initial of the exclusive sum value.
|
|
1314
|
+
* \return The end of the output sequence.
|
|
1315
|
+
*
|
|
1316
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1317
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1318
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1319
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1320
|
+
*
|
|
1321
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key
|
|
1322
|
+
*
|
|
1323
|
+
* \code
|
|
1324
|
+
* #include <thrust/scan.h>
|
|
1325
|
+
* #include <thrust/functional.h>
|
|
1326
|
+
*
|
|
1327
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1328
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1329
|
+
*
|
|
1330
|
+
* int init = 5;
|
|
1331
|
+
*
|
|
1332
|
+
* thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init); // in-place scan
|
|
1333
|
+
*
|
|
1334
|
+
* // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
|
|
1335
|
+
* \endcode
|
|
1336
|
+
*
|
|
1337
|
+
* \see exclusive_scan
|
|
1338
|
+
* \see inclusive_scan_by_key
|
|
1339
|
+
*
|
|
1340
|
+
*/
|
|
1341
|
+
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T>
|
|
1342
|
+
OutputIterator exclusive_scan_by_key(
|
|
1343
|
+
InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init);
|
|
1344
|
+
|
|
1345
|
+
/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
|
|
1346
|
+
* sum operation. The term 'exclusive' means that each result does not include
|
|
1347
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
1348
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
1349
|
+
* words, within each segment a separate exclusive scan operation is computed.
|
|
1350
|
+
* Refer to the code sample below for example usage.
|
|
1351
|
+
*
|
|
1352
|
+
* This version of \p exclusive_scan_by_key uses the value \c init to
|
|
1353
|
+
* initialize the exclusive scan operation.
|
|
1354
|
+
*
|
|
1355
|
+
* This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
|
|
1356
|
+
* to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
|
|
1357
|
+
* <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
|
|
1358
|
+
* <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
|
|
1359
|
+
*
|
|
1360
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1361
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1362
|
+
* operators may vary from run to run.
|
|
1363
|
+
*
|
|
1364
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
1365
|
+
*
|
|
1366
|
+
* \param exec The execution policy to use for parallelization.
|
|
1367
|
+
* \param first1 The beginning of the key sequence.
|
|
1368
|
+
* \param last1 The end of the key sequence.
|
|
1369
|
+
* \param first2 The beginning of the input value sequence.
|
|
1370
|
+
* \param result The beginning of the output value sequence.
|
|
1371
|
+
* \param init The initial of the exclusive sum value.
|
|
1372
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
1373
|
+
* \return The end of the output sequence.
|
|
1374
|
+
*
|
|
1375
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1376
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1377
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1378
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1379
|
+
*
|
|
1380
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
|
|
1381
|
+
* \p thrust::host execution policy for parallelization:
|
|
1382
|
+
*
|
|
1383
|
+
* \code
|
|
1384
|
+
* #include <thrust/scan.h>
|
|
1385
|
+
* #include <thrust/functional.h>
|
|
1386
|
+
* #include <thrust/execution_policy.h>
|
|
1387
|
+
* ...
|
|
1388
|
+
*
|
|
1389
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1390
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1391
|
+
*
|
|
1392
|
+
* int init = 5;
|
|
1393
|
+
*
|
|
1394
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
1395
|
+
*
|
|
1396
|
+
* thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred); // in-place scan
|
|
1397
|
+
*
|
|
1398
|
+
* // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
|
|
1399
|
+
* \endcode
|
|
1400
|
+
*
|
|
1401
|
+
* \see exclusive_scan
|
|
1402
|
+
* \see inclusive_scan_by_key
|
|
1403
|
+
*
|
|
1404
|
+
*/
|
|
1405
|
+
template <typename DerivedPolicy,
|
|
1406
|
+
typename InputIterator1,
|
|
1407
|
+
typename InputIterator2,
|
|
1408
|
+
typename OutputIterator,
|
|
1409
|
+
typename T,
|
|
1410
|
+
typename BinaryPredicate>
|
|
1411
|
+
_CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
|
|
1412
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
1413
|
+
InputIterator1 first1,
|
|
1414
|
+
InputIterator1 last1,
|
|
1415
|
+
InputIterator2 first2,
|
|
1416
|
+
OutputIterator result,
|
|
1417
|
+
T init,
|
|
1418
|
+
BinaryPredicate binary_pred);
|
|
1419
|
+
|
|
1420
|
+
/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
|
|
1421
|
+
* sum operation. The term 'exclusive' means that each result does not include
|
|
1422
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
1423
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
1424
|
+
* words, within each segment a separate exclusive scan operation is computed.
|
|
1425
|
+
* Refer to the code sample below for example usage.
|
|
1426
|
+
*
|
|
1427
|
+
* This version of \p exclusive_scan_by_key uses the value \c init to
|
|
1428
|
+
* initialize the exclusive scan operation.
|
|
1429
|
+
*
|
|
1430
|
+
* This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
|
|
1431
|
+
* to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
|
|
1432
|
+
* <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
|
|
1433
|
+
* <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
|
|
1434
|
+
*
|
|
1435
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1436
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1437
|
+
* operators may vary from run to run.
|
|
1438
|
+
*
|
|
1439
|
+
* \param first1 The beginning of the key sequence.
|
|
1440
|
+
* \param last1 The end of the key sequence.
|
|
1441
|
+
* \param first2 The beginning of the input value sequence.
|
|
1442
|
+
* \param result The beginning of the output value sequence.
|
|
1443
|
+
* \param init The initial of the exclusive sum value.
|
|
1444
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
1445
|
+
* \return The end of the output sequence.
|
|
1446
|
+
*
|
|
1447
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1448
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1449
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1450
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1451
|
+
*
|
|
1452
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key
|
|
1453
|
+
*
|
|
1454
|
+
* \code
|
|
1455
|
+
* #include <thrust/scan.h>
|
|
1456
|
+
* #include <thrust/functional.h>
|
|
1457
|
+
*
|
|
1458
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1459
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1460
|
+
*
|
|
1461
|
+
* int init = 5;
|
|
1462
|
+
*
|
|
1463
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
1464
|
+
*
|
|
1465
|
+
* thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred); // in-place scan
|
|
1466
|
+
*
|
|
1467
|
+
* // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
|
|
1468
|
+
* \endcode
|
|
1469
|
+
*
|
|
1470
|
+
* \see exclusive_scan
|
|
1471
|
+
* \see inclusive_scan_by_key
|
|
1472
|
+
*
|
|
1473
|
+
*/
|
|
1474
|
+
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T, typename BinaryPredicate>
|
|
1475
|
+
OutputIterator exclusive_scan_by_key(
|
|
1476
|
+
InputIterator1 first1,
|
|
1477
|
+
InputIterator1 last1,
|
|
1478
|
+
InputIterator2 first2,
|
|
1479
|
+
OutputIterator result,
|
|
1480
|
+
T init,
|
|
1481
|
+
BinaryPredicate binary_pred);
|
|
1482
|
+
|
|
1483
|
+
/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
|
|
1484
|
+
* sum operation. The term 'exclusive' means that each result does not include
|
|
1485
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
1486
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
1487
|
+
* words, within each segment a separate exclusive scan operation is computed.
|
|
1488
|
+
* Refer to the code sample below for example usage.
|
|
1489
|
+
*
|
|
1490
|
+
* This version of \p exclusive_scan_by_key uses the value \c init to
|
|
1491
|
+
* initialize the exclusive scan operation.
|
|
1492
|
+
*
|
|
1493
|
+
* This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
|
|
1494
|
+
* to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
|
|
1495
|
+
* <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
|
|
1496
|
+
* <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
|
|
1497
|
+
*
|
|
1498
|
+
* This version of \p exclusive_scan_by_key uses the associative operator
|
|
1499
|
+
* \c binary_op to perform the prefix sum. When the input and output sequences
|
|
1500
|
+
* are the same, the scan is performed in-place.
|
|
1501
|
+
*
|
|
1502
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1503
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1504
|
+
* operators may vary from run to run.
|
|
1505
|
+
*
|
|
1506
|
+
* The algorithm's execution is parallelized as determined by \p exec.
|
|
1507
|
+
*
|
|
1508
|
+
* \param exec The execution policy to use for parallelization.
|
|
1509
|
+
* \param first1 The beginning of the key sequence.
|
|
1510
|
+
* \param last1 The end of the key sequence.
|
|
1511
|
+
* \param first2 The beginning of the input value sequence.
|
|
1512
|
+
* \param result The beginning of the output value sequence.
|
|
1513
|
+
* \param init The initial of the exclusive sum value.
|
|
1514
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
1515
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
1516
|
+
* \return The end of the output sequence.
|
|
1517
|
+
*
|
|
1518
|
+
* \tparam DerivedPolicy The name of the derived execution policy.
|
|
1519
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
1520
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
1521
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
1522
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
1523
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
1524
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam T is convertible to \c
|
|
1525
|
+
* OutputIterator's \c value_type. \tparam BinaryPredicate is a model of <a
|
|
1526
|
+
* href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
|
|
1527
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
1528
|
+
*
|
|
1529
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1530
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1531
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1532
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1533
|
+
*
|
|
1534
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key using the
|
|
1535
|
+
* \p thrust::host execution policy for parallelization:
|
|
1536
|
+
*
|
|
1537
|
+
* \code
|
|
1538
|
+
* #include <thrust/scan.h>
|
|
1539
|
+
* #include <thrust/functional.h>
|
|
1540
|
+
* #include <thrust/execution_policy.h>
|
|
1541
|
+
* ...
|
|
1542
|
+
*
|
|
1543
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1544
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1545
|
+
*
|
|
1546
|
+
* int init = 5;
|
|
1547
|
+
*
|
|
1548
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
1549
|
+
* ::cuda::std::plus<int> binary_op;
|
|
1550
|
+
*
|
|
1551
|
+
* thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place
|
|
1552
|
+
* scan
|
|
1553
|
+
*
|
|
1554
|
+
* // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
|
|
1555
|
+
* \endcode
|
|
1556
|
+
*
|
|
1557
|
+
* \see exclusive_scan
|
|
1558
|
+
* \see inclusive_scan_by_key
|
|
1559
|
+
*
|
|
1560
|
+
*/
|
|
1561
|
+
template <typename DerivedPolicy,
|
|
1562
|
+
typename InputIterator1,
|
|
1563
|
+
typename InputIterator2,
|
|
1564
|
+
typename OutputIterator,
|
|
1565
|
+
typename T,
|
|
1566
|
+
typename BinaryPredicate,
|
|
1567
|
+
typename AssociativeOperator>
|
|
1568
|
+
_CCCL_HOST_DEVICE OutputIterator exclusive_scan_by_key(
|
|
1569
|
+
const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
|
|
1570
|
+
InputIterator1 first1,
|
|
1571
|
+
InputIterator1 last1,
|
|
1572
|
+
InputIterator2 first2,
|
|
1573
|
+
OutputIterator result,
|
|
1574
|
+
T init,
|
|
1575
|
+
BinaryPredicate binary_pred,
|
|
1576
|
+
AssociativeOperator binary_op);
|
|
1577
|
+
|
|
1578
|
+
/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix
|
|
1579
|
+
* sum operation. The term 'exclusive' means that each result does not include
|
|
1580
|
+
* the corresponding input operand in the partial sum. The term 'segmented'
|
|
1581
|
+
* means that the partial sums are broken into distinct segments. In other
|
|
1582
|
+
* words, within each segment a separate exclusive scan operation is computed.
|
|
1583
|
+
* Refer to the code sample below for example usage.
|
|
1584
|
+
*
|
|
1585
|
+
* This version of \p exclusive_scan_by_key uses the value \c init to
|
|
1586
|
+
* initialize the exclusive scan operation.
|
|
1587
|
+
*
|
|
1588
|
+
* This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred
|
|
1589
|
+
* to compare adjacent keys. Specifically, consecutive iterators <tt>i</tt> and
|
|
1590
|
+
* <tt>i+1</tt> in the range <tt>[first1, last1)</tt> belong to the same segment if
|
|
1591
|
+
* <tt>binary_pred(*i, *(i+1))</tt> is true, and belong to different segments otherwise.
|
|
1592
|
+
*
|
|
1593
|
+
* This version of \p exclusive_scan_by_key uses the associative operator
|
|
1594
|
+
* \c binary_op to perform the prefix sum. When the input and output sequences
|
|
1595
|
+
* are the same, the scan is performed in-place.
|
|
1596
|
+
*
|
|
1597
|
+
* Results are not deterministic for pseudo-associative operators (e.g.,
|
|
1598
|
+
* addition of floating-point types). Results for pseudo-associative
|
|
1599
|
+
* operators may vary from run to run.
|
|
1600
|
+
*
|
|
1601
|
+
* \param first1 The beginning of the key sequence.
|
|
1602
|
+
* \param last1 The end of the key sequence.
|
|
1603
|
+
* \param first2 The beginning of the input value sequence.
|
|
1604
|
+
* \param result The beginning of the output value sequence.
|
|
1605
|
+
* \param init The initial of the exclusive sum value.
|
|
1606
|
+
* \param binary_pred The binary predicate used to determine equality of keys.
|
|
1607
|
+
* \param binary_op The associative operator used to 'sum' values.
|
|
1608
|
+
* \return The end of the output sequence.
|
|
1609
|
+
*
|
|
1610
|
+
* \tparam InputIterator1 is a model of <a href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input
|
|
1611
|
+
* Iterator</a> \tparam InputIterator2 is a model of <a
|
|
1612
|
+
* href="https://en.cppreference.com/w/cpp/iterator/input_iterator">Input Iterator</a> and \c InputIterator2's \c
|
|
1613
|
+
* value_type is convertible to \c OutputIterator's \c value_type. \tparam OutputIterator is a model of <a
|
|
1614
|
+
* href="https://en.cppreference.com/w/cpp/iterator/output_iterator">Output Iterator</a>, and if \c x and \c y are
|
|
1615
|
+
* objects of \c OutputIterator's \c value_type, then <tt>binary_op(x,y)</tt> is defined. \tparam T is convertible to \c
|
|
1616
|
+
* OutputIterator's \c value_type. \tparam BinaryPredicate is a model of <a
|
|
1617
|
+
* href="https://en.cppreference.com/w/cpp/named_req/BinaryPredicate">Binary Predicate</a>.
|
|
1618
|
+
* \tparam AssociativeOperator The function's return type must be convertible to \c OutputIterator's \c value_type.
|
|
1619
|
+
*
|
|
1620
|
+
* \pre \p first1 may equal \p result but the range <tt>[first1, last1)</tt> and the range
|
|
1621
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1622
|
+
* \pre \p first2 may equal \p result but the range <tt>[first2, first2 + (last1 - first1)</tt> and the range
|
|
1623
|
+
* <tt>[result, result + (last1 - first1))</tt> shall not overlap otherwise.
|
|
1624
|
+
*
|
|
1625
|
+
* The following code snippet demonstrates how to use \p exclusive_scan_by_key
|
|
1626
|
+
*
|
|
1627
|
+
* \code
|
|
1628
|
+
* #include <thrust/scan.h>
|
|
1629
|
+
* #include <thrust/functional.h>
|
|
1630
|
+
*
|
|
1631
|
+
* int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
|
|
1632
|
+
* int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
|
1633
|
+
*
|
|
1634
|
+
* int init = 5;
|
|
1635
|
+
*
|
|
1636
|
+
* ::cuda::std::equal_to<int> binary_pred;
|
|
1637
|
+
* ::cuda::std::plus<int> binary_op;
|
|
1638
|
+
*
|
|
1639
|
+
* thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan
|
|
1640
|
+
*
|
|
1641
|
+
* // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8};
|
|
1642
|
+
* \endcode
|
|
1643
|
+
*
|
|
1644
|
+
* \see exclusive_scan
|
|
1645
|
+
* \see inclusive_scan_by_key
|
|
1646
|
+
*
|
|
1647
|
+
*/
|
|
1648
|
+
template <typename InputIterator1,
|
|
1649
|
+
typename InputIterator2,
|
|
1650
|
+
typename OutputIterator,
|
|
1651
|
+
typename T,
|
|
1652
|
+
typename BinaryPredicate,
|
|
1653
|
+
typename AssociativeOperator>
|
|
1654
|
+
OutputIterator exclusive_scan_by_key(
|
|
1655
|
+
InputIterator1 first1,
|
|
1656
|
+
InputIterator1 last1,
|
|
1657
|
+
InputIterator2 first2,
|
|
1658
|
+
OutputIterator result,
|
|
1659
|
+
T init,
|
|
1660
|
+
BinaryPredicate binary_pred,
|
|
1661
|
+
AssociativeOperator binary_op);
|
|
1662
|
+
|
|
1663
|
+
/*! \} // end segmentedprefixsums
|
|
1664
|
+
*/
|
|
1665
|
+
|
|
1666
|
+
/*! \} // end prefix sums
|
|
1667
|
+
*/
|
|
1668
|
+
|
|
1669
|
+
THRUST_NAMESPACE_END
|
|
1670
|
+
|
|
1671
|
+
#include <thrust/detail/scan.inl>
|