cuda-cccl 0.3.2__cp313-cp313-manylinux_2_24_aarch64.whl → 0.3.4__cp313-cp313-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +12 -38
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +16 -40
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -28
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +24 -56
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +12 -38
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +31 -56
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +31 -35
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +47 -48
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +39 -42
- cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +33 -60
- cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +18 -44
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +26 -55
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +22 -49
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +15 -41
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +9 -35
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +20 -49
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +14 -40
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +18 -40
- cuda/cccl/headers/include/cub/agent/agent_topk.cuh +0 -2
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +20 -46
- cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +3 -28
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +7 -31
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +10 -34
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +120 -154
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +28 -52
- cuda/cccl/headers/include/cub/block/block_load.cuh +124 -146
- cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +0 -16
- cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +58 -87
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +81 -100
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +92 -156
- cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +8 -32
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +21 -46
- cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +51 -79
- cuda/cccl/headers/include/cub/block/block_scan.cuh +94 -401
- cuda/cccl/headers/include/cub/block/block_shuffle.cuh +10 -34
- cuda/cccl/headers/include/cub/block/block_store.cuh +73 -97
- cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +2 -29
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +5 -29
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +25 -49
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +12 -34
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +10 -34
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +3 -27
- cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +12 -36
- cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +9 -33
- cuda/cccl/headers/include/cub/config.cuh +2 -26
- cuda/cccl/headers/include/cub/cub.cuh +3 -27
- cuda/cccl/headers/include/cub/detail/array_utils.cuh +2 -26
- cuda/cccl/headers/include/cub/detail/choose_offset.cuh +2 -28
- cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +3 -27
- cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +0 -2
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -3
- cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +2 -28
- cuda/cccl/headers/include/cub/detail/integer_utils.cuh +0 -2
- cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +0 -2
- cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +0 -2
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +0 -2
- cuda/cccl/headers/include/cub/detail/ptx-json/README.md +7 -12
- cuda/cccl/headers/include/cub/detail/ptx-json/array.h +6 -33
- cuda/cccl/headers/include/cub/detail/ptx-json/json.h +13 -36
- cuda/cccl/headers/include/cub/detail/ptx-json/object.h +9 -38
- cuda/cccl/headers/include/cub/detail/ptx-json/string.h +58 -32
- cuda/cccl/headers/include/cub/detail/ptx-json/value.h +51 -51
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +7 -31
- cuda/cccl/headers/include/cub/detail/rfa.cuh +2 -27
- cuda/cccl/headers/include/cub/detail/strong_load.cuh +3 -29
- cuda/cccl/headers/include/cub/detail/strong_store.cuh +3 -29
- cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +2 -9
- cuda/cccl/headers/include/cub/detail/type_traits.cuh +0 -2
- cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +6 -31
- cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +2 -25
- cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +2 -26
- cuda/cccl/headers/include/cub/device/device_for.cuh +3 -5
- cuda/cccl/headers/include/cub/device/device_histogram.cuh +3 -27
- cuda/cccl/headers/include/cub/device/device_memcpy.cuh +2 -26
- cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +2 -26
- cuda/cccl/headers/include/cub/device/device_partition.cuh +3 -27
- cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3 -27
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +10 -31
- cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +3 -27
- cuda/cccl/headers/include/cub/device/device_scan.cuh +16 -34
- cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +3 -27
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +3 -27
- cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2 -26
- cuda/cccl/headers/include/cub/device/device_select.cuh +3 -27
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +2 -28
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +2 -27
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +0 -2
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +3 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +14 -34
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +5 -30
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +4 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +5 -32
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +3 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +1 -2
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +47 -59
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +21 -30
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +2 -27
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +3 -27
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +3 -27
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +0 -2
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +51 -36
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +3 -28
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +27 -55
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +4 -28
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{for_each.cuh → kernel_for_each.cuh} +0 -2
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{histogram.cuh → kernel_histogram.cuh} +149 -157
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{merge_sort.cuh → kernel_merge_sort.cuh} +0 -2
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{radix_sort.cuh → kernel_radix_sort.cuh} +0 -2
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{reduce.cuh → kernel_reduce.cuh} +2 -28
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{scan.cuh → kernel_scan.cuh} +2 -28
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{segmented_reduce.cuh → kernel_segmented_reduce.cuh} +3 -29
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{segmented_sort.cuh → kernel_segmented_sort.cuh} +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{three_way_partition.cuh → kernel_three_way_partition.cuh} +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{transform.cuh → kernel_transform.cuh} +11 -11
- cuda/cccl/headers/include/cub/device/dispatch/kernels/{unique_by_key.cuh → kernel_unique_by_key.cuh} +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -26
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -26
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -28
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +6 -26
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -26
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +5 -31
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +31 -33
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +15 -40
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -26
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -28
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +20 -44
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -26
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +20 -45
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +2 -27
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +11 -36
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_topk.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +2 -27
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +14 -40
- cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +3 -27
- cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +3 -27
- cuda/cccl/headers/include/cub/grid/grid_queue.cuh +3 -27
- cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +3 -27
- cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +3 -27
- cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +3 -27
- cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +3 -27
- cuda/cccl/headers/include/cub/thread/thread_load.cuh +3 -28
- cuda/cccl/headers/include/cub/thread/thread_operators.cuh +3 -27
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +3 -26
- cuda/cccl/headers/include/cub/thread/thread_scan.cuh +3 -29
- cuda/cccl/headers/include/cub/thread/thread_search.cuh +3 -27
- cuda/cccl/headers/include/cub/thread/thread_simd.cuh +0 -2
- cuda/cccl/headers/include/cub/thread/thread_sort.cuh +2 -26
- cuda/cccl/headers/include/cub/thread/thread_store.cuh +3 -27
- cuda/cccl/headers/include/cub/util_allocator.cuh +3 -27
- cuda/cccl/headers/include/cub/util_arch.cuh +3 -29
- cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +2 -26
- cuda/cccl/headers/include/cub/util_debug.cuh +3 -27
- cuda/cccl/headers/include/cub/util_device.cuh +18 -59
- cuda/cccl/headers/include/cub/util_macro.cuh +4 -28
- cuda/cccl/headers/include/cub/util_math.cuh +2 -28
- cuda/cccl/headers/include/cub/util_namespace.cuh +3 -28
- cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +3 -27
- cuda/cccl/headers/include/cub/util_ptx.cuh +6 -30
- cuda/cccl/headers/include/cub/util_temporary_storage.cuh +3 -29
- cuda/cccl/headers/include/cub/util_type.cuh +5 -32
- cuda/cccl/headers/include/cub/util_vsmem.cuh +2 -28
- cuda/cccl/headers/include/cub/version.cuh +2 -26
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +10 -35
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +5 -30
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +15 -39
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +5 -35
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +22 -46
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +3 -27
- cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +2 -26
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +4 -27
- cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +2 -26
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +3 -22
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +3 -27
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +4 -27
- cuda/cccl/headers/include/cub/warp/warp_utils.cuh +0 -2
- cuda/cccl/headers/include/cuda/__barrier/barrier.h +1 -1
- cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +0 -1
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +277 -235
- cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +0 -1
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +13 -0
- cuda/cccl/headers/include/cuda/__execution/determinism.h +0 -2
- cuda/cccl/headers/include/cuda/__execution/output_ordering.h +0 -2
- cuda/cccl/headers/include/cuda/__functional/maximum.h +25 -7
- cuda/cccl/headers/include/cuda/__functional/minimum.h +25 -7
- cuda/cccl/headers/include/cuda/__functional/minimum_maximum_common.h +52 -0
- cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +0 -2
- cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +13 -4
- cuda/cccl/headers/include/cuda/__iterator/zip_function.h +4 -2
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +0 -1
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +28 -7
- cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +1 -1
- cuda/cccl/headers/include/cuda/__memcpy_async/elect_one.h +52 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +2 -3
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +1 -7
- cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +0 -1
- cuda/cccl/headers/include/cuda/__memory/get_device_address.h +1 -1
- cuda/cccl/headers/include/cuda/__memory/ranges_overlap.h +126 -0
- cuda/cccl/headers/include/cuda/__memory_resource/any_resource.h +898 -0
- cuda/cccl/headers/include/cuda/__memory_resource/device_memory_pool.h +149 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +3 -3
- cuda/cccl/headers/include/cuda/__memory_resource/legacy_managed_memory_resource.h +148 -0
- cuda/cccl/headers/include/cuda/__memory_resource/legacy_pinned_memory_resource.h +139 -0
- cuda/cccl/headers/include/cuda/__memory_resource/managed_memory_pool.h +146 -0
- cuda/cccl/headers/include/cuda/__memory_resource/memory_resource_base.h +578 -0
- cuda/cccl/headers/include/cuda/__memory_resource/pinned_memory_pool.h +188 -0
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +3 -3
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +37 -3
- cuda/cccl/headers/include/cuda/__numeric/add_overflow.h +13 -3
- cuda/cccl/headers/include/cuda/__numeric/div_overflow.h +150 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +2 -2
- cuda/cccl/headers/include/cuda/__numeric/sub_overflow.h +344 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +0 -6
- cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +1 -1
- cuda/cccl/headers/include/cuda/__ptx/pragmas/enable_smem_spilling.h +47 -0
- cuda/cccl/headers/include/cuda/{std/__cuda → __runtime}/api_wrapper.h +3 -3
- cuda/cccl/headers/include/cuda/__stream/get_stream.h +0 -1
- cuda/cccl/headers/include/cuda/{__fwd/barrier_native_handle.h → __stream/internal_streams.h} +17 -15
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_ptr.h +2 -2
- cuda/cccl/headers/include/cuda/__utility/__basic_any/basic_any_value.h +1 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/semiregular.h +1 -0
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtcall.h +2 -1
- cuda/cccl/headers/include/cuda/barrier +42 -16
- cuda/cccl/headers/include/cuda/memory +1 -0
- cuda/cccl/headers/include/cuda/memory_resource +6 -1
- cuda/cccl/headers/include/cuda/numeric +2 -0
- cuda/cccl/headers/include/cuda/pipeline +3 -2
- cuda/cccl/headers/include/cuda/ptx +1 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +0 -2
- cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +1 -1
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +115 -58
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +844 -378
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +12 -5
- cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +31 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +10 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types.h +2 -3
- cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +37 -13
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +0 -28
- cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +7 -0
- cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +10 -0
- cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +2 -45
- cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +0 -2
- cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +8 -0
- cuda/cccl/headers/include/cuda/std/__chrono/calendar.h +0 -2
- cuda/cccl/headers/include/cuda/std/__chrono/day.h +0 -2
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +13 -17
- cuda/cccl/headers/include/cuda/std/__chrono/file_clock.h +0 -2
- cuda/cccl/headers/include/cuda/std/__chrono/high_resolution_clock.h +0 -2
- cuda/cccl/headers/include/cuda/std/__chrono/month.h +0 -2
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +0 -2
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +0 -2
- cuda/cccl/headers/include/cuda/std/__chrono/time_point.h +5 -8
- cuda/cccl/headers/include/cuda/std/__chrono/year.h +0 -2
- cuda/cccl/headers/include/cuda/std/__cmath/error_functions.h +4 -0
- cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/fdim.h +4 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fma.h +4 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +2 -2
- cuda/cccl/headers/include/cuda/std/__cmath/remainder.h +4 -0
- cuda/cccl/headers/include/cuda/std/__cmath/roots.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +2 -3
- cuda/cccl/headers/include/cuda/std/__cmath/traits.h +4 -0
- cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +2 -3
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +0 -6
- cuda/cccl/headers/include/cuda/std/__complex/exponential_functions.h +2 -2
- cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +27 -1
- cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +2 -4
- cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +15 -36
- cuda/cccl/headers/include/cuda/std/__exception/exception_macros.h +93 -0
- cuda/cccl/headers/include/cuda/std/{detail/libcxx/include/stdexcept → __exception/throw_error.h} +3 -3
- cuda/cccl/headers/include/cuda/std/__expected/expected.h +28 -43
- cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +2 -10
- cuda/cccl/headers/include/cuda/std/__format/format_arg_store.h +2 -2
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +6 -6
- cuda/cccl/headers/include/cuda/std/__functional/function.h +2 -6
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +5 -5
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +5 -0
- cuda/cccl/headers/include/cuda/std/__fwd/array.h +2 -2
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +12 -0
- cuda/cccl/headers/include/cuda/std/__fwd/expected.h +46 -0
- cuda/cccl/headers/include/cuda/std/__fwd/get.h +21 -22
- cuda/cccl/headers/include/cuda/std/{detail/libcxx/include/iosfwd → __fwd/ios.h} +5 -10
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +19 -10
- cuda/cccl/headers/include/cuda/std/__fwd/optional.h +2 -2
- cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +5 -0
- cuda/cccl/headers/include/cuda/std/__fwd/span.h +2 -2
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +7 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +18 -0
- cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +3 -0
- cuda/cccl/headers/include/cuda/std/__fwd/unexpected.h +40 -0
- cuda/cccl/headers/include/cuda/std/{__type_traits/is_reference_wrapper.h → __fwd/variant.h} +16 -15
- cuda/cccl/headers/include/cuda/std/__internal/features.h +14 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +1 -1
- cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +1 -1
- cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +58 -40
- cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +1 -1
- cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +1 -1
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +0 -5
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +4 -18
- cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +1 -2
- cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +0 -2
- cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +0 -2
- cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +0 -4
- cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +0 -5
- cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +3 -10
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +4 -15
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +4 -4
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +4 -4
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +2 -4
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +3 -3
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +1 -1
- cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +1 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +6 -12
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -5
- cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +7 -2
- cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +1 -0
- cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +5 -0
- cuda/cccl/headers/include/cuda/std/__new/allocate.h +5 -0
- cuda/cccl/headers/include/cuda/{__barrier/barrier_native_handle.h → std/__new/device_new.h} +9 -24
- cuda/cccl/headers/include/cuda/std/__new_ +1 -0
- cuda/cccl/headers/include/cuda/std/__optional/optional.h +5 -4
- cuda/cccl/headers/include/cuda/std/__optional/optional_ref.h +4 -4
- cuda/cccl/headers/include/cuda/std/__random/linear_congruential_engine.h +1 -1
- cuda/cccl/headers/include/cuda/std/__random/philox_engine.h +562 -0
- cuda/cccl/headers/include/cuda/std/__random/seed_seq.h +204 -0
- cuda/cccl/headers/include/cuda/std/__random_ +2 -0
- cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +7 -19
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -4
- cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +5 -4
- cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +1 -1
- cuda/cccl/headers/include/cuda/std/__string/string_view.h +5 -5
- cuda/cccl/headers/include/cuda/std/__tuple_dir/apply.h +82 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/get.h +122 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +0 -160
- cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +123 -129
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tie.h +55 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple.h +457 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_cat.h +158 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_constraints.h +286 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +7 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_leaf.h +452 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +1 -2
- cuda/cccl/headers/include/cuda/std/__type_traits/is_comparable.h +78 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +1 -1
- cuda/cccl/headers/include/cuda/std/__type_traits/is_fully_bounded_array.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +0 -2
- cuda/cccl/headers/include/cuda/std/__utility/in_place.h +4 -24
- cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +0 -2
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +20 -20
- cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +0 -2
- cuda/cccl/headers/include/cuda/std/__variant/bad_variant_access.h +74 -0
- cuda/cccl/headers/include/cuda/std/__variant/comparison.h +207 -0
- cuda/cccl/headers/include/cuda/std/__variant/get.h +192 -0
- cuda/cccl/headers/include/cuda/std/__variant/hash.h +82 -0
- cuda/cccl/headers/include/cuda/std/__variant/sfinae_helpers.h +89 -0
- cuda/cccl/headers/include/cuda/std/__variant/variant.h +250 -0
- cuda/cccl/headers/include/cuda/std/__variant/variant_access.h +70 -0
- cuda/cccl/headers/include/cuda/std/__variant/variant_base.h +683 -0
- cuda/cccl/headers/include/cuda/std/__variant/variant_constraints.h +135 -0
- cuda/cccl/headers/include/cuda/std/__variant/variant_match.h +126 -0
- cuda/cccl/headers/include/cuda/std/__variant/variant_traits.h +184 -0
- cuda/cccl/headers/include/cuda/std/__variant/variant_visit.h +225 -0
- cuda/cccl/headers/include/cuda/std/__variant/visit.h +148 -0
- cuda/cccl/headers/include/cuda/std/array +1 -1
- cuda/cccl/headers/include/cuda/std/atomic +1 -1
- cuda/cccl/headers/include/cuda/std/bitset +2 -10
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +6 -6
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +1 -4
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +3 -6
- cuda/cccl/headers/include/cuda/std/functional +1 -1
- cuda/cccl/headers/include/cuda/std/initializer_list +8 -0
- cuda/cccl/headers/include/cuda/std/inplace_vector +6 -5
- cuda/cccl/headers/include/cuda/std/iterator +1 -1
- cuda/cccl/headers/include/cuda/std/numbers +0 -2
- cuda/cccl/headers/include/cuda/std/ratio +2 -2
- cuda/cccl/headers/include/cuda/std/span +2 -2
- cuda/cccl/headers/include/cuda/std/string_view +24 -42
- cuda/cccl/headers/include/cuda/std/tuple +18 -1
- cuda/cccl/headers/include/cuda/std/type_traits +0 -1
- cuda/cccl/headers/include/cuda/std/variant +8 -1
- cuda/cccl/headers/include/nv/target +2 -6
- cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +15 -2
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +0 -2
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +0 -1
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +0 -1
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +0 -2
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +0 -2
- cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +0 -2
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +0 -2
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +0 -2
- cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +0 -4
- cuda/cccl/headers/include/thrust/detail/binary_search.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +2 -7
- cuda/cccl/headers/include/thrust/detail/complex/c99math.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/catrig.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/cexp.h +2 -7
- cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/clog.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/clogf.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/cproj.h +2 -7
- cuda/cccl/headers/include/thrust/detail/complex/csinh.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +2 -8
- cuda/cccl/headers/include/thrust/detail/complex/math_private.h +2 -8
- cuda/cccl/headers/include/thrust/detail/config/device_system.h +2 -0
- cuda/cccl/headers/include/thrust/detail/config/host_system.h +2 -0
- cuda/cccl/headers/include/thrust/detail/config/namespace.h +0 -1
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +0 -2
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +0 -2
- cuda/cccl/headers/include/thrust/detail/copy.h +0 -2
- cuda/cccl/headers/include/thrust/detail/copy.inl +14 -4
- cuda/cccl/headers/include/thrust/detail/copy_if.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/count.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/equal.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +4 -5
- cuda/cccl/headers/include/thrust/detail/extrema.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/fill.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/find.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/for_each.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/functional/actor.h +2 -5
- cuda/cccl/headers/include/thrust/detail/functional/operators.h +2 -5
- cuda/cccl/headers/include/thrust/detail/gather.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/generate.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +0 -2
- cuda/cccl/headers/include/thrust/detail/inner_product.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -0
- cuda/cccl/headers/include/thrust/detail/logical.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +13 -1
- cuda/cccl/headers/include/thrust/detail/merge.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/mismatch.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +0 -4
- cuda/cccl/headers/include/thrust/detail/partition.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/random_bijection.h +0 -2
- cuda/cccl/headers/include/thrust/detail/range/head_flags.h +0 -2
- cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +0 -2
- cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +0 -6
- cuda/cccl/headers/include/thrust/detail/reduce.inl +21 -3
- cuda/cccl/headers/include/thrust/detail/reference.h +27 -3
- cuda/cccl/headers/include/thrust/detail/remove.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/replace.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/reverse.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/scan.inl +21 -3
- cuda/cccl/headers/include/thrust/detail/scatter.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/sequence.inl +13 -1
- cuda/cccl/headers/include/thrust/detail/set_operations.inl +13 -1
- cuda/cccl/headers/include/thrust/detail/sort.inl +13 -1
- cuda/cccl/headers/include/thrust/detail/static_assert.h +0 -2
- cuda/cccl/headers/include/thrust/detail/static_map.h +0 -3
- cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +13 -1
- cuda/cccl/headers/include/thrust/detail/tabulate.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +0 -4
- cuda/cccl/headers/include/thrust/detail/temporary_array.inl +0 -1
- cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +14 -3
- cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +13 -1
- cuda/cccl/headers/include/thrust/detail/transform_scan.inl +13 -1
- cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +0 -2
- cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +0 -2
- cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +2 -7
- cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +0 -2
- cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +0 -4
- cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +0 -4
- cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +14 -2
- cuda/cccl/headers/include/thrust/detail/unique.inl +21 -3
- cuda/cccl/headers/include/thrust/detail/vector_base.h +0 -2
- cuda/cccl/headers/include/thrust/detail/vector_base.inl +0 -2
- cuda/cccl/headers/include/thrust/execution_policy.h +10 -9
- cuda/cccl/headers/include/thrust/functional.h +0 -2
- cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +9 -4
- cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +8 -4
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +0 -1
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +0 -1
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +0 -1
- cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +0 -1
- cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +0 -1
- cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +2 -6
- cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +0 -1
- cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +0 -2
- cuda/cccl/headers/include/thrust/mr/allocator.h +0 -2
- cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +9 -4
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +10 -10
- cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +0 -2
- cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +0 -2
- cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +0 -2
- cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +8 -4
- cuda/cccl/headers/include/thrust/mr/memory_resource.h +0 -2
- cuda/cccl/headers/include/thrust/mr/new.h +0 -2
- cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +0 -2
- cuda/cccl/headers/include/thrust/mr/pool.h +10 -10
- cuda/cccl/headers/include/thrust/mr/pool_options.h +4 -6
- cuda/cccl/headers/include/thrust/mr/sync_pool.h +0 -2
- cuda/cccl/headers/include/thrust/mr/tls_pool.h +0 -2
- cuda/cccl/headers/include/thrust/mr/validator.h +0 -2
- cuda/cccl/headers/include/thrust/per_device_resource.h +13 -1
- cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +0 -2
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +0 -2
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +2 -9
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +0 -2
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +2 -9
- cuda/cccl/headers/include/thrust/random/detail/mod.h +2 -9
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +0 -2
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +2 -7
- cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +2 -9
- cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +0 -2
- cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +0 -2
- cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +0 -2
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +0 -2
- cuda/cccl/headers/include/thrust/random/discard_block_engine.h +0 -2
- cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +0 -2
- cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +0 -2
- cuda/cccl/headers/include/thrust/random/normal_distribution.h +0 -2
- cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +0 -2
- cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +0 -2
- cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +0 -2
- cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +0 -2
- cuda/cccl/headers/include/thrust/random.h +0 -2
- cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +15 -11
- cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +2 -7
- cuda/cccl/headers/include/thrust/system/cpp/memory.h +0 -1
- cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +0 -2
- cuda/cccl/headers/include/thrust/system/cpp/pointer.h +0 -2
- cuda/cccl/headers/include/thrust/system/cpp/vector.h +0 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +0 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +2 -9
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +4 -32
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +2 -9
- cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +23 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +2 -11
- cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +2 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +0 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +0 -5
- cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +0 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +0 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +2 -8
- cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +2 -26
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +7 -142
- cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +0 -5
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +0 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +0 -3
- cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +3 -5
- cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +8 -10
- cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +0 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +0 -2
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +1 -7
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +2 -7
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +0 -3
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +0 -4
- cuda/cccl/headers/include/thrust/system/cuda/error.h +2 -11
- cuda/cccl/headers/include/thrust/system/cuda/memory.h +2 -6
- cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +2 -9
- cuda/cccl/headers/include/thrust/system/cuda/pointer.h +2 -7
- cuda/cccl/headers/include/thrust/system/cuda/vector.h +2 -6
- cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/errno.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/error_category.inl +0 -4
- cuda/cccl/headers/include/thrust/system/detail/error_code.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/count.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/find.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +26 -12
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +0 -1
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +2 -4
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +0 -3
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +0 -2
- cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +76 -5
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +0 -3
- cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +78 -6
- cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +0 -4
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +67 -6
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +310 -11
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +78 -5
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +543 -7
- cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +0 -2
- cuda/cccl/headers/include/thrust/system/detail/system_error.inl +0 -2
- cuda/cccl/headers/include/thrust/system/error_code.h +0 -4
- cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +5 -25
- cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +5 -25
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +40 -29
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +11 -28
- cuda/cccl/headers/include/thrust/system/omp/detail/count.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +26 -28
- cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +18 -13
- cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +5 -25
- cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/find.h +5 -25
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +47 -30
- cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +26 -31
- cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +2 -26
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +35 -27
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +13 -28
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +56 -28
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +26 -31
- cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +176 -17
- cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +8 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +213 -28
- cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +2 -15
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +21 -30
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +17 -29
- cuda/cccl/headers/include/thrust/system/omp/memory.h +51 -9
- cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +3 -7
- cuda/cccl/headers/include/thrust/system/omp/pointer.h +3 -7
- cuda/cccl/headers/include/thrust/system/omp/vector.h +3 -6
- cuda/cccl/headers/include/thrust/system/system_error.h +0 -2
- cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +4 -25
- cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +38 -29
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +91 -24
- cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +17 -13
- cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +4 -25
- cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +4 -25
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +47 -28
- cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +254 -29
- cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +25 -31
- cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +95 -29
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +345 -28
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +4 -26
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +32 -42
- cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +265 -30
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +7 -17
- cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +244 -32
- cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +2 -15
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +23 -33
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +16 -29
- cuda/cccl/headers/include/thrust/system/tbb/memory.h +52 -24
- cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +4 -22
- cuda/cccl/headers/include/thrust/system/tbb/pointer.h +4 -22
- cuda/cccl/headers/include/thrust/system/tbb/vector.h +4 -21
- cuda/cccl/headers/include/thrust/transform.h +14 -3
- cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +0 -4
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +0 -1
- cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +0 -4
- cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +0 -4
- cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +0 -4
- cuda/cccl/headers/include/thrust/universal_allocator.h +8 -0
- cuda/cccl/headers/include/thrust/universal_vector.h +9 -0
- cuda/cccl/headers/include/thrust/zip_function.h +2 -28
- cuda/compute/__init__.py +4 -0
- cuda/compute/_bindings.pyi +26 -3
- cuda/compute/_bindings_impl.pyx +143 -1
- cuda/compute/algorithms/__init__.py +9 -5
- cuda/compute/algorithms/_sort/__init__.py +23 -0
- cuda/compute/algorithms/{_merge_sort.py → _sort/_merge_sort.py} +10 -10
- cuda/compute/algorithms/{_radix_sort.py → _sort/_radix_sort.py} +9 -58
- cuda/compute/algorithms/_sort/_segmented_sort.py +288 -0
- cuda/compute/algorithms/_sort/_sort_common.py +52 -0
- cuda/compute/cu12/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
- cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/cu13/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
- cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
- cuda_cccl-0.3.4.dist-info/METADATA +78 -0
- {cuda_cccl-0.3.2.dist-info → cuda_cccl-0.3.4.dist-info}/RECORD +830 -867
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +0 -652
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +0 -1365
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +0 -2144
- cuda/cccl/headers/include/thrust/detail/integer_math.h +0 -113
- cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +0 -52
- cuda/cccl/headers/include/thrust/system/detail/adl/count.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/find.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +0 -51
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +0 -85
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +0 -119
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +0 -145
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +0 -116
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +0 -356
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +0 -124
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +0 -586
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +0 -74
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +0 -59
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +0 -65
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +0 -87
- cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +0 -93
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +0 -102
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +0 -78
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +0 -65
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +0 -103
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +0 -87
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +0 -265
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +0 -71
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +0 -75
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +0 -73
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +0 -136
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +0 -91
- cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +0 -94
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +0 -327
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +0 -98
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +0 -137
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +0 -400
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +0 -87
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +0 -312
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +0 -295
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +0 -71
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +0 -75
- cuda_cccl-0.3.2.dist-info/METADATA +0 -42
- {cuda_cccl-0.3.2.dist-info → cuda_cccl-0.3.4.dist-info}/WHEEL +0 -0
- {cuda_cccl-0.3.2.dist-info → cuda_cccl-0.3.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2008-2013 NVIDIA Corporation
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
#pragma once
|
|
18
|
-
|
|
19
|
-
#include <thrust/detail/config.h>
|
|
20
|
-
|
|
21
|
-
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
22
|
-
# pragma GCC system_header
|
|
23
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
24
|
-
# pragma clang system_header
|
|
25
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
26
|
-
# pragma system_header
|
|
27
|
-
#endif // no system header
|
|
28
|
-
#include <thrust/detail/type_traits.h>
|
|
29
|
-
#include <thrust/functional.h>
|
|
30
|
-
#include <thrust/iterator/zip_iterator.h>
|
|
31
|
-
#include <thrust/system/detail/sequential/partition.h>
|
|
32
|
-
#include <thrust/system/detail/sequential/stable_primitive_sort.h>
|
|
33
|
-
#include <thrust/system/detail/sequential/stable_radix_sort.h>
|
|
34
|
-
|
|
35
|
-
THRUST_NAMESPACE_BEGIN
|
|
36
|
-
namespace system::detail::sequential
|
|
37
|
-
{
|
|
38
|
-
namespace stable_primitive_sort_detail
|
|
39
|
-
{
|
|
40
|
-
|
|
41
|
-
template <typename Iterator>
|
|
42
|
-
struct enable_if_bool_sort
|
|
43
|
-
: ::cuda::std::enable_if<::cuda::std::is_same<bool, thrust::detail::it_value_t<Iterator>>::value>
|
|
44
|
-
{};
|
|
45
|
-
|
|
46
|
-
template <typename Iterator>
|
|
47
|
-
struct disable_if_bool_sort
|
|
48
|
-
: thrust::detail::disable_if<::cuda::std::is_same<bool, thrust::detail::it_value_t<Iterator>>::value>
|
|
49
|
-
{};
|
|
50
|
-
|
|
51
|
-
template <typename DerivedPolicy, typename RandomAccessIterator>
|
|
52
|
-
typename enable_if_bool_sort<RandomAccessIterator>::type _CCCL_HOST_DEVICE stable_primitive_sort(
|
|
53
|
-
sequential::execution_policy<DerivedPolicy>& exec, RandomAccessIterator first, RandomAccessIterator last)
|
|
54
|
-
{
|
|
55
|
-
// use stable_partition if we're sorting bool
|
|
56
|
-
// stable_partition puts true values first, so we need to logical_not
|
|
57
|
-
sequential::stable_partition(exec, first, last, ::cuda::std::logical_not<bool>());
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
template <typename DerivedPolicy, typename RandomAccessIterator>
|
|
61
|
-
typename disable_if_bool_sort<RandomAccessIterator>::type _CCCL_HOST_DEVICE stable_primitive_sort(
|
|
62
|
-
sequential::execution_policy<DerivedPolicy>& exec, RandomAccessIterator first, RandomAccessIterator last)
|
|
63
|
-
{
|
|
64
|
-
// call stable_radix_sort
|
|
65
|
-
sequential::stable_radix_sort(exec, first, last);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
struct logical_not_first
|
|
69
|
-
{
|
|
70
|
-
template <typename Tuple>
|
|
71
|
-
_CCCL_HOST_DEVICE bool operator()(Tuple t)
|
|
72
|
-
{
|
|
73
|
-
return !thrust::get<0>(t);
|
|
74
|
-
}
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
78
|
-
typename enable_if_bool_sort<RandomAccessIterator1>::type _CCCL_HOST_DEVICE stable_primitive_sort_by_key(
|
|
79
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
80
|
-
RandomAccessIterator1 keys_first,
|
|
81
|
-
RandomAccessIterator1 keys_last,
|
|
82
|
-
RandomAccessIterator2 values_first)
|
|
83
|
-
{
|
|
84
|
-
// use stable_partition if we're sorting bool
|
|
85
|
-
// stable_partition puts true values first, so we need to logical_not
|
|
86
|
-
sequential::stable_partition(
|
|
87
|
-
exec,
|
|
88
|
-
thrust::make_zip_iterator(keys_first, values_first),
|
|
89
|
-
thrust::make_zip_iterator(keys_last, values_first),
|
|
90
|
-
logical_not_first());
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
94
|
-
typename disable_if_bool_sort<RandomAccessIterator1>::type _CCCL_HOST_DEVICE stable_primitive_sort_by_key(
|
|
95
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
96
|
-
RandomAccessIterator1 keys_first,
|
|
97
|
-
RandomAccessIterator1 keys_last,
|
|
98
|
-
RandomAccessIterator2 values_first)
|
|
99
|
-
{
|
|
100
|
-
// call stable_radix_sort_by_key
|
|
101
|
-
sequential::stable_radix_sort_by_key(exec, keys_first, keys_last, values_first);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
} // end namespace stable_primitive_sort_detail
|
|
105
|
-
|
|
106
|
-
template <typename DerivedPolicy, typename RandomAccessIterator>
|
|
107
|
-
_CCCL_HOST_DEVICE void stable_primitive_sort(
|
|
108
|
-
sequential::execution_policy<DerivedPolicy>& exec, RandomAccessIterator first, RandomAccessIterator last)
|
|
109
|
-
{
|
|
110
|
-
stable_primitive_sort_detail::stable_primitive_sort(exec, first, last);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
114
|
-
_CCCL_HOST_DEVICE void stable_primitive_sort_by_key(
|
|
115
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
116
|
-
RandomAccessIterator1 keys_first,
|
|
117
|
-
RandomAccessIterator1 keys_last,
|
|
118
|
-
RandomAccessIterator2 values_first)
|
|
119
|
-
{
|
|
120
|
-
stable_primitive_sort_detail::stable_primitive_sort_by_key(exec, keys_first, keys_last, values_first);
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
} // namespace system::detail::sequential
|
|
124
|
-
THRUST_NAMESPACE_END
|
|
@@ -1,586 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2008-2021 NVIDIA Corporation
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
#pragma once
|
|
18
|
-
|
|
19
|
-
#include <thrust/detail/config.h>
|
|
20
|
-
|
|
21
|
-
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
22
|
-
# pragma GCC system_header
|
|
23
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
24
|
-
# pragma clang system_header
|
|
25
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
26
|
-
# pragma system_header
|
|
27
|
-
#endif // no system header
|
|
28
|
-
|
|
29
|
-
#include <thrust/copy.h>
|
|
30
|
-
#include <thrust/detail/temporary_array.h>
|
|
31
|
-
#include <thrust/functional.h>
|
|
32
|
-
#include <thrust/iterator/iterator_traits.h>
|
|
33
|
-
#include <thrust/iterator/transform_iterator.h>
|
|
34
|
-
#include <thrust/iterator/zip_iterator.h>
|
|
35
|
-
#include <thrust/scatter.h>
|
|
36
|
-
|
|
37
|
-
#include <cuda/std/cstdint>
|
|
38
|
-
#include <cuda/std/limits>
|
|
39
|
-
#include <cuda/std/utility>
|
|
40
|
-
|
|
41
|
-
THRUST_NAMESPACE_BEGIN
|
|
42
|
-
namespace system::detail::sequential
|
|
43
|
-
{
|
|
44
|
-
namespace radix_sort_detail
|
|
45
|
-
{
|
|
46
|
-
|
|
47
|
-
template <typename T>
|
|
48
|
-
struct RadixEncoder
|
|
49
|
-
{
|
|
50
|
-
_CCCL_HOST_DEVICE T operator()(T x) const
|
|
51
|
-
{
|
|
52
|
-
return x;
|
|
53
|
-
}
|
|
54
|
-
};
|
|
55
|
-
|
|
56
|
-
template <>
|
|
57
|
-
struct RadixEncoder<char>
|
|
58
|
-
{
|
|
59
|
-
_CCCL_HOST_DEVICE unsigned char operator()(char x) const
|
|
60
|
-
{
|
|
61
|
-
if (::cuda::std::numeric_limits<char>::is_signed)
|
|
62
|
-
{
|
|
63
|
-
return static_cast<unsigned char>(x) ^ static_cast<unsigned char>(1) << (8 * sizeof(unsigned char) - 1);
|
|
64
|
-
}
|
|
65
|
-
else
|
|
66
|
-
{
|
|
67
|
-
return x;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
};
|
|
71
|
-
|
|
72
|
-
template <>
|
|
73
|
-
struct RadixEncoder<signed char>
|
|
74
|
-
{
|
|
75
|
-
_CCCL_HOST_DEVICE unsigned char operator()(signed char x) const
|
|
76
|
-
{
|
|
77
|
-
return static_cast<unsigned char>(x) ^ static_cast<unsigned char>(1) << (8 * sizeof(unsigned char) - 1);
|
|
78
|
-
}
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
template <>
|
|
82
|
-
struct RadixEncoder<short>
|
|
83
|
-
{
|
|
84
|
-
_CCCL_HOST_DEVICE unsigned short operator()(short x) const
|
|
85
|
-
{
|
|
86
|
-
return static_cast<unsigned short>(x) ^ static_cast<unsigned short>(1) << (8 * sizeof(unsigned short) - 1);
|
|
87
|
-
}
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
template <>
|
|
91
|
-
struct RadixEncoder<int>
|
|
92
|
-
{
|
|
93
|
-
_CCCL_HOST_DEVICE unsigned int operator()(int x) const
|
|
94
|
-
{
|
|
95
|
-
return x ^ static_cast<unsigned int>(1) << (8 * sizeof(unsigned int) - 1);
|
|
96
|
-
}
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
template <>
|
|
100
|
-
struct RadixEncoder<long>
|
|
101
|
-
{
|
|
102
|
-
_CCCL_HOST_DEVICE unsigned long operator()(long x) const
|
|
103
|
-
{
|
|
104
|
-
return x ^ static_cast<unsigned long>(1) << (8 * sizeof(unsigned long) - 1);
|
|
105
|
-
}
|
|
106
|
-
};
|
|
107
|
-
|
|
108
|
-
template <>
|
|
109
|
-
struct RadixEncoder<long long>
|
|
110
|
-
{
|
|
111
|
-
_CCCL_HOST_DEVICE unsigned long long operator()(long long x) const
|
|
112
|
-
{
|
|
113
|
-
return x ^ static_cast<unsigned long long>(1) << (8 * sizeof(unsigned long long) - 1);
|
|
114
|
-
}
|
|
115
|
-
};
|
|
116
|
-
|
|
117
|
-
// ideally we'd use uint32 here and uint64 below
|
|
118
|
-
template <>
|
|
119
|
-
struct RadixEncoder<float>
|
|
120
|
-
{
|
|
121
|
-
_CCCL_HOST_DEVICE std::uint32_t operator()(float x) const
|
|
122
|
-
{
|
|
123
|
-
union
|
|
124
|
-
{
|
|
125
|
-
float f;
|
|
126
|
-
std::uint32_t i;
|
|
127
|
-
} u;
|
|
128
|
-
u.f = x;
|
|
129
|
-
std::uint32_t mask = -static_cast<std::int32_t>(u.i >> 31) | (static_cast<std::uint32_t>(1) << 31);
|
|
130
|
-
return u.i ^ mask;
|
|
131
|
-
}
|
|
132
|
-
};
|
|
133
|
-
|
|
134
|
-
template <>
|
|
135
|
-
struct RadixEncoder<double>
|
|
136
|
-
{
|
|
137
|
-
_CCCL_HOST_DEVICE std::uint64_t operator()(double x) const
|
|
138
|
-
{
|
|
139
|
-
union
|
|
140
|
-
{
|
|
141
|
-
double f;
|
|
142
|
-
std::uint64_t i;
|
|
143
|
-
} u;
|
|
144
|
-
u.f = x;
|
|
145
|
-
std::uint64_t mask = -static_cast<std::int64_t>(u.i >> 63) | (static_cast<std::uint64_t>(1) << 63);
|
|
146
|
-
return u.i ^ mask;
|
|
147
|
-
}
|
|
148
|
-
};
|
|
149
|
-
|
|
150
|
-
// this functor returns a key's to its histogram bucket count and post-increments the bucket
|
|
151
|
-
template <unsigned int RadixBits, typename KeyType>
|
|
152
|
-
struct bucket_functor
|
|
153
|
-
{
|
|
154
|
-
using Encoder = RadixEncoder<KeyType>;
|
|
155
|
-
using EncodedType = decltype(::cuda::std::declval<Encoder>()(::cuda::std::declval<KeyType>()));
|
|
156
|
-
using result_type = size_t;
|
|
157
|
-
static const EncodedType BitMask = static_cast<EncodedType>((1 << RadixBits) - 1);
|
|
158
|
-
|
|
159
|
-
Encoder encode;
|
|
160
|
-
EncodedType bit_shift;
|
|
161
|
-
size_t* histogram;
|
|
162
|
-
|
|
163
|
-
_CCCL_HOST_DEVICE bucket_functor(EncodedType bit_shift, size_t* histogram)
|
|
164
|
-
: encode()
|
|
165
|
-
, bit_shift(bit_shift)
|
|
166
|
-
, histogram(histogram)
|
|
167
|
-
{}
|
|
168
|
-
|
|
169
|
-
inline _CCCL_HOST_DEVICE size_t operator()(KeyType key)
|
|
170
|
-
{
|
|
171
|
-
const EncodedType x = encode(key);
|
|
172
|
-
|
|
173
|
-
// note that we mutate the histogram here
|
|
174
|
-
return histogram[(x >> bit_shift) & BitMask]++;
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
template <unsigned int RadixBits,
|
|
179
|
-
typename DerivedPolicy,
|
|
180
|
-
typename RandomAccessIterator1,
|
|
181
|
-
typename RandomAccessIterator2,
|
|
182
|
-
typename Integer>
|
|
183
|
-
inline _CCCL_HOST_DEVICE void radix_shuffle_n(
|
|
184
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
185
|
-
RandomAccessIterator1 first,
|
|
186
|
-
const size_t n,
|
|
187
|
-
RandomAccessIterator2 result,
|
|
188
|
-
Integer bit_shift,
|
|
189
|
-
size_t* histogram)
|
|
190
|
-
{
|
|
191
|
-
using KeyType = thrust::detail::it_value_t<RandomAccessIterator1>;
|
|
192
|
-
|
|
193
|
-
// note that we are going to mutate the histogram during this sequential scatter
|
|
194
|
-
thrust::scatter(
|
|
195
|
-
exec,
|
|
196
|
-
first,
|
|
197
|
-
first + n,
|
|
198
|
-
thrust::make_transform_iterator(first, bucket_functor<RadixBits, KeyType>(bit_shift, histogram)),
|
|
199
|
-
result);
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
template <unsigned int RadixBits,
|
|
203
|
-
typename DerivedPolicy,
|
|
204
|
-
typename RandomAccessIterator1,
|
|
205
|
-
typename RandomAccessIterator2,
|
|
206
|
-
typename RandomAccessIterator3,
|
|
207
|
-
typename RandomAccessIterator4,
|
|
208
|
-
typename Integer>
|
|
209
|
-
_CCCL_HOST_DEVICE void radix_shuffle_n(
|
|
210
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
211
|
-
RandomAccessIterator1 keys_first,
|
|
212
|
-
RandomAccessIterator2 values_first,
|
|
213
|
-
const size_t n,
|
|
214
|
-
RandomAccessIterator3 keys_result,
|
|
215
|
-
RandomAccessIterator4 values_result,
|
|
216
|
-
Integer bit_shift,
|
|
217
|
-
size_t* histogram)
|
|
218
|
-
{
|
|
219
|
-
using KeyType = thrust::detail::it_value_t<RandomAccessIterator1>;
|
|
220
|
-
|
|
221
|
-
// note that we are going to mutate the histogram during this sequential scatter
|
|
222
|
-
thrust::scatter(
|
|
223
|
-
exec,
|
|
224
|
-
thrust::make_zip_iterator(keys_first, values_first),
|
|
225
|
-
thrust::make_zip_iterator(keys_first + n, values_first + n),
|
|
226
|
-
thrust::make_transform_iterator(keys_first, bucket_functor<RadixBits, KeyType>(bit_shift, histogram)),
|
|
227
|
-
thrust::make_zip_iterator(keys_result, values_result));
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
template <unsigned int RadixBits,
|
|
231
|
-
bool HasValues,
|
|
232
|
-
typename DerivedPolicy,
|
|
233
|
-
typename RandomAccessIterator1,
|
|
234
|
-
typename RandomAccessIterator2,
|
|
235
|
-
typename RandomAccessIterator3,
|
|
236
|
-
typename RandomAccessIterator4>
|
|
237
|
-
_CCCL_HOST_DEVICE void radix_sort(
|
|
238
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
239
|
-
RandomAccessIterator1 keys1,
|
|
240
|
-
RandomAccessIterator2 keys2,
|
|
241
|
-
RandomAccessIterator3 vals1,
|
|
242
|
-
RandomAccessIterator4 vals2,
|
|
243
|
-
const size_t N)
|
|
244
|
-
{
|
|
245
|
-
using KeyType = thrust::detail::it_value_t<RandomAccessIterator1>;
|
|
246
|
-
|
|
247
|
-
using Encoder = RadixEncoder<KeyType>;
|
|
248
|
-
using EncodedType = decltype(::cuda::std::declval<Encoder>()(::cuda::std::declval<KeyType>()));
|
|
249
|
-
|
|
250
|
-
const unsigned int NumHistograms = (8 * sizeof(EncodedType) + (RadixBits - 1)) / RadixBits;
|
|
251
|
-
const unsigned int HistogramSize = 1 << RadixBits;
|
|
252
|
-
|
|
253
|
-
const EncodedType BitMask = static_cast<EncodedType>((1 << RadixBits) - 1);
|
|
254
|
-
|
|
255
|
-
Encoder encode;
|
|
256
|
-
|
|
257
|
-
// storage for histograms
|
|
258
|
-
size_t histograms[NumHistograms][HistogramSize] = {{0}};
|
|
259
|
-
|
|
260
|
-
// see which passes can be eliminated
|
|
261
|
-
bool skip_shuffle[NumHistograms] = {false};
|
|
262
|
-
|
|
263
|
-
// false if most recent data is stored in (keys1,vals1)
|
|
264
|
-
bool flip = false;
|
|
265
|
-
|
|
266
|
-
// compute histograms
|
|
267
|
-
for (size_t i = 0; i < N; i++)
|
|
268
|
-
{
|
|
269
|
-
const EncodedType x = encode(keys1[i]);
|
|
270
|
-
|
|
271
|
-
for (unsigned int j = 0; j < NumHistograms; j++)
|
|
272
|
-
{
|
|
273
|
-
const auto BitShift = static_cast<EncodedType>(RadixBits * j);
|
|
274
|
-
histograms[j][(x >> BitShift) & BitMask]++;
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// scan histograms
|
|
279
|
-
for (unsigned int i = 0; i < NumHistograms; i++)
|
|
280
|
-
{
|
|
281
|
-
size_t sum = 0;
|
|
282
|
-
|
|
283
|
-
for (unsigned int j = 0; j < HistogramSize; j++)
|
|
284
|
-
{
|
|
285
|
-
size_t bin = histograms[i][j];
|
|
286
|
-
|
|
287
|
-
if (bin == N)
|
|
288
|
-
{
|
|
289
|
-
skip_shuffle[i] = true;
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
histograms[i][j] = sum;
|
|
293
|
-
|
|
294
|
-
sum = sum + bin;
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
// shuffle keys and (optionally) values
|
|
299
|
-
for (unsigned int i = 0; i < NumHistograms; i++)
|
|
300
|
-
{
|
|
301
|
-
const EncodedType BitShift = static_cast<EncodedType>(RadixBits * i);
|
|
302
|
-
|
|
303
|
-
if (!skip_shuffle[i])
|
|
304
|
-
{
|
|
305
|
-
if (flip)
|
|
306
|
-
{
|
|
307
|
-
if (HasValues)
|
|
308
|
-
{
|
|
309
|
-
radix_shuffle_n<RadixBits>(exec, keys2, vals2, N, keys1, vals1, BitShift, histograms[i]);
|
|
310
|
-
}
|
|
311
|
-
else
|
|
312
|
-
{
|
|
313
|
-
radix_shuffle_n<RadixBits>(exec, keys2, N, keys1, BitShift, histograms[i]);
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
else
|
|
317
|
-
{
|
|
318
|
-
if (HasValues)
|
|
319
|
-
{
|
|
320
|
-
radix_shuffle_n<RadixBits>(exec, keys1, vals1, N, keys2, vals2, BitShift, histograms[i]);
|
|
321
|
-
}
|
|
322
|
-
else
|
|
323
|
-
{
|
|
324
|
-
radix_shuffle_n<RadixBits>(exec, keys1, N, keys2, BitShift, histograms[i]);
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
flip = (flip) ? false : true;
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
// ensure final values are in (keys1,vals1)
|
|
333
|
-
if (flip)
|
|
334
|
-
{
|
|
335
|
-
thrust::copy(exec, keys2, keys2 + N, keys1);
|
|
336
|
-
|
|
337
|
-
if (HasValues)
|
|
338
|
-
{
|
|
339
|
-
thrust::copy(exec, vals2, vals2 + N, vals1);
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
// Select best radix sort parameters based on sizeof(T) and input size
|
|
345
|
-
// These particular values were determined through empirical testing on a Core i7 950 CPU
|
|
346
|
-
template <size_t KeySize>
|
|
347
|
-
struct radix_sort_dispatcher
|
|
348
|
-
{};
|
|
349
|
-
|
|
350
|
-
template <>
|
|
351
|
-
struct radix_sort_dispatcher<1>
|
|
352
|
-
{
|
|
353
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
354
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
355
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
356
|
-
RandomAccessIterator1 keys1,
|
|
357
|
-
RandomAccessIterator2 keys2,
|
|
358
|
-
const size_t N)
|
|
359
|
-
{
|
|
360
|
-
radix_sort_detail::radix_sort<8, false>(exec, keys1, keys2, static_cast<int*>(0), static_cast<int*>(0), N);
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
template <typename DerivedPolicy,
|
|
364
|
-
typename RandomAccessIterator1,
|
|
365
|
-
typename RandomAccessIterator2,
|
|
366
|
-
typename RandomAccessIterator3,
|
|
367
|
-
typename RandomAccessIterator4>
|
|
368
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
369
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
370
|
-
RandomAccessIterator1 keys1,
|
|
371
|
-
RandomAccessIterator2 keys2,
|
|
372
|
-
RandomAccessIterator3 vals1,
|
|
373
|
-
RandomAccessIterator4 vals2,
|
|
374
|
-
const size_t N)
|
|
375
|
-
{
|
|
376
|
-
radix_sort_detail::radix_sort<8, true>(exec, keys1, keys2, vals1, vals2, N);
|
|
377
|
-
}
|
|
378
|
-
};
|
|
379
|
-
|
|
380
|
-
template <>
|
|
381
|
-
struct radix_sort_dispatcher<2>
|
|
382
|
-
{
|
|
383
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
384
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
385
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
386
|
-
RandomAccessIterator1 keys1,
|
|
387
|
-
RandomAccessIterator2 keys2,
|
|
388
|
-
const size_t N)
|
|
389
|
-
{
|
|
390
|
-
#ifdef __QNX__
|
|
391
|
-
// XXX war for nvbug 200193674
|
|
392
|
-
const bool condition = true;
|
|
393
|
-
#else
|
|
394
|
-
const bool condition = N < (1 << 16);
|
|
395
|
-
#endif
|
|
396
|
-
if (condition)
|
|
397
|
-
{
|
|
398
|
-
radix_sort_detail::radix_sort<8, false>(exec, keys1, keys2, static_cast<int*>(0), static_cast<int*>(0), N);
|
|
399
|
-
}
|
|
400
|
-
else
|
|
401
|
-
{
|
|
402
|
-
radix_sort_detail::radix_sort<16, false>(exec, keys1, keys2, static_cast<int*>(0), static_cast<int*>(0), N);
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
template <typename DerivedPolicy,
|
|
407
|
-
typename RandomAccessIterator1,
|
|
408
|
-
typename RandomAccessIterator2,
|
|
409
|
-
typename RandomAccessIterator3,
|
|
410
|
-
typename RandomAccessIterator4>
|
|
411
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
412
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
413
|
-
RandomAccessIterator1 keys1,
|
|
414
|
-
RandomAccessIterator2 keys2,
|
|
415
|
-
RandomAccessIterator3 vals1,
|
|
416
|
-
RandomAccessIterator4 vals2,
|
|
417
|
-
const size_t N)
|
|
418
|
-
{
|
|
419
|
-
#ifdef __QNX__
|
|
420
|
-
// XXX war for nvbug 200193674
|
|
421
|
-
const bool condition = true;
|
|
422
|
-
#else
|
|
423
|
-
const bool condition = N < (1 << 15);
|
|
424
|
-
#endif
|
|
425
|
-
if (condition)
|
|
426
|
-
{
|
|
427
|
-
radix_sort_detail::radix_sort<8, true>(exec, keys1, keys2, vals1, vals2, N);
|
|
428
|
-
}
|
|
429
|
-
else
|
|
430
|
-
{
|
|
431
|
-
radix_sort_detail::radix_sort<16, true>(exec, keys1, keys2, vals1, vals2, N);
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
};
|
|
435
|
-
|
|
436
|
-
template <>
|
|
437
|
-
struct radix_sort_dispatcher<4>
|
|
438
|
-
{
|
|
439
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
440
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
441
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
442
|
-
RandomAccessIterator1 keys1,
|
|
443
|
-
RandomAccessIterator2 keys2,
|
|
444
|
-
const size_t N)
|
|
445
|
-
{
|
|
446
|
-
if (N < (1 << 22))
|
|
447
|
-
{
|
|
448
|
-
radix_sort_detail::radix_sort<8, false>(exec, keys1, keys2, static_cast<int*>(0), static_cast<int*>(0), N);
|
|
449
|
-
}
|
|
450
|
-
else
|
|
451
|
-
{
|
|
452
|
-
radix_sort_detail::radix_sort<4, false>(exec, keys1, keys2, static_cast<int*>(0), static_cast<int*>(0), N);
|
|
453
|
-
}
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
template <typename DerivedPolicy,
|
|
457
|
-
typename RandomAccessIterator1,
|
|
458
|
-
typename RandomAccessIterator2,
|
|
459
|
-
typename RandomAccessIterator3,
|
|
460
|
-
typename RandomAccessIterator4>
|
|
461
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
462
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
463
|
-
RandomAccessIterator1 keys1,
|
|
464
|
-
RandomAccessIterator2 keys2,
|
|
465
|
-
RandomAccessIterator3 vals1,
|
|
466
|
-
RandomAccessIterator4 vals2,
|
|
467
|
-
const size_t N)
|
|
468
|
-
{
|
|
469
|
-
if (N < (1 << 22))
|
|
470
|
-
{
|
|
471
|
-
radix_sort_detail::radix_sort<8, true>(exec, keys1, keys2, vals1, vals2, N);
|
|
472
|
-
}
|
|
473
|
-
else
|
|
474
|
-
{
|
|
475
|
-
radix_sort_detail::radix_sort<3, true>(exec, keys1, keys2, vals1, vals2, N);
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
};
|
|
479
|
-
|
|
480
|
-
template <>
|
|
481
|
-
struct radix_sort_dispatcher<8>
|
|
482
|
-
{
|
|
483
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
484
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
485
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
486
|
-
RandomAccessIterator1 keys1,
|
|
487
|
-
RandomAccessIterator2 keys2,
|
|
488
|
-
const size_t N)
|
|
489
|
-
{
|
|
490
|
-
if (N < (1 << 21))
|
|
491
|
-
{
|
|
492
|
-
radix_sort_detail::radix_sort<8, false>(exec, keys1, keys2, static_cast<int*>(0), static_cast<int*>(0), N);
|
|
493
|
-
}
|
|
494
|
-
else
|
|
495
|
-
{
|
|
496
|
-
radix_sort_detail::radix_sort<4, false>(exec, keys1, keys2, static_cast<int*>(0), static_cast<int*>(0), N);
|
|
497
|
-
}
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
template <typename DerivedPolicy,
|
|
501
|
-
typename RandomAccessIterator1,
|
|
502
|
-
typename RandomAccessIterator2,
|
|
503
|
-
typename RandomAccessIterator3,
|
|
504
|
-
typename RandomAccessIterator4>
|
|
505
|
-
_CCCL_HOST_DEVICE void operator()(
|
|
506
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
507
|
-
RandomAccessIterator1 keys1,
|
|
508
|
-
RandomAccessIterator2 keys2,
|
|
509
|
-
RandomAccessIterator3 vals1,
|
|
510
|
-
RandomAccessIterator4 vals2,
|
|
511
|
-
const size_t N)
|
|
512
|
-
{
|
|
513
|
-
if (N < (1 << 21))
|
|
514
|
-
{
|
|
515
|
-
radix_sort_detail::radix_sort<8, true>(exec, keys1, keys2, vals1, vals2, N);
|
|
516
|
-
}
|
|
517
|
-
else
|
|
518
|
-
{
|
|
519
|
-
radix_sort_detail::radix_sort<3, true>(exec, keys1, keys2, vals1, vals2, N);
|
|
520
|
-
}
|
|
521
|
-
}
|
|
522
|
-
};
|
|
523
|
-
|
|
524
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
525
|
-
_CCCL_HOST_DEVICE void radix_sort(
|
|
526
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
527
|
-
RandomAccessIterator1 keys1,
|
|
528
|
-
RandomAccessIterator2 keys2,
|
|
529
|
-
const size_t N)
|
|
530
|
-
{
|
|
531
|
-
using KeyType = thrust::detail::it_value_t<RandomAccessIterator1>;
|
|
532
|
-
radix_sort_dispatcher<sizeof(KeyType)>()(exec, keys1, keys2, N);
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
template <typename DerivedPolicy,
|
|
536
|
-
typename RandomAccessIterator1,
|
|
537
|
-
typename RandomAccessIterator2,
|
|
538
|
-
typename RandomAccessIterator3,
|
|
539
|
-
typename RandomAccessIterator4>
|
|
540
|
-
_CCCL_HOST_DEVICE void radix_sort(
|
|
541
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
542
|
-
RandomAccessIterator1 keys1,
|
|
543
|
-
RandomAccessIterator2 keys2,
|
|
544
|
-
RandomAccessIterator3 vals1,
|
|
545
|
-
RandomAccessIterator4 vals2,
|
|
546
|
-
const size_t N)
|
|
547
|
-
{
|
|
548
|
-
using KeyType = thrust::detail::it_value_t<RandomAccessIterator1>;
|
|
549
|
-
radix_sort_dispatcher<sizeof(KeyType)>()(exec, keys1, keys2, vals1, vals2, N);
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
} // end namespace radix_sort_detail
|
|
553
|
-
|
|
554
|
-
template <typename DerivedPolicy, typename RandomAccessIterator>
|
|
555
|
-
_CCCL_HOST_DEVICE void stable_radix_sort(
|
|
556
|
-
sequential::execution_policy<DerivedPolicy>& exec, RandomAccessIterator first, RandomAccessIterator last)
|
|
557
|
-
{
|
|
558
|
-
using KeyType = thrust::detail::it_value_t<RandomAccessIterator>;
|
|
559
|
-
|
|
560
|
-
size_t N = last - first;
|
|
561
|
-
|
|
562
|
-
thrust::detail::temporary_array<KeyType, DerivedPolicy> temp(exec, N);
|
|
563
|
-
|
|
564
|
-
radix_sort_detail::radix_sort(exec, first, temp.begin(), N);
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
template <typename DerivedPolicy, typename RandomAccessIterator1, typename RandomAccessIterator2>
|
|
568
|
-
_CCCL_HOST_DEVICE void stable_radix_sort_by_key(
|
|
569
|
-
sequential::execution_policy<DerivedPolicy>& exec,
|
|
570
|
-
RandomAccessIterator1 first1,
|
|
571
|
-
RandomAccessIterator1 last1,
|
|
572
|
-
RandomAccessIterator2 first2)
|
|
573
|
-
{
|
|
574
|
-
using KeyType = thrust::detail::it_value_t<RandomAccessIterator1>;
|
|
575
|
-
using ValueType = thrust::detail::it_value_t<RandomAccessIterator2>;
|
|
576
|
-
|
|
577
|
-
size_t N = last1 - first1;
|
|
578
|
-
|
|
579
|
-
thrust::detail::temporary_array<KeyType, DerivedPolicy> temp1(exec, N);
|
|
580
|
-
thrust::detail::temporary_array<ValueType, DerivedPolicy> temp2(exec, N);
|
|
581
|
-
|
|
582
|
-
radix_sort_detail::radix_sort(exec, first1, temp1.begin(), first2, temp2.begin(), N);
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
} // namespace system::detail::sequential
|
|
586
|
-
THRUST_NAMESPACE_END
|