cuda-cccl 0.3.0__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.2__cp312-cp312-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/cooperative/__init__.py +7 -1
- cuda/cccl/cooperative/experimental/__init__.py +21 -5
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
- cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
- cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
- cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
- cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
- cuda/cccl/headers/include/cuda/__cccl_config +1 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
- cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
- cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
- cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
- cuda/cccl/headers/include/cuda/__event/event.h +27 -26
- cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
- cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
- cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/algorithm +1 -1
- cuda/cccl/headers/include/cuda/cmath +1 -0
- cuda/cccl/headers/include/cuda/devices +13 -0
- cuda/cccl/headers/include/cuda/iterator +1 -0
- cuda/cccl/headers/include/cuda/memory +1 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
- cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
- cuda/cccl/headers/include/cuda/std/bitset +1 -1
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
- cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
- cuda/cccl/headers/include/cuda/std/numbers +5 -0
- cuda/cccl/headers/include/cuda/std/string_view +155 -13
- cuda/cccl/headers/include/cuda/std/version +1 -4
- cuda/cccl/headers/include/cuda/stream_ref +5 -0
- cuda/cccl/headers/include/cuda/utility +1 -0
- cuda/cccl/headers/include/nv/target +7 -2
- cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
- cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
- cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
- cuda/cccl/headers/include/thrust/device_delete.h +18 -3
- cuda/cccl/headers/include/thrust/device_free.h +16 -3
- cuda/cccl/headers/include/thrust/device_new.h +29 -8
- cuda/cccl/headers/include/thrust/host_vector.h +1 -1
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
- cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
- cuda/cccl/parallel/experimental/__init__.py +21 -74
- cuda/compute/__init__.py +79 -0
- cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
- cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
- cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
- cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
- cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
- cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
- cuda/compute/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
- cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
- cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
- cuda/coop/__init__.py +8 -0
- cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
- cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
- cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
- cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
- cuda/coop/warp/__init__.py +9 -0
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
- cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
- cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
- cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
- cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
- cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
- cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
- cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
- cuda/cccl/parallel/experimental/.gitignore +0 -4
- cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
- /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -26,14 +26,56 @@
|
|
|
26
26
|
# pragma system_header
|
|
27
27
|
#endif // no system header
|
|
28
28
|
|
|
29
|
+
#include <thrust/detail/allocator/allocator_traits.h>
|
|
30
|
+
#include <thrust/detail/type_traits.h>
|
|
31
|
+
#include <thrust/detail/type_traits/pointer_traits.h>
|
|
32
|
+
#include <thrust/for_each.h>
|
|
33
|
+
#include <thrust/uninitialized_fill.h>
|
|
34
|
+
|
|
35
|
+
#include <cuda/std/__cccl/memory_wrapper.h>
|
|
36
|
+
|
|
29
37
|
THRUST_NAMESPACE_BEGIN
|
|
30
38
|
namespace detail
|
|
31
39
|
{
|
|
32
40
|
|
|
41
|
+
// fill_construct_range has 2 cases:
|
|
42
|
+
// if Allocator has an effectful member function construct:
|
|
43
|
+
// 1. construct via the allocator
|
|
44
|
+
// else
|
|
45
|
+
// 2. construct via uninitialized_fill
|
|
46
|
+
|
|
47
|
+
template <typename Allocator, typename T, typename Arg1>
|
|
48
|
+
inline constexpr bool has_effectful_member_construct2 =
|
|
49
|
+
allocator_traits_detail::has_member_construct2<Allocator, T, Arg1>::value;
|
|
50
|
+
|
|
51
|
+
// std::allocator::construct's only effect is to invoke placement new
|
|
52
|
+
template <typename U, typename T, typename Arg1>
|
|
53
|
+
inline constexpr bool has_effectful_member_construct2<std::allocator<U>, T, Arg1> = false;
|
|
54
|
+
|
|
55
|
+
template <typename Allocator, typename Arg1>
|
|
56
|
+
struct construct2_via_allocator
|
|
57
|
+
{
|
|
58
|
+
Allocator& a;
|
|
59
|
+
Arg1 arg;
|
|
60
|
+
|
|
61
|
+
template <typename T>
|
|
62
|
+
inline _CCCL_HOST_DEVICE void operator()(T& x)
|
|
63
|
+
{
|
|
64
|
+
allocator_traits<Allocator>::construct(a, &x, arg);
|
|
65
|
+
}
|
|
66
|
+
};
|
|
33
67
|
template <typename Allocator, typename Pointer, typename Size, typename T>
|
|
34
|
-
_CCCL_HOST_DEVICE
|
|
68
|
+
_CCCL_HOST_DEVICE void fill_construct_range(Allocator& a, Pointer p, Size n, const T& value)
|
|
69
|
+
{
|
|
70
|
+
if constexpr (has_effectful_member_construct2<Allocator, typename pointer_element<Pointer>::type, T>)
|
|
71
|
+
{
|
|
72
|
+
thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, construct2_via_allocator<Allocator, T>{a, value});
|
|
73
|
+
}
|
|
74
|
+
else
|
|
75
|
+
{
|
|
76
|
+
thrust::uninitialized_fill_n(allocator_system<Allocator>::get(a), p, n, value);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
35
79
|
|
|
36
80
|
} // namespace detail
|
|
37
81
|
THRUST_NAMESPACE_END
|
|
38
|
-
|
|
39
|
-
#include <thrust/detail/allocator/fill_construct_range.inl>
|
|
@@ -25,29 +25,54 @@
|
|
|
25
25
|
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
26
26
|
# pragma system_header
|
|
27
27
|
#endif // no system header
|
|
28
|
+
|
|
29
|
+
#include <thrust/detail/allocator/malloc_allocator.h>
|
|
28
30
|
#include <thrust/detail/allocator/tagged_allocator.h>
|
|
31
|
+
#include <thrust/detail/malloc_and_free.h>
|
|
32
|
+
#include <thrust/detail/raw_pointer_cast.h>
|
|
29
33
|
#include <thrust/detail/type_traits/pointer_traits.h>
|
|
34
|
+
#include <thrust/system/detail/bad_alloc.h>
|
|
35
|
+
#include <thrust/system/detail/generic/select_system.h>
|
|
30
36
|
|
|
31
37
|
THRUST_NAMESPACE_BEGIN
|
|
32
38
|
namespace detail
|
|
33
39
|
{
|
|
34
40
|
|
|
35
41
|
template <typename T, typename System, typename Pointer>
|
|
36
|
-
class malloc_allocator : public
|
|
42
|
+
class malloc_allocator : public tagged_allocator<T, System, Pointer>
|
|
37
43
|
{
|
|
38
44
|
private:
|
|
39
|
-
using super_t =
|
|
45
|
+
using super_t = tagged_allocator<T, System, Pointer>;
|
|
40
46
|
|
|
41
47
|
public:
|
|
42
48
|
using pointer = typename super_t::pointer;
|
|
43
49
|
using size_type = typename super_t::size_type;
|
|
44
50
|
|
|
45
|
-
pointer allocate(size_type cnt)
|
|
51
|
+
pointer allocate(size_type cnt)
|
|
52
|
+
{
|
|
53
|
+
using thrust::system::detail::generic::select_system;
|
|
54
|
+
|
|
55
|
+
// XXX should use a hypothetical thrust::static_pointer_cast here
|
|
56
|
+
System system;
|
|
57
|
+
|
|
58
|
+
pointer result = thrust::malloc<T>(select_system(system), cnt);
|
|
59
|
+
|
|
60
|
+
if (result.get() == 0)
|
|
61
|
+
{
|
|
62
|
+
throw thrust::system::detail::bad_alloc("malloc_allocator::allocate: malloc failed");
|
|
63
|
+
} // end if
|
|
46
64
|
|
|
47
|
-
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
void deallocate(pointer p, size_type n) noexcept
|
|
69
|
+
{
|
|
70
|
+
using thrust::system::detail::generic::select_system;
|
|
71
|
+
|
|
72
|
+
System system;
|
|
73
|
+
thrust::free(select_system(system), p);
|
|
74
|
+
}
|
|
48
75
|
};
|
|
49
76
|
|
|
50
77
|
} // namespace detail
|
|
51
78
|
THRUST_NAMESPACE_END
|
|
52
|
-
|
|
53
|
-
#include <thrust/detail/allocator/malloc_allocator.inl>
|
|
@@ -25,9 +25,12 @@
|
|
|
25
25
|
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
26
26
|
# pragma system_header
|
|
27
27
|
#endif // no system header
|
|
28
|
+
#include <thrust/detail/allocator/tagged_allocator.h>
|
|
28
29
|
#include <thrust/detail/type_traits/pointer_traits.h>
|
|
29
30
|
#include <thrust/iterator/iterator_traits.h>
|
|
30
31
|
|
|
32
|
+
#include <cuda/std/limits>
|
|
33
|
+
|
|
31
34
|
THRUST_NAMESPACE_BEGIN
|
|
32
35
|
namespace detail
|
|
33
36
|
{
|
|
@@ -72,31 +75,41 @@ public:
|
|
|
72
75
|
using other = tagged_allocator<U, Tag, Pointer>;
|
|
73
76
|
}; // end rebind
|
|
74
77
|
|
|
75
|
-
|
|
78
|
+
tagged_allocator() = default;
|
|
76
79
|
|
|
77
|
-
|
|
80
|
+
tagged_allocator(const tagged_allocator&) = default;
|
|
78
81
|
|
|
79
82
|
template <typename U, typename OtherPointer>
|
|
80
|
-
_CCCL_HOST_DEVICE
|
|
83
|
+
_CCCL_HOST_DEVICE tagged_allocator(const tagged_allocator<U, Tag, OtherPointer>&)
|
|
84
|
+
{}
|
|
81
85
|
|
|
82
|
-
|
|
86
|
+
~tagged_allocator() = default;
|
|
83
87
|
|
|
84
|
-
_CCCL_HOST_DEVICE pointer address(reference x) const
|
|
88
|
+
_CCCL_HOST_DEVICE pointer address(reference x) const
|
|
89
|
+
{
|
|
90
|
+
return &x;
|
|
91
|
+
}
|
|
85
92
|
|
|
86
|
-
_CCCL_HOST_DEVICE const_pointer address(const_reference x) const
|
|
93
|
+
_CCCL_HOST_DEVICE const_pointer address(const_reference x) const
|
|
94
|
+
{
|
|
95
|
+
return &x;
|
|
96
|
+
}
|
|
87
97
|
|
|
88
|
-
size_type max_size() const
|
|
89
|
-
|
|
98
|
+
size_type max_size() const
|
|
99
|
+
{
|
|
100
|
+
return (::cuda::std::numeric_limits<size_type>::max)() / sizeof(T);
|
|
101
|
+
}
|
|
90
102
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
103
|
+
_CCCL_HOST_DEVICE friend bool operator==(const tagged_allocator&, const tagged_allocator&)
|
|
104
|
+
{
|
|
105
|
+
return true;
|
|
106
|
+
}
|
|
94
107
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
108
|
+
_CCCL_HOST_DEVICE friend bool operator!=(const tagged_allocator&, const tagged_allocator&)
|
|
109
|
+
{
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
};
|
|
98
113
|
|
|
99
114
|
} // namespace detail
|
|
100
115
|
THRUST_NAMESPACE_END
|
|
101
|
-
|
|
102
|
-
#include <thrust/detail/allocator/tagged_allocator.inl>
|
|
@@ -25,11 +25,23 @@
|
|
|
25
25
|
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
26
26
|
# pragma system_header
|
|
27
27
|
#endif // no system header
|
|
28
|
+
|
|
28
29
|
#include <thrust/detail/allocator/allocator_traits.h>
|
|
29
30
|
#include <thrust/detail/allocator/tagged_allocator.h>
|
|
31
|
+
#include <thrust/detail/allocator/temporary_allocator.h>
|
|
30
32
|
#include <thrust/detail/execution_policy.h>
|
|
33
|
+
#include <thrust/detail/temporary_buffer.h>
|
|
31
34
|
#include <thrust/memory.h>
|
|
32
35
|
#include <thrust/pair.h>
|
|
36
|
+
#include <thrust/system/detail/bad_alloc.h>
|
|
37
|
+
|
|
38
|
+
#include <cuda/std/cassert>
|
|
39
|
+
|
|
40
|
+
#include <nv/target>
|
|
41
|
+
|
|
42
|
+
#if _CCCL_CUDA_COMPILATION() && _CCCL_DEVICE_COMPILATION()
|
|
43
|
+
# include <thrust/system/cuda/detail/terminate.h>
|
|
44
|
+
#endif // _CCCL_CUDA_COMPILATION() && _CCCL_DEVICE_COMPILATION()
|
|
33
45
|
|
|
34
46
|
THRUST_NAMESPACE_BEGIN
|
|
35
47
|
namespace detail
|
|
@@ -60,9 +72,36 @@ public:
|
|
|
60
72
|
, m_system(thrust::detail::derived_cast(system))
|
|
61
73
|
{}
|
|
62
74
|
|
|
63
|
-
_CCCL_HOST_DEVICE pointer allocate(size_type cnt)
|
|
75
|
+
_CCCL_HOST_DEVICE pointer allocate(size_type cnt)
|
|
76
|
+
{
|
|
77
|
+
pointer_and_size result = thrust::get_temporary_buffer<T>(system(), cnt);
|
|
78
|
+
|
|
79
|
+
// handle failure
|
|
80
|
+
if (result.second < cnt)
|
|
81
|
+
{
|
|
82
|
+
// deallocate and throw
|
|
83
|
+
// note that we pass cnt to deallocate, not a value derived from result.second
|
|
84
|
+
deallocate(result.first, cnt);
|
|
85
|
+
|
|
86
|
+
#if _CCCL_CUDA_COMPILATION()
|
|
87
|
+
NV_IF_TARGET(
|
|
88
|
+
NV_IS_HOST,
|
|
89
|
+
(throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");),
|
|
90
|
+
( // NV_IS_DEVICE
|
|
91
|
+
thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: "
|
|
92
|
+
"get_temporary_buffer failed");));
|
|
93
|
+
#else
|
|
94
|
+
throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");
|
|
95
|
+
#endif
|
|
96
|
+
} // end if
|
|
64
97
|
|
|
65
|
-
|
|
98
|
+
return result.first;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
_CCCL_HOST_DEVICE void deallocate(pointer p, size_type n) noexcept
|
|
102
|
+
{
|
|
103
|
+
return thrust::return_temporary_buffer(system(), p, n);
|
|
104
|
+
}
|
|
66
105
|
|
|
67
106
|
_CCCL_HOST_DEVICE inline System& system()
|
|
68
107
|
{
|
|
@@ -75,5 +114,3 @@ private:
|
|
|
75
114
|
|
|
76
115
|
} // namespace detail
|
|
77
116
|
THRUST_NAMESPACE_END
|
|
78
|
-
|
|
79
|
-
#include <thrust/detail/allocator/temporary_allocator.inl>
|
|
@@ -26,14 +26,52 @@
|
|
|
26
26
|
# pragma system_header
|
|
27
27
|
#endif // no system header
|
|
28
28
|
|
|
29
|
+
#include <thrust/detail/allocator/allocator_traits.h>
|
|
30
|
+
#include <thrust/detail/type_traits.h>
|
|
31
|
+
#include <thrust/detail/type_traits/pointer_traits.h>
|
|
32
|
+
#include <thrust/for_each.h>
|
|
33
|
+
#include <thrust/uninitialized_fill.h>
|
|
34
|
+
|
|
29
35
|
THRUST_NAMESPACE_BEGIN
|
|
30
36
|
namespace detail
|
|
31
37
|
{
|
|
38
|
+
template <typename Allocator>
|
|
39
|
+
struct construct1_via_allocator
|
|
40
|
+
{
|
|
41
|
+
Allocator& a;
|
|
32
42
|
|
|
33
|
-
template <typename
|
|
34
|
-
_CCCL_HOST_DEVICE
|
|
43
|
+
template <typename T>
|
|
44
|
+
inline _CCCL_HOST_DEVICE void operator()(T& x)
|
|
45
|
+
{
|
|
46
|
+
allocator_traits<Allocator>::construct(a, &x);
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// we need to construct T via the allocator if the Allocator does something interesting or if T's default constructor
|
|
51
|
+
// does something interesting
|
|
52
|
+
template <typename Allocator, typename T>
|
|
53
|
+
inline constexpr bool needs_default_construct_via_allocator =
|
|
54
|
+
allocator_traits_detail::has_member_construct1<Allocator, T>::value
|
|
55
|
+
|| !::cuda::std::is_trivially_default_constructible_v<T>;
|
|
35
56
|
|
|
57
|
+
// we know that std::allocator::construct's only effect is to call T's
|
|
58
|
+
// default constructor, so we needn't use it for default construction
|
|
59
|
+
// unless T's constructor does something interesting
|
|
60
|
+
template <typename U, typename T>
|
|
61
|
+
inline constexpr bool needs_default_construct_via_allocator<std::allocator<U>, T> =
|
|
62
|
+
!::cuda::std::is_trivially_default_constructible_v<T>;
|
|
63
|
+
|
|
64
|
+
template <typename Allocator, typename Pointer, typename Size>
|
|
65
|
+
_CCCL_HOST_DEVICE void value_initialize_range(Allocator& a, Pointer p, Size n)
|
|
66
|
+
{
|
|
67
|
+
if constexpr (needs_default_construct_via_allocator<Allocator, typename pointer_element<Pointer>::type>)
|
|
68
|
+
{
|
|
69
|
+
thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, construct1_via_allocator<Allocator>{a});
|
|
70
|
+
}
|
|
71
|
+
else
|
|
72
|
+
{
|
|
73
|
+
thrust::uninitialized_fill_n(allocator_system<Allocator>::get(a), p, n, typename pointer_element<Pointer>::type());
|
|
74
|
+
}
|
|
75
|
+
}
|
|
36
76
|
} // namespace detail
|
|
37
77
|
THRUST_NAMESPACE_END
|
|
38
|
-
|
|
39
|
-
#include <thrust/detail/allocator/value_initialize_range.inl>
|
|
@@ -106,9 +106,9 @@ _CCCL_HOST_DEVICE inline thrust::complex<double> ccosh(const thrust::complex<dou
|
|
|
106
106
|
else if (ix < 0x4096bbaa)
|
|
107
107
|
{
|
|
108
108
|
/* x < 1455: scale to avoid overflow */
|
|
109
|
-
thrust::complex<double> z_;
|
|
110
|
-
z_
|
|
111
|
-
return
|
|
109
|
+
thrust::complex<double> z_ = ldexp_cexp(thrust::complex<double>(fabs(x), y), -1);
|
|
110
|
+
z_.imag(copysign(z_.imag(), x));
|
|
111
|
+
return z_;
|
|
112
112
|
}
|
|
113
113
|
else
|
|
114
114
|
{
|
|
@@ -27,6 +27,8 @@
|
|
|
27
27
|
#endif // no system header
|
|
28
28
|
#include <thrust/detail/type_deduction.h>
|
|
29
29
|
|
|
30
|
+
#include <cuda/std/__bit/countl.h>
|
|
31
|
+
#include <cuda/std/__type_traits/make_unsigned.h>
|
|
30
32
|
#include <cuda/std/limits>
|
|
31
33
|
#include <cuda/std/type_traits>
|
|
32
34
|
|
|
@@ -36,25 +38,6 @@ THRUST_NAMESPACE_BEGIN
|
|
|
36
38
|
namespace detail
|
|
37
39
|
{
|
|
38
40
|
|
|
39
|
-
template <typename Integer>
|
|
40
|
-
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE Integer clz(Integer x)
|
|
41
|
-
{
|
|
42
|
-
Integer result;
|
|
43
|
-
|
|
44
|
-
NV_IF_TARGET(NV_IS_DEVICE,
|
|
45
|
-
(result = ::__clz(x);),
|
|
46
|
-
(int num_bits = 8 * sizeof(Integer); int num_bits_minus_one = num_bits - 1; result = num_bits;
|
|
47
|
-
for (int i = num_bits_minus_one; i >= 0; --i) {
|
|
48
|
-
if ((Integer(1) << i) & x)
|
|
49
|
-
{
|
|
50
|
-
result = num_bits_minus_one - i;
|
|
51
|
-
break;
|
|
52
|
-
}
|
|
53
|
-
}));
|
|
54
|
-
|
|
55
|
-
return result;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
41
|
template <typename Integer>
|
|
59
42
|
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool is_power_of_2(Integer x)
|
|
60
43
|
{
|
|
@@ -85,7 +68,7 @@ _CCCL_HOST_DEVICE _CCCL_FORCEINLINE Integer log2(Integer x)
|
|
|
85
68
|
Integer num_bits = 8 * sizeof(Integer);
|
|
86
69
|
Integer num_bits_minus_one = num_bits - 1;
|
|
87
70
|
|
|
88
|
-
return num_bits_minus_one -
|
|
71
|
+
return num_bits_minus_one - ::cuda::std::countl_zero(::cuda::std::__to_unsigned_like(x));
|
|
89
72
|
}
|
|
90
73
|
|
|
91
74
|
template <typename Integer>
|
|
@@ -30,7 +30,6 @@
|
|
|
30
30
|
# pragma system_header
|
|
31
31
|
#endif // no system header
|
|
32
32
|
|
|
33
|
-
#include <thrust/detail/memory_wrapper.h> // for ::new
|
|
34
33
|
#include <thrust/detail/raw_reference_cast.h>
|
|
35
34
|
#include <thrust/detail/static_assert.h>
|
|
36
35
|
#include <thrust/detail/type_traits.h>
|
|
@@ -43,6 +42,7 @@
|
|
|
43
42
|
#include <cuda/__iterator/tabulate_output_iterator.h>
|
|
44
43
|
#include <cuda/__iterator/transform_input_output_iterator.h>
|
|
45
44
|
#include <cuda/__iterator/transform_output_iterator.h>
|
|
45
|
+
#include <cuda/std/__cccl/memory_wrapper.h> // for ::new
|
|
46
46
|
#include <cuda/std/type_traits>
|
|
47
47
|
|
|
48
48
|
THRUST_NAMESPACE_BEGIN
|
|
@@ -18,10 +18,10 @@
|
|
|
18
18
|
# pragma system_header
|
|
19
19
|
#endif // no system header
|
|
20
20
|
#include <thrust/detail/allocator/allocator_traits.h>
|
|
21
|
-
#include <thrust/detail/memory_wrapper.h>
|
|
22
21
|
#include <thrust/detail/type_traits.h>
|
|
23
22
|
#include <thrust/iterator/iterator_traits.h>
|
|
24
23
|
|
|
24
|
+
#include <cuda/std/__cccl/memory_wrapper.h>
|
|
25
25
|
#include <cuda/std/__memory/addressof.h>
|
|
26
26
|
#include <cuda/std/utility>
|
|
27
27
|
|
|
@@ -45,10 +45,10 @@ THRUST_NAMESPACE_END
|
|
|
45
45
|
#include <thrust/detail/allocator/no_throw_allocator.h>
|
|
46
46
|
#include <thrust/detail/allocator/temporary_allocator.h>
|
|
47
47
|
#include <thrust/detail/contiguous_storage.h>
|
|
48
|
-
#include <thrust/detail/memory_wrapper.h>
|
|
49
48
|
#include <thrust/iterator/detail/tagged_iterator.h>
|
|
50
49
|
#include <thrust/iterator/iterator_traits.h>
|
|
51
50
|
|
|
51
|
+
#include <cuda/std/__cccl/memory_wrapper.h>
|
|
52
52
|
#include <cuda/std/type_traits>
|
|
53
53
|
|
|
54
54
|
THRUST_NAMESPACE_BEGIN
|
|
@@ -123,7 +123,7 @@ struct larger_type
|
|
|
123
123
|
{};
|
|
124
124
|
|
|
125
125
|
template <class F, class... Us>
|
|
126
|
-
using invoke_result = ::cuda::std::
|
|
126
|
+
using invoke_result = ::cuda::std::invoke_result<F, Us...>;
|
|
127
127
|
|
|
128
128
|
template <class F, class... Us>
|
|
129
129
|
using invoke_result_t = typename invoke_result<F, Us...>::type;
|
|
@@ -29,10 +29,22 @@
|
|
|
29
29
|
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
30
30
|
# pragma system_header
|
|
31
31
|
#endif // no system header
|
|
32
|
+
|
|
33
|
+
#include <thrust/detail/allocator/destroy_range.h>
|
|
34
|
+
#include <thrust/device_delete.h>
|
|
35
|
+
#include <thrust/device_free.h>
|
|
32
36
|
#include <thrust/device_ptr.h>
|
|
37
|
+
#include <thrust/execution_policy.h>
|
|
33
38
|
|
|
34
39
|
THRUST_NAMESPACE_BEGIN
|
|
35
40
|
|
|
41
|
+
namespace detail
|
|
42
|
+
{
|
|
43
|
+
// define an empty allocator class to use below
|
|
44
|
+
struct device_delete_allocator
|
|
45
|
+
{};
|
|
46
|
+
} // namespace detail
|
|
47
|
+
|
|
36
48
|
/*! \addtogroup memory_management Memory Management
|
|
37
49
|
* \{
|
|
38
50
|
*/
|
|
@@ -49,11 +61,14 @@ THRUST_NAMESPACE_BEGIN
|
|
|
49
61
|
* \see device_new
|
|
50
62
|
*/
|
|
51
63
|
template <typename T>
|
|
52
|
-
inline void device_delete(thrust::device_ptr<T> ptr, const size_t n = 1)
|
|
64
|
+
inline void device_delete(thrust::device_ptr<T> ptr, const size_t n = 1)
|
|
65
|
+
{
|
|
66
|
+
// we don't have an allocator, so there is no need to go through thrust::detail::destroy_range
|
|
67
|
+
thrust::for_each_n(device, ptr, n, detail::gozer{});
|
|
68
|
+
thrust::device_free(ptr);
|
|
69
|
+
}
|
|
53
70
|
|
|
54
71
|
/*! \} // memory_management
|
|
55
72
|
*/
|
|
56
73
|
|
|
57
74
|
THRUST_NAMESPACE_END
|
|
58
|
-
|
|
59
|
-
#include <thrust/detail/device_delete.inl>
|
|
@@ -29,7 +29,12 @@
|
|
|
29
29
|
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
30
30
|
# pragma system_header
|
|
31
31
|
#endif // no system header
|
|
32
|
+
|
|
33
|
+
#include <thrust/detail/malloc_and_free.h>
|
|
34
|
+
#include <thrust/device_free.h>
|
|
32
35
|
#include <thrust/device_ptr.h>
|
|
36
|
+
#include <thrust/iterator/iterator_traits.h>
|
|
37
|
+
#include <thrust/system/detail/generic/select_system.h>
|
|
33
38
|
|
|
34
39
|
THRUST_NAMESPACE_BEGIN
|
|
35
40
|
|
|
@@ -62,11 +67,19 @@ THRUST_NAMESPACE_BEGIN
|
|
|
62
67
|
* \see device_ptr
|
|
63
68
|
* \see device_malloc
|
|
64
69
|
*/
|
|
65
|
-
inline void device_free(thrust::device_ptr<void> ptr)
|
|
70
|
+
inline void device_free(thrust::device_ptr<void> ptr)
|
|
71
|
+
{
|
|
72
|
+
using thrust::system::detail::generic::select_system;
|
|
73
|
+
|
|
74
|
+
using system = thrust::iterator_system<thrust::device_ptr<void>>::type;
|
|
75
|
+
|
|
76
|
+
// XXX lower to select_system(system) here
|
|
77
|
+
system s;
|
|
78
|
+
|
|
79
|
+
thrust::free(s, ptr);
|
|
80
|
+
}
|
|
66
81
|
|
|
67
82
|
/*! \} // memory_management
|
|
68
83
|
*/
|
|
69
84
|
|
|
70
85
|
THRUST_NAMESPACE_END
|
|
71
|
-
|
|
72
|
-
#include <thrust/detail/device_free.inl>
|
|
@@ -30,10 +30,13 @@
|
|
|
30
30
|
# pragma system_header
|
|
31
31
|
#endif // no system header
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
#include <thrust/detail/allocator/value_initialize_range.h>
|
|
34
|
+
#include <thrust/device_allocator.h>
|
|
35
|
+
#include <thrust/device_malloc.h>
|
|
36
|
+
#include <thrust/device_new.h>
|
|
34
37
|
#include <thrust/device_ptr.h>
|
|
35
|
-
|
|
36
|
-
#include <
|
|
38
|
+
#include <thrust/execution_policy.h>
|
|
39
|
+
#include <thrust/uninitialized_fill.h>
|
|
37
40
|
|
|
38
41
|
THRUST_NAMESPACE_BEGIN
|
|
39
42
|
|
|
@@ -55,7 +58,15 @@ THRUST_NAMESPACE_BEGIN
|
|
|
55
58
|
* \see device_ptr
|
|
56
59
|
*/
|
|
57
60
|
template <typename T>
|
|
58
|
-
device_ptr<T> device_new(device_ptr<void> p, const size_t n = 1)
|
|
61
|
+
device_ptr<T> device_new(device_ptr<void> p, const size_t n = 1)
|
|
62
|
+
{
|
|
63
|
+
auto* dev_ptr = static_cast<T*>(p.get());
|
|
64
|
+
// TODO(bgruber): ideally, we would have an thrust::uninitialized_default_construct. Until then, use vector's
|
|
65
|
+
// infrastructure
|
|
66
|
+
device_allocator<T> alloc; // not needed for allocation, just for construct() called in value_initialize_range()
|
|
67
|
+
detail::value_initialize_range(alloc, dev_ptr, n);
|
|
68
|
+
return device_ptr<T>{dev_ptr};
|
|
69
|
+
}
|
|
59
70
|
|
|
60
71
|
/*! \p device_new implements the placement new operator for types
|
|
61
72
|
* resident in device memory. \p device_new calls <tt>T</tt>'s copy
|
|
@@ -72,7 +83,15 @@ device_ptr<T> device_new(device_ptr<void> p, const size_t n = 1);
|
|
|
72
83
|
* \see fill
|
|
73
84
|
*/
|
|
74
85
|
template <typename T>
|
|
75
|
-
device_ptr<T> device_new(device_ptr<void> p, const T& exemplar, const size_t n = 1)
|
|
86
|
+
device_ptr<T> device_new(device_ptr<void> p, const T& exemplar, const size_t n = 1)
|
|
87
|
+
{
|
|
88
|
+
device_ptr<T> result(static_cast<T*>(p.get()));
|
|
89
|
+
|
|
90
|
+
// run copy constructors at p here
|
|
91
|
+
thrust::uninitialized_fill(device, result, result + n, exemplar);
|
|
92
|
+
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
76
95
|
|
|
77
96
|
/*! \p device_new implements the new operator for types resident in device memory.
|
|
78
97
|
* It allocates device memory large enough to hold \p n new objects of type \c T.
|
|
@@ -81,11 +100,13 @@ device_ptr<T> device_new(device_ptr<void> p, const T& exemplar, const size_t n =
|
|
|
81
100
|
* \return A \p device_ptr to the newly allocated region of device memory.
|
|
82
101
|
*/
|
|
83
102
|
template <typename T>
|
|
84
|
-
device_ptr<T> device_new(const size_t n = 1)
|
|
103
|
+
device_ptr<T> device_new(const size_t n = 1)
|
|
104
|
+
{
|
|
105
|
+
// call placement new version of device_new
|
|
106
|
+
return device_new<T>(thrust::device_malloc<T>(n));
|
|
107
|
+
}
|
|
85
108
|
|
|
86
109
|
/*! \} // memory_management
|
|
87
110
|
*/
|
|
88
111
|
|
|
89
112
|
THRUST_NAMESPACE_END
|
|
90
|
-
|
|
91
|
-
#include <thrust/detail/device_new.inl>
|
|
@@ -30,9 +30,9 @@
|
|
|
30
30
|
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
31
31
|
# pragma system_header
|
|
32
32
|
#endif // no system header
|
|
33
|
-
#include <thrust/detail/memory_wrapper.h>
|
|
34
33
|
#include <thrust/detail/vector_base.h>
|
|
35
34
|
|
|
35
|
+
#include <cuda/std/__cccl/memory_wrapper.h>
|
|
36
36
|
#include <cuda/std/initializer_list>
|
|
37
37
|
#include <cuda/std/utility>
|
|
38
38
|
|
|
@@ -316,6 +316,17 @@ struct iterator_traversal<::cuda::zip_iterator<Iterators...>>
|
|
|
316
316
|
using type = detail::minimum_type<iterator_traversal_t<Iterators>...>;
|
|
317
317
|
};
|
|
318
318
|
|
|
319
|
+
template <class Fn, class... Iterators>
|
|
320
|
+
struct iterator_system<::cuda::zip_transform_iterator<Fn, Iterators...>>
|
|
321
|
+
{
|
|
322
|
+
using type = detail::minimum_system_t<iterator_system_t<Iterators>...>;
|
|
323
|
+
};
|
|
324
|
+
template <class Fn, class... Iterators>
|
|
325
|
+
struct iterator_traversal<::cuda::zip_transform_iterator<Fn, Iterators...>>
|
|
326
|
+
{
|
|
327
|
+
using type = detail::minimum_type<iterator_traversal_t<Iterators>...>;
|
|
328
|
+
};
|
|
329
|
+
|
|
319
330
|
//! \} // end iterator_traits
|
|
320
331
|
|
|
321
332
|
THRUST_NAMESPACE_END
|
|
@@ -104,7 +104,8 @@ inline constexpr bool is_proxy_reference_v<tabulate_output_iterator_proxy<Binary
|
|
|
104
104
|
//!
|
|
105
105
|
//! \see make_tabulate_output_iterator
|
|
106
106
|
template <typename BinaryFunction, typename System = use_default, typename DifferenceT = ptrdiff_t>
|
|
107
|
-
class
|
|
107
|
+
class CCCL_DEPRECATED_BECAUSE("Use cuda::tabulate_output_iterator instead") tabulate_output_iterator
|
|
108
|
+
: public detail::make_tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>
|
|
108
109
|
{
|
|
109
110
|
public:
|
|
110
111
|
//! \cond
|
|
@@ -138,7 +139,9 @@ private:
|
|
|
138
139
|
//! \param fun The \c BinaryFunction invoked whenever assigning to a dereferenced \p tabulate_output_iterator
|
|
139
140
|
//! \see tabulate_output_iterator
|
|
140
141
|
template <typename BinaryFunction>
|
|
141
|
-
|
|
142
|
+
CCCL_DEPRECATED_BECAUSE("Use cuda::make_tabulate_output_iterator instead")
|
|
143
|
+
_CCCL_HOST_DEVICE tabulate_output_iterator<BinaryFunction>
|
|
144
|
+
make_tabulate_output_iterator(BinaryFunction fun)
|
|
142
145
|
{
|
|
143
146
|
return tabulate_output_iterator<BinaryFunction>(fun);
|
|
144
147
|
}
|
|
@@ -34,7 +34,6 @@
|
|
|
34
34
|
#include <thrust/detail/config.h>
|
|
35
35
|
|
|
36
36
|
#include <thrust/binary_search.h>
|
|
37
|
-
#include <thrust/detail/algorithm_wrapper.h>
|
|
38
37
|
#include <thrust/detail/seq.h>
|
|
39
38
|
#include <thrust/find.h>
|
|
40
39
|
#include <thrust/host_vector.h>
|
|
@@ -44,6 +43,7 @@
|
|
|
44
43
|
|
|
45
44
|
#include <cuda/std/__algorithm/max.h>
|
|
46
45
|
#include <cuda/std/__algorithm/min.h>
|
|
46
|
+
#include <cuda/std/__cccl/algorithm_wrapper.h>
|
|
47
47
|
#include <cuda/std/cassert>
|
|
48
48
|
#include <cuda/std/cstdint>
|
|
49
49
|
|
|
@@ -32,12 +32,12 @@
|
|
|
32
32
|
# pragma system_header
|
|
33
33
|
#endif // no system header
|
|
34
34
|
|
|
35
|
-
#include <thrust/detail/algorithm_wrapper.h>
|
|
36
35
|
#include <thrust/host_vector.h>
|
|
37
36
|
#include <thrust/mr/allocator.h>
|
|
38
37
|
#include <thrust/mr/memory_resource.h>
|
|
39
38
|
#include <thrust/mr/pool_options.h>
|
|
40
39
|
|
|
40
|
+
#include <cuda/std/__cccl/algorithm_wrapper.h>
|
|
41
41
|
#include <cuda/std/cassert>
|
|
42
42
|
#include <cuda/std/cstdint>
|
|
43
43
|
|