cuda-cccl 0.3.0__cp310-cp310-manylinux_2_24_aarch64.whl → 0.3.2__cp310-cp310-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/cooperative/__init__.py +7 -1
- cuda/cccl/cooperative/experimental/__init__.py +21 -5
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
- cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
- cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
- cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
- cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
- cuda/cccl/headers/include/cuda/__cccl_config +1 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
- cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
- cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
- cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
- cuda/cccl/headers/include/cuda/__event/event.h +27 -26
- cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
- cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
- cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/algorithm +1 -1
- cuda/cccl/headers/include/cuda/cmath +1 -0
- cuda/cccl/headers/include/cuda/devices +13 -0
- cuda/cccl/headers/include/cuda/iterator +1 -0
- cuda/cccl/headers/include/cuda/memory +1 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
- cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
- cuda/cccl/headers/include/cuda/std/bitset +1 -1
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
- cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
- cuda/cccl/headers/include/cuda/std/numbers +5 -0
- cuda/cccl/headers/include/cuda/std/string_view +155 -13
- cuda/cccl/headers/include/cuda/std/version +1 -4
- cuda/cccl/headers/include/cuda/stream_ref +5 -0
- cuda/cccl/headers/include/cuda/utility +1 -0
- cuda/cccl/headers/include/nv/target +7 -2
- cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
- cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
- cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
- cuda/cccl/headers/include/thrust/device_delete.h +18 -3
- cuda/cccl/headers/include/thrust/device_free.h +16 -3
- cuda/cccl/headers/include/thrust/device_new.h +29 -8
- cuda/cccl/headers/include/thrust/host_vector.h +1 -1
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
- cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
- cuda/cccl/parallel/experimental/__init__.py +21 -74
- cuda/compute/__init__.py +79 -0
- cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
- cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
- cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
- cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
- cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
- cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
- cuda/compute/cu12/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/cu13/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
- cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
- cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
- cuda/coop/__init__.py +8 -0
- cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
- cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
- cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
- cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
- cuda/coop/warp/__init__.py +9 -0
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
- cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
- cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
- cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
- cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
- cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
- cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
- cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
- cuda/cccl/parallel/experimental/.gitignore +0 -4
- cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
- cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
- /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
#if _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
|
|
26
26
|
# include <cuda/std/__compare/three_way_comparable.h>
|
|
27
27
|
#endif // _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
|
|
28
|
+
#include <cuda/__iterator/zip_common.h>
|
|
28
29
|
#include <cuda/std/__concepts/convertible_to.h>
|
|
29
30
|
#include <cuda/std/__concepts/equality_comparable.h>
|
|
30
31
|
#include <cuda/std/__functional/invoke.h>
|
|
@@ -41,7 +42,6 @@
|
|
|
41
42
|
#include <cuda/std/__utility/forward.h>
|
|
42
43
|
#include <cuda/std/__utility/integer_sequence.h>
|
|
43
44
|
#include <cuda/std/__utility/move.h>
|
|
44
|
-
#include <cuda/std/__utility/pair.h>
|
|
45
45
|
#include <cuda/std/tuple>
|
|
46
46
|
|
|
47
47
|
#include <cuda/std/__cccl/prologue.h>
|
|
@@ -51,46 +51,6 @@ _CCCL_BEGIN_NAMESPACE_CUDA
|
|
|
51
51
|
//! @addtogroup iterators
|
|
52
52
|
//! @{
|
|
53
53
|
|
|
54
|
-
template <class... _Iterators>
|
|
55
|
-
struct __tuple_or_pair_impl
|
|
56
|
-
{
|
|
57
|
-
using type = ::cuda::std::tuple<_Iterators...>;
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
template <class _Iterator1, class _Iterator2>
|
|
61
|
-
struct __tuple_or_pair_impl<_Iterator1, _Iterator2>
|
|
62
|
-
{
|
|
63
|
-
using type = ::cuda::std::pair<_Iterator1, _Iterator2>;
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
template <class... _Iterators>
|
|
67
|
-
using __tuple_or_pair = typename __tuple_or_pair_impl<_Iterators...>::type;
|
|
68
|
-
|
|
69
|
-
template <class... _Iterators>
|
|
70
|
-
struct __zip_iter_constraints
|
|
71
|
-
{
|
|
72
|
-
static constexpr bool __all_forward = (::cuda::std::__has_forward_traversal<_Iterators> && ...);
|
|
73
|
-
static constexpr bool __all_bidirectional = (::cuda::std::__has_bidirectional_traversal<_Iterators> && ...);
|
|
74
|
-
static constexpr bool __all_random_access = (::cuda::std::__has_random_access_traversal<_Iterators> && ...);
|
|
75
|
-
|
|
76
|
-
static constexpr bool __all_equality_comparable = (::cuda::std::equality_comparable<_Iterators> && ...);
|
|
77
|
-
|
|
78
|
-
#if _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
|
|
79
|
-
static constexpr bool __all_three_way_comparable = (::cuda::std::three_way_comparable<_Iterators> && ...);
|
|
80
|
-
#endif // _LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
|
|
81
|
-
|
|
82
|
-
// Our C++17 iterators sometimes do not satisfy `sized_sentinel_for` but they should all be random_access
|
|
83
|
-
static constexpr bool __all_sized_sentinel =
|
|
84
|
-
(::cuda::std::sized_sentinel_for<_Iterators, _Iterators> && ...) || __all_random_access;
|
|
85
|
-
static constexpr bool __all_nothrow_iter_movable =
|
|
86
|
-
(noexcept(::cuda::std::ranges::iter_move(::cuda::std::declval<const _Iterators&>())) && ...)
|
|
87
|
-
&& (::cuda::std::is_nothrow_move_constructible_v<::cuda::std::iter_rvalue_reference_t<_Iterators>> && ...);
|
|
88
|
-
|
|
89
|
-
static constexpr bool __all_indirectly_swappable = (::cuda::std::indirectly_swappable<_Iterators> && ...);
|
|
90
|
-
|
|
91
|
-
static constexpr bool __all_noexcept_swappable = (::cuda::std::__noexcept_swappable<_Iterators> && ...);
|
|
92
|
-
};
|
|
93
|
-
|
|
94
54
|
struct __zv_iter_category_base_none
|
|
95
55
|
{};
|
|
96
56
|
|
|
@@ -105,77 +65,8 @@ using __zv_iter_category_base =
|
|
|
105
65
|
__zv_iter_category_base_tag,
|
|
106
66
|
__zv_iter_category_base_none>;
|
|
107
67
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
{
|
|
111
|
-
using _Constraints = __zip_iter_constraints<_Iterators...>;
|
|
112
|
-
if constexpr (_Constraints::__all_random_access)
|
|
113
|
-
{
|
|
114
|
-
return ::cuda::std::random_access_iterator_tag();
|
|
115
|
-
}
|
|
116
|
-
else if constexpr (_Constraints::__all_bidirectional)
|
|
117
|
-
{
|
|
118
|
-
return ::cuda::std::bidirectional_iterator_tag();
|
|
119
|
-
}
|
|
120
|
-
else if constexpr (_Constraints::__all_forward)
|
|
121
|
-
{
|
|
122
|
-
return ::cuda::std::forward_iterator_tag();
|
|
123
|
-
}
|
|
124
|
-
else
|
|
125
|
-
{
|
|
126
|
-
return ::cuda::std::input_iterator_tag();
|
|
127
|
-
}
|
|
128
|
-
_CCCL_UNREACHABLE();
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
//! @note Not static functions because nvc++ sometimes has issues with class static functions in device code
|
|
132
|
-
struct __zip_op_star
|
|
133
|
-
{
|
|
134
|
-
template <class... _Iterators>
|
|
135
|
-
using reference = __tuple_or_pair<::cuda::std::iter_reference_t<_Iterators>...>;
|
|
136
|
-
|
|
137
|
-
_CCCL_EXEC_CHECK_DISABLE
|
|
138
|
-
template <class... _Iterators>
|
|
139
|
-
[[nodiscard]] _CCCL_API constexpr reference<_Iterators...> operator()(const _Iterators&... __iters) const
|
|
140
|
-
noexcept(noexcept(reference<_Iterators...>{*__iters...}))
|
|
141
|
-
{
|
|
142
|
-
return reference<_Iterators...>{*__iters...};
|
|
143
|
-
}
|
|
144
|
-
};
|
|
145
|
-
|
|
146
|
-
struct __zip_op_increment
|
|
147
|
-
{
|
|
148
|
-
_CCCL_EXEC_CHECK_DISABLE
|
|
149
|
-
template <class... _Iterators>
|
|
150
|
-
_CCCL_API constexpr void operator()(_Iterators&... __iters) const noexcept(noexcept(((void) ++__iters, ...)))
|
|
151
|
-
{
|
|
152
|
-
((void) ++__iters, ...);
|
|
153
|
-
}
|
|
154
|
-
};
|
|
155
|
-
|
|
156
|
-
struct __zip_op_decrement
|
|
157
|
-
{
|
|
158
|
-
_CCCL_EXEC_CHECK_DISABLE
|
|
159
|
-
template <class... _Iterators>
|
|
160
|
-
_CCCL_API constexpr void operator()(_Iterators&... __iters) const noexcept(noexcept(((void) --__iters, ...)))
|
|
161
|
-
{
|
|
162
|
-
((void) --__iters, ...);
|
|
163
|
-
}
|
|
164
|
-
};
|
|
165
|
-
|
|
166
|
-
struct __zip_iter_move
|
|
167
|
-
{
|
|
168
|
-
template <class... _Iterators>
|
|
169
|
-
using __iter_move_ret = __tuple_or_pair<::cuda::std::iter_rvalue_reference_t<_Iterators>...>;
|
|
170
|
-
|
|
171
|
-
_CCCL_EXEC_CHECK_DISABLE
|
|
172
|
-
template <class... _Iterators>
|
|
173
|
-
[[nodiscard]] _CCCL_API constexpr __iter_move_ret<_Iterators...> operator()(const _Iterators&... __iters) const
|
|
174
|
-
noexcept(noexcept(__iter_move_ret<_Iterators...>{::cuda::std::ranges::iter_move(__iters)...}))
|
|
175
|
-
{
|
|
176
|
-
return __iter_move_ret<_Iterators...>{::cuda::std::ranges::iter_move(__iters)...};
|
|
177
|
-
}
|
|
178
|
-
};
|
|
68
|
+
//! @addtogroup iterators
|
|
69
|
+
//! @{
|
|
179
70
|
|
|
180
71
|
//! @brief @c zip_iterator is an iterator which represents a @c tuple of iterators. This iterator is useful for creating
|
|
181
72
|
//! a virtual array of structures while achieving the same performance and bandwidth as the structure of arrays idiom.
|
|
@@ -231,7 +122,7 @@ struct __zip_iter_move
|
|
|
231
122
|
template <class... _Iterators>
|
|
232
123
|
class zip_iterator : public __zv_iter_category_base<_Iterators...>
|
|
233
124
|
{
|
|
234
|
-
|
|
125
|
+
::cuda::std::tuple<_Iterators...> __current_;
|
|
235
126
|
|
|
236
127
|
template <class...>
|
|
237
128
|
friend class zip_iterator;
|
|
@@ -239,8 +130,8 @@ class zip_iterator : public __zv_iter_category_base<_Iterators...>
|
|
|
239
130
|
template <class _Fn>
|
|
240
131
|
_CCCL_API static constexpr auto
|
|
241
132
|
__zip_apply(const _Fn& __fun,
|
|
242
|
-
const
|
|
243
|
-
const
|
|
133
|
+
const ::cuda::std::tuple<_Iterators...>& __tuple1,
|
|
134
|
+
const ::cuda::std::tuple<_Iterators...>& __tuple2) //
|
|
244
135
|
noexcept(noexcept(__fun(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>())))
|
|
245
136
|
{
|
|
246
137
|
return __fun(__tuple1, __tuple2, ::cuda::std::make_index_sequence<sizeof...(_Iterators)>());
|
|
@@ -251,8 +142,8 @@ public:
|
|
|
251
142
|
_CCCL_HIDE_FROM_ABI zip_iterator() = default;
|
|
252
143
|
|
|
253
144
|
//! @brief Constructs a @c zip_iterator from a tuple of iterators
|
|
254
|
-
//! @param __iters A tuple
|
|
255
|
-
_CCCL_API constexpr explicit zip_iterator(
|
|
145
|
+
//! @param __iters A tuple of iterators
|
|
146
|
+
_CCCL_API constexpr explicit zip_iterator(::cuda::std::tuple<_Iterators...> __iters)
|
|
256
147
|
: __current_(::cuda::std::move(__iters))
|
|
257
148
|
{}
|
|
258
149
|
|
|
@@ -270,9 +161,9 @@ public:
|
|
|
270
161
|
: __current_(::cuda::std::move(__iters)...)
|
|
271
162
|
{}
|
|
272
163
|
|
|
273
|
-
using iterator_concept = decltype(
|
|
274
|
-
using value_type =
|
|
275
|
-
using reference =
|
|
164
|
+
using iterator_concept = decltype(__get_zip_iterator_concept<_Iterators...>());
|
|
165
|
+
using value_type = ::cuda::std::tuple<::cuda::std::iter_value_t<_Iterators>...>;
|
|
166
|
+
using reference = ::cuda::std::tuple<::cuda::std::iter_reference_t<_Iterators>...>;
|
|
276
167
|
using difference_type = ::cuda::std::common_type_t<::cuda::std::iter_difference_t<_Iterators>...>;
|
|
277
168
|
|
|
278
169
|
// Those are technically not to spec, but pre-ranges iterator_traits do not work properly with iterators that do not
|
|
@@ -468,8 +359,8 @@ public:
|
|
|
468
359
|
_CCCL_EXEC_CHECK_DISABLE
|
|
469
360
|
template <size_t _Zero, size_t... _Indices>
|
|
470
361
|
[[nodiscard]] _CCCL_API constexpr difference_type
|
|
471
|
-
operator()(const
|
|
472
|
-
const
|
|
362
|
+
operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
|
|
363
|
+
const ::cuda::std::tuple<_Iterators...>& __iters2,
|
|
473
364
|
::cuda::std::index_sequence<_Zero, _Indices...>) const //
|
|
474
365
|
noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) - ::cuda::std::get<_Indices>(__iters2)) && ...)))
|
|
475
366
|
{
|
|
@@ -499,8 +390,8 @@ public:
|
|
|
499
390
|
{
|
|
500
391
|
_CCCL_EXEC_CHECK_DISABLE
|
|
501
392
|
template <size_t... _Indices>
|
|
502
|
-
_CCCL_API constexpr bool operator()(const
|
|
503
|
-
const
|
|
393
|
+
_CCCL_API constexpr bool operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
|
|
394
|
+
const ::cuda::std::tuple<_Iterators...>& __iters2,
|
|
504
395
|
::cuda::std::index_sequence<_Indices...>) const
|
|
505
396
|
noexcept(noexcept(((::cuda::std::get<_Indices>(__iters1) == ::cuda::std::get<_Indices>(__iters2)) || ...)))
|
|
506
397
|
{
|
|
@@ -595,17 +486,13 @@ public:
|
|
|
595
486
|
return ::cuda::std::apply(__zip_iter_move{}, __iter.__current_);
|
|
596
487
|
}
|
|
597
488
|
|
|
598
|
-
template <class... _OtherIterators>
|
|
599
|
-
static constexpr bool __all_nothrow_swappable =
|
|
600
|
-
(::cuda::std::__noexcept_swappable<_OtherIterators, _OtherIterators> && ...);
|
|
601
|
-
|
|
602
489
|
struct __zip_op_iter_swap
|
|
603
490
|
{
|
|
604
491
|
template <size_t... _Indices>
|
|
605
|
-
_CCCL_API constexpr void
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
492
|
+
_CCCL_API constexpr void operator()(const ::cuda::std::tuple<_Iterators...>& __iters1,
|
|
493
|
+
const ::cuda::std::tuple<_Iterators...>& __iters2,
|
|
494
|
+
::cuda::std::index_sequence<_Indices...>) const
|
|
495
|
+
noexcept(__zip_iter_constraints<_Iterators...>::__all_noexcept_swappable)
|
|
609
496
|
{
|
|
610
497
|
(::cuda::std::ranges::iter_swap(::cuda::std::get<_Indices>(__iters1), ::cuda::std::get<_Indices>(__iters2)), ...);
|
|
611
498
|
}
|
|
@@ -620,12 +507,12 @@ public:
|
|
|
620
507
|
return __zip_apply(__zip_op_iter_swap{}, __lhs.__current_, __rhs.__current_);
|
|
621
508
|
}
|
|
622
509
|
|
|
623
|
-
[[nodiscard]] _CCCL_API constexpr
|
|
510
|
+
[[nodiscard]] _CCCL_API constexpr ::cuda::std::tuple<_Iterators...>& __iterators() noexcept
|
|
624
511
|
{
|
|
625
512
|
return __current_;
|
|
626
513
|
}
|
|
627
514
|
|
|
628
|
-
[[nodiscard]] _CCCL_API constexpr const
|
|
515
|
+
[[nodiscard]] _CCCL_API constexpr const ::cuda::std::tuple<_Iterators...>& __iterators() const noexcept
|
|
629
516
|
{
|
|
630
517
|
return __current_;
|
|
631
518
|
}
|
|
@@ -634,9 +521,6 @@ public:
|
|
|
634
521
|
template <class... _Iterators>
|
|
635
522
|
_CCCL_HOST_DEVICE zip_iterator(::cuda::std::tuple<_Iterators...>) -> zip_iterator<_Iterators...>;
|
|
636
523
|
|
|
637
|
-
template <class _Iterator1, class _Iterator2>
|
|
638
|
-
_CCCL_HOST_DEVICE zip_iterator(::cuda::std::pair<_Iterator1, _Iterator2>) -> zip_iterator<_Iterator1, _Iterator2>;
|
|
639
|
-
|
|
640
524
|
template <class... _Iterators>
|
|
641
525
|
_CCCL_HOST_DEVICE zip_iterator(_Iterators...) -> zip_iterator<_Iterators...>;
|
|
642
526
|
|