cuda-cccl 0.3.0__cp313-cp313-manylinux_2_24_aarch64.whl → 0.3.2__cp313-cp313-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/cooperative/__init__.py +7 -1
- cuda/cccl/cooperative/experimental/__init__.py +21 -5
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
- cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
- cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
- cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
- cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
- cuda/cccl/headers/include/cuda/__cccl_config +1 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
- cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
- cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
- cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
- cuda/cccl/headers/include/cuda/__event/event.h +27 -26
- cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
- cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
- cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/algorithm +1 -1
- cuda/cccl/headers/include/cuda/cmath +1 -0
- cuda/cccl/headers/include/cuda/devices +13 -0
- cuda/cccl/headers/include/cuda/iterator +1 -0
- cuda/cccl/headers/include/cuda/memory +1 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
- cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
- cuda/cccl/headers/include/cuda/std/bitset +1 -1
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
- cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
- cuda/cccl/headers/include/cuda/std/numbers +5 -0
- cuda/cccl/headers/include/cuda/std/string_view +155 -13
- cuda/cccl/headers/include/cuda/std/version +1 -4
- cuda/cccl/headers/include/cuda/stream_ref +5 -0
- cuda/cccl/headers/include/cuda/utility +1 -0
- cuda/cccl/headers/include/nv/target +7 -2
- cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
- cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
- cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
- cuda/cccl/headers/include/thrust/device_delete.h +18 -3
- cuda/cccl/headers/include/thrust/device_free.h +16 -3
- cuda/cccl/headers/include/thrust/device_new.h +29 -8
- cuda/cccl/headers/include/thrust/host_vector.h +1 -1
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
- cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
- cuda/cccl/parallel/experimental/__init__.py +21 -74
- cuda/compute/__init__.py +79 -0
- cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
- cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
- cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
- cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
- cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
- cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
- cuda/compute/cu12/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/cu13/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
- cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
- cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
- cuda/coop/__init__.py +8 -0
- cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
- cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
- cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
- cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
- cuda/coop/warp/__init__.py +9 -0
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
- cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
- cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
- cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
- cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
- cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
- cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
- cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
- cuda/cccl/parallel/experimental/.gitignore +0 -4
- cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
- cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-313-aarch64-linux-gnu.so +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
- /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -39,11 +39,11 @@ namespace chrono
|
|
|
39
39
|
class _CCCL_TYPE_VISIBILITY_DEFAULT system_clock
|
|
40
40
|
{
|
|
41
41
|
public:
|
|
42
|
-
using duration
|
|
43
|
-
using rep
|
|
44
|
-
using period
|
|
45
|
-
using time_point
|
|
46
|
-
static constexpr
|
|
42
|
+
using duration = ::cuda::std::chrono::nanoseconds;
|
|
43
|
+
using rep = duration::rep;
|
|
44
|
+
using period = duration::period;
|
|
45
|
+
using time_point = ::cuda::std::chrono::time_point<system_clock>;
|
|
46
|
+
static constexpr bool is_steady = false;
|
|
47
47
|
|
|
48
48
|
[[nodiscard]] _CCCL_API inline static time_point now() noexcept
|
|
49
49
|
{
|
|
@@ -27,9 +27,10 @@
|
|
|
27
27
|
#include <cuda/std/__type_traits/is_floating_point.h>
|
|
28
28
|
#include <cuda/std/__type_traits/is_integral.h>
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
// MSVC and clang cuda need the host side functions included
|
|
31
|
+
#if _CCCL_HOST_COMPILATION() || _CCCL_CUDA_COMPILER(CLANG)
|
|
31
32
|
# include <math.h>
|
|
32
|
-
#endif //
|
|
33
|
+
#endif // _CCCL_HOST_COMPILATION() || _CCCL_CUDA_COMPILER(CLANG)
|
|
33
34
|
|
|
34
35
|
#include <cuda/std/__cccl/prologue.h>
|
|
35
36
|
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
#include <cuda/std/__complex/vector_support.h>
|
|
25
25
|
#include <cuda/std/__concepts/concept_macros.h>
|
|
26
|
+
#include <cuda/std/__fwd/complex.h>
|
|
26
27
|
#include <cuda/std/__fwd/get.h>
|
|
27
28
|
#include <cuda/std/__tuple_dir/tuple_element.h>
|
|
28
29
|
#include <cuda/std/__tuple_dir/tuple_size.h>
|
|
@@ -36,9 +37,9 @@
|
|
|
36
37
|
#include <cuda/std/limits>
|
|
37
38
|
|
|
38
39
|
// Compatibility helpers for thrust to convert between `std::complex` and `cuda::std::complex`
|
|
40
|
+
// todo: find a way to get rid of this include
|
|
39
41
|
#if !_CCCL_COMPILER(NVRTC)
|
|
40
|
-
# include <complex>
|
|
41
|
-
# include <sstream> // for std::basic_ostringstream
|
|
42
|
+
# include <complex> // for std::complex stream operators
|
|
42
43
|
|
|
43
44
|
# define _LIBCUDACXX_ACCESS_STD_COMPLEX_REAL(__c) reinterpret_cast<const _Up(&)[2]>(__c)[0]
|
|
44
45
|
# define _LIBCUDACXX_ACCESS_STD_COMPLEX_IMAG(__c) reinterpret_cast<const _Up(&)[2]>(__c)[1]
|
|
@@ -21,28 +21,28 @@
|
|
|
21
21
|
# pragma system_header
|
|
22
22
|
#endif // no system header
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
// gcc < 8 warns about it's extended literals being shadowed by the implementation, so let's just disable the complex
|
|
25
|
+
// literals
|
|
26
|
+
#if !_CCCL_COMPILER(GCC, <, 8)
|
|
25
27
|
|
|
26
|
-
#include <cuda/std/
|
|
28
|
+
# include <cuda/std/__complex/complex.h>
|
|
27
29
|
|
|
28
|
-
|
|
30
|
+
# include <cuda/std/__cccl/prologue.h>
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
// Literal suffix for complex number literals [complex.literals]
|
|
32
|
+
_CCCL_BEGIN_NAMESPACE_CUDA_STD
|
|
32
33
|
|
|
33
34
|
_CCCL_DIAG_PUSH
|
|
34
35
|
_CCCL_DIAG_SUPPRESS_GCC("-Wliteral-suffix")
|
|
35
36
|
_CCCL_DIAG_SUPPRESS_CLANG("-Wuser-defined-literals")
|
|
36
|
-
|
|
37
|
+
_CCCL_DIAG_SUPPRESS_NVHPC(lit_suffix_no_underscore)
|
|
38
|
+
_CCCL_DIAG_SUPPRESS_MSVC(4455) // literal suffix identifiers that do not start with an underscore are reserved
|
|
39
|
+
_CCCL_BEGIN_NV_DIAG_SUPPRESS(2506, 20208) // a user-provided literal suffix must begin with "_",
|
|
40
|
+
// long double treated as double
|
|
37
41
|
|
|
38
42
|
inline namespace literals
|
|
39
43
|
{
|
|
40
44
|
inline namespace complex_literals
|
|
41
45
|
{
|
|
42
|
-
# if !_CCCL_CUDA_COMPILER(NVCC) && !_CCCL_COMPILER(NVRTC)
|
|
43
|
-
// NOTE: if you get a warning from GCC <7 here that "literal operator suffixes not preceded by ‘_’ are reserved for
|
|
44
|
-
// future standardization" then we are sorry. The warning was implemented before GCC 7, but can only be disabled since
|
|
45
|
-
// GCC 7. See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69523
|
|
46
46
|
_CCCL_API constexpr complex<long double> operator""il(long double __im)
|
|
47
47
|
{
|
|
48
48
|
return {0.0l, __im};
|
|
@@ -71,36 +71,16 @@ _CCCL_API constexpr complex<float> operator""if(unsigned long long __im)
|
|
|
71
71
|
{
|
|
72
72
|
return {0.0f, static_cast<float>(__im)};
|
|
73
73
|
}
|
|
74
|
-
# else // ^^^ !_CCCL_CUDA_COMPILER(NVCC) && !_CCCL_COMPILER(NVRTC) ^^^ / vvv other compilers vvv
|
|
75
|
-
_CCCL_API constexpr complex<double> operator""i(double __im)
|
|
76
|
-
{
|
|
77
|
-
return {0.0, static_cast<double>(__im)};
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
_CCCL_API constexpr complex<double> operator""i(unsigned long long __im)
|
|
81
|
-
{
|
|
82
|
-
return {0.0, static_cast<double>(__im)};
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
_CCCL_API constexpr complex<float> operator""if(double __im)
|
|
86
|
-
{
|
|
87
|
-
return {0.0f, static_cast<float>(__im)};
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
_CCCL_API constexpr complex<float> operator""if(unsigned long long __im)
|
|
91
|
-
{
|
|
92
|
-
return {0.0f, static_cast<float>(__im)};
|
|
93
|
-
}
|
|
94
|
-
# endif // other compilers
|
|
95
74
|
} // namespace complex_literals
|
|
96
75
|
} // namespace literals
|
|
97
76
|
|
|
77
|
+
_CCCL_END_NV_DIAG_SUPPRESS()
|
|
98
78
|
_CCCL_DIAG_POP
|
|
99
79
|
|
|
100
|
-
#endif // _LIBCUDACXX_HAS_STL_LITERALS
|
|
101
|
-
|
|
102
80
|
_CCCL_END_NAMESPACE_CUDA_STD
|
|
103
81
|
|
|
104
|
-
#include <cuda/std/__cccl/epilogue.h>
|
|
82
|
+
# include <cuda/std/__cccl/epilogue.h>
|
|
83
|
+
|
|
84
|
+
#endif // !_CCCL_COMPILER(GCC, <, 8)
|
|
105
85
|
|
|
106
86
|
#endif // _CUDA_STD___COMPLEX_LITERALS_H
|
|
@@ -31,8 +31,9 @@
|
|
|
31
31
|
# include <cuda/std/__type_traits/enable_if.h>
|
|
32
32
|
# include <cuda/std/__type_traits/is_constructible.h>
|
|
33
33
|
|
|
34
|
+
// todo: find a way to get rid of this include
|
|
34
35
|
# if !_CCCL_COMPILER(NVRTC)
|
|
35
|
-
# include <
|
|
36
|
+
# include <complex> // for std::complex stream operators
|
|
36
37
|
# endif // !_CCCL_COMPILER(NVRTC)
|
|
37
38
|
|
|
38
39
|
# include <cuda/std/__cccl/prologue.h>
|
|
@@ -31,8 +31,9 @@
|
|
|
31
31
|
# include <cuda/std/__type_traits/enable_if.h>
|
|
32
32
|
# include <cuda/std/__type_traits/is_constructible.h>
|
|
33
33
|
|
|
34
|
+
// todo: find a way to get rid of this include
|
|
34
35
|
# if !_CCCL_COMPILER(NVRTC)
|
|
35
|
-
# include <
|
|
36
|
+
# include <complex> // for std::complex stream operators
|
|
36
37
|
# endif // !_CCCL_COMPILER(NVRTC)
|
|
37
38
|
|
|
38
39
|
# include <cuda/std/__cccl/prologue.h>
|
|
@@ -294,7 +295,7 @@ struct __get_complex_impl<__half>
|
|
|
294
295
|
}
|
|
295
296
|
};
|
|
296
297
|
|
|
297
|
-
# if !
|
|
298
|
+
# if !_CCCL_COMPILER(NVRTC)
|
|
298
299
|
template <class _CharT, class _Traits>
|
|
299
300
|
::std::basic_istream<_CharT, _Traits>& operator>>(::std::basic_istream<_CharT, _Traits>& __is, complex<__half>& __x)
|
|
300
301
|
{
|
|
@@ -310,7 +311,7 @@ operator<<(::std::basic_ostream<_CharT, _Traits>& __os, const complex<__half>& _
|
|
|
310
311
|
{
|
|
311
312
|
return __os << complex<float>{__x};
|
|
312
313
|
}
|
|
313
|
-
# endif // !
|
|
314
|
+
# endif // !_CCCL_COMPILER(NVRTC)
|
|
314
315
|
|
|
315
316
|
_CCCL_END_NAMESPACE_CUDA_STD
|
|
316
317
|
|
|
@@ -35,8 +35,8 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD
|
|
|
35
35
|
|
|
36
36
|
template <class _Fn, class... _Args>
|
|
37
37
|
concept invocable = requires(_Fn&& __fn, _Args&&... __args) {
|
|
38
|
-
::cuda::std::
|
|
39
|
-
|
|
38
|
+
::cuda::std::invoke(::cuda::std::forward<_Fn>(__fn), ::cuda::std::forward<_Args>(__args)...); // not required to be
|
|
39
|
+
// equality preserving
|
|
40
40
|
};
|
|
41
41
|
|
|
42
42
|
// [concept.regular.invocable]
|
|
@@ -21,8 +21,9 @@
|
|
|
21
21
|
# pragma system_header
|
|
22
22
|
#endif // no system header
|
|
23
23
|
|
|
24
|
+
#include <cuda/__cmath/mul_hi.h>
|
|
24
25
|
#include <cuda/std/__cstddef/types.h>
|
|
25
|
-
#include <cuda/std/
|
|
26
|
+
#include <cuda/std/__cstring/memset.h>
|
|
26
27
|
|
|
27
28
|
#if !_CCCL_COMPILER(NVRTC)
|
|
28
29
|
# include <cstdlib>
|
|
@@ -44,7 +45,7 @@ using ::malloc;
|
|
|
44
45
|
|
|
45
46
|
const size_t __nbytes = __n * __size;
|
|
46
47
|
|
|
47
|
-
if (::
|
|
48
|
+
if (::cuda::mul_hi(__n, __size) == 0)
|
|
48
49
|
{
|
|
49
50
|
__ptr = ::cuda::std::malloc(__nbytes);
|
|
50
51
|
if (__ptr != nullptr)
|
|
@@ -97,14 +97,13 @@ _CCCL_API inline _Tp& __mu(reference_wrapper<_Tp> __t, _Uj&)
|
|
|
97
97
|
}
|
|
98
98
|
|
|
99
99
|
template <class _Ti, class... _Uj, size_t... _Indx>
|
|
100
|
-
_CCCL_API inline
|
|
101
|
-
__mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>)
|
|
100
|
+
_CCCL_API inline invoke_result_t<_Ti&, _Uj...> __mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>)
|
|
102
101
|
{
|
|
103
102
|
return __ti(::cuda::std::forward<_Uj>(::cuda::std::get<_Indx>(__uj))...);
|
|
104
103
|
}
|
|
105
104
|
|
|
106
105
|
template <class _Ti, class... _Uj>
|
|
107
|
-
_CCCL_API inline enable_if_t<is_bind_expression<_Ti>::value,
|
|
106
|
+
_CCCL_API inline enable_if_t<is_bind_expression<_Ti>::value, invoke_result<_Ti&, _Uj...>>
|
|
108
107
|
__mu(_Ti& __ti, tuple<_Uj...>& __uj)
|
|
109
108
|
{
|
|
110
109
|
using __indices = __make_tuple_indices_t<sizeof...(_Uj)>;
|
|
@@ -151,12 +150,12 @@ struct __mu_return_invocable // false
|
|
|
151
150
|
template <class _Ti, class... _Uj>
|
|
152
151
|
struct __mu_return_invocable<true, _Ti, _Uj...>
|
|
153
152
|
{
|
|
154
|
-
using type =
|
|
153
|
+
using type = invoke_result_t<_Ti&, _Uj...>;
|
|
155
154
|
};
|
|
156
155
|
|
|
157
156
|
template <class _Ti, class... _Uj>
|
|
158
157
|
struct __mu_return_impl<_Ti, false, true, false, tuple<_Uj...>>
|
|
159
|
-
: public __mu_return_invocable<
|
|
158
|
+
: public __mu_return_invocable<is_invocable_v<_Ti&, _Uj...>, _Ti, _Uj...>
|
|
160
159
|
{};
|
|
161
160
|
|
|
162
161
|
template <class _Ti, class _TupleUj>
|
|
@@ -196,13 +195,13 @@ struct __is_valid_bind_return
|
|
|
196
195
|
template <class _Fp, class... _BoundArgs, class _TupleUj>
|
|
197
196
|
struct __is_valid_bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj>
|
|
198
197
|
{
|
|
199
|
-
static const bool value =
|
|
198
|
+
static const bool value = is_invocable_v<_Fp, typename __mu_return<_BoundArgs, _TupleUj>::type...>;
|
|
200
199
|
};
|
|
201
200
|
|
|
202
201
|
template <class _Fp, class... _BoundArgs, class _TupleUj>
|
|
203
202
|
struct __is_valid_bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj>
|
|
204
203
|
{
|
|
205
|
-
static const bool value =
|
|
204
|
+
static const bool value = is_invocable_v<_Fp, typename __mu_return<const _BoundArgs, _TupleUj>::type...>;
|
|
206
205
|
};
|
|
207
206
|
|
|
208
207
|
template <class _Fp, class _BoundArgs, class _TupleUj, bool = __is_valid_bind_return<_Fp, _BoundArgs, _TupleUj>::value>
|
|
@@ -211,13 +210,13 @@ struct __bind_return;
|
|
|
211
210
|
template <class _Fp, class... _BoundArgs, class _TupleUj>
|
|
212
211
|
struct __bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj, true>
|
|
213
212
|
{
|
|
214
|
-
using type =
|
|
213
|
+
using type = invoke_result_t<_Fp&, typename __mu_return<_BoundArgs, _TupleUj>::type...>;
|
|
215
214
|
};
|
|
216
215
|
|
|
217
216
|
template <class _Fp, class... _BoundArgs, class _TupleUj>
|
|
218
217
|
struct __bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj, true>
|
|
219
218
|
{
|
|
220
|
-
using type =
|
|
219
|
+
using type = invoke_result_t<_Fp&, typename __mu_return<const _BoundArgs, _TupleUj>::type...>;
|
|
221
220
|
};
|
|
222
221
|
|
|
223
222
|
template <class _Fp, class _BoundArgs, class _TupleUj>
|
|
@@ -295,8 +294,7 @@ public:
|
|
|
295
294
|
result_type>
|
|
296
295
|
operator()(_Args&&... __args)
|
|
297
296
|
{
|
|
298
|
-
|
|
299
|
-
return _Invoker::__call(static_cast<base&>(*this), ::cuda::std::forward<_Args>(__args)...);
|
|
297
|
+
return ::cuda::std::invoke_r<_Rp>(static_cast<base&>(*this), ::cuda::std::forward<_Args>(__args)...);
|
|
300
298
|
}
|
|
301
299
|
|
|
302
300
|
template <class... _Args>
|
|
@@ -305,8 +303,7 @@ public:
|
|
|
305
303
|
result_type>
|
|
306
304
|
operator()(_Args&&... __args) const
|
|
307
305
|
{
|
|
308
|
-
|
|
309
|
-
return _Invoker::__call(static_cast<base const&>(*this), ::cuda::std::forward<_Args>(__args)...);
|
|
306
|
+
return ::cuda::std::invoke_r<_Rp>(static_cast<base const&>(*this), ::cuda::std::forward<_Args>(__args)...);
|
|
310
307
|
}
|
|
311
308
|
};
|
|
312
309
|
|
|
@@ -174,8 +174,7 @@ public:
|
|
|
174
174
|
|
|
175
175
|
_CCCL_API inline _Rp operator()(_ArgTypes&&... __arg)
|
|
176
176
|
{
|
|
177
|
-
|
|
178
|
-
return _Invoker::__call(__f_.first(), ::cuda::std::forward<_ArgTypes>(__arg)...);
|
|
177
|
+
return ::cuda::std::invoke_r<_Rp>(__f_.first(), ::cuda::std::forward<_ArgTypes>(__arg)...);
|
|
179
178
|
}
|
|
180
179
|
|
|
181
180
|
_CCCL_API inline __alloc_func* __clone() const
|
|
@@ -227,8 +226,7 @@ public:
|
|
|
227
226
|
|
|
228
227
|
_CCCL_API inline _Rp operator()(_ArgTypes&&... __arg)
|
|
229
228
|
{
|
|
230
|
-
|
|
231
|
-
return _Invoker::__call(__f_, ::cuda::std::forward<_ArgTypes>(__arg)...);
|
|
229
|
+
return ::cuda::std::invoke_r<_Rp>(__f_, ::cuda::std::forward<_ArgTypes>(__arg)...);
|
|
232
230
|
}
|
|
233
231
|
|
|
234
232
|
_CCCL_API inline __default_alloc_func* __clone() const
|
|
@@ -963,7 +961,7 @@ public:
|
|
|
963
961
|
|
|
964
962
|
virtual _Rp operator()(_ArgTypes&&... __arg)
|
|
965
963
|
{
|
|
966
|
-
return ::cuda::std::
|
|
964
|
+
return ::cuda::std::invoke(__f_, ::cuda::std::forward<_ArgTypes>(__arg)...);
|
|
967
965
|
}
|
|
968
966
|
|
|
969
967
|
# ifndef _CCCL_NO_RTTI
|
|
@@ -996,13 +994,12 @@ class _CCCL_TYPE_VISIBILITY_DEFAULT function<_Rp(_ArgTypes...)>
|
|
|
996
994
|
|
|
997
995
|
__func __f_;
|
|
998
996
|
|
|
999
|
-
template <class _Fp, bool = !is_same_v<remove_cvref_t<_Fp>, function> &&
|
|
997
|
+
template <class _Fp, bool = !is_same_v<remove_cvref_t<_Fp>, function> && is_invocable_v<_Fp, _ArgTypes...>>
|
|
1000
998
|
struct __callable;
|
|
1001
999
|
template <class _Fp>
|
|
1002
1000
|
struct __callable<_Fp, true>
|
|
1003
1001
|
{
|
|
1004
|
-
static const bool value =
|
|
1005
|
-
is_void_v<_Rp> || __is_core_convertible<typename __invoke_of<_Fp, _ArgTypes...>::type, _Rp>::value;
|
|
1002
|
+
static const bool value = is_void_v<_Rp> || __is_core_convertible<invoke_result_t<_Fp, _ArgTypes...>, _Rp>::value;
|
|
1006
1003
|
};
|
|
1007
1004
|
template <class _Fp>
|
|
1008
1005
|
struct __callable<_Fp, false>
|