cuda-cccl 0.3.0__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.2__cp312-cp312-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/cooperative/__init__.py +7 -1
- cuda/cccl/cooperative/experimental/__init__.py +21 -5
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
- cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
- cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
- cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
- cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
- cuda/cccl/headers/include/cuda/__cccl_config +1 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
- cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
- cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
- cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
- cuda/cccl/headers/include/cuda/__event/event.h +27 -26
- cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
- cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
- cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/algorithm +1 -1
- cuda/cccl/headers/include/cuda/cmath +1 -0
- cuda/cccl/headers/include/cuda/devices +13 -0
- cuda/cccl/headers/include/cuda/iterator +1 -0
- cuda/cccl/headers/include/cuda/memory +1 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
- cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
- cuda/cccl/headers/include/cuda/std/bitset +1 -1
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
- cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
- cuda/cccl/headers/include/cuda/std/numbers +5 -0
- cuda/cccl/headers/include/cuda/std/string_view +155 -13
- cuda/cccl/headers/include/cuda/std/version +1 -4
- cuda/cccl/headers/include/cuda/stream_ref +5 -0
- cuda/cccl/headers/include/cuda/utility +1 -0
- cuda/cccl/headers/include/nv/target +7 -2
- cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
- cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
- cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
- cuda/cccl/headers/include/thrust/device_delete.h +18 -3
- cuda/cccl/headers/include/thrust/device_free.h +16 -3
- cuda/cccl/headers/include/thrust/device_new.h +29 -8
- cuda/cccl/headers/include/thrust/host_vector.h +1 -1
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
- cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
- cuda/cccl/parallel/experimental/__init__.py +21 -74
- cuda/compute/__init__.py +79 -0
- cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
- cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
- cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
- cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
- cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
- cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
- cuda/compute/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
- cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
- cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
- cuda/coop/__init__.py +8 -0
- cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
- cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
- cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
- cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
- cuda/coop/warp/__init__.py +9 -0
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
- cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
- cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
- cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
- cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
- cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
- cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
- cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
- cuda/cccl/parallel/experimental/.gitignore +0 -4
- cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
- /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
- {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,10 +11,23 @@
|
|
|
11
11
|
#ifndef _CUDA_DEVICES
|
|
12
12
|
#define _CUDA_DEVICES
|
|
13
13
|
|
|
14
|
+
#include <cuda/std/detail/__config>
|
|
15
|
+
|
|
16
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
+
# pragma GCC system_header
|
|
18
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
+
# pragma clang system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
+
# pragma system_header
|
|
22
|
+
#endif // no system header
|
|
23
|
+
|
|
14
24
|
#include <cuda/__device/all_devices.h>
|
|
25
|
+
#include <cuda/__device/arch_id.h>
|
|
15
26
|
#include <cuda/__device/arch_traits.h>
|
|
16
27
|
#include <cuda/__device/attributes.h>
|
|
28
|
+
#include <cuda/__device/compute_capability.h>
|
|
17
29
|
#include <cuda/__device/device_ref.h>
|
|
18
30
|
#include <cuda/__device/physical_device.h>
|
|
31
|
+
#include <cuda/version>
|
|
19
32
|
|
|
20
33
|
#endif // _CUDA_DEVICES
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
#include <cuda/__iterator/transform_output_iterator.h>
|
|
34
34
|
#include <cuda/__iterator/zip_function.h>
|
|
35
35
|
#include <cuda/__iterator/zip_iterator.h>
|
|
36
|
+
#include <cuda/__iterator/zip_transform_iterator.h>
|
|
36
37
|
#include <cuda/std/iterator>
|
|
37
38
|
|
|
38
39
|
#endif // _CUDA_ITERATOR
|
|
@@ -52,12 +52,12 @@ __equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
|
|
|
52
52
|
{
|
|
53
53
|
auto __half_len = ::cuda::std::__half_positive(__len);
|
|
54
54
|
_Iter __mid = _IterOps<_AlgPolicy>::next(__first, __half_len);
|
|
55
|
-
if (::cuda::std::
|
|
55
|
+
if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__mid), __value))
|
|
56
56
|
{
|
|
57
57
|
__first = ++__mid;
|
|
58
58
|
__len -= __half_len + 1;
|
|
59
59
|
}
|
|
60
|
-
else if (::cuda::std::
|
|
60
|
+
else if (::cuda::std::invoke(__comp, __value, ::cuda::std::invoke(__proj, *__mid)))
|
|
61
61
|
{
|
|
62
62
|
__end = __mid;
|
|
63
63
|
__len = __half_len;
|
|
@@ -40,13 +40,11 @@ _CCCL_API constexpr bool __includes(
|
|
|
40
40
|
for (; __first2 != __last2; ++__first1)
|
|
41
41
|
{
|
|
42
42
|
if (__first1 == __last1
|
|
43
|
-
|| ::cuda::std::
|
|
44
|
-
__comp, ::cuda::std::__invoke(__proj2, *__first2), ::cuda::std::__invoke(__proj1, *__first1)))
|
|
43
|
+
|| ::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj2, *__first2), ::cuda::std::invoke(__proj1, *__first1)))
|
|
45
44
|
{
|
|
46
45
|
return false;
|
|
47
46
|
}
|
|
48
|
-
if (!::cuda::std::
|
|
49
|
-
__comp, ::cuda::std::__invoke(__proj1, *__first1), ::cuda::std::__invoke(__proj2, *__first2)))
|
|
47
|
+
if (!::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj1, *__first1), ::cuda::std::invoke(__proj2, *__first2)))
|
|
50
48
|
{
|
|
51
49
|
++__first2;
|
|
52
50
|
}
|
|
@@ -46,7 +46,7 @@ _CCCL_API constexpr _Iter __lower_bound(_Iter __first, _Sent __last, const _Type
|
|
|
46
46
|
auto __l2 = ::cuda::std::__half_positive(__len);
|
|
47
47
|
_Iter __m = __first;
|
|
48
48
|
_IterOps<_AlgPolicy>::advance(__m, __l2);
|
|
49
|
-
if (::cuda::std::
|
|
49
|
+
if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__m), __value))
|
|
50
50
|
{
|
|
51
51
|
__first = ++__m;
|
|
52
52
|
__len -= __l2 + 1;
|
|
@@ -47,26 +47,18 @@ struct _ProjectedPred
|
|
|
47
47
|
{}
|
|
48
48
|
|
|
49
49
|
template <class _Tp>
|
|
50
|
-
|
|
51
|
-
_Pred&,
|
|
52
|
-
decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_Tp>()))>::type constexpr
|
|
53
|
-
_CCCL_API inline
|
|
54
|
-
operator()(_Tp&& __v) const
|
|
50
|
+
invoke_result_t<_Pred&, invoke_result_t<_Proj&, _Tp>> constexpr _CCCL_API inline operator()(_Tp&& __v) const
|
|
55
51
|
{
|
|
56
|
-
return ::cuda::std::
|
|
52
|
+
return ::cuda::std::invoke(__pred, ::cuda::std::invoke(__proj, ::cuda::std::forward<_Tp>(__v)));
|
|
57
53
|
}
|
|
58
54
|
|
|
59
55
|
template <class _T1, class _T2>
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_T1>())),
|
|
63
|
-
decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_T2>()))>::type constexpr
|
|
64
|
-
_CCCL_API inline
|
|
65
|
-
operator()(_T1&& __lhs, _T2&& __rhs) const
|
|
56
|
+
invoke_result_t<_Pred&, invoke_result_t<_Proj&, _T1>, invoke_result_t<_Proj&, _T2>> _CCCL_API inline
|
|
57
|
+
operator()(_T1&& __lhs, _T2&& __rhs) const
|
|
66
58
|
{
|
|
67
|
-
return ::cuda::std::
|
|
68
|
-
|
|
69
|
-
|
|
59
|
+
return ::cuda::std::invoke(__pred,
|
|
60
|
+
::cuda::std::invoke(__proj, ::cuda::std::forward<_T1>(__lhs)),
|
|
61
|
+
::cuda::std::invoke(__proj, ::cuda::std::forward<_T2>(__rhs)));
|
|
70
62
|
}
|
|
71
63
|
};
|
|
72
64
|
|
|
@@ -44,7 +44,7 @@ _CCCL_API constexpr _Iter __min_element(_Iter __first, _Sent __last, _Comp __com
|
|
|
44
44
|
_Iter __i = __first;
|
|
45
45
|
while (++__i != __last)
|
|
46
46
|
{
|
|
47
|
-
if (::cuda::std::
|
|
47
|
+
if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__i), ::cuda::std::invoke(__proj, *__first)))
|
|
48
48
|
{
|
|
49
49
|
__first = __i;
|
|
50
50
|
}
|
|
@@ -46,8 +46,7 @@ public:
|
|
|
46
46
|
template <class _Iter>
|
|
47
47
|
_CCCL_API constexpr bool operator()(_Iter& __it1, _Iter& __it2)
|
|
48
48
|
{
|
|
49
|
-
return ::cuda::std::
|
|
50
|
-
__comp_, ::cuda::std::__invoke(__proj_, *__it1), ::cuda::std::__invoke(__proj_, *__it2));
|
|
49
|
+
return ::cuda::std::invoke(__comp_, ::cuda::std::invoke(__proj_, *__it1), ::cuda::std::invoke(__proj_, *__it2));
|
|
51
50
|
}
|
|
52
51
|
};
|
|
53
52
|
|
|
@@ -69,8 +69,8 @@ _CCCL_API constexpr pair<_InputIterator, _RandomAccessIterator> __partial_sort_c
|
|
|
69
69
|
typename iterator_traits<_RandomAccessIterator>::difference_type __len = __r - __result_first;
|
|
70
70
|
for (; __first != __last; ++__first)
|
|
71
71
|
{
|
|
72
|
-
if (::cuda::std::
|
|
73
|
-
__comp, ::cuda::std::
|
|
72
|
+
if (::cuda::std::invoke(
|
|
73
|
+
__comp, ::cuda::std::invoke(__proj1, *__first), ::cuda::std::invoke(__proj2, *__result_first)))
|
|
74
74
|
{
|
|
75
75
|
*__result_first = *__first;
|
|
76
76
|
::cuda::std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
|
|
@@ -45,7 +45,7 @@ __upper_bound(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
|
|
|
45
45
|
{
|
|
46
46
|
auto __half_len = ::cuda::std::__half_positive(__len);
|
|
47
47
|
auto __mid = _IterOps<_AlgPolicy>::next(__first, __half_len);
|
|
48
|
-
if (::cuda::std::
|
|
48
|
+
if (::cuda::std::invoke(__comp, __value, ::cuda::std::invoke(__proj, *__mid)))
|
|
49
49
|
{
|
|
50
50
|
__len = __half_len;
|
|
51
51
|
}
|
|
@@ -100,7 +100,14 @@ template <typename _Tp>
|
|
|
100
100
|
template <typename _Tp>
|
|
101
101
|
[[nodiscard]] _CCCL_HIDE_FROM_ABI _CCCL_DEVICE int __cccl_countl_zero_impl_device(_Tp __v) noexcept
|
|
102
102
|
{
|
|
103
|
-
|
|
103
|
+
if constexpr (sizeof(_Tp) == sizeof(uint32_t))
|
|
104
|
+
{
|
|
105
|
+
return static_cast<int>(::__clz(static_cast<int>(__v)));
|
|
106
|
+
}
|
|
107
|
+
else
|
|
108
|
+
{
|
|
109
|
+
return static_cast<int>(::__clzll(static_cast<long long>(__v)));
|
|
110
|
+
}
|
|
104
111
|
}
|
|
105
112
|
#endif // _CCCL_CUDA_COMPILATION()
|
|
106
113
|
|
|
@@ -114,11 +114,11 @@ template <typename _Tp>
|
|
|
114
114
|
{
|
|
115
115
|
if constexpr (sizeof(_Tp) == sizeof(uint32_t))
|
|
116
116
|
{
|
|
117
|
-
return ::__clz(static_cast<int>(::__brev(__v)));
|
|
117
|
+
return static_cast<int>(::__clz(static_cast<int>(::__brev(__v))));
|
|
118
118
|
}
|
|
119
119
|
else
|
|
120
120
|
{
|
|
121
|
-
return ::__clzll(static_cast<long long>(::__brevll(__v)));
|
|
121
|
+
return static_cast<int>(::__clzll(static_cast<long long>(::__brevll(__v))));
|
|
122
122
|
}
|
|
123
123
|
}
|
|
124
124
|
#endif // _CCCL_CUDA_COMPILATION()
|
|
@@ -275,10 +275,10 @@ _CCCL_API constexpr __bit_iterator<_Cp, false> __copy_aligned(
|
|
|
275
275
|
// do first word
|
|
276
276
|
if (__first.__ctz_ != 0)
|
|
277
277
|
{
|
|
278
|
-
unsigned
|
|
279
|
-
difference_type __dn = ::cuda::std::min(static_cast<difference_type>(
|
|
278
|
+
unsigned __clz_f = __bits_per_word - __first.__ctz_;
|
|
279
|
+
difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz_f), __n);
|
|
280
280
|
__n -= __dn;
|
|
281
|
-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (
|
|
281
|
+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
|
|
282
282
|
__storage_type __b = *__first.__seg_ & __m;
|
|
283
283
|
*__result.__seg_ &= ~__m;
|
|
284
284
|
*__result.__seg_ |= __b;
|
|
@@ -420,8 +420,8 @@ _CCCL_API constexpr __bit_iterator<_Cp, false> __copy_backward_aligned(
|
|
|
420
420
|
{
|
|
421
421
|
difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__last.__ctz_), __n);
|
|
422
422
|
__n -= __dn;
|
|
423
|
-
unsigned
|
|
424
|
-
__storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >>
|
|
423
|
+
unsigned __clz_f = __bits_per_word - __last.__ctz_;
|
|
424
|
+
__storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_f);
|
|
425
425
|
__storage_type __b = *__last.__seg_ & __m;
|
|
426
426
|
*__result.__seg_ &= ~__m;
|
|
427
427
|
*__result.__seg_ |= __b;
|
|
@@ -635,10 +635,10 @@ _CCCL_API inline __bit_iterator<_Cr, false> __swap_ranges_aligned(
|
|
|
635
635
|
// do first word
|
|
636
636
|
if (__first.__ctz_ != 0)
|
|
637
637
|
{
|
|
638
|
-
unsigned
|
|
639
|
-
difference_type __dn = ::cuda::std::min(static_cast<difference_type>(
|
|
638
|
+
unsigned __clz_f = __bits_per_word - __first.__ctz_;
|
|
639
|
+
difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz_f), __n);
|
|
640
640
|
__n -= __dn;
|
|
641
|
-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (
|
|
641
|
+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
|
|
642
642
|
__storage_type __b1 = *__first.__seg_ & __m;
|
|
643
643
|
*__first.__seg_ &= ~__m;
|
|
644
644
|
__storage_type __b2 = *__result.__seg_ & __m;
|
|
@@ -988,10 +988,10 @@ _CCCL_API constexpr bool __equal_aligned(
|
|
|
988
988
|
// do first word
|
|
989
989
|
if (__first1.__ctz_ != 0)
|
|
990
990
|
{
|
|
991
|
-
unsigned
|
|
992
|
-
difference_type __dn = ::cuda::std::min(static_cast<difference_type>(
|
|
991
|
+
unsigned __clz_f = __bits_per_word - __first1.__ctz_;
|
|
992
|
+
difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz_f), __n);
|
|
993
993
|
__n -= __dn;
|
|
994
|
-
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (
|
|
994
|
+
__storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
|
|
995
995
|
if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
|
|
996
996
|
{
|
|
997
997
|
return false;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
|
|
12
|
+
#define _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/detail/__config>
|
|
15
|
+
|
|
16
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
+
# pragma GCC system_header
|
|
18
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
+
# pragma clang system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
+
# pragma system_header
|
|
22
|
+
#endif // no system header
|
|
23
|
+
|
|
24
|
+
// When nvc++ uses CCCL components as part of its implementation of
|
|
25
|
+
// Standard C++ algorithms, a cycle of included files may result when CCCL code
|
|
26
|
+
// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
|
|
27
|
+
// is defined only when CCCL is including an algorithms-related header, giving
|
|
28
|
+
// the compiler a chance to detect and break the cycle of includes.
|
|
29
|
+
|
|
30
|
+
#if !_CCCL_COMPILER(NVRTC)
|
|
31
|
+
# define THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
32
|
+
# include <algorithm>
|
|
33
|
+
# undef THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
34
|
+
#endif // !_CCCL_COMPILER(NVRTC)
|
|
35
|
+
|
|
36
|
+
#endif // _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
#include <cuda/std/__cccl/attributes.h>
|
|
27
27
|
#include <cuda/std/__cccl/extended_data_types.h>
|
|
28
|
+
#include <cuda/std/__cccl/host_std_lib.h>
|
|
28
29
|
|
|
29
30
|
//! This file consolidates all compiler builtin detection for CCCL.
|
|
30
31
|
//!
|
|
@@ -607,55 +608,51 @@
|
|
|
607
608
|
# define _CCCL_BUILTIN_STRLEN(...) __builtin_strlen(__VA_ARGS__)
|
|
608
609
|
#endif
|
|
609
610
|
|
|
610
|
-
//
|
|
611
|
-
|
|
612
|
-
//
|
|
613
|
-
#
|
|
614
|
-
|
|
615
|
-
#
|
|
616
|
-
# include <
|
|
617
|
-
#
|
|
618
|
-
|
|
619
|
-
//
|
|
620
|
-
#
|
|
621
|
-
#
|
|
622
|
-
#
|
|
623
|
-
#
|
|
624
|
-
# include <__utility/
|
|
625
|
-
#
|
|
626
|
-
|
|
627
|
-
#
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
#
|
|
631
|
-
//
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
#
|
|
635
|
-
|
|
636
|
-
//
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
#
|
|
640
|
-
|
|
641
|
-
//
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
#
|
|
645
|
-
|
|
646
|
-
//
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
//
|
|
652
|
-
//
|
|
653
|
-
#
|
|
654
|
-
|
|
655
|
-
# define _CCCL_HAS_BUILTIN_STD_FORWARD_LIKE() 1
|
|
656
|
-
# endif
|
|
657
|
-
# endif // defined(_GLIBCXX_VERSION) || defined(_LIBCPP_VERSION) || defined(_MSVC_STL_VERSION)
|
|
658
|
-
#endif // defined(__cplusplus)
|
|
611
|
+
// todo: re-enable std builtins
|
|
612
|
+
|
|
613
|
+
// // Some compilers provide std::move/std::forward/etc as builtins
|
|
614
|
+
// #if defined(__cplusplus)
|
|
615
|
+
// // Bring in the bits of the STL we need
|
|
616
|
+
// # if _CCCL_HOST_STD_LIB(LIBSTDCXX)
|
|
617
|
+
// # include <bits/move.h> // for move, forward, forward_like, and addressof
|
|
618
|
+
// # elif _CCCL_HOST_STD_LIB(LIBCXX)
|
|
619
|
+
// # include <__memory/addressof.h>
|
|
620
|
+
// # include <__utility/as_const.h>
|
|
621
|
+
// # include <__utility/forward.h>
|
|
622
|
+
// # if __cpp_lib_forward_like >= 202217L
|
|
623
|
+
// # include <__utility/forward_like.h>
|
|
624
|
+
// # endif // __cpp_lib_forward_like >= 202217L
|
|
625
|
+
// # include <__utility/move.h>
|
|
626
|
+
// # endif
|
|
627
|
+
|
|
628
|
+
// # if _CCCL_HOST_STD_LIB(LIBSTDCXX) || _CCCL_HOST_STD_LIB(LIBCXX)
|
|
629
|
+
// // std::move builtin
|
|
630
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
|
|
631
|
+
// # define _CCCL_HAS_BUILTIN_STD_MOVE() 1
|
|
632
|
+
// # endif
|
|
633
|
+
|
|
634
|
+
// // std::forward builtin
|
|
635
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
|
|
636
|
+
// # define _CCCL_HAS_BUILTIN_STD_FORWARD() 1
|
|
637
|
+
// # endif
|
|
638
|
+
|
|
639
|
+
// // std::addressof builtin
|
|
640
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
|
|
641
|
+
// # define _CCCL_HAS_BUILTIN_STD_ADDRESSOF() 1
|
|
642
|
+
// # endif
|
|
643
|
+
|
|
644
|
+
// // std::as_const builtin
|
|
645
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15)
|
|
646
|
+
// # define _CCCL_HAS_BUILTIN_STD_AS_CONST() 1
|
|
647
|
+
// # endif
|
|
648
|
+
|
|
649
|
+
// // std::forward_like builtin
|
|
650
|
+
// // Leaving out MSVC for now because it is hard for forward-declare std::forward_like.
|
|
651
|
+
// # if (_CCCL_COMPILER(CLANG, >=, 17) || _CCCL_COMPILER(GCC, >=, 15)) && __cpp_lib_forward_like >= 202217L
|
|
652
|
+
// # define _CCCL_HAS_BUILTIN_STD_FORWARD_LIKE() 1
|
|
653
|
+
// # endif
|
|
654
|
+
// # endif // _CCCL_HOST_STD_LIB(LIBSTDCXX) || _CCCL_HOST_STD_LIB(LIBCXX)
|
|
655
|
+
// #endif // defined(__cplusplus)
|
|
659
656
|
|
|
660
657
|
#ifndef _CCCL_HAS_BUILTIN_STD_MOVE
|
|
661
658
|
# define _CCCL_HAS_BUILTIN_STD_MOVE() 0
|
|
@@ -65,4 +65,10 @@
|
|
|
65
65
|
# endif // _CCCL_CUDA_COMPILER(NVCC)
|
|
66
66
|
#endif // !_CCCL_EXEC_CHECK_DISABLE
|
|
67
67
|
|
|
68
|
+
#if _CCCL_CUDA_COMPILER(NVHPC)
|
|
69
|
+
# define _CCCL_TARGET_CONSTEXPR
|
|
70
|
+
#else // ^^^ _CCCL_CUDA_COMPILER(NVHPC) ^^^ / vvv !_CCCL_CUDA_COMPILER(NVHPC) vvv
|
|
71
|
+
# define _CCCL_TARGET_CONSTEXPR constexpr
|
|
72
|
+
#endif // ^^^ !_CCCL_CUDA_COMPILER(NVHPC) ^^^
|
|
73
|
+
|
|
68
74
|
#endif // __CCCL_EXECUTION_SPACE_H
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef __CCCL_HOST_STD_LIB_H
|
|
12
|
+
#define __CCCL_HOST_STD_LIB_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/__cccl/compiler.h>
|
|
15
|
+
#include <cuda/std/__cccl/preprocessor.h>
|
|
16
|
+
#include <cuda/std/__cccl/system_header.h>
|
|
17
|
+
|
|
18
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
19
|
+
# pragma GCC system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
21
|
+
# pragma clang system_header
|
|
22
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
23
|
+
# pragma system_header
|
|
24
|
+
#endif // no system header
|
|
25
|
+
|
|
26
|
+
#define _CCCL_HOST_STD_LIB_LIBSTDCXX() 0
|
|
27
|
+
#define _CCCL_HOST_STD_LIB_LIBCXX() 0
|
|
28
|
+
#define _CCCL_HOST_STD_LIB_STL() 0
|
|
29
|
+
|
|
30
|
+
// include a minimal header
|
|
31
|
+
#if _CCCL_HAS_INCLUDE(<version>)
|
|
32
|
+
# include <version>
|
|
33
|
+
#elif _CCCL_HAS_INCLUDE(<ciso646>)
|
|
34
|
+
# include <ciso646>
|
|
35
|
+
#endif // ^^^ _CCCL_HAS_INCLUDE(<ciso646>) ^^^
|
|
36
|
+
|
|
37
|
+
#if defined(_MSVC_STL_VERSION)
|
|
38
|
+
# undef _CCCL_HOST_STD_LIB_STL
|
|
39
|
+
# define _CCCL_HOST_STD_LIB_STL() 1
|
|
40
|
+
#elif defined(__GLIBCXX__)
|
|
41
|
+
# undef _CCCL_HOST_STD_LIB_LIBSTDCXX
|
|
42
|
+
# define _CCCL_HOST_STD_LIB_LIBSTDCXX() 1
|
|
43
|
+
#elif defined(_LIBCPP_VERSION)
|
|
44
|
+
# undef _CCCL_HOST_STD_LIB_LIBCXX
|
|
45
|
+
# define _CCCL_HOST_STD_LIB_LIBCXX() 1
|
|
46
|
+
#endif // ^^^ _LIBCPP_VERSION ^^^
|
|
47
|
+
|
|
48
|
+
#define _CCCL_HOST_STD_LIB(_X) _CCCL_HOST_STD_LIB_##_X()
|
|
49
|
+
#define _CCCL_HAS_HOST_STD_LIB() \
|
|
50
|
+
(_CCCL_HOST_STD_LIB_LIBSTDCXX() || _CCCL_HOST_STD_LIB_LIBCXX() || _CCCL_HOST_STD_LIB_STL())
|
|
51
|
+
|
|
52
|
+
#endif // __CCCL_HOST_STD_LIB_H
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef _CUDA_STD__CCCL_MEMORY_WRAPPER_H
|
|
12
|
+
#define _CUDA_STD__CCCL_MEMORY_WRAPPER_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/detail/__config>
|
|
15
|
+
|
|
16
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
+
# pragma GCC system_header
|
|
18
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
+
# pragma clang system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
+
# pragma system_header
|
|
22
|
+
#endif // no system header
|
|
23
|
+
|
|
24
|
+
// When nvc++ uses CCCL components as part of its implementation of
|
|
25
|
+
// Standard C++ algorithms, a cycle of included files may result when CCCL code
|
|
26
|
+
// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
|
|
27
|
+
// is defined only when CCCL is including an algorithms-related header, giving
|
|
28
|
+
// the compiler a chance to detect and break the cycle of includes.
|
|
29
|
+
|
|
30
|
+
#if !_CCCL_COMPILER(NVRTC)
|
|
31
|
+
# define THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
32
|
+
# include <memory>
|
|
33
|
+
# undef THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
34
|
+
#endif // !_CCCL_COMPILER(NVRTC)
|
|
35
|
+
|
|
36
|
+
#endif // _CUDA_STD__CCCL_MEMORY_WRAPPER_H
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
|
|
12
|
+
#define _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/detail/__config>
|
|
15
|
+
|
|
16
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
+
# pragma GCC system_header
|
|
18
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
+
# pragma clang system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
+
# pragma system_header
|
|
22
|
+
#endif // no system header
|
|
23
|
+
|
|
24
|
+
// When a compiler uses CCCL components as part of its implementation of
|
|
25
|
+
// Standard C++ algorithms, a cycle of included files may result when CCCL code
|
|
26
|
+
// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
|
|
27
|
+
// is defined only when CCCL is including an algorithms-related header, giving
|
|
28
|
+
// the compiler a chance to detect and break the cycle of includes.
|
|
29
|
+
|
|
30
|
+
#if !_CCCL_COMPILER(NVRTC)
|
|
31
|
+
# define THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
32
|
+
# include <numeric>
|
|
33
|
+
# undef THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
34
|
+
#endif // !_CCCL_COMPILER(NVRTC)
|
|
35
|
+
|
|
36
|
+
#endif // _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
|
|
@@ -43,19 +43,19 @@ template <class _Rep, class _Period = ratio<1>>
|
|
|
43
43
|
class _CCCL_TYPE_VISIBILITY_DEFAULT duration;
|
|
44
44
|
|
|
45
45
|
template <class _Tp>
|
|
46
|
-
inline
|
|
46
|
+
inline constexpr bool __is_duration_v = false;
|
|
47
47
|
|
|
48
48
|
template <class _Rep, class _Period>
|
|
49
|
-
inline
|
|
49
|
+
inline constexpr bool __is_duration_v<duration<_Rep, _Period>> = true;
|
|
50
50
|
|
|
51
51
|
template <class _Rep, class _Period>
|
|
52
|
-
inline
|
|
52
|
+
inline constexpr bool __is_duration_v<const duration<_Rep, _Period>> = true;
|
|
53
53
|
|
|
54
54
|
template <class _Rep, class _Period>
|
|
55
|
-
inline
|
|
55
|
+
inline constexpr bool __is_duration_v<volatile duration<_Rep, _Period>> = true;
|
|
56
56
|
|
|
57
57
|
template <class _Rep, class _Period>
|
|
58
|
-
inline
|
|
58
|
+
inline constexpr bool __is_duration_v<const volatile duration<_Rep, _Period>> = true;
|
|
59
59
|
|
|
60
60
|
} // namespace chrono
|
|
61
61
|
|
|
@@ -190,29 +190,29 @@ class _CCCL_TYPE_VISIBILITY_DEFAULT duration
|
|
|
190
190
|
struct __no_overflow
|
|
191
191
|
{
|
|
192
192
|
private:
|
|
193
|
-
static
|
|
194
|
-
static
|
|
195
|
-
static
|
|
196
|
-
static
|
|
197
|
-
static
|
|
198
|
-
static
|
|
199
|
-
static
|
|
193
|
+
static constexpr intmax_t __gcd_n1_n2 = __static_gcd<_R1::num, _R2::num>::value;
|
|
194
|
+
static constexpr intmax_t __gcd_d1_d2 = __static_gcd<_R1::den, _R2::den>::value;
|
|
195
|
+
static constexpr intmax_t __n1 = _R1::num / __gcd_n1_n2;
|
|
196
|
+
static constexpr intmax_t __d1 = _R1::den / __gcd_d1_d2;
|
|
197
|
+
static constexpr intmax_t __n2 = _R2::num / __gcd_n1_n2;
|
|
198
|
+
static constexpr intmax_t __d2 = _R2::den / __gcd_d1_d2;
|
|
199
|
+
static constexpr intmax_t max = -((intmax_t(1) << (sizeof(intmax_t) * CHAR_BIT - 1)) + 1);
|
|
200
200
|
|
|
201
201
|
template <intmax_t _Xp, intmax_t _Yp, bool __overflow>
|
|
202
202
|
struct __mul // __overflow == false
|
|
203
203
|
{
|
|
204
|
-
static
|
|
204
|
+
static constexpr intmax_t value = _Xp * _Yp;
|
|
205
205
|
};
|
|
206
206
|
|
|
207
207
|
template <intmax_t _Xp, intmax_t _Yp>
|
|
208
208
|
struct __mul<_Xp, _Yp, true>
|
|
209
209
|
{
|
|
210
|
-
static
|
|
210
|
+
static constexpr intmax_t value = 1;
|
|
211
211
|
};
|
|
212
212
|
|
|
213
213
|
public:
|
|
214
|
-
static
|
|
215
|
-
using type
|
|
214
|
+
static constexpr bool value = (__n1 <= max / __d2) && (__n2 <= max / __d1);
|
|
215
|
+
using type = ratio<__mul<__n1, __d2, !value>::value, __mul<__n2, __d1, !value>::value>;
|
|
216
216
|
};
|
|
217
217
|
|
|
218
218
|
public:
|
|
@@ -40,11 +40,11 @@ namespace chrono
|
|
|
40
40
|
class _CCCL_TYPE_VISIBILITY_DEFAULT steady_clock
|
|
41
41
|
{
|
|
42
42
|
public:
|
|
43
|
-
using duration
|
|
44
|
-
using rep
|
|
45
|
-
using period
|
|
46
|
-
using time_point
|
|
47
|
-
static constexpr
|
|
43
|
+
using duration = nanoseconds;
|
|
44
|
+
using rep = duration::rep;
|
|
45
|
+
using period = duration::period;
|
|
46
|
+
using time_point = ::cuda::std::chrono::time_point<steady_clock, duration>;
|
|
47
|
+
static constexpr bool is_steady = true;
|
|
48
48
|
|
|
49
49
|
[[nodiscard]] _CCCL_API static time_point now() noexcept;
|
|
50
50
|
};
|