cuda-cccl 0.3.1__cp311-cp311-manylinux_2_24_aarch64.whl → 0.3.2__cp311-cp311-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1 -0
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
- cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
- cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +12 -13
- cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +2 -3
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +4 -3
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +1 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
- cuda/cccl/headers/include/cuda/__cccl_config +1 -0
- cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
- cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
- cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
- cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +239 -317
- cuda/cccl/headers/include/cuda/__device/attributes.h +4 -3
- cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
- cuda/cccl/headers/include/cuda/__device/device_ref.h +0 -10
- cuda/cccl/headers/include/cuda/__device/physical_device.h +1 -26
- cuda/cccl/headers/include/cuda/__event/event.h +26 -26
- cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
- cuda/cccl/headers/include/cuda/__event/timed_event.h +9 -7
- cuda/cccl/headers/include/cuda/__fwd/devices.h +4 -4
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
- cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +2 -12
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +15 -19
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +59 -60
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
- cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +5 -4
- cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +17 -16
- cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
- cuda/cccl/headers/include/cuda/cmath +1 -0
- cuda/cccl/headers/include/cuda/devices +3 -0
- cuda/cccl/headers/include/cuda/memory +1 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
- cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
- cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
- cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
- cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
- cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
- cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
- cuda/cccl/headers/include/cuda/std/bitset +1 -1
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
- cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
- cuda/cccl/headers/include/cuda/std/numbers +5 -0
- cuda/cccl/headers/include/cuda/std/string_view +146 -11
- cuda/cccl/headers/include/cuda/stream_ref +5 -0
- cuda/cccl/headers/include/cuda/utility +1 -0
- cuda/cccl/headers/include/nv/target +7 -2
- cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
- cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
- cuda/cccl/headers/include/thrust/device_delete.h +18 -3
- cuda/cccl/headers/include/thrust/device_free.h +16 -3
- cuda/cccl/headers/include/thrust/device_new.h +29 -8
- cuda/cccl/headers/include/thrust/host_vector.h +1 -1
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
- cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
- cuda/compute/__init__.py +2 -0
- cuda/compute/_bindings.pyi +43 -1
- cuda/compute/_bindings_impl.pyx +156 -7
- cuda/compute/algorithms/_scan.py +108 -36
- cuda/compute/algorithms/_transform.py +32 -11
- cuda/compute/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
- cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
- cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/iterators/__init__.py +2 -0
- cuda/compute/iterators/_factories.py +28 -0
- cuda/compute/iterators/_iterators.py +206 -1
- cuda/compute/numba_utils.py +2 -2
- cuda/compute/typing.py +2 -0
- {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
- {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +171 -175
- cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
- cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
- cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
- cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
- cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
- cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
- {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
- {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -40,13 +40,11 @@ _CCCL_API constexpr bool __includes(
|
|
|
40
40
|
for (; __first2 != __last2; ++__first1)
|
|
41
41
|
{
|
|
42
42
|
if (__first1 == __last1
|
|
43
|
-
|| ::cuda::std::
|
|
44
|
-
__comp, ::cuda::std::__invoke(__proj2, *__first2), ::cuda::std::__invoke(__proj1, *__first1)))
|
|
43
|
+
|| ::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj2, *__first2), ::cuda::std::invoke(__proj1, *__first1)))
|
|
45
44
|
{
|
|
46
45
|
return false;
|
|
47
46
|
}
|
|
48
|
-
if (!::cuda::std::
|
|
49
|
-
__comp, ::cuda::std::__invoke(__proj1, *__first1), ::cuda::std::__invoke(__proj2, *__first2)))
|
|
47
|
+
if (!::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj1, *__first1), ::cuda::std::invoke(__proj2, *__first2)))
|
|
50
48
|
{
|
|
51
49
|
++__first2;
|
|
52
50
|
}
|
|
@@ -46,7 +46,7 @@ _CCCL_API constexpr _Iter __lower_bound(_Iter __first, _Sent __last, const _Type
|
|
|
46
46
|
auto __l2 = ::cuda::std::__half_positive(__len);
|
|
47
47
|
_Iter __m = __first;
|
|
48
48
|
_IterOps<_AlgPolicy>::advance(__m, __l2);
|
|
49
|
-
if (::cuda::std::
|
|
49
|
+
if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__m), __value))
|
|
50
50
|
{
|
|
51
51
|
__first = ++__m;
|
|
52
52
|
__len -= __l2 + 1;
|
|
@@ -47,26 +47,18 @@ struct _ProjectedPred
|
|
|
47
47
|
{}
|
|
48
48
|
|
|
49
49
|
template <class _Tp>
|
|
50
|
-
|
|
51
|
-
_Pred&,
|
|
52
|
-
decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_Tp>()))>::type constexpr
|
|
53
|
-
_CCCL_API inline
|
|
54
|
-
operator()(_Tp&& __v) const
|
|
50
|
+
invoke_result_t<_Pred&, invoke_result_t<_Proj&, _Tp>> constexpr _CCCL_API inline operator()(_Tp&& __v) const
|
|
55
51
|
{
|
|
56
|
-
return ::cuda::std::
|
|
52
|
+
return ::cuda::std::invoke(__pred, ::cuda::std::invoke(__proj, ::cuda::std::forward<_Tp>(__v)));
|
|
57
53
|
}
|
|
58
54
|
|
|
59
55
|
template <class _T1, class _T2>
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_T1>())),
|
|
63
|
-
decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_T2>()))>::type constexpr
|
|
64
|
-
_CCCL_API inline
|
|
65
|
-
operator()(_T1&& __lhs, _T2&& __rhs) const
|
|
56
|
+
invoke_result_t<_Pred&, invoke_result_t<_Proj&, _T1>, invoke_result_t<_Proj&, _T2>> _CCCL_API inline
|
|
57
|
+
operator()(_T1&& __lhs, _T2&& __rhs) const
|
|
66
58
|
{
|
|
67
|
-
return ::cuda::std::
|
|
68
|
-
|
|
69
|
-
|
|
59
|
+
return ::cuda::std::invoke(__pred,
|
|
60
|
+
::cuda::std::invoke(__proj, ::cuda::std::forward<_T1>(__lhs)),
|
|
61
|
+
::cuda::std::invoke(__proj, ::cuda::std::forward<_T2>(__rhs)));
|
|
70
62
|
}
|
|
71
63
|
};
|
|
72
64
|
|
|
@@ -44,7 +44,7 @@ _CCCL_API constexpr _Iter __min_element(_Iter __first, _Sent __last, _Comp __com
|
|
|
44
44
|
_Iter __i = __first;
|
|
45
45
|
while (++__i != __last)
|
|
46
46
|
{
|
|
47
|
-
if (::cuda::std::
|
|
47
|
+
if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__i), ::cuda::std::invoke(__proj, *__first)))
|
|
48
48
|
{
|
|
49
49
|
__first = __i;
|
|
50
50
|
}
|
|
@@ -46,8 +46,7 @@ public:
|
|
|
46
46
|
template <class _Iter>
|
|
47
47
|
_CCCL_API constexpr bool operator()(_Iter& __it1, _Iter& __it2)
|
|
48
48
|
{
|
|
49
|
-
return ::cuda::std::
|
|
50
|
-
__comp_, ::cuda::std::__invoke(__proj_, *__it1), ::cuda::std::__invoke(__proj_, *__it2));
|
|
49
|
+
return ::cuda::std::invoke(__comp_, ::cuda::std::invoke(__proj_, *__it1), ::cuda::std::invoke(__proj_, *__it2));
|
|
51
50
|
}
|
|
52
51
|
};
|
|
53
52
|
|
|
@@ -69,8 +69,8 @@ _CCCL_API constexpr pair<_InputIterator, _RandomAccessIterator> __partial_sort_c
|
|
|
69
69
|
typename iterator_traits<_RandomAccessIterator>::difference_type __len = __r - __result_first;
|
|
70
70
|
for (; __first != __last; ++__first)
|
|
71
71
|
{
|
|
72
|
-
if (::cuda::std::
|
|
73
|
-
__comp, ::cuda::std::
|
|
72
|
+
if (::cuda::std::invoke(
|
|
73
|
+
__comp, ::cuda::std::invoke(__proj1, *__first), ::cuda::std::invoke(__proj2, *__result_first)))
|
|
74
74
|
{
|
|
75
75
|
*__result_first = *__first;
|
|
76
76
|
::cuda::std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);
|
|
@@ -45,7 +45,7 @@ __upper_bound(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
|
|
|
45
45
|
{
|
|
46
46
|
auto __half_len = ::cuda::std::__half_positive(__len);
|
|
47
47
|
auto __mid = _IterOps<_AlgPolicy>::next(__first, __half_len);
|
|
48
|
-
if (::cuda::std::
|
|
48
|
+
if (::cuda::std::invoke(__comp, __value, ::cuda::std::invoke(__proj, *__mid)))
|
|
49
49
|
{
|
|
50
50
|
__len = __half_len;
|
|
51
51
|
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
|
|
12
|
+
#define _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/detail/__config>
|
|
15
|
+
|
|
16
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
+
# pragma GCC system_header
|
|
18
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
+
# pragma clang system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
+
# pragma system_header
|
|
22
|
+
#endif // no system header
|
|
23
|
+
|
|
24
|
+
// When nvc++ uses CCCL components as part of its implementation of
|
|
25
|
+
// Standard C++ algorithms, a cycle of included files may result when CCCL code
|
|
26
|
+
// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
|
|
27
|
+
// is defined only when CCCL is including an algorithms-related header, giving
|
|
28
|
+
// the compiler a chance to detect and break the cycle of includes.
|
|
29
|
+
|
|
30
|
+
#if !_CCCL_COMPILER(NVRTC)
|
|
31
|
+
# define THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
32
|
+
# include <algorithm>
|
|
33
|
+
# undef THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
34
|
+
#endif // !_CCCL_COMPILER(NVRTC)
|
|
35
|
+
|
|
36
|
+
#endif // _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
#include <cuda/std/__cccl/attributes.h>
|
|
27
27
|
#include <cuda/std/__cccl/extended_data_types.h>
|
|
28
|
+
#include <cuda/std/__cccl/host_std_lib.h>
|
|
28
29
|
|
|
29
30
|
//! This file consolidates all compiler builtin detection for CCCL.
|
|
30
31
|
//!
|
|
@@ -607,55 +608,51 @@
|
|
|
607
608
|
# define _CCCL_BUILTIN_STRLEN(...) __builtin_strlen(__VA_ARGS__)
|
|
608
609
|
#endif
|
|
609
610
|
|
|
610
|
-
//
|
|
611
|
-
|
|
612
|
-
//
|
|
613
|
-
#
|
|
614
|
-
|
|
615
|
-
#
|
|
616
|
-
# include <
|
|
617
|
-
#
|
|
618
|
-
|
|
619
|
-
//
|
|
620
|
-
#
|
|
621
|
-
#
|
|
622
|
-
#
|
|
623
|
-
#
|
|
624
|
-
# include <__utility/
|
|
625
|
-
#
|
|
626
|
-
|
|
627
|
-
#
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
#
|
|
631
|
-
//
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
#
|
|
635
|
-
|
|
636
|
-
//
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
#
|
|
640
|
-
|
|
641
|
-
//
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
#
|
|
645
|
-
|
|
646
|
-
//
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
//
|
|
652
|
-
//
|
|
653
|
-
#
|
|
654
|
-
|
|
655
|
-
# define _CCCL_HAS_BUILTIN_STD_FORWARD_LIKE() 1
|
|
656
|
-
# endif
|
|
657
|
-
# endif // defined(_GLIBCXX_VERSION) || defined(_LIBCPP_VERSION) || defined(_MSVC_STL_VERSION)
|
|
658
|
-
#endif // defined(__cplusplus)
|
|
611
|
+
// todo: re-enable std builtins
|
|
612
|
+
|
|
613
|
+
// // Some compilers provide std::move/std::forward/etc as builtins
|
|
614
|
+
// #if defined(__cplusplus)
|
|
615
|
+
// // Bring in the bits of the STL we need
|
|
616
|
+
// # if _CCCL_HOST_STD_LIB(LIBSTDCXX)
|
|
617
|
+
// # include <bits/move.h> // for move, forward, forward_like, and addressof
|
|
618
|
+
// # elif _CCCL_HOST_STD_LIB(LIBCXX)
|
|
619
|
+
// # include <__memory/addressof.h>
|
|
620
|
+
// # include <__utility/as_const.h>
|
|
621
|
+
// # include <__utility/forward.h>
|
|
622
|
+
// # if __cpp_lib_forward_like >= 202217L
|
|
623
|
+
// # include <__utility/forward_like.h>
|
|
624
|
+
// # endif // __cpp_lib_forward_like >= 202217L
|
|
625
|
+
// # include <__utility/move.h>
|
|
626
|
+
// # endif
|
|
627
|
+
|
|
628
|
+
// # if _CCCL_HOST_STD_LIB(LIBSTDCXX) || _CCCL_HOST_STD_LIB(LIBCXX)
|
|
629
|
+
// // std::move builtin
|
|
630
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
|
|
631
|
+
// # define _CCCL_HAS_BUILTIN_STD_MOVE() 1
|
|
632
|
+
// # endif
|
|
633
|
+
|
|
634
|
+
// // std::forward builtin
|
|
635
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
|
|
636
|
+
// # define _CCCL_HAS_BUILTIN_STD_FORWARD() 1
|
|
637
|
+
// # endif
|
|
638
|
+
|
|
639
|
+
// // std::addressof builtin
|
|
640
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
|
|
641
|
+
// # define _CCCL_HAS_BUILTIN_STD_ADDRESSOF() 1
|
|
642
|
+
// # endif
|
|
643
|
+
|
|
644
|
+
// // std::as_const builtin
|
|
645
|
+
// # if _CCCL_COMPILER(CLANG, >=, 15)
|
|
646
|
+
// # define _CCCL_HAS_BUILTIN_STD_AS_CONST() 1
|
|
647
|
+
// # endif
|
|
648
|
+
|
|
649
|
+
// // std::forward_like builtin
|
|
650
|
+
// // Leaving out MSVC for now because it is hard for forward-declare std::forward_like.
|
|
651
|
+
// # if (_CCCL_COMPILER(CLANG, >=, 17) || _CCCL_COMPILER(GCC, >=, 15)) && __cpp_lib_forward_like >= 202217L
|
|
652
|
+
// # define _CCCL_HAS_BUILTIN_STD_FORWARD_LIKE() 1
|
|
653
|
+
// # endif
|
|
654
|
+
// # endif // _CCCL_HOST_STD_LIB(LIBSTDCXX) || _CCCL_HOST_STD_LIB(LIBCXX)
|
|
655
|
+
// #endif // defined(__cplusplus)
|
|
659
656
|
|
|
660
657
|
#ifndef _CCCL_HAS_BUILTIN_STD_MOVE
|
|
661
658
|
# define _CCCL_HAS_BUILTIN_STD_MOVE() 0
|
|
@@ -65,4 +65,10 @@
|
|
|
65
65
|
# endif // _CCCL_CUDA_COMPILER(NVCC)
|
|
66
66
|
#endif // !_CCCL_EXEC_CHECK_DISABLE
|
|
67
67
|
|
|
68
|
+
#if _CCCL_CUDA_COMPILER(NVHPC)
|
|
69
|
+
# define _CCCL_TARGET_CONSTEXPR
|
|
70
|
+
#else // ^^^ _CCCL_CUDA_COMPILER(NVHPC) ^^^ / vvv !_CCCL_CUDA_COMPILER(NVHPC) vvv
|
|
71
|
+
# define _CCCL_TARGET_CONSTEXPR constexpr
|
|
72
|
+
#endif // ^^^ !_CCCL_CUDA_COMPILER(NVHPC) ^^^
|
|
73
|
+
|
|
68
74
|
#endif // __CCCL_EXECUTION_SPACE_H
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef __CCCL_HOST_STD_LIB_H
|
|
12
|
+
#define __CCCL_HOST_STD_LIB_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/__cccl/compiler.h>
|
|
15
|
+
#include <cuda/std/__cccl/preprocessor.h>
|
|
16
|
+
#include <cuda/std/__cccl/system_header.h>
|
|
17
|
+
|
|
18
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
19
|
+
# pragma GCC system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
21
|
+
# pragma clang system_header
|
|
22
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
23
|
+
# pragma system_header
|
|
24
|
+
#endif // no system header
|
|
25
|
+
|
|
26
|
+
#define _CCCL_HOST_STD_LIB_LIBSTDCXX() 0
|
|
27
|
+
#define _CCCL_HOST_STD_LIB_LIBCXX() 0
|
|
28
|
+
#define _CCCL_HOST_STD_LIB_STL() 0
|
|
29
|
+
|
|
30
|
+
// include a minimal header
|
|
31
|
+
#if _CCCL_HAS_INCLUDE(<version>)
|
|
32
|
+
# include <version>
|
|
33
|
+
#elif _CCCL_HAS_INCLUDE(<ciso646>)
|
|
34
|
+
# include <ciso646>
|
|
35
|
+
#endif // ^^^ _CCCL_HAS_INCLUDE(<ciso646>) ^^^
|
|
36
|
+
|
|
37
|
+
#if defined(_MSVC_STL_VERSION)
|
|
38
|
+
# undef _CCCL_HOST_STD_LIB_STL
|
|
39
|
+
# define _CCCL_HOST_STD_LIB_STL() 1
|
|
40
|
+
#elif defined(__GLIBCXX__)
|
|
41
|
+
# undef _CCCL_HOST_STD_LIB_LIBSTDCXX
|
|
42
|
+
# define _CCCL_HOST_STD_LIB_LIBSTDCXX() 1
|
|
43
|
+
#elif defined(_LIBCPP_VERSION)
|
|
44
|
+
# undef _CCCL_HOST_STD_LIB_LIBCXX
|
|
45
|
+
# define _CCCL_HOST_STD_LIB_LIBCXX() 1
|
|
46
|
+
#endif // ^^^ _LIBCPP_VERSION ^^^
|
|
47
|
+
|
|
48
|
+
#define _CCCL_HOST_STD_LIB(_X) _CCCL_HOST_STD_LIB_##_X()
|
|
49
|
+
#define _CCCL_HAS_HOST_STD_LIB() \
|
|
50
|
+
(_CCCL_HOST_STD_LIB_LIBSTDCXX() || _CCCL_HOST_STD_LIB_LIBCXX() || _CCCL_HOST_STD_LIB_STL())
|
|
51
|
+
|
|
52
|
+
#endif // __CCCL_HOST_STD_LIB_H
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef _CUDA_STD__CCCL_MEMORY_WRAPPER_H
|
|
12
|
+
#define _CUDA_STD__CCCL_MEMORY_WRAPPER_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/detail/__config>
|
|
15
|
+
|
|
16
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
+
# pragma GCC system_header
|
|
18
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
+
# pragma clang system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
+
# pragma system_header
|
|
22
|
+
#endif // no system header
|
|
23
|
+
|
|
24
|
+
// When nvc++ uses CCCL components as part of its implementation of
|
|
25
|
+
// Standard C++ algorithms, a cycle of included files may result when CCCL code
|
|
26
|
+
// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
|
|
27
|
+
// is defined only when CCCL is including an algorithms-related header, giving
|
|
28
|
+
// the compiler a chance to detect and break the cycle of includes.
|
|
29
|
+
|
|
30
|
+
#if !_CCCL_COMPILER(NVRTC)
|
|
31
|
+
# define THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
32
|
+
# include <memory>
|
|
33
|
+
# undef THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
34
|
+
#endif // !_CCCL_COMPILER(NVRTC)
|
|
35
|
+
|
|
36
|
+
#endif // _CUDA_STD__CCCL_MEMORY_WRAPPER_H
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
//
|
|
3
|
+
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
+
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
+
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
+
//
|
|
9
|
+
//===----------------------------------------------------------------------===//
|
|
10
|
+
|
|
11
|
+
#ifndef _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
|
|
12
|
+
#define _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
|
|
13
|
+
|
|
14
|
+
#include <cuda/std/detail/__config>
|
|
15
|
+
|
|
16
|
+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
+
# pragma GCC system_header
|
|
18
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
+
# pragma clang system_header
|
|
20
|
+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
+
# pragma system_header
|
|
22
|
+
#endif // no system header
|
|
23
|
+
|
|
24
|
+
// When a compiler uses CCCL components as part of its implementation of
|
|
25
|
+
// Standard C++ algorithms, a cycle of included files may result when CCCL code
|
|
26
|
+
// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
|
|
27
|
+
// is defined only when CCCL is including an algorithms-related header, giving
|
|
28
|
+
// the compiler a chance to detect and break the cycle of includes.
|
|
29
|
+
|
|
30
|
+
#if !_CCCL_COMPILER(NVRTC)
|
|
31
|
+
# define THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
32
|
+
# include <numeric>
|
|
33
|
+
# undef THRUST_INCLUDING_ALGORITHMS_HEADER
|
|
34
|
+
#endif // !_CCCL_COMPILER(NVRTC)
|
|
35
|
+
|
|
36
|
+
#endif // _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
|
|
@@ -27,9 +27,10 @@
|
|
|
27
27
|
#include <cuda/std/__type_traits/is_floating_point.h>
|
|
28
28
|
#include <cuda/std/__type_traits/is_integral.h>
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
// MSVC and clang cuda need the host side functions included
|
|
31
|
+
#if _CCCL_HOST_COMPILATION() || _CCCL_CUDA_COMPILER(CLANG)
|
|
31
32
|
# include <math.h>
|
|
32
|
-
#endif //
|
|
33
|
+
#endif // _CCCL_HOST_COMPILATION() || _CCCL_CUDA_COMPILER(CLANG)
|
|
33
34
|
|
|
34
35
|
#include <cuda/std/__cccl/prologue.h>
|
|
35
36
|
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
#include <cuda/std/__complex/vector_support.h>
|
|
25
25
|
#include <cuda/std/__concepts/concept_macros.h>
|
|
26
|
+
#include <cuda/std/__fwd/complex.h>
|
|
26
27
|
#include <cuda/std/__fwd/get.h>
|
|
27
28
|
#include <cuda/std/__tuple_dir/tuple_element.h>
|
|
28
29
|
#include <cuda/std/__tuple_dir/tuple_size.h>
|
|
@@ -36,9 +37,9 @@
|
|
|
36
37
|
#include <cuda/std/limits>
|
|
37
38
|
|
|
38
39
|
// Compatibility helpers for thrust to convert between `std::complex` and `cuda::std::complex`
|
|
40
|
+
// todo: find a way to get rid of this include
|
|
39
41
|
#if !_CCCL_COMPILER(NVRTC)
|
|
40
|
-
# include <complex>
|
|
41
|
-
# include <sstream> // for std::basic_ostringstream
|
|
42
|
+
# include <complex> // for std::complex stream operators
|
|
42
43
|
|
|
43
44
|
# define _LIBCUDACXX_ACCESS_STD_COMPLEX_REAL(__c) reinterpret_cast<const _Up(&)[2]>(__c)[0]
|
|
44
45
|
# define _LIBCUDACXX_ACCESS_STD_COMPLEX_IMAG(__c) reinterpret_cast<const _Up(&)[2]>(__c)[1]
|
|
@@ -21,28 +21,28 @@
|
|
|
21
21
|
# pragma system_header
|
|
22
22
|
#endif // no system header
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
// gcc < 8 warns about it's extended literals being shadowed by the implementation, so let's just disable the complex
|
|
25
|
+
// literals
|
|
26
|
+
#if !_CCCL_COMPILER(GCC, <, 8)
|
|
25
27
|
|
|
26
|
-
#include <cuda/std/
|
|
28
|
+
# include <cuda/std/__complex/complex.h>
|
|
27
29
|
|
|
28
|
-
|
|
30
|
+
# include <cuda/std/__cccl/prologue.h>
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
// Literal suffix for complex number literals [complex.literals]
|
|
32
|
+
_CCCL_BEGIN_NAMESPACE_CUDA_STD
|
|
32
33
|
|
|
33
34
|
_CCCL_DIAG_PUSH
|
|
34
35
|
_CCCL_DIAG_SUPPRESS_GCC("-Wliteral-suffix")
|
|
35
36
|
_CCCL_DIAG_SUPPRESS_CLANG("-Wuser-defined-literals")
|
|
36
|
-
|
|
37
|
+
_CCCL_DIAG_SUPPRESS_NVHPC(lit_suffix_no_underscore)
|
|
38
|
+
_CCCL_DIAG_SUPPRESS_MSVC(4455) // literal suffix identifiers that do not start with an underscore are reserved
|
|
39
|
+
_CCCL_BEGIN_NV_DIAG_SUPPRESS(2506, 20208) // a user-provided literal suffix must begin with "_",
|
|
40
|
+
// long double treated as double
|
|
37
41
|
|
|
38
42
|
inline namespace literals
|
|
39
43
|
{
|
|
40
44
|
inline namespace complex_literals
|
|
41
45
|
{
|
|
42
|
-
# if !_CCCL_CUDA_COMPILER(NVCC) && !_CCCL_COMPILER(NVRTC)
|
|
43
|
-
// NOTE: if you get a warning from GCC <7 here that "literal operator suffixes not preceded by ‘_’ are reserved for
|
|
44
|
-
// future standardization" then we are sorry. The warning was implemented before GCC 7, but can only be disabled since
|
|
45
|
-
// GCC 7. See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69523
|
|
46
46
|
_CCCL_API constexpr complex<long double> operator""il(long double __im)
|
|
47
47
|
{
|
|
48
48
|
return {0.0l, __im};
|
|
@@ -71,36 +71,16 @@ _CCCL_API constexpr complex<float> operator""if(unsigned long long __im)
|
|
|
71
71
|
{
|
|
72
72
|
return {0.0f, static_cast<float>(__im)};
|
|
73
73
|
}
|
|
74
|
-
# else // ^^^ !_CCCL_CUDA_COMPILER(NVCC) && !_CCCL_COMPILER(NVRTC) ^^^ / vvv other compilers vvv
|
|
75
|
-
_CCCL_API constexpr complex<double> operator""i(double __im)
|
|
76
|
-
{
|
|
77
|
-
return {0.0, static_cast<double>(__im)};
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
_CCCL_API constexpr complex<double> operator""i(unsigned long long __im)
|
|
81
|
-
{
|
|
82
|
-
return {0.0, static_cast<double>(__im)};
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
_CCCL_API constexpr complex<float> operator""if(double __im)
|
|
86
|
-
{
|
|
87
|
-
return {0.0f, static_cast<float>(__im)};
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
_CCCL_API constexpr complex<float> operator""if(unsigned long long __im)
|
|
91
|
-
{
|
|
92
|
-
return {0.0f, static_cast<float>(__im)};
|
|
93
|
-
}
|
|
94
|
-
# endif // other compilers
|
|
95
74
|
} // namespace complex_literals
|
|
96
75
|
} // namespace literals
|
|
97
76
|
|
|
77
|
+
_CCCL_END_NV_DIAG_SUPPRESS()
|
|
98
78
|
_CCCL_DIAG_POP
|
|
99
79
|
|
|
100
|
-
#endif // _LIBCUDACXX_HAS_STL_LITERALS
|
|
101
|
-
|
|
102
80
|
_CCCL_END_NAMESPACE_CUDA_STD
|
|
103
81
|
|
|
104
|
-
#include <cuda/std/__cccl/epilogue.h>
|
|
82
|
+
# include <cuda/std/__cccl/epilogue.h>
|
|
83
|
+
|
|
84
|
+
#endif // !_CCCL_COMPILER(GCC, <, 8)
|
|
105
85
|
|
|
106
86
|
#endif // _CUDA_STD___COMPLEX_LITERALS_H
|
|
@@ -31,8 +31,9 @@
|
|
|
31
31
|
# include <cuda/std/__type_traits/enable_if.h>
|
|
32
32
|
# include <cuda/std/__type_traits/is_constructible.h>
|
|
33
33
|
|
|
34
|
+
// todo: find a way to get rid of this include
|
|
34
35
|
# if !_CCCL_COMPILER(NVRTC)
|
|
35
|
-
# include <
|
|
36
|
+
# include <complex> // for std::complex stream operators
|
|
36
37
|
# endif // !_CCCL_COMPILER(NVRTC)
|
|
37
38
|
|
|
38
39
|
# include <cuda/std/__cccl/prologue.h>
|
|
@@ -31,8 +31,9 @@
|
|
|
31
31
|
# include <cuda/std/__type_traits/enable_if.h>
|
|
32
32
|
# include <cuda/std/__type_traits/is_constructible.h>
|
|
33
33
|
|
|
34
|
+
// todo: find a way to get rid of this include
|
|
34
35
|
# if !_CCCL_COMPILER(NVRTC)
|
|
35
|
-
# include <
|
|
36
|
+
# include <complex> // for std::complex stream operators
|
|
36
37
|
# endif // !_CCCL_COMPILER(NVRTC)
|
|
37
38
|
|
|
38
39
|
# include <cuda/std/__cccl/prologue.h>
|
|
@@ -294,7 +295,7 @@ struct __get_complex_impl<__half>
|
|
|
294
295
|
}
|
|
295
296
|
};
|
|
296
297
|
|
|
297
|
-
# if !
|
|
298
|
+
# if !_CCCL_COMPILER(NVRTC)
|
|
298
299
|
template <class _CharT, class _Traits>
|
|
299
300
|
::std::basic_istream<_CharT, _Traits>& operator>>(::std::basic_istream<_CharT, _Traits>& __is, complex<__half>& __x)
|
|
300
301
|
{
|
|
@@ -310,7 +311,7 @@ operator<<(::std::basic_ostream<_CharT, _Traits>& __os, const complex<__half>& _
|
|
|
310
311
|
{
|
|
311
312
|
return __os << complex<float>{__x};
|
|
312
313
|
}
|
|
313
|
-
# endif // !
|
|
314
|
+
# endif // !_CCCL_COMPILER(NVRTC)
|
|
314
315
|
|
|
315
316
|
_CCCL_END_NAMESPACE_CUDA_STD
|
|
316
317
|
|
|
@@ -35,8 +35,8 @@ _CCCL_BEGIN_NAMESPACE_CUDA_STD
|
|
|
35
35
|
|
|
36
36
|
template <class _Fn, class... _Args>
|
|
37
37
|
concept invocable = requires(_Fn&& __fn, _Args&&... __args) {
|
|
38
|
-
::cuda::std::
|
|
39
|
-
|
|
38
|
+
::cuda::std::invoke(::cuda::std::forward<_Fn>(__fn), ::cuda::std::forward<_Args>(__args)...); // not required to be
|
|
39
|
+
// equality preserving
|
|
40
40
|
};
|
|
41
41
|
|
|
42
42
|
// [concept.regular.invocable]
|
|
@@ -21,8 +21,9 @@
|
|
|
21
21
|
# pragma system_header
|
|
22
22
|
#endif // no system header
|
|
23
23
|
|
|
24
|
+
#include <cuda/__cmath/mul_hi.h>
|
|
24
25
|
#include <cuda/std/__cstddef/types.h>
|
|
25
|
-
#include <cuda/std/
|
|
26
|
+
#include <cuda/std/__cstring/memset.h>
|
|
26
27
|
|
|
27
28
|
#if !_CCCL_COMPILER(NVRTC)
|
|
28
29
|
# include <cstdlib>
|
|
@@ -44,7 +45,7 @@ using ::malloc;
|
|
|
44
45
|
|
|
45
46
|
const size_t __nbytes = __n * __size;
|
|
46
47
|
|
|
47
|
-
if (::
|
|
48
|
+
if (::cuda::mul_hi(__n, __size) == 0)
|
|
48
49
|
{
|
|
49
50
|
__ptr = ::cuda::std::malloc(__nbytes);
|
|
50
51
|
if (__ptr != nullptr)
|