cuda-cccl 0.1.3.2.0.dev438__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.1__cp312-cp312-manylinux_2_24_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/cooperative/__init__.py +7 -1
- cuda/cccl/cooperative/experimental/__init__.py +21 -5
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +25 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +24 -19
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
- cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +3 -2
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
- cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +18 -26
- cuda/cccl/headers/include/cub/device/device_copy.cuh +116 -27
- cuda/cccl/headers/include/cub/device/device_partition.cuh +5 -1
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +158 -247
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +4 -4
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +8 -26
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +1 -6
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +320 -262
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +10 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +57 -10
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +37 -13
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +204 -55
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +55 -19
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
- cuda/cccl/headers/include/cub/util_device.cuh +51 -35
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
- cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
- cuda/cccl/headers/include/cuda/__algorithm/copy.h +4 -4
- cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
- cuda/cccl/headers/include/cuda/__device/all_devices.h +47 -147
- cuda/cccl/headers/include/cuda/__device/arch_traits.h +51 -49
- cuda/cccl/headers/include/cuda/__device/attributes.h +177 -127
- cuda/cccl/headers/include/cuda/__device/device_ref.h +32 -51
- cuda/cccl/headers/include/cuda/__device/physical_device.h +120 -91
- cuda/cccl/headers/include/cuda/__driver/driver_api.h +330 -36
- cuda/cccl/headers/include/cuda/__event/event.h +8 -8
- cuda/cccl/headers/include/cuda/__event/event_ref.h +4 -5
- cuda/cccl/headers/include/cuda/__event/timed_event.h +4 -4
- cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
- cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +3 -3
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +3 -3
- cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +3 -3
- cuda/cccl/headers/include/cuda/__iterator/zip_common.h +158 -0
- cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +8 -120
- cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +593 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +14 -10
- cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +4 -3
- cuda/cccl/headers/include/cuda/__runtime/types.h +1 -1
- cuda/cccl/headers/include/cuda/__stream/stream.h +2 -3
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -12
- cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +2 -2
- cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
- cuda/cccl/headers/include/cuda/algorithm +1 -1
- cuda/cccl/headers/include/cuda/devices +10 -0
- cuda/cccl/headers/include/cuda/iterator +1 -0
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +2 -2
- cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +2 -0
- cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
- cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +10 -5
- cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +44 -17
- cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +1 -1
- cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +12 -12
- cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +1 -8
- cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +15 -12
- cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +3 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +2 -1
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +3 -2
- cuda/cccl/headers/include/cuda/std/string_view +12 -5
- cuda/cccl/headers/include/cuda/std/version +1 -4
- cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +8 -22
- cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +15 -48
- cuda/cccl/parallel/experimental/__init__.py +21 -70
- cuda/compute/__init__.py +77 -0
- cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +28 -0
- cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +141 -1
- cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +4 -0
- cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
- cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
- cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -4
- cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +4 -6
- cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
- cuda/compute/algorithms/_three_way_partition.py +261 -0
- cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +4 -4
- cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
- cuda/compute/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
- cuda/compute/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
- cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +8 -8
- cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
- cuda/coop/__init__.py +8 -0
- cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
- cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
- cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
- cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
- cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
- cuda/coop/warp/__init__.py +9 -0
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
- cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
- {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/METADATA +1 -1
- {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/RECORD +171 -166
- cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
- cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +0 -72
- cuda/cccl/parallel/experimental/.gitignore +0 -4
- cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-312-aarch64-linux-gnu.so +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
- /cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/numba_utils.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
- /cuda/{cccl/parallel/experimental → compute}/typing.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
- /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
- {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/WHEEL +0 -0
- {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
4
|
-
|
|
5
|
-
from cuda.cccl.cooperative.experimental.warp._warp_merge_sort import merge_sort_keys
|
|
6
|
-
from cuda.cccl.cooperative.experimental.warp._warp_reduce import reduce, sum
|
|
7
|
-
from cuda.cccl.cooperative.experimental.warp._warp_scan import exclusive_sum
|
|
8
|
-
|
|
9
|
-
__all__ = ["exclusive_sum", "reduce", "sum", "merge_sort_keys"]
|
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
// SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
3
|
-
|
|
4
|
-
#pragma once
|
|
5
|
-
|
|
6
|
-
#include <cub/config.cuh>
|
|
7
|
-
|
|
8
|
-
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
9
|
-
# pragma GCC system_header
|
|
10
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
11
|
-
# pragma clang system_header
|
|
12
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
13
|
-
# pragma system_header
|
|
14
|
-
#endif // no system header
|
|
15
|
-
|
|
16
|
-
#include <cuda/std/__type_traits/integral_constant.h>
|
|
17
|
-
#include <cuda/std/__type_traits/void_t.h>
|
|
18
|
-
#include <cuda/std/__utility/declval.h>
|
|
19
|
-
|
|
20
|
-
CUB_NAMESPACE_BEGIN
|
|
21
|
-
|
|
22
|
-
namespace detail
|
|
23
|
-
{
|
|
24
|
-
template <typename T, typename U, typename = void>
|
|
25
|
-
struct has_plus_operator : ::cuda::std::false_type
|
|
26
|
-
{};
|
|
27
|
-
|
|
28
|
-
template <typename T, typename U>
|
|
29
|
-
struct has_plus_operator<T, U, ::cuda::std::void_t<decltype(::cuda::std::declval<T>() + ::cuda::std::declval<U>())>>
|
|
30
|
-
: ::cuda::std::true_type
|
|
31
|
-
{};
|
|
32
|
-
|
|
33
|
-
template <typename T, typename U>
|
|
34
|
-
constexpr bool has_plus_operator_v = has_plus_operator<T, U>::value;
|
|
35
|
-
|
|
36
|
-
// Helper function that advances a given iterator only if it supports being advanced by the given offset
|
|
37
|
-
template <typename IteratorT, typename OffsetT>
|
|
38
|
-
CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE IteratorT
|
|
39
|
-
advance_iterators_if_supported(IteratorT iter, [[maybe_unused]] OffsetT offset)
|
|
40
|
-
{
|
|
41
|
-
if constexpr (has_plus_operator_v<IteratorT, OffsetT>)
|
|
42
|
-
{
|
|
43
|
-
// If operator+ is valid, advance the iterator.
|
|
44
|
-
return iter + offset;
|
|
45
|
-
}
|
|
46
|
-
else
|
|
47
|
-
{
|
|
48
|
-
// Otherwise, return iter unmodified.
|
|
49
|
-
return iter;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
template <typename T, typename U, typename = void>
|
|
54
|
-
struct has_add_assign_operator : ::cuda::std::false_type
|
|
55
|
-
{};
|
|
56
|
-
|
|
57
|
-
template <typename T, typename U>
|
|
58
|
-
struct has_add_assign_operator<T,
|
|
59
|
-
U,
|
|
60
|
-
::cuda::std::void_t<decltype(::cuda::std::declval<T&>() += ::cuda::std::declval<U>())>>
|
|
61
|
-
: ::cuda::std::true_type
|
|
62
|
-
{};
|
|
63
|
-
|
|
64
|
-
template <typename T, typename U>
|
|
65
|
-
constexpr bool has_add_assign_operator_v = has_add_assign_operator<T, U>::value;
|
|
66
|
-
|
|
67
|
-
// Helper function that advances a given iterator only if it supports being advanced by the given offset
|
|
68
|
-
template <typename IteratorT, typename OffsetT>
|
|
69
|
-
CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE void
|
|
70
|
-
advance_iterators_inplace_if_supported(IteratorT& iter, [[maybe_unused]] OffsetT offset)
|
|
71
|
-
{
|
|
72
|
-
if constexpr (has_add_assign_operator_v<IteratorT, OffsetT>)
|
|
73
|
-
{
|
|
74
|
-
// If operator+ is valid, advance the iterator.
|
|
75
|
-
iter += offset;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// Helper function that checks whether all of the given iterators support the + operator with the given offset
|
|
80
|
-
template <typename OffsetT, typename... Iterators>
|
|
81
|
-
CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE bool
|
|
82
|
-
all_iterators_support_plus_operator(OffsetT /*offset*/, Iterators... /*iters*/)
|
|
83
|
-
{
|
|
84
|
-
if constexpr ((has_plus_operator_v<Iterators, OffsetT> && ...))
|
|
85
|
-
{
|
|
86
|
-
return true;
|
|
87
|
-
}
|
|
88
|
-
else
|
|
89
|
-
{
|
|
90
|
-
return false;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// Helper function that checks whether all of the given iterators support the + operator with the given offset
|
|
95
|
-
template <typename OffsetT, typename... Iterators>
|
|
96
|
-
CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE bool
|
|
97
|
-
all_iterators_support_add_assign_operator(OffsetT /*offset*/, Iterators... /*iters*/)
|
|
98
|
-
{
|
|
99
|
-
if constexpr ((has_add_assign_operator_v<Iterators, OffsetT> && ...))
|
|
100
|
-
{
|
|
101
|
-
return true;
|
|
102
|
-
}
|
|
103
|
-
else
|
|
104
|
-
{
|
|
105
|
-
return false;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
} // namespace detail
|
|
110
|
-
|
|
111
|
-
CUB_NAMESPACE_END
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
|
2
|
-
//
|
|
3
|
-
// Part of libcu++, the C++ Standard Library for your entire system,
|
|
4
|
-
// under the Apache License v2.0 with LLVM Exceptions.
|
|
5
|
-
// See https://llvm.org/LICENSE.txt for license information.
|
|
6
|
-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
7
|
-
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
|
|
8
|
-
//
|
|
9
|
-
//===----------------------------------------------------------------------===//
|
|
10
|
-
|
|
11
|
-
#ifndef _CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
|
|
12
|
-
#define _CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
|
|
13
|
-
|
|
14
|
-
#include <cuda/std/detail/__config>
|
|
15
|
-
|
|
16
|
-
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
|
|
17
|
-
# pragma GCC system_header
|
|
18
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
|
|
19
|
-
# pragma clang system_header
|
|
20
|
-
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
|
|
21
|
-
# pragma system_header
|
|
22
|
-
#endif // no system header
|
|
23
|
-
|
|
24
|
-
#if _CCCL_HAS_CTK()
|
|
25
|
-
|
|
26
|
-
# include <cuda/std/__cuda/api_wrapper.h>
|
|
27
|
-
|
|
28
|
-
# include <cuda/std/__cccl/prologue.h>
|
|
29
|
-
|
|
30
|
-
_CCCL_BEGIN_NAMESPACE_CUDA
|
|
31
|
-
|
|
32
|
-
# if !_CCCL_COMPILER(NVRTC)
|
|
33
|
-
|
|
34
|
-
//! @brief `__ensure_current_device` is a simple helper that the current device is set to the right one.
|
|
35
|
-
//! Only changes the current device if the target device is not the current one
|
|
36
|
-
struct __ensure_current_device
|
|
37
|
-
{
|
|
38
|
-
int __target_device_ = 0;
|
|
39
|
-
int __original_device_ = 0;
|
|
40
|
-
|
|
41
|
-
//! @brief Queries the current device and if that is different than \p __target_device sets the current device to
|
|
42
|
-
//! \p __target_device
|
|
43
|
-
__ensure_current_device(const int __target_device)
|
|
44
|
-
: __target_device_(__target_device)
|
|
45
|
-
{
|
|
46
|
-
_CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to query current device", &__original_device_);
|
|
47
|
-
if (__original_device_ != __target_device_)
|
|
48
|
-
{
|
|
49
|
-
_CCCL_TRY_CUDA_API(::cudaSetDevice, "Failed to set device", __target_device_);
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
//! @brief If the \p __original_device was not equal to \p __target_device sets the current device back to
|
|
54
|
-
//! \p __original_device
|
|
55
|
-
~__ensure_current_device()
|
|
56
|
-
{
|
|
57
|
-
if (__original_device_ != __target_device_)
|
|
58
|
-
{
|
|
59
|
-
_CCCL_LOG_CUDA_API(::cudaSetDevice, "Failed to set device", __original_device_);
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
# endif // !_CCCL_COMPILER(NVRTC)
|
|
65
|
-
|
|
66
|
-
_CCCL_END_NAMESPACE_CUDA
|
|
67
|
-
|
|
68
|
-
# include <cuda/std/__cccl/epilogue.h>
|
|
69
|
-
|
|
70
|
-
#endif // _CCCL_HAS_CTK()
|
|
71
|
-
|
|
72
|
-
#endif //_CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
|
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|