cuda-cccl 0.1.3.2.0.dev438__cp310-cp310-manylinux_2_24_aarch64.whl → 0.3.1__cp310-cp310-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (177) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  7. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  8. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +25 -5
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  14. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  15. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +24 -19
  18. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  20. cuda/cccl/headers/include/cub/block/block_load_to_shared.cuh +432 -0
  21. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +3 -2
  22. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  23. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  24. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +18 -26
  25. cuda/cccl/headers/include/cub/device/device_copy.cuh +116 -27
  26. cuda/cccl/headers/include/cub/device/device_partition.cuh +5 -1
  27. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +158 -247
  28. cuda/cccl/headers/include/cub/device/dispatch/dispatch_copy_mdspan.cuh +79 -0
  29. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +4 -4
  30. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  31. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +8 -26
  32. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +1 -6
  33. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  34. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +320 -262
  35. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +10 -5
  36. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  37. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  38. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_sort.cuh +57 -10
  39. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +37 -13
  40. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  41. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  42. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  43. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  44. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  46. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  47. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  48. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  49. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  50. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  51. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +204 -55
  52. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  53. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +55 -19
  54. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  55. cuda/cccl/headers/include/cub/util_device.cuh +51 -35
  56. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  57. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  58. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  59. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  60. cuda/cccl/headers/include/cuda/__algorithm/copy.h +4 -4
  61. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  62. cuda/cccl/headers/include/cuda/__device/all_devices.h +47 -147
  63. cuda/cccl/headers/include/cuda/__device/arch_traits.h +51 -49
  64. cuda/cccl/headers/include/cuda/__device/attributes.h +177 -127
  65. cuda/cccl/headers/include/cuda/__device/device_ref.h +32 -51
  66. cuda/cccl/headers/include/cuda/__device/physical_device.h +120 -91
  67. cuda/cccl/headers/include/cuda/__driver/driver_api.h +330 -36
  68. cuda/cccl/headers/include/cuda/__event/event.h +8 -8
  69. cuda/cccl/headers/include/cuda/__event/event_ref.h +4 -5
  70. cuda/cccl/headers/include/cuda/__event/timed_event.h +4 -4
  71. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  72. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  73. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +3 -3
  74. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +3 -3
  75. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +3 -3
  76. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +158 -0
  77. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +8 -120
  78. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +593 -0
  79. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +14 -10
  80. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +4 -3
  81. cuda/cccl/headers/include/cuda/__runtime/types.h +1 -1
  82. cuda/cccl/headers/include/cuda/__stream/stream.h +2 -3
  83. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -12
  84. cuda/cccl/headers/include/cuda/__utility/__basic_any/virtual_tables.h +2 -2
  85. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  86. cuda/cccl/headers/include/cuda/algorithm +1 -1
  87. cuda/cccl/headers/include/cuda/devices +10 -0
  88. cuda/cccl/headers/include/cuda/iterator +1 -0
  89. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  90. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  91. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  92. cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +2 -2
  93. cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +2 -0
  94. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  95. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  96. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  97. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +10 -5
  98. cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +44 -17
  99. cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +1 -1
  100. cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +12 -12
  101. cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +1 -8
  102. cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +15 -12
  103. cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h +3 -0
  104. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  105. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +2 -1
  106. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  107. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  108. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  109. cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +3 -2
  110. cuda/cccl/headers/include/cuda/std/string_view +12 -5
  111. cuda/cccl/headers/include/cuda/std/version +1 -4
  112. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  113. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  114. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  115. cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +8 -22
  116. cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +15 -48
  117. cuda/cccl/parallel/experimental/__init__.py +21 -70
  118. cuda/compute/__init__.py +77 -0
  119. cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +28 -0
  120. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +141 -1
  121. cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +4 -0
  122. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  123. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  124. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  125. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -4
  126. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +4 -6
  127. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  128. cuda/compute/algorithms/_three_way_partition.py +261 -0
  129. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +4 -4
  130. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  131. cuda/compute/cu12/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  132. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  133. cuda/compute/cu13/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  134. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  135. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +8 -8
  136. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  137. cuda/coop/__init__.py +8 -0
  138. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  139. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  140. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  141. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  142. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  143. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  144. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  145. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  146. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  147. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  148. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  149. cuda/coop/warp/__init__.py +9 -0
  150. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  151. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  152. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  153. {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/METADATA +1 -1
  154. {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/RECORD +171 -166
  155. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  156. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  157. cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +0 -72
  158. cuda/cccl/parallel/experimental/.gitignore +0 -4
  159. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  160. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  161. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  162. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  163. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  164. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  165. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  166. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  167. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  168. /cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +0 -0
  169. /cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +0 -0
  170. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  171. /cuda/{cccl/parallel/experimental → compute}/numba_utils.py +0 -0
  172. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  173. /cuda/{cccl/parallel/experimental → compute}/typing.py +0 -0
  174. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  175. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  176. {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/WHEEL +0 -0
  177. {cuda_cccl-0.1.3.2.0.dev438.dist-info → cuda_cccl-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +0,0 @@
1
- # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
2
- #
3
- # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4
-
5
- from cuda.cccl.cooperative.experimental.warp._warp_merge_sort import merge_sort_keys
6
- from cuda.cccl.cooperative.experimental.warp._warp_reduce import reduce, sum
7
- from cuda.cccl.cooperative.experimental.warp._warp_scan import exclusive_sum
8
-
9
- __all__ = ["exclusive_sum", "reduce", "sum", "merge_sort_keys"]
@@ -1,111 +0,0 @@
1
- // SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3
-
4
- #pragma once
5
-
6
- #include <cub/config.cuh>
7
-
8
- #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
9
- # pragma GCC system_header
10
- #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
11
- # pragma clang system_header
12
- #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
13
- # pragma system_header
14
- #endif // no system header
15
-
16
- #include <cuda/std/__type_traits/integral_constant.h>
17
- #include <cuda/std/__type_traits/void_t.h>
18
- #include <cuda/std/__utility/declval.h>
19
-
20
- CUB_NAMESPACE_BEGIN
21
-
22
- namespace detail
23
- {
24
- template <typename T, typename U, typename = void>
25
- struct has_plus_operator : ::cuda::std::false_type
26
- {};
27
-
28
- template <typename T, typename U>
29
- struct has_plus_operator<T, U, ::cuda::std::void_t<decltype(::cuda::std::declval<T>() + ::cuda::std::declval<U>())>>
30
- : ::cuda::std::true_type
31
- {};
32
-
33
- template <typename T, typename U>
34
- constexpr bool has_plus_operator_v = has_plus_operator<T, U>::value;
35
-
36
- // Helper function that advances a given iterator only if it supports being advanced by the given offset
37
- template <typename IteratorT, typename OffsetT>
38
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE IteratorT
39
- advance_iterators_if_supported(IteratorT iter, [[maybe_unused]] OffsetT offset)
40
- {
41
- if constexpr (has_plus_operator_v<IteratorT, OffsetT>)
42
- {
43
- // If operator+ is valid, advance the iterator.
44
- return iter + offset;
45
- }
46
- else
47
- {
48
- // Otherwise, return iter unmodified.
49
- return iter;
50
- }
51
- }
52
-
53
- template <typename T, typename U, typename = void>
54
- struct has_add_assign_operator : ::cuda::std::false_type
55
- {};
56
-
57
- template <typename T, typename U>
58
- struct has_add_assign_operator<T,
59
- U,
60
- ::cuda::std::void_t<decltype(::cuda::std::declval<T&>() += ::cuda::std::declval<U>())>>
61
- : ::cuda::std::true_type
62
- {};
63
-
64
- template <typename T, typename U>
65
- constexpr bool has_add_assign_operator_v = has_add_assign_operator<T, U>::value;
66
-
67
- // Helper function that advances a given iterator only if it supports being advanced by the given offset
68
- template <typename IteratorT, typename OffsetT>
69
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE void
70
- advance_iterators_inplace_if_supported(IteratorT& iter, [[maybe_unused]] OffsetT offset)
71
- {
72
- if constexpr (has_add_assign_operator_v<IteratorT, OffsetT>)
73
- {
74
- // If operator+ is valid, advance the iterator.
75
- iter += offset;
76
- }
77
- }
78
-
79
- // Helper function that checks whether all of the given iterators support the + operator with the given offset
80
- template <typename OffsetT, typename... Iterators>
81
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE bool
82
- all_iterators_support_plus_operator(OffsetT /*offset*/, Iterators... /*iters*/)
83
- {
84
- if constexpr ((has_plus_operator_v<Iterators, OffsetT> && ...))
85
- {
86
- return true;
87
- }
88
- else
89
- {
90
- return false;
91
- }
92
- }
93
-
94
- // Helper function that checks whether all of the given iterators support the + operator with the given offset
95
- template <typename OffsetT, typename... Iterators>
96
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE bool
97
- all_iterators_support_add_assign_operator(OffsetT /*offset*/, Iterators... /*iters*/)
98
- {
99
- if constexpr ((has_add_assign_operator_v<Iterators, OffsetT> && ...))
100
- {
101
- return true;
102
- }
103
- else
104
- {
105
- return false;
106
- }
107
- }
108
-
109
- } // namespace detail
110
-
111
- CUB_NAMESPACE_END
@@ -1,72 +0,0 @@
1
- //===----------------------------------------------------------------------===//
2
- //
3
- // Part of libcu++, the C++ Standard Library for your entire system,
4
- // under the Apache License v2.0 with LLVM Exceptions.
5
- // See https://llvm.org/LICENSE.txt for license information.
6
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7
- // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
8
- //
9
- //===----------------------------------------------------------------------===//
10
-
11
- #ifndef _CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
12
- #define _CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
13
-
14
- #include <cuda/std/detail/__config>
15
-
16
- #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
17
- # pragma GCC system_header
18
- #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
19
- # pragma clang system_header
20
- #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
21
- # pragma system_header
22
- #endif // no system header
23
-
24
- #if _CCCL_HAS_CTK()
25
-
26
- # include <cuda/std/__cuda/api_wrapper.h>
27
-
28
- # include <cuda/std/__cccl/prologue.h>
29
-
30
- _CCCL_BEGIN_NAMESPACE_CUDA
31
-
32
- # if !_CCCL_COMPILER(NVRTC)
33
-
34
- //! @brief `__ensure_current_device` is a simple helper that the current device is set to the right one.
35
- //! Only changes the current device if the target device is not the current one
36
- struct __ensure_current_device
37
- {
38
- int __target_device_ = 0;
39
- int __original_device_ = 0;
40
-
41
- //! @brief Queries the current device and if that is different than \p __target_device sets the current device to
42
- //! \p __target_device
43
- __ensure_current_device(const int __target_device)
44
- : __target_device_(__target_device)
45
- {
46
- _CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to query current device", &__original_device_);
47
- if (__original_device_ != __target_device_)
48
- {
49
- _CCCL_TRY_CUDA_API(::cudaSetDevice, "Failed to set device", __target_device_);
50
- }
51
- }
52
-
53
- //! @brief If the \p __original_device was not equal to \p __target_device sets the current device back to
54
- //! \p __original_device
55
- ~__ensure_current_device()
56
- {
57
- if (__original_device_ != __target_device_)
58
- {
59
- _CCCL_LOG_CUDA_API(::cudaSetDevice, "Failed to set device", __original_device_);
60
- }
61
- }
62
- };
63
-
64
- # endif // !_CCCL_COMPILER(NVRTC)
65
-
66
- _CCCL_END_NAMESPACE_CUDA
67
-
68
- # include <cuda/std/__cccl/epilogue.h>
69
-
70
- #endif // _CCCL_HAS_CTK()
71
-
72
- #endif //_CUDA__STD__CUDA_ENSURE_CURRENT_DEVICE_H
@@ -1,4 +0,0 @@
1
- # these are generated at build time depending on the CUDA
2
- # version we're building for:
3
- cu12/
4
- cu13/
File without changes
File without changes
File without changes