cuda-cccl 0.3.0__cp310-cp310-manylinux_2_24_aarch64.whl → 0.3.1__cp310-cp310-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (144) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  7. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  8. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  14. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  15. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
  18. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  20. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +3 -2
  21. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  22. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  23. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +158 -247
  24. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +4 -4
  25. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  26. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +8 -26
  27. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +1 -6
  28. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  29. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
  30. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
  31. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  32. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  33. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  34. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  35. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  36. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  37. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  38. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  39. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  40. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  41. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  42. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  43. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  44. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  46. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  47. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  48. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  49. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  50. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  51. cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
  52. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  53. cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
  54. cuda/cccl/headers/include/cuda/__device/arch_traits.h +48 -46
  55. cuda/cccl/headers/include/cuda/__device/attributes.h +171 -121
  56. cuda/cccl/headers/include/cuda/__device/device_ref.h +30 -42
  57. cuda/cccl/headers/include/cuda/__device/physical_device.h +120 -91
  58. cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
  59. cuda/cccl/headers/include/cuda/__event/event.h +1 -0
  60. cuda/cccl/headers/include/cuda/__event/timed_event.h +1 -0
  61. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  62. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  63. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +158 -0
  64. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +8 -120
  65. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +593 -0
  66. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +4 -3
  67. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +1 -0
  68. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  69. cuda/cccl/headers/include/cuda/algorithm +1 -1
  70. cuda/cccl/headers/include/cuda/devices +10 -0
  71. cuda/cccl/headers/include/cuda/iterator +1 -0
  72. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  73. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  74. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  75. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  76. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  77. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  78. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  79. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  80. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  81. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  82. cuda/cccl/headers/include/cuda/std/string_view +12 -5
  83. cuda/cccl/headers/include/cuda/std/version +1 -4
  84. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  85. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  86. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  87. cuda/cccl/parallel/experimental/__init__.py +21 -74
  88. cuda/compute/__init__.py +77 -0
  89. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +1 -1
  90. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  91. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  92. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  93. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
  94. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +4 -4
  95. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  96. cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
  97. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +4 -4
  98. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  99. cuda/{cccl/parallel/experimental → compute}/cu12/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  100. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  101. cuda/{cccl/parallel/experimental → compute}/cu13/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  102. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  103. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +8 -8
  104. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  105. cuda/coop/__init__.py +8 -0
  106. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  107. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  108. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  109. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  110. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  111. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  112. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  113. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  114. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  115. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  116. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  117. cuda/coop/warp/__init__.py +9 -0
  118. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  119. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  120. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  121. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.1.dist-info}/METADATA +1 -1
  122. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.1.dist-info}/RECORD +141 -138
  123. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  124. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  125. cuda/cccl/parallel/experimental/.gitignore +0 -4
  126. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  127. /cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +0 -0
  128. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  129. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  130. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  131. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  132. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  133. /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
  134. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  135. /cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +0 -0
  136. /cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +0 -0
  137. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  138. /cuda/{cccl/parallel/experimental → compute}/numba_utils.py +0 -0
  139. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  140. /cuda/{cccl/parallel/experimental → compute}/typing.py +0 -0
  141. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  142. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  143. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.1.dist-info}/WHEEL +0 -0
  144. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,111 +0,0 @@
1
- // SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3
-
4
- #pragma once
5
-
6
- #include <cub/config.cuh>
7
-
8
- #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
9
- # pragma GCC system_header
10
- #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
11
- # pragma clang system_header
12
- #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
13
- # pragma system_header
14
- #endif // no system header
15
-
16
- #include <cuda/std/__type_traits/integral_constant.h>
17
- #include <cuda/std/__type_traits/void_t.h>
18
- #include <cuda/std/__utility/declval.h>
19
-
20
- CUB_NAMESPACE_BEGIN
21
-
22
- namespace detail
23
- {
24
- template <typename T, typename U, typename = void>
25
- struct has_plus_operator : ::cuda::std::false_type
26
- {};
27
-
28
- template <typename T, typename U>
29
- struct has_plus_operator<T, U, ::cuda::std::void_t<decltype(::cuda::std::declval<T>() + ::cuda::std::declval<U>())>>
30
- : ::cuda::std::true_type
31
- {};
32
-
33
- template <typename T, typename U>
34
- constexpr bool has_plus_operator_v = has_plus_operator<T, U>::value;
35
-
36
- // Helper function that advances a given iterator only if it supports being advanced by the given offset
37
- template <typename IteratorT, typename OffsetT>
38
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE IteratorT
39
- advance_iterators_if_supported(IteratorT iter, [[maybe_unused]] OffsetT offset)
40
- {
41
- if constexpr (has_plus_operator_v<IteratorT, OffsetT>)
42
- {
43
- // If operator+ is valid, advance the iterator.
44
- return iter + offset;
45
- }
46
- else
47
- {
48
- // Otherwise, return iter unmodified.
49
- return iter;
50
- }
51
- }
52
-
53
- template <typename T, typename U, typename = void>
54
- struct has_add_assign_operator : ::cuda::std::false_type
55
- {};
56
-
57
- template <typename T, typename U>
58
- struct has_add_assign_operator<T,
59
- U,
60
- ::cuda::std::void_t<decltype(::cuda::std::declval<T&>() += ::cuda::std::declval<U>())>>
61
- : ::cuda::std::true_type
62
- {};
63
-
64
- template <typename T, typename U>
65
- constexpr bool has_add_assign_operator_v = has_add_assign_operator<T, U>::value;
66
-
67
- // Helper function that advances a given iterator only if it supports being advanced by the given offset
68
- template <typename IteratorT, typename OffsetT>
69
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE void
70
- advance_iterators_inplace_if_supported(IteratorT& iter, [[maybe_unused]] OffsetT offset)
71
- {
72
- if constexpr (has_add_assign_operator_v<IteratorT, OffsetT>)
73
- {
74
- // If operator+ is valid, advance the iterator.
75
- iter += offset;
76
- }
77
- }
78
-
79
- // Helper function that checks whether all of the given iterators support the + operator with the given offset
80
- template <typename OffsetT, typename... Iterators>
81
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE bool
82
- all_iterators_support_plus_operator(OffsetT /*offset*/, Iterators... /*iters*/)
83
- {
84
- if constexpr ((has_plus_operator_v<Iterators, OffsetT> && ...))
85
- {
86
- return true;
87
- }
88
- else
89
- {
90
- return false;
91
- }
92
- }
93
-
94
- // Helper function that checks whether all of the given iterators support the + operator with the given offset
95
- template <typename OffsetT, typename... Iterators>
96
- CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE bool
97
- all_iterators_support_add_assign_operator(OffsetT /*offset*/, Iterators... /*iters*/)
98
- {
99
- if constexpr ((has_add_assign_operator_v<Iterators, OffsetT> && ...))
100
- {
101
- return true;
102
- }
103
- else
104
- {
105
- return false;
106
- }
107
- }
108
-
109
- } // namespace detail
110
-
111
- CUB_NAMESPACE_END
@@ -1,4 +0,0 @@
1
- # these are generated at build time depending on the CUDA
2
- # version we're building for:
3
- cu12/
4
- cu13/
File without changes
File without changes
File without changes