PyPI - cuda-cccl - Versions diffs - 0.3.0__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.2__cp312-cp312-manylinux_2_24_aarch64.whl - Mend

cuda-cccl 0.3.0__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.2__cp312-cp312-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show

cuda/cccl/headers/include/cuda/cmath CHANGED Viewed

@@ -26,6 +26,7 @@
 #include <cuda/__cmath/ilog.h>
 #include <cuda/__cmath/ipow.h>
 #include <cuda/__cmath/isqrt.h>
+#include <cuda/__cmath/mul_hi.h>
 #include <cuda/__cmath/neg.h>
 #include <cuda/__cmath/pow2.h>
 #include <cuda/__cmath/round_down.h>

cuda/cccl/headers/include/cuda/devices CHANGED Viewed

@@ -11,10 +11,23 @@
 #ifndef _CUDA_DEVICES
 #define _CUDA_DEVICES
+#include <cuda/std/detail/__config>
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
 #include <cuda/__device/all_devices.h>
+#include <cuda/__device/arch_id.h>
 #include <cuda/__device/arch_traits.h>
 #include <cuda/__device/attributes.h>
+#include <cuda/__device/compute_capability.h>
 #include <cuda/__device/device_ref.h>
 #include <cuda/__device/physical_device.h>
+#include <cuda/version>
 #endif // _CUDA_DEVICES

cuda/cccl/headers/include/cuda/iterator CHANGED Viewed

@@ -33,6 +33,7 @@
 #include <cuda/__iterator/transform_output_iterator.h>
 #include <cuda/__iterator/zip_function.h>
 #include <cuda/__iterator/zip_iterator.h>
+#include <cuda/__iterator/zip_transform_iterator.h>
 #include <cuda/std/iterator>
 #endif // _CUDA_ITERATOR

cuda/cccl/headers/include/cuda/memory CHANGED Viewed

@@ -28,6 +28,7 @@
 #include <cuda/__memory/discard_memory.h>
 #include <cuda/__memory/get_device_address.h>
 #include <cuda/__memory/is_aligned.h>
+#include <cuda/__memory/ptr_in_range.h>
 #include <cuda/__memory/ptr_rebind.h>
 #include <cuda/std/memory>

cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h CHANGED Viewed

@@ -52,12 +52,12 @@ __equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
   {
     auto __half_len = ::cuda::std::__half_positive(__len);
     _Iter __mid     = _IterOps<_AlgPolicy>::next(__first, __half_len);
-    if (::cuda::std::__invoke(__comp, ::cuda::std::__invoke(__proj, *__mid), __value))
+    if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__mid), __value))
     {
       __first = ++__mid;
       __len -= __half_len + 1;
     }
-    else if (::cuda::std::__invoke(__comp, __value, ::cuda::std::__invoke(__proj, *__mid)))
+    else if (::cuda::std::invoke(__comp, __value, ::cuda::std::invoke(__proj, *__mid)))
     {
       __end = __mid;
       __len = __half_len;

cuda/cccl/headers/include/cuda/std/__algorithm/find.h CHANGED Viewed

@@ -33,7 +33,7 @@ template <class _Iter, class _Sent, class _Tp, class _Proj>
 {
   for (; __first != __last; ++__first)
   {
-    if (::cuda::std::__invoke(__proj, *__first) == __value)
+    if (::cuda::std::invoke(__proj, *__first) == __value)
     {
       break;
     }

cuda/cccl/headers/include/cuda/std/__algorithm/includes.h CHANGED Viewed

@@ -40,13 +40,11 @@ _CCCL_API constexpr bool __includes(
   for (; __first2 != __last2; ++__first1)
   {
     if (__first1 == __last1
-        || ::cuda::std::__invoke(
-          __comp, ::cuda::std::__invoke(__proj2, *__first2), ::cuda::std::__invoke(__proj1, *__first1)))
+        || ::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj2, *__first2), ::cuda::std::invoke(__proj1, *__first1)))
     {
       return false;
     }
-    if (!::cuda::std::__invoke(
-          __comp, ::cuda::std::__invoke(__proj1, *__first1), ::cuda::std::__invoke(__proj2, *__first2)))
+    if (!::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj1, *__first1), ::cuda::std::invoke(__proj2, *__first2)))
     {
       ++__first2;
     }

cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h CHANGED Viewed

@@ -46,7 +46,7 @@ _CCCL_API constexpr _Iter __lower_bound(_Iter __first, _Sent __last, const _Type
     auto __l2 = ::cuda::std::__half_positive(__len);
     _Iter __m = __first;
     _IterOps<_AlgPolicy>::advance(__m, __l2);
-    if (::cuda::std::__invoke(__comp, ::cuda::std::__invoke(__proj, *__m), __value))
+    if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__m), __value))
     {
       __first = ++__m;
       __len -= __l2 + 1;

cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h CHANGED Viewed

@@ -47,26 +47,18 @@ struct _ProjectedPred
   {}
   template <class _Tp>
-  typename __invoke_of<
-    _Pred&,
-    decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_Tp>()))>::type constexpr
-    _CCCL_API inline
-    operator()(_Tp&& __v) const
+  invoke_result_t<_Pred&, invoke_result_t<_Proj&, _Tp>> constexpr _CCCL_API inline operator()(_Tp&& __v) const
   {
-    return ::cuda::std::__invoke(__pred, ::cuda::std::__invoke(__proj, ::cuda::std::forward<_Tp>(__v)));
+    return ::cuda::std::invoke(__pred, ::cuda::std::invoke(__proj, ::cuda::std::forward<_Tp>(__v)));
   }
   template <class _T1, class _T2>
-  typename __invoke_of<
-    _Pred&,
-    decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_T1>())),
-    decltype(::cuda::std::__invoke(::cuda::std::declval<_Proj&>(), ::cuda::std::declval<_T2>()))>::type constexpr
-    _CCCL_API inline
-    operator()(_T1&& __lhs, _T2&& __rhs) const
+  invoke_result_t<_Pred&, invoke_result_t<_Proj&, _T1>, invoke_result_t<_Proj&, _T2>> _CCCL_API inline
+  operator()(_T1&& __lhs, _T2&& __rhs) const
   {
-    return ::cuda::std::__invoke(__pred,
-                                 ::cuda::std::__invoke(__proj, ::cuda::std::forward<_T1>(__lhs)),
-                                 ::cuda::std::__invoke(__proj, ::cuda::std::forward<_T2>(__rhs)));
+    return ::cuda::std::invoke(__pred,
+                               ::cuda::std::invoke(__proj, ::cuda::std::forward<_T1>(__lhs)),
+                               ::cuda::std::invoke(__proj, ::cuda::std::forward<_T2>(__rhs)));
   }
 };

cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h CHANGED Viewed

@@ -44,7 +44,7 @@ _CCCL_API constexpr _Iter __min_element(_Iter __first, _Sent __last, _Comp __com
   _Iter __i = __first;
   while (++__i != __last)
   {
-    if (::cuda::std::__invoke(__comp, ::cuda::std::__invoke(__proj, *__i), ::cuda::std::__invoke(__proj, *__first)))
+    if (::cuda::std::invoke(__comp, ::cuda::std::invoke(__proj, *__i), ::cuda::std::invoke(__proj, *__first)))
     {
       __first = __i;
     }

cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h CHANGED Viewed

@@ -46,8 +46,7 @@ public:
   template <class _Iter>
   _CCCL_API constexpr bool operator()(_Iter& __it1, _Iter& __it2)
   {
-    return ::cuda::std::__invoke(
-      __comp_, ::cuda::std::__invoke(__proj_, *__it1), ::cuda::std::__invoke(__proj_, *__it2));
+    return ::cuda::std::invoke(__comp_, ::cuda::std::invoke(__proj_, *__it1), ::cuda::std::invoke(__proj_, *__it2));
   }
 };

cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h CHANGED Viewed

@@ -69,8 +69,8 @@ _CCCL_API constexpr pair<_InputIterator, _RandomAccessIterator> __partial_sort_c
     typename iterator_traits<_RandomAccessIterator>::difference_type __len = __r - __result_first;
     for (; __first != __last; ++__first)
     {
-      if (::cuda::std::__invoke(
-            __comp, ::cuda::std::__invoke(__proj1, *__first), ::cuda::std::__invoke(__proj2, *__result_first)))
+      if (::cuda::std::invoke(
+            __comp, ::cuda::std::invoke(__proj1, *__first), ::cuda::std::invoke(__proj2, *__result_first)))
       {
         *__result_first = *__first;
         ::cuda::std::__sift_down<_AlgPolicy>(__result_first, __projected_comp, __len, __result_first);

cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h CHANGED Viewed

@@ -45,7 +45,7 @@ __upper_bound(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
   {
     auto __half_len = ::cuda::std::__half_positive(__len);
     auto __mid      = _IterOps<_AlgPolicy>::next(__first, __half_len);
-    if (::cuda::std::__invoke(__comp, __value, ::cuda::std::__invoke(__proj, *__mid)))
+    if (::cuda::std::invoke(__comp, __value, ::cuda::std::invoke(__proj, *__mid)))
     {
       __len = __half_len;
     }

cuda/cccl/headers/include/cuda/std/__bit/countl.h CHANGED Viewed

@@ -100,7 +100,14 @@ template <typename _Tp>
 template <typename _Tp>
 [[nodiscard]] _CCCL_HIDE_FROM_ABI _CCCL_DEVICE int __cccl_countl_zero_impl_device(_Tp __v) noexcept
 {
-  return (sizeof(_Tp) == sizeof(uint32_t)) ? ::__clz(static_cast<int>(__v)) : ::__clzll(static_cast<long long>(__v));
+  if constexpr (sizeof(_Tp) == sizeof(uint32_t))
+  {
+    return static_cast<int>(::__clz(static_cast<int>(__v)));
+  }
+  else
+  {
+    return static_cast<int>(::__clzll(static_cast<long long>(__v)));
+  }
 }
 #endif // _CCCL_CUDA_COMPILATION()

cuda/cccl/headers/include/cuda/std/__bit/countr.h CHANGED Viewed

@@ -114,11 +114,11 @@ template <typename _Tp>
 {
   if constexpr (sizeof(_Tp) == sizeof(uint32_t))
   {
-    return ::__clz(static_cast<int>(::__brev(__v)));
+    return static_cast<int>(::__clz(static_cast<int>(::__brev(__v))));
   }
   else
   {
-    return ::__clzll(static_cast<long long>(::__brevll(__v)));
+    return static_cast<int>(::__clzll(static_cast<long long>(::__brevll(__v))));
   }
 }
 #endif // _CCCL_CUDA_COMPILATION()

cuda/cccl/headers/include/cuda/std/__bit/reference.h CHANGED Viewed

@@ -275,10 +275,10 @@ _CCCL_API constexpr __bit_iterator<_Cp, false> __copy_aligned(
     // do first word
     if (__first.__ctz_ != 0)
     {
-      unsigned __clz       = __bits_per_word - __first.__ctz_;
-      difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz), __n);
+      unsigned __clz_f     = __bits_per_word - __first.__ctz_;
+      difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz_f), __n);
       __n -= __dn;
-      __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+      __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
       __storage_type __b = *__first.__seg_ & __m;
       *__result.__seg_ &= ~__m;
       *__result.__seg_ |= __b;
@@ -420,8 +420,8 @@ _CCCL_API constexpr __bit_iterator<_Cp, false> __copy_backward_aligned(
     {
       difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__last.__ctz_), __n);
       __n -= __dn;
-      unsigned __clz     = __bits_per_word - __last.__ctz_;
-      __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz);
+      unsigned __clz_f   = __bits_per_word - __last.__ctz_;
+      __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_f);
       __storage_type __b = *__last.__seg_ & __m;
       *__result.__seg_ &= ~__m;
       *__result.__seg_ |= __b;
@@ -635,10 +635,10 @@ _CCCL_API inline __bit_iterator<_Cr, false> __swap_ranges_aligned(
     // do first word
     if (__first.__ctz_ != 0)
     {
-      unsigned __clz       = __bits_per_word - __first.__ctz_;
-      difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz), __n);
+      unsigned __clz_f     = __bits_per_word - __first.__ctz_;
+      difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz_f), __n);
       __n -= __dn;
-      __storage_type __m  = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+      __storage_type __m  = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
       __storage_type __b1 = *__first.__seg_ & __m;
       *__first.__seg_ &= ~__m;
       __storage_type __b2 = *__result.__seg_ & __m;
@@ -988,10 +988,10 @@ _CCCL_API constexpr bool __equal_aligned(
     // do first word
     if (__first1.__ctz_ != 0)
     {
-      unsigned __clz       = __bits_per_word - __first1.__ctz_;
-      difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz), __n);
+      unsigned __clz_f     = __bits_per_word - __first1.__ctz_;
+      difference_type __dn = ::cuda::std::min(static_cast<difference_type>(__clz_f), __n);
       __n -= __dn;
-      __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
+      __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
       if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m))
       {
         return false;

cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h ADDED Viewed

@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
+#define _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H
+#include <cuda/std/detail/__config>
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+// When nvc++ uses CCCL components as part of its implementation of
+// Standard C++ algorithms, a cycle of included files may result when CCCL code
+// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
+// is defined only when CCCL is including an algorithms-related header, giving
+// the compiler a chance to detect and break the cycle of includes.
+#if !_CCCL_COMPILER(NVRTC)
+#  define THRUST_INCLUDING_ALGORITHMS_HEADER
+#  include <algorithm>
+#  undef THRUST_INCLUDING_ALGORITHMS_HEADER
+#endif // !_CCCL_COMPILER(NVRTC)
+#endif // _CUDA_STD__CCCL_ALGORITHM_WRAPPER_H

cuda/cccl/headers/include/cuda/std/__cccl/builtin.h CHANGED Viewed

@@ -25,6 +25,7 @@
 #include <cuda/std/__cccl/attributes.h>
 #include <cuda/std/__cccl/extended_data_types.h>
+#include <cuda/std/__cccl/host_std_lib.h>
 //! This file consolidates all compiler builtin detection for CCCL.
 //!
@@ -607,55 +608,51 @@
 #  define _CCCL_BUILTIN_STRLEN(...) __builtin_strlen(__VA_ARGS__)
 #endif
-// Some compilers provide std::move/std::forward/etc as builtins
-#if defined(__cplusplus)
-// Bring in the feature test macros (needed for std::forward_like)
-#  if _CCCL_HAS_INCLUDE(<version>) // <version> should be the smallest include possible
-#    include <version>
-#  elif !_CCCL_COMPILER(NVRTC)
-#    include <ciso646> // otherwise go for the smallest possible header
-#  endif // !_CCCL_COMPILER(NVRTC)
-// Bring in the bits of the STL we need
-#  if defined(_GLIBCXX_VERSION)
-#    include <bits/move.h> // for move, forward, forward_like, and addressof
-#  elif defined(_LIBCPP_VERSION)
-#    include <__memory/addressof.h>
-#    include <__utility/as_const.h>
-#    include <__utility/forward.h>
-#    include <__utility/forward_like.h>
-#    include <__utility/move.h>
-#  endif
-#  if defined(_GLIBCXX_VERSION) || defined(_LIBCPP_VERSION)
-// std::move builtin
-#    if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
-#      define _CCCL_HAS_BUILTIN_STD_MOVE() 1
-#    endif
-// std::forward builtin
-#    if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
-#      define _CCCL_HAS_BUILTIN_STD_FORWARD() 1
-#    endif
-// std::addressof builtin
-#    if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
-#      define _CCCL_HAS_BUILTIN_STD_ADDRESSOF() 1
-#    endif
-// std::as_const builtin
-#    if _CCCL_COMPILER(CLANG, >=, 15)
-#      define _CCCL_HAS_BUILTIN_STD_AS_CONST() 1
-#    endif
-// std::forward_like builtin
-// Leaving out MSVC for now because it is hard for forward-declare std::forward_like.
-#    if (_CCCL_COMPILER(CLANG, >=, 17) || _CCCL_COMPILER(GCC, >=, 15)) && defined(__cpp_lib_forward_like) \
-      && (__cpp_lib_forward_like >= 202217L)
-#      define _CCCL_HAS_BUILTIN_STD_FORWARD_LIKE() 1
-#    endif
-#  endif // defined(_GLIBCXX_VERSION) || defined(_LIBCPP_VERSION) || defined(_MSVC_STL_VERSION)
-#endif // defined(__cplusplus)
+// todo: re-enable std builtins
+// // Some compilers provide std::move/std::forward/etc as builtins
+// #if defined(__cplusplus)
+// // Bring in the bits of the STL we need
+// #  if _CCCL_HOST_STD_LIB(LIBSTDCXX)
+// #    include <bits/move.h> // for move, forward, forward_like, and addressof
+// #  elif _CCCL_HOST_STD_LIB(LIBCXX)
+// #    include <__memory/addressof.h>
+// #    include <__utility/as_const.h>
+// #    include <__utility/forward.h>
+// #    if __cpp_lib_forward_like >= 202217L
+// #      include <__utility/forward_like.h>
+// #    endif // __cpp_lib_forward_like >= 202217L
+// #    include <__utility/move.h>
+// #  endif
+// #  if _CCCL_HOST_STD_LIB(LIBSTDCXX) || _CCCL_HOST_STD_LIB(LIBCXX)
+// // std::move builtin
+// #    if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
+// #      define _CCCL_HAS_BUILTIN_STD_MOVE() 1
+// #    endif
+// // std::forward builtin
+// #    if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
+// #      define _CCCL_HAS_BUILTIN_STD_FORWARD() 1
+// #    endif
+// // std::addressof builtin
+// #    if _CCCL_COMPILER(CLANG, >=, 15) || _CCCL_COMPILER(GCC, >=, 15)
+// #      define _CCCL_HAS_BUILTIN_STD_ADDRESSOF() 1
+// #    endif
+// // std::as_const builtin
+// #    if _CCCL_COMPILER(CLANG, >=, 15)
+// #      define _CCCL_HAS_BUILTIN_STD_AS_CONST() 1
+// #    endif
+// // std::forward_like builtin
+// // Leaving out MSVC for now because it is hard for forward-declare std::forward_like.
+// #    if (_CCCL_COMPILER(CLANG, >=, 17) || _CCCL_COMPILER(GCC, >=, 15)) && __cpp_lib_forward_like >= 202217L
+// #      define _CCCL_HAS_BUILTIN_STD_FORWARD_LIKE() 1
+// #    endif
+// #  endif // _CCCL_HOST_STD_LIB(LIBSTDCXX) || _CCCL_HOST_STD_LIB(LIBCXX)
+// #endif // defined(__cplusplus)
 #ifndef _CCCL_HAS_BUILTIN_STD_MOVE
 #  define _CCCL_HAS_BUILTIN_STD_MOVE() 0

cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h CHANGED Viewed

@@ -65,4 +65,10 @@
 #  endif // _CCCL_CUDA_COMPILER(NVCC)
 #endif // !_CCCL_EXEC_CHECK_DISABLE
+#if _CCCL_CUDA_COMPILER(NVHPC)
+#  define _CCCL_TARGET_CONSTEXPR
+#else // ^^^ _CCCL_CUDA_COMPILER(NVHPC) ^^^ / vvv !_CCCL_CUDA_COMPILER(NVHPC) vvv
+#  define _CCCL_TARGET_CONSTEXPR constexpr
+#endif // ^^^ !_CCCL_CUDA_COMPILER(NVHPC) ^^^
 #endif // __CCCL_EXECUTION_SPACE_H

cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h ADDED Viewed

@@ -0,0 +1,52 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+#ifndef __CCCL_HOST_STD_LIB_H
+#define __CCCL_HOST_STD_LIB_H
+#include <cuda/std/__cccl/compiler.h>
+#include <cuda/std/__cccl/preprocessor.h>
+#include <cuda/std/__cccl/system_header.h>
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+#define _CCCL_HOST_STD_LIB_LIBSTDCXX() 0
+#define _CCCL_HOST_STD_LIB_LIBCXX()    0
+#define _CCCL_HOST_STD_LIB_STL()       0
+// include a minimal header
+#if _CCCL_HAS_INCLUDE(<version>)
+#  include <version>
+#elif _CCCL_HAS_INCLUDE(<ciso646>)
+#  include <ciso646>
+#endif // ^^^ _CCCL_HAS_INCLUDE(<ciso646>) ^^^
+#if defined(_MSVC_STL_VERSION)
+#  undef _CCCL_HOST_STD_LIB_STL
+#  define _CCCL_HOST_STD_LIB_STL() 1
+#elif defined(__GLIBCXX__)
+#  undef _CCCL_HOST_STD_LIB_LIBSTDCXX
+#  define _CCCL_HOST_STD_LIB_LIBSTDCXX() 1
+#elif defined(_LIBCPP_VERSION)
+#  undef _CCCL_HOST_STD_LIB_LIBCXX
+#  define _CCCL_HOST_STD_LIB_LIBCXX() 1
+#endif // ^^^ _LIBCPP_VERSION ^^^
+#define _CCCL_HOST_STD_LIB(_X) _CCCL_HOST_STD_LIB_##_X()
+#define _CCCL_HAS_HOST_STD_LIB() \
+  (_CCCL_HOST_STD_LIB_LIBSTDCXX() || _CCCL_HOST_STD_LIB_LIBCXX() || _CCCL_HOST_STD_LIB_STL())
+#endif // __CCCL_HOST_STD_LIB_H

cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h ADDED Viewed

@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _CUDA_STD__CCCL_MEMORY_WRAPPER_H
+#define _CUDA_STD__CCCL_MEMORY_WRAPPER_H
+#include <cuda/std/detail/__config>
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+// When nvc++ uses CCCL components as part of its implementation of
+// Standard C++ algorithms, a cycle of included files may result when CCCL code
+// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
+// is defined only when CCCL is including an algorithms-related header, giving
+// the compiler a chance to detect and break the cycle of includes.
+#if !_CCCL_COMPILER(NVRTC)
+#  define THRUST_INCLUDING_ALGORITHMS_HEADER
+#  include <memory>
+#  undef THRUST_INCLUDING_ALGORITHMS_HEADER
+#endif // !_CCCL_COMPILER(NVRTC)
+#endif // _CUDA_STD__CCCL_MEMORY_WRAPPER_H

cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h ADDED Viewed

@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
+#define _CUDA_STD__CCCL_NUMERIC_WRAPPER_H
+#include <cuda/std/detail/__config>
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+// When a compiler uses CCCL components as part of its implementation of
+// Standard C++ algorithms, a cycle of included files may result when CCCL code
+// tries to use a standard algorithm. The THRUST_INCLUDING_ALGORITHMS_HEADER macro
+// is defined only when CCCL is including an algorithms-related header, giving
+// the compiler a chance to detect and break the cycle of includes.
+#if !_CCCL_COMPILER(NVRTC)
+#  define THRUST_INCLUDING_ALGORITHMS_HEADER
+#  include <numeric>
+#  undef THRUST_INCLUDING_ALGORITHMS_HEADER
+#endif // !_CCCL_COMPILER(NVRTC)
+#endif // _CUDA_STD__CCCL_NUMERIC_WRAPPER_H

cuda/cccl/headers/include/cuda/std/__chrono/duration.h CHANGED Viewed

@@ -43,19 +43,19 @@ template <class _Rep, class _Period = ratio<1>>
 class _CCCL_TYPE_VISIBILITY_DEFAULT duration;
 template <class _Tp>
-inline const bool __is_duration_v = false;
+inline constexpr bool __is_duration_v = false;
 template <class _Rep, class _Period>
-inline const bool __is_duration_v<duration<_Rep, _Period>> = true;
+inline constexpr bool __is_duration_v<duration<_Rep, _Period>> = true;
 template <class _Rep, class _Period>
-inline const bool __is_duration_v<const duration<_Rep, _Period>> = true;
+inline constexpr bool __is_duration_v<const duration<_Rep, _Period>> = true;
 template <class _Rep, class _Period>
-inline const bool __is_duration_v<volatile duration<_Rep, _Period>> = true;
+inline constexpr bool __is_duration_v<volatile duration<_Rep, _Period>> = true;
 template <class _Rep, class _Period>
-inline const bool __is_duration_v<const volatile duration<_Rep, _Period>> = true;
+inline constexpr bool __is_duration_v<const volatile duration<_Rep, _Period>> = true;
 } // namespace chrono
@@ -190,29 +190,29 @@ class _CCCL_TYPE_VISIBILITY_DEFAULT duration
   struct __no_overflow
   {
   private:
-    static const intmax_t __gcd_n1_n2 = __static_gcd<_R1::num, _R2::num>::value;
-    static const intmax_t __gcd_d1_d2 = __static_gcd<_R1::den, _R2::den>::value;
-    static const intmax_t __n1        = _R1::num / __gcd_n1_n2;
-    static const intmax_t __d1        = _R1::den / __gcd_d1_d2;
-    static const intmax_t __n2        = _R2::num / __gcd_n1_n2;
-    static const intmax_t __d2        = _R2::den / __gcd_d1_d2;
-    static const intmax_t max         = -((intmax_t(1) << (sizeof(intmax_t) * CHAR_BIT - 1)) + 1);
+    static constexpr intmax_t __gcd_n1_n2 = __static_gcd<_R1::num, _R2::num>::value;
+    static constexpr intmax_t __gcd_d1_d2 = __static_gcd<_R1::den, _R2::den>::value;
+    static constexpr intmax_t __n1        = _R1::num / __gcd_n1_n2;
+    static constexpr intmax_t __d1        = _R1::den / __gcd_d1_d2;
+    static constexpr intmax_t __n2        = _R2::num / __gcd_n1_n2;
+    static constexpr intmax_t __d2        = _R2::den / __gcd_d1_d2;
+    static constexpr intmax_t max         = -((intmax_t(1) << (sizeof(intmax_t) * CHAR_BIT - 1)) + 1);
     template <intmax_t _Xp, intmax_t _Yp, bool __overflow>
     struct __mul // __overflow == false
     {
-      static const intmax_t value = _Xp * _Yp;
+      static constexpr intmax_t value = _Xp * _Yp;
     };
     template <intmax_t _Xp, intmax_t _Yp>
     struct __mul<_Xp, _Yp, true>
     {
-      static const intmax_t value = 1;
+      static constexpr intmax_t value = 1;
     };
   public:
-    static const bool value = (__n1 <= max / __d2) && (__n2 <= max / __d1);
-    using type              = ratio<__mul<__n1, __d2, !value>::value, __mul<__n2, __d1, !value>::value>;
+    static constexpr bool value = (__n1 <= max / __d2) && (__n2 <= max / __d1);
+    using type                  = ratio<__mul<__n1, __d2, !value>::value, __mul<__n2, __d1, !value>::value>;
   };
 public:

cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h CHANGED Viewed

@@ -40,11 +40,11 @@ namespace chrono
 class _CCCL_TYPE_VISIBILITY_DEFAULT steady_clock
 {
 public:
-  using duration                        = nanoseconds;
-  using rep                             = duration::rep;
-  using period                          = duration::period;
-  using time_point                      = ::cuda::std::chrono::time_point<steady_clock, duration>;
-  static constexpr const bool is_steady = true;
+  using duration                  = nanoseconds;
+  using rep                       = duration::rep;
+  using period                    = duration::period;
+  using time_point                = ::cuda::std::chrono::time_point<steady_clock, duration>;
+  static constexpr bool is_steady = true;
   [[nodiscard]] _CCCL_API static time_point now() noexcept;
 };