PyPI - cuda-cccl - Versions diffs - 0.3.0__cp310-cp310-manylinux_2_24_aarch64.whl → 0.3.2__cp310-cp310-manylinux_2_24_aarch64.whl - Mend

cuda-cccl 0.3.0__cp310-cp310-manylinux_2_24_aarch64.whl → 0.3.2__cp310-cp310-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show

cuda/cccl/headers/include/cuda/__driver/driver_api.h CHANGED Viewed

@@ -216,11 +216,10 @@ _CCCL_HOST_API inline void __deviceGetName(char* __name_out, int __len, int __or
   return __result;
 }
-_CCCL_HOST_API inline void __primaryCtxRelease(::CUdevice __dev)
+[[nodiscard]] _CCCL_HOST_API inline ::cudaError_t __primaryCtxReleaseNoThrow(::CUdevice __dev)
 {
   static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuDevicePrimaryCtxRelease);
-  // TODO we might need to ignore failure here
-  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to release context for a device", __dev);
+  return static_cast<::cudaError_t>(__driver_fn(__dev));
 }
 [[nodiscard]] _CCCL_HOST_API inline bool __isPrimaryCtxActive(::CUdevice __dev)
@@ -325,6 +324,109 @@ _CCCL_HOST_API void __memsetAsync(void* __dst, _Tp __value, size_t __count, ::CU
   }
 }
+_CCCL_HOST_API inline ::cudaError_t __mempoolCreateNoThrow(::CUmemoryPool* __pool, ::CUmemPoolProps* __props)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemPoolCreate);
+  return static_cast<::cudaError_t>(__driver_fn(__pool, __props));
+}
+_CCCL_HOST_API inline void __mempoolSetAttribute(::CUmemoryPool __pool, ::CUmemPool_attribute __attr, void* __value)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemPoolSetAttribute);
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to set attribute for a memory pool", __pool, __attr, __value);
+}
+_CCCL_HOST_API inline size_t __mempoolGetAttribute(::CUmemoryPool __pool, ::CUmemPool_attribute __attr)
+{
+  size_t __value          = 0;
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemPoolGetAttribute);
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to get attribute for a memory pool", __pool, __attr, &__value);
+  return __value;
+}
+_CCCL_HOST_API inline void __mempoolDestroy(::CUmemoryPool __pool)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemPoolDestroy);
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to destroy a memory pool", __pool);
+}
+_CCCL_HOST_API inline ::CUdeviceptr
+__mallocFromPoolAsync(::cuda::std::size_t __bytes, ::CUmemoryPool __pool, ::CUstream __stream)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemAllocFromPoolAsync);
+  ::CUdeviceptr __result  = 0;
+  ::cuda::__driver::__call_driver_fn(
+    __driver_fn, "Failed to allocate memory from a memory pool", &__result, __bytes, __pool, __stream);
+  return __result;
+}
+_CCCL_HOST_API inline void __mempoolTrimTo(::CUmemoryPool __pool, ::cuda::std::size_t __min_bytes_to_keep)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemPoolTrimTo);
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to trim a memory pool", __pool, __min_bytes_to_keep);
+}
+_CCCL_HOST_API inline ::cudaError_t __freeAsyncNoThrow(::CUdeviceptr __dptr, ::CUstream __stream)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemFreeAsync);
+  return static_cast<::cudaError_t>(__driver_fn(__dptr, __stream));
+}
+_CCCL_HOST_API inline void __mempoolSetAccess(::CUmemoryPool __pool, ::CUmemAccessDesc* __descs, ::size_t __count)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemPoolSetAccess);
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to set access of a memory pool", __pool, __descs, __count);
+}
+_CCCL_HOST_API inline ::CUmemAccess_flags __mempoolGetAccess(::CUmemoryPool __pool, ::CUmemLocation* __location)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemPoolGetAccess);
+  ::CUmemAccess_flags __flags;
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to get access of a memory pool", &__flags, __pool, __location);
+  return __flags;
+}
+#  if _CCCL_CTK_AT_LEAST(13, 0)
+_CCCL_HOST_API inline ::CUmemoryPool
+__getDefaultMemPool(CUmemLocation __location, CUmemAllocationType_enum __allocation_type)
+{
+  static auto __driver_fn =
+    _CCCLRT_GET_DRIVER_FUNCTION_VERSIONED(cuMemGetDefaultMemPool, cuMemGetDefaultMemPool, 13, 0);
+  ::CUmemoryPool __result = nullptr;
+  ::cuda::__driver::__call_driver_fn(
+    __driver_fn, "Failed to get default memory pool", &__result, &__location, __allocation_type);
+  return __result;
+}
+#  endif // _CCCL_CTK_AT_LEAST(13, 0)
+_CCCL_HOST_API inline ::CUdeviceptr __mallocManaged(::cuda::std::size_t __bytes, unsigned int __flags)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemAllocManaged);
+  ::CUdeviceptr __result  = 0;
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to allocate managed memory", &__result, __bytes, __flags);
+  return __result;
+}
+_CCCL_HOST_API inline void* __mallocHost(::cuda::std::size_t __bytes)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemAllocHost);
+  void* __result          = nullptr;
+  ::cuda::__driver::__call_driver_fn(__driver_fn, "Failed to allocate host memory", &__result, __bytes);
+  return __result;
+}
+_CCCL_HOST_API inline ::cudaError_t __freeNoThrow(::CUdeviceptr __dptr)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemFree);
+  return static_cast<::cudaError_t>(__driver_fn(__dptr));
+}
+_CCCL_HOST_API inline ::cudaError_t __freeHostNoThrow(void* __dptr)
+{
+  static auto __driver_fn = _CCCLRT_GET_DRIVER_FUNCTION(cuMemFreeHost);
+  return static_cast<::cudaError_t>(__driver_fn(__dptr));
+}
 // Unified Addressing
 // TODO: we don't want to have these functions here, refactoring expected

cuda/cccl/headers/include/cuda/__event/event.h CHANGED Viewed

@@ -23,12 +23,13 @@
 #if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)
+#  include <cuda/__device/device_ref.h>
 #  include <cuda/__driver/driver_api.h>
 #  include <cuda/__event/event_ref.h>
 #  include <cuda/__runtime/ensure_current_context.h>
 #  include <cuda/__utility/no_init.h>
+#  include <cuda/std/__utility/to_underlying.h>
 #  include <cuda/std/cstddef>
-#  include <cuda/std/utility>
 #  include <cuda/std/__cccl/prologue.h>
@@ -36,38 +37,43 @@ _CCCL_BEGIN_NAMESPACE_CUDA
 class timed_event;
+//! @brief Flags to use when creating the event.
+enum class event_flags : unsigned
+{
+  none          = cudaEventDefault,
+  blocking_sync = cudaEventBlockingSync,
+  interprocess  = cudaEventInterprocess,
+};
+[[nodiscard]] _CCCL_HOST_API constexpr event_flags operator|(event_flags __lhs, event_flags __rhs) noexcept
+{
+  return static_cast<event_flags>(::cuda::std::to_underlying(__lhs) | ::cuda::std::to_underlying(__rhs));
+}
 //! @brief An owning wrapper for an untimed `cudaEvent_t`.
 class event : public event_ref
 {
   friend class timed_event;
 public:
-  //! @brief Flags to use when creating the event.
-  enum class flags : unsigned
-  {
-    none          = cudaEventDefault,
-    blocking_sync = cudaEventBlockingSync,
-    interprocess  = cudaEventInterprocess,
-  };
   //! @brief Construct a new `event` object with timing disabled, and record
   //!        the event in the specified stream.
   //!
   //! @throws cuda_error if the event creation fails.
-  explicit event(stream_ref __stream, flags __flags = flags::none);
+  _CCCL_HOST_API explicit event(stream_ref __stream, event_flags __flags = event_flags::none);
   //! @brief Construct a new `event` object with timing disabled. The event can only be recorded on streams from the
   //! specified device.
   //!
   //! @throws cuda_error if the event creation fails.
-  explicit event(device_ref __device, flags __flags = flags::none)
-      : event(__device, static_cast<unsigned int>(__flags) | cudaEventDisableTiming)
+  _CCCL_HOST_API explicit event(device_ref __device, event_flags __flags = event_flags::none)
+      : event(__device, ::cuda::std::to_underlying(__flags) | cudaEventDisableTiming)
   {}
   //! @brief Construct a new `event` object into the moved-from state.
   //!
   //! @post `get()` returns `cudaEvent_t()`.
-  explicit constexpr event(no_init_t) noexcept
+  _CCCL_HOST_API explicit constexpr event(no_init_t) noexcept
       : event_ref(::cudaEvent_t{})
   {}
@@ -76,7 +82,7 @@ public:
   //! @param __other
   //!
   //! @post `__other` is in a moved-from state.
-  constexpr event(event&& __other) noexcept
+  _CCCL_HOST_API constexpr event(event&& __other) noexcept
       : event_ref(::cuda::std::exchange(__other.__event_, {}))
   {}
@@ -86,7 +92,7 @@ public:
   //! @brief Destroy the `event` object
   //!
   //! @note If the event fails to be destroyed, the error is silently ignored.
-  ~event()
+  _CCCL_HOST_API ~event()
   {
     if (__event_ != nullptr)
     {
@@ -101,7 +107,7 @@ public:
   //! @param __other
   //!
   //! @post `__other` is in a moved-from state.
-  event& operator=(event&& __other) noexcept
+  _CCCL_HOST_API event& operator=(event&& __other) noexcept
   {
     event __tmp(::cuda::std::move(__other));
     ::cuda::std::swap(__event_, __tmp.__event_);
@@ -118,7 +124,7 @@ public:
   //! @return event The constructed `event` object
   //!
   //! @note The constructed `event` object takes ownership of the native handle.
-  [[nodiscard]] static event from_native_handle(::cudaEvent_t __evnt) noexcept
+  [[nodiscard]] static _CCCL_HOST_API event from_native_handle(::cudaEvent_t __evnt) noexcept
   {
     return event(__evnt);
   }
@@ -134,26 +140,21 @@ public:
   //! @return cudaEvent_t The native handle being held by the `event` object.
   //!
   //! @post The event object is in a moved-from state.
-  [[nodiscard]] constexpr ::cudaEvent_t release() noexcept
+  [[nodiscard]] _CCCL_HOST_API constexpr ::cudaEvent_t release() noexcept
   {
     return ::cuda::std::exchange(__event_, {});
   }
-  [[nodiscard]] friend constexpr flags operator|(flags __lhs, flags __rhs) noexcept
-  {
-    return static_cast<flags>(static_cast<unsigned>(__lhs) | static_cast<unsigned>(__rhs));
-  }
 private:
   // Use `event::from_native_handle(e)` to construct an owning `event`
   // object from a `cudaEvent_t` handle.
-  explicit constexpr event(::cudaEvent_t __evnt) noexcept
+  _CCCL_HOST_API explicit constexpr event(::cudaEvent_t __evnt) noexcept
       : event_ref(__evnt)
   {}
-  explicit event(stream_ref __stream, unsigned __flags);
+  _CCCL_HOST_API explicit event(stream_ref __stream, unsigned __flags);
-  explicit event(device_ref __device, unsigned __flags)
+  _CCCL_HOST_API explicit event(device_ref __device, unsigned __flags)
       : event_ref(::cudaEvent_t{})
   {
     [[maybe_unused]] __ensure_current_context __ctx_setter(__device);

cuda/cccl/headers/include/cuda/__event/event_ref.h CHANGED Viewed

@@ -56,7 +56,7 @@ public:
   //!
   //! @note: It is the callers responsibility to ensure the `event_ref` does not
   //! outlive the event denoted by the `cudaEvent_t` handle.
-  constexpr event_ref(::cudaEvent_t __evnt) noexcept
+  _CCCL_HOST_API constexpr event_ref(::cudaEvent_t __evnt) noexcept
       : __event_(__evnt)
   {}
@@ -108,7 +108,7 @@ public:
   //! @brief Retrieve the native `cudaEvent_t` handle.
   //!
   //! @return cudaEvent_t The native handle being held by the event_ref object.
-  [[nodiscard]] constexpr ::cudaEvent_t get() const noexcept
+  [[nodiscard]] _CCCL_HOST_API constexpr ::cudaEvent_t get() const noexcept
   {
     return __event_;
   }
@@ -116,7 +116,7 @@ public:
   //! @brief Checks if the `event_ref` is valid
   //!
   //! @return true if the `event_ref` is valid, false otherwise.
-  [[nodiscard]] explicit constexpr operator bool() const noexcept
+  [[nodiscard]] _CCCL_HOST_API explicit constexpr operator bool() const noexcept
   {
     return __event_ != nullptr;
   }
@@ -129,7 +129,7 @@ public:
   //! @param __lhs The first `event_ref` to compare
   //! @param __rhs The second `event_ref` to compare
   //! @return true if `lhs` and `rhs` refer to the same `cudaEvent_t` object.
-  [[nodiscard]] friend constexpr bool operator==(event_ref __lhs, event_ref __rhs) noexcept
+  [[nodiscard]] friend _CCCL_HOST_API constexpr bool operator==(event_ref __lhs, event_ref __rhs) noexcept
   {
     return __lhs.__event_ == __rhs.__event_;
   }
@@ -142,7 +142,7 @@ public:
   //! @param __lhs The first `event_ref` to compare
   //! @param __rhs The second `event_ref` to compare
   //! @return true if `lhs` and `rhs` refer to different `cudaEvent_t` objects.
-  [[nodiscard]] friend constexpr bool operator!=(event_ref __lhs, event_ref __rhs) noexcept
+  [[nodiscard]] friend _CCCL_HOST_API constexpr bool operator!=(event_ref __lhs, event_ref __rhs) noexcept
   {
     return __lhs.__event_ != __rhs.__event_;
   }

cuda/cccl/headers/include/cuda/__event/timed_event.h CHANGED Viewed

@@ -26,10 +26,12 @@
 #if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)
+#  include <cuda/__device/device_ref.h>
 #  include <cuda/__driver/driver_api.h>
 #  include <cuda/__event/event.h>
 #  include <cuda/__utility/no_init.h>
 #  include <cuda/std/__chrono/duration.h>
+#  include <cuda/std/__utility/to_underlying.h>
 #  include <cuda/std/cstddef>
 #  include <cuda/std/__cccl/prologue.h>
@@ -44,20 +46,20 @@ public:
   //!        and record the event on the specified stream.
   //!
   //! @throws cuda_error if the event creation fails.
-  explicit timed_event(stream_ref __stream, flags __flags = flags::none);
+  _CCCL_HOST_API explicit timed_event(stream_ref __stream, event_flags __flags = event_flags::none);
   //! @brief Construct a new `timed_event` object with the specified flags. The event can only be recorded on streams
   //! from the specified device.
   //!
   //! @throws cuda_error if the event creation fails.
-  explicit timed_event(device_ref __device, flags __flags = flags::none)
-      : event(__device, static_cast<unsigned>(__flags))
+  _CCCL_HOST_API explicit timed_event(device_ref __device, event_flags __flags = event_flags::none)
+      : event(__device, ::cuda::std::to_underlying(__flags))
   {}
   //! @brief Construct a new `timed_event` object into the moved-from state.
   //!
   //! @post `get()` returns `cudaEvent_t()`.
-  explicit constexpr timed_event(no_init_t) noexcept
+  _CCCL_HOST_API explicit constexpr timed_event(no_init_t) noexcept
       : event(no_init)
   {}
@@ -73,7 +75,7 @@ public:
   //! @return timed_event The constructed `timed_event` object
   //!
   //! @note The constructed `timed_event` object takes ownership of the native handle.
-  [[nodiscard]] static timed_event from_native_handle(::cudaEvent_t __evnt) noexcept
+  [[nodiscard]] static _CCCL_HOST_API timed_event from_native_handle(::cudaEvent_t __evnt) noexcept
   {
     return timed_event(__evnt);
   }
@@ -94,7 +96,8 @@ public:
   //! @return cuda::std::chrono::nanoseconds The elapsed time in nanoseconds.
   //!
   //! @note The elapsed time has a resolution of approximately 0.5 microseconds.
-  [[nodiscard]] friend ::cuda::std::chrono::nanoseconds operator-(const timed_event& __end, const timed_event& __start)
+  [[nodiscard]] friend _CCCL_HOST_API ::cuda::std::chrono::nanoseconds
+  operator-(const timed_event& __end, const timed_event& __start)
   {
     const auto __ms = ::cuda::__driver::__eventElapsedTime(__start.get(), __end.get());
     return ::cuda::std::chrono::nanoseconds(static_cast<::cuda::std::chrono::nanoseconds::rep>(__ms * 1'000'000.0));
@@ -103,7 +106,7 @@ public:
 private:
   // Use `timed_event::from_native_handle(e)` to construct an owning `timed_event`
   // object from a `cudaEvent_t` handle.
-  explicit constexpr timed_event(::cudaEvent_t __evnt) noexcept
+  _CCCL_HOST_API explicit constexpr timed_event(::cudaEvent_t __evnt) noexcept
       : event(__evnt)
   {}
 };

cuda/cccl/headers/include/cuda/__fwd/devices.h ADDED Viewed

@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _CUDA___FWD_DEVICES_H
+#define _CUDA___FWD_DEVICES_H
+#include <cuda/std/detail/__config>
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+#include <cuda/std/__fwd/span.h>
+#include <cuda/std/__cccl/prologue.h>
+_CCCL_BEGIN_NAMESPACE_CUDA
+class __physical_device;
+class device_ref;
+template <::cudaDeviceAttr _Attr>
+struct __dev_attr;
+struct arch_traits_t;
+class compute_capability;
+enum class arch_id : int;
+inline constexpr int __arch_specific_id_multiplier = 100000;
+_CCCL_END_NAMESPACE_CUDA
+#include <cuda/std/__cccl/epilogue.h>
+#endif // _CUDA___FWD_DEVICES_H

cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h CHANGED Viewed

@@ -42,6 +42,15 @@ inline constexpr bool __is_zip_function = false;
 template <class _Fn>
 inline constexpr bool __is_zip_function<zip_function<_Fn>> = true;
+template <class _Fn, class... _Iterators>
+class zip_transform_iterator;
+template <class>
+inline constexpr bool __is_zip_transform_iterator = false;
+template <class _Fn, class... _Iterators>
+inline constexpr bool __is_zip_transform_iterator<zip_transform_iterator<_Fn, _Iterators...>> = true;
 _CCCL_END_NAMESPACE_CUDA
 #include <cuda/std/__cccl/epilogue.h>

cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h CHANGED Viewed

@@ -23,6 +23,7 @@
 #include <cuda/std/__iterator/concepts.h>
 #include <cuda/std/__iterator/iterator_traits.h>
+#include <cuda/std/__ranges/compressed_movable_box.h>
 #include <cuda/std/__ranges/movable_box.h>
 #include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
 #include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
@@ -64,8 +65,28 @@ class constant_iterator
 private:
   static_assert(::cuda::std::__integer_like<_Index>, "The index type of cuda::constant_iterator must be integer-like!");
-  ::cuda::std::ranges::__movable_box<_Tp> __value_{::cuda::std::in_place};
-  _Index __index_ = 0;
+  // Not a base because then the friend operators would be ambiguous
+  ::cuda::std::__compressed_movable_box<_Index, _Tp> __store_;
+  [[nodiscard]] _CCCL_API constexpr _Index& __index() noexcept
+  {
+    return __store_.template __get<0>();
+  }
+  [[nodiscard]] _CCCL_API constexpr const _Index& __index() const noexcept
+  {
+    return __store_.template __get<0>();
+  }
+  [[nodiscard]] _CCCL_API constexpr _Tp& __value() noexcept
+  {
+    return __store_.template __get<1>();
+  }
+  [[nodiscard]] _CCCL_API constexpr const _Tp& __value() const noexcept
+  {
+    return __store_.template __get<1>();
+  }
 public:
   using iterator_concept  = ::cuda::std::random_access_iterator_tag;
@@ -78,22 +99,17 @@ public:
   using reference = _Tp;
   using pointer   = void;
-#if _CCCL_HAS_CONCEPTS()
-  _CCCL_HIDE_FROM_ABI constant_iterator()
-    requires ::cuda::std::default_initializable<_Tp>
-  = default;
-#else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv
   _CCCL_TEMPLATE(class _Tp2 = _Tp)
   _CCCL_REQUIRES(::cuda::std::default_initializable<_Tp2>)
-  _CCCL_API constexpr constant_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Tp2>) {}
-#endif // !_CCCL_HAS_CONCEPTS()
+  _CCCL_API constexpr constant_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Tp2>)
+      : __store_()
+  {}
   //! @brief Creates a @c constant_iterator from a value. The index is set to zero
   //! @param __value The value to store in the @c constant_iterator
   _CCCL_EXEC_CHECK_DISABLE
   _CCCL_API constexpr constant_iterator(_Tp __value) noexcept(::cuda::std::is_nothrow_move_constructible_v<_Tp>)
-      : __value_(::cuda::std::in_place, ::cuda::std::move(__value))
-      , __index_()
+      : __store_(0, ::cuda::std::move(__value))
   {}
   //! @brief Creates @c constant_iterator from a value and an index
@@ -104,32 +120,31 @@ public:
   _CCCL_REQUIRES(::cuda::std::__integer_like<_Index2>)
   _CCCL_API constexpr explicit constant_iterator(_Tp __value, _Index2 __index) noexcept(
     ::cuda::std::is_nothrow_move_constructible_v<_Tp>)
-      : __value_(::cuda::std::in_place, ::cuda::std::move(__value))
-      , __index_(static_cast<_Index>(__index))
+      : __store_(static_cast<_Index>(__index), ::cuda::std::move(__value))
   {}
   //! @brief Returns a the current index
   [[nodiscard]] _CCCL_API constexpr difference_type index() const noexcept
   {
-    return static_cast<difference_type>(__index_);
+    return static_cast<difference_type>(__index());
   }
   //! @brief Returns a const reference to the stored value
   [[nodiscard]] _CCCL_API constexpr const _Tp& operator*() const noexcept
   {
-    return *__value_;
+    return __value();
   }
   //! @brief Returns a const reference to the stored value
   [[nodiscard]] _CCCL_API constexpr const _Tp& operator[](difference_type) const noexcept
   {
-    return *__value_;
+    return __value();
   }
   //! @brief Increments the stored index
   _CCCL_API constexpr constant_iterator& operator++() noexcept
   {
-    ++__index_;
+    ++__index();
     return *this;
   }
@@ -147,9 +162,9 @@ public:
   {
     if constexpr (::cuda::std::is_signed_v<_Index>)
     {
-      _CCCL_ASSERT(__index_ > 0, "The index must be greater than or equal to 0");
+      _CCCL_ASSERT(__index() > 0, "The index must be greater than or equal to 0");
     }
-    --__index_;
+    --__index();
     return *this;
   }
@@ -168,9 +183,9 @@ public:
   {
     if constexpr (::cuda::std::is_signed_v<_Index>)
     {
-      _CCCL_ASSERT(__index_ + __n >= 0, "The index must be greater than or equal to 0");
+      _CCCL_ASSERT(__index() + __n >= 0, "The index must be greater than or equal to 0");
     }
-    __index_ += static_cast<_Index>(__n);
+    __index() += static_cast<_Index>(__n);
     return *this;
   }
@@ -200,9 +215,9 @@ public:
   {
     if constexpr (::cuda::std::is_signed_v<_Index>)
     {
-      _CCCL_ASSERT(__index_ - __n >= 0, "The index must be greater than or equal to 0");
+      _CCCL_ASSERT(__index() - __n >= 0, "The index must be greater than or equal to 0");
     }
-    __index_ -= static_cast<_Index>(__n);
+    __index() -= static_cast<_Index>(__n);
     return *this;
   }
@@ -220,14 +235,14 @@ public:
   [[nodiscard]] _CCCL_API friend constexpr difference_type
   operator-(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return static_cast<difference_type>(__lhs.__index_) - static_cast<difference_type>(__rhs.__index_);
+    return static_cast<difference_type>(__lhs.__index()) - static_cast<difference_type>(__rhs.__index());
   }
   //! @brief Compares two @c constant_iterator for equality by comparing the index in the sequence
   [[nodiscard]] _CCCL_API friend constexpr bool
   operator==(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return __lhs.__index_ == __rhs.__index_;
+    return __lhs.__index() == __rhs.__index();
   }
 #if _CCCL_STD_VER <= 2017
@@ -235,7 +250,7 @@ public:
   [[nodiscard]] _CCCL_API friend constexpr bool
   operator!=(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return __lhs.__index_ != __rhs.__index_;
+    return __lhs.__index() != __rhs.__index();
   }
 #endif // _CCCL_STD_VER <= 2017
@@ -244,32 +259,32 @@ public:
   [[nodiscard]] _CCCL_API friend constexpr auto
   operator<=>(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return __lhs.__index_ <=> __rhs.__index_;
+    return __lhs.__index() <=> __rhs.__index();
   }
 #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
   //! @brief Compares two @c constant_iterator for less than by comparing the index in the sequence
   [[nodiscard]] _CCCL_API friend constexpr bool
   operator<(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return __lhs.__index_ < __rhs.__index_;
+    return __lhs.__index() < __rhs.__index();
   }
   //! @brief Compares two @c constant_iterator for less equal by comparing the index in the sequence
   [[nodiscard]] _CCCL_API friend constexpr bool
   operator<=(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return __lhs.__index_ <= __rhs.__index_;
+    return __lhs.__index() <= __rhs.__index();
   }
   //! @brief Compares two @c constant_iterator for greater than by comparing the index in the sequence
   [[nodiscard]] _CCCL_API friend constexpr bool
   operator>(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return __lhs.__index_ > __rhs.__index_;
+    return __lhs.__index() > __rhs.__index();
   }
   //! @brief Compares two @c constant_iterator for greater equal by comparing the index in the sequence
   [[nodiscard]] _CCCL_API friend constexpr bool
   operator>=(const constant_iterator& __lhs, const constant_iterator& __rhs) noexcept
   {
-    return __lhs.__index_ >= __rhs.__index_;
+    return __lhs.__index() >= __rhs.__index();
   }
 #endif // !_LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR()
 };