cuda-cccl 0.3.1__cp311-cp311-manylinux_2_24_aarch64.whl → 0.3.2__cp311-cp311-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (185) hide show
  1. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  2. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  3. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  4. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  5. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  6. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  7. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1 -0
  8. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  9. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  10. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  11. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  12. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  13. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  14. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  15. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +12 -13
  16. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  17. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +2 -3
  18. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +4 -3
  19. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +1 -1
  20. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  21. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  22. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  23. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  24. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  25. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  26. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  27. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  28. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  29. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  30. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  31. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  32. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  33. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  34. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  35. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  36. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  37. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  38. cuda/cccl/headers/include/cuda/__device/arch_traits.h +239 -317
  39. cuda/cccl/headers/include/cuda/__device/attributes.h +4 -3
  40. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  41. cuda/cccl/headers/include/cuda/__device/device_ref.h +0 -10
  42. cuda/cccl/headers/include/cuda/__device/physical_device.h +1 -26
  43. cuda/cccl/headers/include/cuda/__event/event.h +26 -26
  44. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  45. cuda/cccl/headers/include/cuda/__event/timed_event.h +9 -7
  46. cuda/cccl/headers/include/cuda/__fwd/devices.h +4 -4
  47. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  48. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  49. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  50. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  51. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  52. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  53. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +2 -12
  54. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +15 -19
  55. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +59 -60
  56. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  57. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  58. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  59. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  60. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  61. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  62. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  63. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  64. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  65. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  66. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +5 -4
  67. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  68. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +17 -16
  69. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  70. cuda/cccl/headers/include/cuda/cmath +1 -0
  71. cuda/cccl/headers/include/cuda/devices +3 -0
  72. cuda/cccl/headers/include/cuda/memory +1 -0
  73. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  74. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  75. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  76. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  77. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  78. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  79. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  80. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  81. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  82. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  83. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  84. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  85. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  86. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  87. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  88. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  89. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  90. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  91. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  92. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  93. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  94. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  95. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  96. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  97. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  98. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  99. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  100. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  101. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  102. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  103. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  104. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  105. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  106. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  107. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  108. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  109. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  110. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  111. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  112. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  113. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  114. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  115. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  116. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  117. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  118. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  119. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  120. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  121. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  122. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  123. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  124. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  125. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  126. cuda/cccl/headers/include/cuda/std/string_view +146 -11
  127. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  128. cuda/cccl/headers/include/cuda/utility +1 -0
  129. cuda/cccl/headers/include/nv/target +7 -2
  130. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  131. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  132. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  133. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  134. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  135. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  136. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  137. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  138. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  139. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  140. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  141. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  142. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  143. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  144. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  145. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  146. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  147. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  148. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  149. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  150. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  151. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  152. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  153. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  154. cuda/compute/__init__.py +2 -0
  155. cuda/compute/_bindings.pyi +43 -1
  156. cuda/compute/_bindings_impl.pyx +156 -7
  157. cuda/compute/algorithms/_scan.py +108 -36
  158. cuda/compute/algorithms/_transform.py +32 -11
  159. cuda/compute/cu12/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  160. cuda/compute/cu12/cccl/libcccl.c.parallel.so +0 -0
  161. cuda/compute/cu13/_bindings_impl.cpython-311-aarch64-linux-gnu.so +0 -0
  162. cuda/compute/cu13/cccl/libcccl.c.parallel.so +0 -0
  163. cuda/compute/iterators/__init__.py +2 -0
  164. cuda/compute/iterators/_factories.py +28 -0
  165. cuda/compute/iterators/_iterators.py +206 -1
  166. cuda/compute/numba_utils.py +2 -2
  167. cuda/compute/typing.py +2 -0
  168. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  169. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +171 -175
  170. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  171. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  172. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  173. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  174. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  175. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  176. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  177. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  178. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  179. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  180. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  181. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  182. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  183. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  184. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  185. {cuda_cccl-0.3.1.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -26,14 +26,56 @@
26
26
  # pragma system_header
27
27
  #endif // no system header
28
28
 
29
+ #include <thrust/detail/allocator/allocator_traits.h>
30
+ #include <thrust/detail/type_traits.h>
31
+ #include <thrust/detail/type_traits/pointer_traits.h>
32
+ #include <thrust/for_each.h>
33
+ #include <thrust/uninitialized_fill.h>
34
+
35
+ #include <cuda/std/__cccl/memory_wrapper.h>
36
+
29
37
  THRUST_NAMESPACE_BEGIN
30
38
  namespace detail
31
39
  {
32
40
 
41
+ // fill_construct_range has 2 cases:
42
+ // if Allocator has an effectful member function construct:
43
+ // 1. construct via the allocator
44
+ // else
45
+ // 2. construct via uninitialized_fill
46
+
47
+ template <typename Allocator, typename T, typename Arg1>
48
+ inline constexpr bool has_effectful_member_construct2 =
49
+ allocator_traits_detail::has_member_construct2<Allocator, T, Arg1>::value;
50
+
51
+ // std::allocator::construct's only effect is to invoke placement new
52
+ template <typename U, typename T, typename Arg1>
53
+ inline constexpr bool has_effectful_member_construct2<std::allocator<U>, T, Arg1> = false;
54
+
55
+ template <typename Allocator, typename Arg1>
56
+ struct construct2_via_allocator
57
+ {
58
+ Allocator& a;
59
+ Arg1 arg;
60
+
61
+ template <typename T>
62
+ inline _CCCL_HOST_DEVICE void operator()(T& x)
63
+ {
64
+ allocator_traits<Allocator>::construct(a, &x, arg);
65
+ }
66
+ };
33
67
  template <typename Allocator, typename Pointer, typename Size, typename T>
34
- _CCCL_HOST_DEVICE inline void fill_construct_range(Allocator& a, Pointer p, Size n, const T& value);
68
+ _CCCL_HOST_DEVICE void fill_construct_range(Allocator& a, Pointer p, Size n, const T& value)
69
+ {
70
+ if constexpr (has_effectful_member_construct2<Allocator, typename pointer_element<Pointer>::type, T>)
71
+ {
72
+ thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, construct2_via_allocator<Allocator, T>{a, value});
73
+ }
74
+ else
75
+ {
76
+ thrust::uninitialized_fill_n(allocator_system<Allocator>::get(a), p, n, value);
77
+ }
78
+ }
35
79
 
36
80
  } // namespace detail
37
81
  THRUST_NAMESPACE_END
38
-
39
- #include <thrust/detail/allocator/fill_construct_range.inl>
@@ -25,29 +25,54 @@
25
25
  #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
26
26
  # pragma system_header
27
27
  #endif // no system header
28
+
29
+ #include <thrust/detail/allocator/malloc_allocator.h>
28
30
  #include <thrust/detail/allocator/tagged_allocator.h>
31
+ #include <thrust/detail/malloc_and_free.h>
32
+ #include <thrust/detail/raw_pointer_cast.h>
29
33
  #include <thrust/detail/type_traits/pointer_traits.h>
34
+ #include <thrust/system/detail/bad_alloc.h>
35
+ #include <thrust/system/detail/generic/select_system.h>
30
36
 
31
37
  THRUST_NAMESPACE_BEGIN
32
38
  namespace detail
33
39
  {
34
40
 
35
41
  template <typename T, typename System, typename Pointer>
36
- class malloc_allocator : public thrust::detail::tagged_allocator<T, System, Pointer>
42
+ class malloc_allocator : public tagged_allocator<T, System, Pointer>
37
43
  {
38
44
  private:
39
- using super_t = thrust::detail::tagged_allocator<T, System, Pointer>;
45
+ using super_t = tagged_allocator<T, System, Pointer>;
40
46
 
41
47
  public:
42
48
  using pointer = typename super_t::pointer;
43
49
  using size_type = typename super_t::size_type;
44
50
 
45
- pointer allocate(size_type cnt);
51
+ pointer allocate(size_type cnt)
52
+ {
53
+ using thrust::system::detail::generic::select_system;
54
+
55
+ // XXX should use a hypothetical thrust::static_pointer_cast here
56
+ System system;
57
+
58
+ pointer result = thrust::malloc<T>(select_system(system), cnt);
59
+
60
+ if (result.get() == 0)
61
+ {
62
+ throw thrust::system::detail::bad_alloc("malloc_allocator::allocate: malloc failed");
63
+ } // end if
46
64
 
47
- void deallocate(pointer p, size_type n) noexcept;
65
+ return result;
66
+ }
67
+
68
+ void deallocate(pointer p, size_type n) noexcept
69
+ {
70
+ using thrust::system::detail::generic::select_system;
71
+
72
+ System system;
73
+ thrust::free(select_system(system), p);
74
+ }
48
75
  };
49
76
 
50
77
  } // namespace detail
51
78
  THRUST_NAMESPACE_END
52
-
53
- #include <thrust/detail/allocator/malloc_allocator.inl>
@@ -25,9 +25,12 @@
25
25
  #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
26
26
  # pragma system_header
27
27
  #endif // no system header
28
+ #include <thrust/detail/allocator/tagged_allocator.h>
28
29
  #include <thrust/detail/type_traits/pointer_traits.h>
29
30
  #include <thrust/iterator/iterator_traits.h>
30
31
 
32
+ #include <cuda/std/limits>
33
+
31
34
  THRUST_NAMESPACE_BEGIN
32
35
  namespace detail
33
36
  {
@@ -72,31 +75,41 @@ public:
72
75
  using other = tagged_allocator<U, Tag, Pointer>;
73
76
  }; // end rebind
74
77
 
75
- _CCCL_HOST_DEVICE inline tagged_allocator();
78
+ tagged_allocator() = default;
76
79
 
77
- _CCCL_HOST_DEVICE inline tagged_allocator(const tagged_allocator&);
80
+ tagged_allocator(const tagged_allocator&) = default;
78
81
 
79
82
  template <typename U, typename OtherPointer>
80
- _CCCL_HOST_DEVICE inline tagged_allocator(const tagged_allocator<U, Tag, OtherPointer>&);
83
+ _CCCL_HOST_DEVICE tagged_allocator(const tagged_allocator<U, Tag, OtherPointer>&)
84
+ {}
81
85
 
82
- _CCCL_HOST_DEVICE inline ~tagged_allocator();
86
+ ~tagged_allocator() = default;
83
87
 
84
- _CCCL_HOST_DEVICE pointer address(reference x) const;
88
+ _CCCL_HOST_DEVICE pointer address(reference x) const
89
+ {
90
+ return &x;
91
+ }
85
92
 
86
- _CCCL_HOST_DEVICE const_pointer address(const_reference x) const;
93
+ _CCCL_HOST_DEVICE const_pointer address(const_reference x) const
94
+ {
95
+ return &x;
96
+ }
87
97
 
88
- size_type max_size() const;
89
- };
98
+ size_type max_size() const
99
+ {
100
+ return (::cuda::std::numeric_limits<size_type>::max)() / sizeof(T);
101
+ }
90
102
 
91
- template <typename T1, typename Pointer1, typename T2, typename Pointer2, typename Tag>
92
- _CCCL_HOST_DEVICE bool
93
- operator==(const tagged_allocator<T1, Pointer1, Tag>&, const tagged_allocator<T2, Pointer2, Tag>&);
103
+ _CCCL_HOST_DEVICE friend bool operator==(const tagged_allocator&, const tagged_allocator&)
104
+ {
105
+ return true;
106
+ }
94
107
 
95
- template <typename T1, typename Pointer1, typename T2, typename Pointer2, typename Tag>
96
- _CCCL_HOST_DEVICE bool
97
- operator!=(const tagged_allocator<T1, Pointer1, Tag>&, const tagged_allocator<T2, Pointer2, Tag>&);
108
+ _CCCL_HOST_DEVICE friend bool operator!=(const tagged_allocator&, const tagged_allocator&)
109
+ {
110
+ return false;
111
+ }
112
+ };
98
113
 
99
114
  } // namespace detail
100
115
  THRUST_NAMESPACE_END
101
-
102
- #include <thrust/detail/allocator/tagged_allocator.inl>
@@ -25,11 +25,23 @@
25
25
  #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
26
26
  # pragma system_header
27
27
  #endif // no system header
28
+
28
29
  #include <thrust/detail/allocator/allocator_traits.h>
29
30
  #include <thrust/detail/allocator/tagged_allocator.h>
31
+ #include <thrust/detail/allocator/temporary_allocator.h>
30
32
  #include <thrust/detail/execution_policy.h>
33
+ #include <thrust/detail/temporary_buffer.h>
31
34
  #include <thrust/memory.h>
32
35
  #include <thrust/pair.h>
36
+ #include <thrust/system/detail/bad_alloc.h>
37
+
38
+ #include <cuda/std/cassert>
39
+
40
+ #include <nv/target>
41
+
42
+ #if _CCCL_CUDA_COMPILATION() && _CCCL_DEVICE_COMPILATION()
43
+ # include <thrust/system/cuda/detail/terminate.h>
44
+ #endif // _CCCL_CUDA_COMPILATION() && _CCCL_DEVICE_COMPILATION()
33
45
 
34
46
  THRUST_NAMESPACE_BEGIN
35
47
  namespace detail
@@ -60,9 +72,36 @@ public:
60
72
  , m_system(thrust::detail::derived_cast(system))
61
73
  {}
62
74
 
63
- _CCCL_HOST_DEVICE pointer allocate(size_type cnt);
75
+ _CCCL_HOST_DEVICE pointer allocate(size_type cnt)
76
+ {
77
+ pointer_and_size result = thrust::get_temporary_buffer<T>(system(), cnt);
78
+
79
+ // handle failure
80
+ if (result.second < cnt)
81
+ {
82
+ // deallocate and throw
83
+ // note that we pass cnt to deallocate, not a value derived from result.second
84
+ deallocate(result.first, cnt);
85
+
86
+ #if _CCCL_CUDA_COMPILATION()
87
+ NV_IF_TARGET(
88
+ NV_IS_HOST,
89
+ (throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");),
90
+ ( // NV_IS_DEVICE
91
+ thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: "
92
+ "get_temporary_buffer failed");));
93
+ #else
94
+ throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");
95
+ #endif
96
+ } // end if
64
97
 
65
- _CCCL_HOST_DEVICE void deallocate(pointer p, size_type n) noexcept;
98
+ return result.first;
99
+ }
100
+
101
+ _CCCL_HOST_DEVICE void deallocate(pointer p, size_type n) noexcept
102
+ {
103
+ return thrust::return_temporary_buffer(system(), p, n);
104
+ }
66
105
 
67
106
  _CCCL_HOST_DEVICE inline System& system()
68
107
  {
@@ -75,5 +114,3 @@ private:
75
114
 
76
115
  } // namespace detail
77
116
  THRUST_NAMESPACE_END
78
-
79
- #include <thrust/detail/allocator/temporary_allocator.inl>
@@ -26,14 +26,52 @@
26
26
  # pragma system_header
27
27
  #endif // no system header
28
28
 
29
+ #include <thrust/detail/allocator/allocator_traits.h>
30
+ #include <thrust/detail/type_traits.h>
31
+ #include <thrust/detail/type_traits/pointer_traits.h>
32
+ #include <thrust/for_each.h>
33
+ #include <thrust/uninitialized_fill.h>
34
+
29
35
  THRUST_NAMESPACE_BEGIN
30
36
  namespace detail
31
37
  {
38
+ template <typename Allocator>
39
+ struct construct1_via_allocator
40
+ {
41
+ Allocator& a;
32
42
 
33
- template <typename Allocator, typename Pointer, typename Size>
34
- _CCCL_HOST_DEVICE inline void value_initialize_range(Allocator& a, Pointer p, Size n);
43
+ template <typename T>
44
+ inline _CCCL_HOST_DEVICE void operator()(T& x)
45
+ {
46
+ allocator_traits<Allocator>::construct(a, &x);
47
+ }
48
+ };
49
+
50
+ // we need to construct T via the allocator if the Allocator does something interesting or if T's default constructor
51
+ // does something interesting
52
+ template <typename Allocator, typename T>
53
+ inline constexpr bool needs_default_construct_via_allocator =
54
+ allocator_traits_detail::has_member_construct1<Allocator, T>::value
55
+ || !::cuda::std::is_trivially_default_constructible_v<T>;
35
56
 
57
+ // we know that std::allocator::construct's only effect is to call T's
58
+ // default constructor, so we needn't use it for default construction
59
+ // unless T's constructor does something interesting
60
+ template <typename U, typename T>
61
+ inline constexpr bool needs_default_construct_via_allocator<std::allocator<U>, T> =
62
+ !::cuda::std::is_trivially_default_constructible_v<T>;
63
+
64
+ template <typename Allocator, typename Pointer, typename Size>
65
+ _CCCL_HOST_DEVICE void value_initialize_range(Allocator& a, Pointer p, Size n)
66
+ {
67
+ if constexpr (needs_default_construct_via_allocator<Allocator, typename pointer_element<Pointer>::type>)
68
+ {
69
+ thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, construct1_via_allocator<Allocator>{a});
70
+ }
71
+ else
72
+ {
73
+ thrust::uninitialized_fill_n(allocator_system<Allocator>::get(a), p, n, typename pointer_element<Pointer>::type());
74
+ }
75
+ }
36
76
  } // namespace detail
37
77
  THRUST_NAMESPACE_END
38
-
39
- #include <thrust/detail/allocator/value_initialize_range.inl>
@@ -106,9 +106,9 @@ _CCCL_HOST_DEVICE inline thrust::complex<double> ccosh(const thrust::complex<dou
106
106
  else if (ix < 0x4096bbaa)
107
107
  {
108
108
  /* x < 1455: scale to avoid overflow */
109
- thrust::complex<double> z_;
110
- z_ = ldexp_cexp(thrust::complex<double>(fabs(x), y), -1);
111
- return (thrust::complex<double>(z_.real(), z_.imag() * copysign(1.0, x)));
109
+ thrust::complex<double> z_ = ldexp_cexp(thrust::complex<double>(fabs(x), y), -1);
110
+ z_.imag(copysign(z_.imag(), x));
111
+ return z_;
112
112
  }
113
113
  else
114
114
  {
@@ -30,7 +30,6 @@
30
30
  # pragma system_header
31
31
  #endif // no system header
32
32
 
33
- #include <thrust/detail/memory_wrapper.h> // for ::new
34
33
  #include <thrust/detail/raw_reference_cast.h>
35
34
  #include <thrust/detail/static_assert.h>
36
35
  #include <thrust/detail/type_traits.h>
@@ -43,6 +42,7 @@
43
42
  #include <cuda/__iterator/tabulate_output_iterator.h>
44
43
  #include <cuda/__iterator/transform_input_output_iterator.h>
45
44
  #include <cuda/__iterator/transform_output_iterator.h>
45
+ #include <cuda/std/__cccl/memory_wrapper.h> // for ::new
46
46
  #include <cuda/std/type_traits>
47
47
 
48
48
  THRUST_NAMESPACE_BEGIN
@@ -18,10 +18,10 @@
18
18
  # pragma system_header
19
19
  #endif // no system header
20
20
  #include <thrust/detail/allocator/allocator_traits.h>
21
- #include <thrust/detail/memory_wrapper.h>
22
21
  #include <thrust/detail/type_traits.h>
23
22
  #include <thrust/iterator/iterator_traits.h>
24
23
 
24
+ #include <cuda/std/__cccl/memory_wrapper.h>
25
25
  #include <cuda/std/__memory/addressof.h>
26
26
  #include <cuda/std/utility>
27
27
 
@@ -45,10 +45,10 @@ THRUST_NAMESPACE_END
45
45
  #include <thrust/detail/allocator/no_throw_allocator.h>
46
46
  #include <thrust/detail/allocator/temporary_allocator.h>
47
47
  #include <thrust/detail/contiguous_storage.h>
48
- #include <thrust/detail/memory_wrapper.h>
49
48
  #include <thrust/iterator/detail/tagged_iterator.h>
50
49
  #include <thrust/iterator/iterator_traits.h>
51
50
 
51
+ #include <cuda/std/__cccl/memory_wrapper.h>
52
52
  #include <cuda/std/type_traits>
53
53
 
54
54
  THRUST_NAMESPACE_BEGIN
@@ -123,7 +123,7 @@ struct larger_type
123
123
  {};
124
124
 
125
125
  template <class F, class... Us>
126
- using invoke_result = ::cuda::std::__invoke_of<F, Us...>;
126
+ using invoke_result = ::cuda::std::invoke_result<F, Us...>;
127
127
 
128
128
  template <class F, class... Us>
129
129
  using invoke_result_t = typename invoke_result<F, Us...>::type;
@@ -29,10 +29,22 @@
29
29
  #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
30
30
  # pragma system_header
31
31
  #endif // no system header
32
+
33
+ #include <thrust/detail/allocator/destroy_range.h>
34
+ #include <thrust/device_delete.h>
35
+ #include <thrust/device_free.h>
32
36
  #include <thrust/device_ptr.h>
37
+ #include <thrust/execution_policy.h>
33
38
 
34
39
  THRUST_NAMESPACE_BEGIN
35
40
 
41
+ namespace detail
42
+ {
43
+ // define an empty allocator class to use below
44
+ struct device_delete_allocator
45
+ {};
46
+ } // namespace detail
47
+
36
48
  /*! \addtogroup memory_management Memory Management
37
49
  * \{
38
50
  */
@@ -49,11 +61,14 @@ THRUST_NAMESPACE_BEGIN
49
61
  * \see device_new
50
62
  */
51
63
  template <typename T>
52
- inline void device_delete(thrust::device_ptr<T> ptr, const size_t n = 1);
64
+ inline void device_delete(thrust::device_ptr<T> ptr, const size_t n = 1)
65
+ {
66
+ // we don't have an allocator, so there is no need to go through thrust::detail::destroy_range
67
+ thrust::for_each_n(device, ptr, n, detail::gozer{});
68
+ thrust::device_free(ptr);
69
+ }
53
70
 
54
71
  /*! \} // memory_management
55
72
  */
56
73
 
57
74
  THRUST_NAMESPACE_END
58
-
59
- #include <thrust/detail/device_delete.inl>
@@ -29,7 +29,12 @@
29
29
  #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
30
30
  # pragma system_header
31
31
  #endif // no system header
32
+
33
+ #include <thrust/detail/malloc_and_free.h>
34
+ #include <thrust/device_free.h>
32
35
  #include <thrust/device_ptr.h>
36
+ #include <thrust/iterator/iterator_traits.h>
37
+ #include <thrust/system/detail/generic/select_system.h>
33
38
 
34
39
  THRUST_NAMESPACE_BEGIN
35
40
 
@@ -62,11 +67,19 @@ THRUST_NAMESPACE_BEGIN
62
67
  * \see device_ptr
63
68
  * \see device_malloc
64
69
  */
65
- inline void device_free(thrust::device_ptr<void> ptr);
70
+ inline void device_free(thrust::device_ptr<void> ptr)
71
+ {
72
+ using thrust::system::detail::generic::select_system;
73
+
74
+ using system = thrust::iterator_system<thrust::device_ptr<void>>::type;
75
+
76
+ // XXX lower to select_system(system) here
77
+ system s;
78
+
79
+ thrust::free(s, ptr);
80
+ }
66
81
 
67
82
  /*! \} // memory_management
68
83
  */
69
84
 
70
85
  THRUST_NAMESPACE_END
71
-
72
- #include <thrust/detail/device_free.inl>
@@ -30,10 +30,13 @@
30
30
  # pragma system_header
31
31
  #endif // no system header
32
32
 
33
- // #include this for size_t
33
+ #include <thrust/detail/allocator/value_initialize_range.h>
34
+ #include <thrust/device_allocator.h>
35
+ #include <thrust/device_malloc.h>
36
+ #include <thrust/device_new.h>
34
37
  #include <thrust/device_ptr.h>
35
-
36
- #include <cuda/std/cstddef>
38
+ #include <thrust/execution_policy.h>
39
+ #include <thrust/uninitialized_fill.h>
37
40
 
38
41
  THRUST_NAMESPACE_BEGIN
39
42
 
@@ -55,7 +58,15 @@ THRUST_NAMESPACE_BEGIN
55
58
  * \see device_ptr
56
59
  */
57
60
  template <typename T>
58
- device_ptr<T> device_new(device_ptr<void> p, const size_t n = 1);
61
+ device_ptr<T> device_new(device_ptr<void> p, const size_t n = 1)
62
+ {
63
+ auto* dev_ptr = static_cast<T*>(p.get());
64
+ // TODO(bgruber): ideally, we would have an thrust::uninitialized_default_construct. Until then, use vector's
65
+ // infrastructure
66
+ device_allocator<T> alloc; // not needed for allocation, just for construct() called in value_initialize_range()
67
+ detail::value_initialize_range(alloc, dev_ptr, n);
68
+ return device_ptr<T>{dev_ptr};
69
+ }
59
70
 
60
71
  /*! \p device_new implements the placement new operator for types
61
72
  * resident in device memory. \p device_new calls <tt>T</tt>'s copy
@@ -72,7 +83,15 @@ device_ptr<T> device_new(device_ptr<void> p, const size_t n = 1);
72
83
  * \see fill
73
84
  */
74
85
  template <typename T>
75
- device_ptr<T> device_new(device_ptr<void> p, const T& exemplar, const size_t n = 1);
86
+ device_ptr<T> device_new(device_ptr<void> p, const T& exemplar, const size_t n = 1)
87
+ {
88
+ device_ptr<T> result(static_cast<T*>(p.get()));
89
+
90
+ // run copy constructors at p here
91
+ thrust::uninitialized_fill(device, result, result + n, exemplar);
92
+
93
+ return result;
94
+ }
76
95
 
77
96
  /*! \p device_new implements the new operator for types resident in device memory.
78
97
  * It allocates device memory large enough to hold \p n new objects of type \c T.
@@ -81,11 +100,13 @@ device_ptr<T> device_new(device_ptr<void> p, const T& exemplar, const size_t n =
81
100
  * \return A \p device_ptr to the newly allocated region of device memory.
82
101
  */
83
102
  template <typename T>
84
- device_ptr<T> device_new(const size_t n = 1);
103
+ device_ptr<T> device_new(const size_t n = 1)
104
+ {
105
+ // call placement new version of device_new
106
+ return device_new<T>(thrust::device_malloc<T>(n));
107
+ }
85
108
 
86
109
  /*! \} // memory_management
87
110
  */
88
111
 
89
112
  THRUST_NAMESPACE_END
90
-
91
- #include <thrust/detail/device_new.inl>
@@ -30,9 +30,9 @@
30
30
  #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
31
31
  # pragma system_header
32
32
  #endif // no system header
33
- #include <thrust/detail/memory_wrapper.h>
34
33
  #include <thrust/detail/vector_base.h>
35
34
 
35
+ #include <cuda/std/__cccl/memory_wrapper.h>
36
36
  #include <cuda/std/initializer_list>
37
37
  #include <cuda/std/utility>
38
38
 
@@ -104,7 +104,8 @@ inline constexpr bool is_proxy_reference_v<tabulate_output_iterator_proxy<Binary
104
104
  //!
105
105
  //! \see make_tabulate_output_iterator
106
106
  template <typename BinaryFunction, typename System = use_default, typename DifferenceT = ptrdiff_t>
107
- class tabulate_output_iterator : public detail::make_tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>
107
+ class CCCL_DEPRECATED_BECAUSE("Use cuda::tabulate_output_iterator instead") tabulate_output_iterator
108
+ : public detail::make_tabulate_output_iterator_base<BinaryFunction, System, DifferenceT>
108
109
  {
109
110
  public:
110
111
  //! \cond
@@ -138,7 +139,9 @@ private:
138
139
  //! \param fun The \c BinaryFunction invoked whenever assigning to a dereferenced \p tabulate_output_iterator
139
140
  //! \see tabulate_output_iterator
140
141
  template <typename BinaryFunction>
141
- tabulate_output_iterator<BinaryFunction> _CCCL_HOST_DEVICE make_tabulate_output_iterator(BinaryFunction fun)
142
+ CCCL_DEPRECATED_BECAUSE("Use cuda::make_tabulate_output_iterator instead")
143
+ _CCCL_HOST_DEVICE tabulate_output_iterator<BinaryFunction>
144
+ make_tabulate_output_iterator(BinaryFunction fun)
142
145
  {
143
146
  return tabulate_output_iterator<BinaryFunction>(fun);
144
147
  }
@@ -34,7 +34,6 @@
34
34
  #include <thrust/detail/config.h>
35
35
 
36
36
  #include <thrust/binary_search.h>
37
- #include <thrust/detail/algorithm_wrapper.h>
38
37
  #include <thrust/detail/seq.h>
39
38
  #include <thrust/find.h>
40
39
  #include <thrust/host_vector.h>
@@ -44,6 +43,7 @@
44
43
 
45
44
  #include <cuda/std/__algorithm/max.h>
46
45
  #include <cuda/std/__algorithm/min.h>
46
+ #include <cuda/std/__cccl/algorithm_wrapper.h>
47
47
  #include <cuda/std/cassert>
48
48
  #include <cuda/std/cstdint>
49
49
 
@@ -32,12 +32,12 @@
32
32
  # pragma system_header
33
33
  #endif // no system header
34
34
 
35
- #include <thrust/detail/algorithm_wrapper.h>
36
35
  #include <thrust/host_vector.h>
37
36
  #include <thrust/mr/allocator.h>
38
37
  #include <thrust/mr/memory_resource.h>
39
38
  #include <thrust/mr/pool_options.h>
40
39
 
40
+ #include <cuda/std/__cccl/algorithm_wrapper.h>
41
41
  #include <cuda/std/cassert>
42
42
  #include <cuda/std/cstdint>
43
43