cuda-cccl 0.3.0__cp310-cp310-manylinux_2_24_aarch64.whl → 0.3.2__cp310-cp310-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cuda-cccl might be problematic. Click here for more details.

Files changed (294) hide show
  1. cuda/cccl/cooperative/__init__.py +7 -1
  2. cuda/cccl/cooperative/experimental/__init__.py +21 -5
  3. cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +2 -5
  4. cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +2 -5
  5. cuda/cccl/headers/include/cub/agent/agent_for.cuh +2 -5
  6. cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +354 -572
  7. cuda/cccl/headers/include/cub/agent/agent_merge.cuh +23 -21
  8. cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +21 -3
  9. cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +2 -5
  10. cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +2 -5
  11. cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +2 -5
  12. cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +2 -5
  13. cuda/cccl/headers/include/cub/agent/agent_rle.cuh +2 -5
  14. cuda/cccl/headers/include/cub/agent/agent_scan.cuh +5 -1
  15. cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +2 -5
  16. cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +2 -5
  17. cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +2 -5
  18. cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +2 -5
  19. cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +2 -5
  20. cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +22 -5
  21. cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +6 -8
  22. cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +24 -14
  23. cuda/cccl/headers/include/cub/block/block_exchange.cuh +5 -0
  24. cuda/cccl/headers/include/cub/block/block_histogram.cuh +4 -0
  25. cuda/cccl/headers/include/cub/block/block_load.cuh +4 -0
  26. cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +4 -2
  27. cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +4 -2
  28. cuda/cccl/headers/include/cub/block/block_reduce.cuh +1 -0
  29. cuda/cccl/headers/include/cub/block/block_scan.cuh +12 -2
  30. cuda/cccl/headers/include/cub/block/block_store.cuh +3 -2
  31. cuda/cccl/headers/include/cub/detail/device_memory_resource.cuh +1 -0
  32. cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +34 -30
  33. cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +1 -1
  34. cuda/cccl/headers/include/cub/device/device_for.cuh +118 -40
  35. cuda/cccl/headers/include/cub/device/device_reduce.cuh +6 -7
  36. cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +170 -260
  37. cuda/cccl/headers/include/cub/device/device_transform.cuh +122 -91
  38. cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +6 -7
  39. cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +2 -11
  40. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +12 -29
  41. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +2 -7
  42. cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_nondeterministic.cuh +0 -1
  43. cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +2 -3
  44. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +4 -5
  45. cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +0 -1
  46. cuda/cccl/headers/include/cub/device/dispatch/dispatch_topk.cuh +3 -5
  47. cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +13 -5
  48. cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +72 -37
  49. cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +2 -5
  50. cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +2 -5
  51. cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +2 -5
  52. cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +22 -27
  53. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +2 -5
  54. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +2 -5
  55. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +2 -5
  56. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +2 -5
  57. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +2 -5
  58. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +8 -0
  59. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +2 -5
  60. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +2 -5
  61. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +2 -5
  62. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +2 -5
  63. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +2 -5
  64. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +2 -5
  65. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +2 -5
  66. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +61 -70
  67. cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +10 -0
  68. cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +24 -17
  69. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +3 -2
  70. cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +3 -2
  71. cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +2 -2
  72. cuda/cccl/headers/include/cub/warp/warp_load.cuh +6 -6
  73. cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +7 -2
  74. cuda/cccl/headers/include/cub/warp/warp_scan.cuh +7 -3
  75. cuda/cccl/headers/include/cub/warp/warp_store.cuh +1 -0
  76. cuda/cccl/headers/include/cuda/__algorithm/common.h +1 -1
  77. cuda/cccl/headers/include/cuda/__algorithm/copy.h +1 -1
  78. cuda/cccl/headers/include/cuda/__algorithm/fill.h +1 -1
  79. cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +19 -0
  80. cuda/cccl/headers/include/cuda/__cccl_config +1 -0
  81. cuda/cccl/headers/include/cuda/__cmath/fast_modulo_division.h +3 -74
  82. cuda/cccl/headers/include/cuda/__cmath/mul_hi.h +146 -0
  83. cuda/cccl/headers/include/cuda/__complex/get_real_imag.h +0 -4
  84. cuda/cccl/headers/include/cuda/__device/all_devices.h +46 -143
  85. cuda/cccl/headers/include/cuda/__device/arch_id.h +176 -0
  86. cuda/cccl/headers/include/cuda/__device/arch_traits.h +247 -323
  87. cuda/cccl/headers/include/cuda/__device/attributes.h +174 -123
  88. cuda/cccl/headers/include/cuda/__device/compute_capability.h +171 -0
  89. cuda/cccl/headers/include/cuda/__device/device_ref.h +27 -49
  90. cuda/cccl/headers/include/cuda/__device/physical_device.h +100 -96
  91. cuda/cccl/headers/include/cuda/__driver/driver_api.h +105 -3
  92. cuda/cccl/headers/include/cuda/__event/event.h +27 -26
  93. cuda/cccl/headers/include/cuda/__event/event_ref.h +5 -5
  94. cuda/cccl/headers/include/cuda/__event/timed_event.h +10 -7
  95. cuda/cccl/headers/include/cuda/__fwd/devices.h +44 -0
  96. cuda/cccl/headers/include/cuda/__fwd/zip_iterator.h +9 -0
  97. cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +46 -31
  98. cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +79 -47
  99. cuda/cccl/headers/include/cuda/__iterator/tabulate_output_iterator.h +59 -36
  100. cuda/cccl/headers/include/cuda/__iterator/transform_input_output_iterator.h +79 -49
  101. cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +74 -48
  102. cuda/cccl/headers/include/cuda/__iterator/transform_output_iterator.h +80 -55
  103. cuda/cccl/headers/include/cuda/__iterator/zip_common.h +148 -0
  104. cuda/cccl/headers/include/cuda/__iterator/zip_iterator.h +21 -137
  105. cuda/cccl/headers/include/cuda/__iterator/zip_transform_iterator.h +592 -0
  106. cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +127 -60
  107. cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +178 -3
  108. cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +38 -8
  109. cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +67 -1
  110. cuda/cccl/headers/include/cuda/__memory/ptr_in_range.h +93 -0
  111. cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +4 -4
  112. cuda/cccl/headers/include/cuda/__memory_resource/properties.h +44 -0
  113. cuda/cccl/headers/include/cuda/__memory_resource/resource.h +1 -1
  114. cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +4 -6
  115. cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2 -1
  116. cuda/cccl/headers/include/cuda/__runtime/ensure_current_context.h +9 -7
  117. cuda/cccl/headers/include/cuda/__stream/stream.h +8 -8
  118. cuda/cccl/headers/include/cuda/__stream/stream_ref.h +18 -16
  119. cuda/cccl/headers/include/cuda/__utility/basic_any.h +1 -1
  120. cuda/cccl/headers/include/cuda/__utility/in_range.h +65 -0
  121. cuda/cccl/headers/include/cuda/algorithm +1 -1
  122. cuda/cccl/headers/include/cuda/cmath +1 -0
  123. cuda/cccl/headers/include/cuda/devices +13 -0
  124. cuda/cccl/headers/include/cuda/iterator +1 -0
  125. cuda/cccl/headers/include/cuda/memory +1 -0
  126. cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +2 -2
  127. cuda/cccl/headers/include/cuda/std/__algorithm/find.h +1 -1
  128. cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +2 -4
  129. cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +1 -1
  130. cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +7 -15
  131. cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +1 -1
  132. cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +1 -2
  133. cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +2 -2
  134. cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +1 -1
  135. cuda/cccl/headers/include/cuda/std/__bit/countl.h +8 -1
  136. cuda/cccl/headers/include/cuda/std/__bit/countr.h +2 -2
  137. cuda/cccl/headers/include/cuda/std/__bit/reference.h +11 -11
  138. cuda/cccl/headers/include/cuda/std/__cccl/algorithm_wrapper.h +36 -0
  139. cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +46 -49
  140. cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +6 -0
  141. cuda/cccl/headers/include/cuda/std/__cccl/host_std_lib.h +52 -0
  142. cuda/cccl/headers/include/cuda/std/__cccl/memory_wrapper.h +36 -0
  143. cuda/cccl/headers/include/cuda/std/__cccl/numeric_wrapper.h +36 -0
  144. cuda/cccl/headers/include/cuda/std/__chrono/duration.h +16 -16
  145. cuda/cccl/headers/include/cuda/std/__chrono/steady_clock.h +5 -5
  146. cuda/cccl/headers/include/cuda/std/__chrono/system_clock.h +5 -5
  147. cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +3 -2
  148. cuda/cccl/headers/include/cuda/std/__complex/complex.h +3 -2
  149. cuda/cccl/headers/include/cuda/std/__complex/literals.h +14 -34
  150. cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +2 -1
  151. cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +4 -3
  152. cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +2 -2
  153. cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +3 -2
  154. cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +1 -1
  155. cuda/cccl/headers/include/cuda/std/__functional/bind.h +10 -13
  156. cuda/cccl/headers/include/cuda/std/__functional/function.h +5 -8
  157. cuda/cccl/headers/include/cuda/std/__functional/invoke.h +71 -335
  158. cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +1 -2
  159. cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +3 -3
  160. cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +0 -6
  161. cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +13 -0
  162. cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +13 -0
  163. cuda/cccl/headers/include/cuda/std/__fwd/complex.h +13 -4
  164. cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +23 -0
  165. cuda/cccl/headers/include/cuda/std/__fwd/pair.h +13 -0
  166. cuda/cccl/headers/include/cuda/std/__fwd/string.h +22 -0
  167. cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +14 -0
  168. cuda/cccl/headers/include/cuda/std/__internal/features.h +0 -5
  169. cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +21 -0
  170. cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +5 -5
  171. cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +7 -1
  172. cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +53 -39
  173. cuda/cccl/headers/include/cuda/std/__memory/allocator.h +3 -3
  174. cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +1 -3
  175. cuda/cccl/headers/include/cuda/std/__optional/optional_base.h +1 -0
  176. cuda/cccl/headers/include/cuda/std/__ranges/compressed_movable_box.h +892 -0
  177. cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +2 -2
  178. cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +23 -1
  179. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +4 -0
  180. cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +4 -0
  181. cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +7 -5
  182. cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +1 -1
  183. cuda/cccl/headers/include/cuda/std/__utility/pair.h +0 -5
  184. cuda/cccl/headers/include/cuda/std/bitset +1 -1
  185. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +15 -12
  186. cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +11 -9
  187. cuda/cccl/headers/include/cuda/std/inplace_vector +4 -4
  188. cuda/cccl/headers/include/cuda/std/numbers +5 -0
  189. cuda/cccl/headers/include/cuda/std/string_view +155 -13
  190. cuda/cccl/headers/include/cuda/std/version +1 -4
  191. cuda/cccl/headers/include/cuda/stream_ref +5 -0
  192. cuda/cccl/headers/include/cuda/utility +1 -0
  193. cuda/cccl/headers/include/nv/target +7 -2
  194. cuda/cccl/headers/include/thrust/allocate_unique.h +1 -1
  195. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +309 -33
  196. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +151 -4
  197. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +60 -3
  198. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +45 -3
  199. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +31 -6
  200. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +29 -16
  201. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +41 -4
  202. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +42 -4
  203. cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +3 -3
  204. cuda/cccl/headers/include/thrust/detail/integer_math.h +3 -20
  205. cuda/cccl/headers/include/thrust/detail/internal_functional.h +1 -1
  206. cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +1 -1
  207. cuda/cccl/headers/include/thrust/detail/temporary_array.h +1 -1
  208. cuda/cccl/headers/include/thrust/detail/type_traits.h +1 -1
  209. cuda/cccl/headers/include/thrust/device_delete.h +18 -3
  210. cuda/cccl/headers/include/thrust/device_free.h +16 -3
  211. cuda/cccl/headers/include/thrust/device_new.h +29 -8
  212. cuda/cccl/headers/include/thrust/host_vector.h +1 -1
  213. cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +11 -0
  214. cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +5 -2
  215. cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +1 -1
  216. cuda/cccl/headers/include/thrust/mr/pool.h +1 -1
  217. cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +33 -0
  218. cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +13 -115
  219. cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +8 -2
  220. cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +7 -7
  221. cuda/cccl/parallel/experimental/__init__.py +21 -74
  222. cuda/compute/__init__.py +79 -0
  223. cuda/{cccl/parallel/experimental → compute}/_bindings.pyi +43 -1
  224. cuda/{cccl/parallel/experimental → compute}/_bindings_impl.pyx +157 -8
  225. cuda/{cccl/parallel/experimental → compute}/algorithms/_histogram.py +2 -2
  226. cuda/{cccl/parallel/experimental → compute}/algorithms/_merge_sort.py +2 -2
  227. cuda/{cccl/parallel/experimental → compute}/algorithms/_radix_sort.py +3 -3
  228. cuda/{cccl/parallel/experimental → compute}/algorithms/_reduce.py +2 -2
  229. cuda/{cccl/parallel/experimental → compute}/algorithms/_scan.py +112 -40
  230. cuda/{cccl/parallel/experimental → compute}/algorithms/_segmented_reduce.py +2 -2
  231. cuda/{cccl/parallel/experimental → compute}/algorithms/_three_way_partition.py +2 -2
  232. cuda/{cccl/parallel/experimental → compute}/algorithms/_transform.py +36 -15
  233. cuda/{cccl/parallel/experimental → compute}/algorithms/_unique_by_key.py +2 -2
  234. cuda/compute/cu12/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  235. cuda/{cccl/parallel/experimental → compute}/cu12/cccl/libcccl.c.parallel.so +0 -0
  236. cuda/compute/cu13/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  237. cuda/{cccl/parallel/experimental → compute}/cu13/cccl/libcccl.c.parallel.so +0 -0
  238. cuda/{cccl/parallel/experimental → compute}/iterators/__init__.py +2 -0
  239. cuda/{cccl/parallel/experimental → compute}/iterators/_factories.py +36 -8
  240. cuda/{cccl/parallel/experimental → compute}/iterators/_iterators.py +206 -1
  241. cuda/{cccl/parallel/experimental → compute}/numba_utils.py +2 -2
  242. cuda/{cccl/parallel/experimental → compute}/struct.py +2 -2
  243. cuda/{cccl/parallel/experimental → compute}/typing.py +2 -0
  244. cuda/coop/__init__.py +8 -0
  245. cuda/{cccl/cooperative/experimental → coop}/_nvrtc.py +3 -2
  246. cuda/{cccl/cooperative/experimental → coop}/_scan_op.py +3 -3
  247. cuda/{cccl/cooperative/experimental → coop}/_types.py +2 -2
  248. cuda/{cccl/cooperative/experimental → coop}/_typing.py +1 -1
  249. cuda/{cccl/cooperative/experimental → coop}/block/__init__.py +6 -6
  250. cuda/{cccl/cooperative/experimental → coop}/block/_block_exchange.py +4 -4
  251. cuda/{cccl/cooperative/experimental → coop}/block/_block_load_store.py +6 -6
  252. cuda/{cccl/cooperative/experimental → coop}/block/_block_merge_sort.py +4 -4
  253. cuda/{cccl/cooperative/experimental → coop}/block/_block_radix_sort.py +6 -6
  254. cuda/{cccl/cooperative/experimental → coop}/block/_block_reduce.py +6 -6
  255. cuda/{cccl/cooperative/experimental → coop}/block/_block_scan.py +7 -7
  256. cuda/coop/warp/__init__.py +9 -0
  257. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_merge_sort.py +3 -3
  258. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_reduce.py +6 -6
  259. cuda/{cccl/cooperative/experimental → coop}/warp/_warp_scan.py +4 -4
  260. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/METADATA +1 -1
  261. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/RECORD +275 -276
  262. cuda/cccl/cooperative/experimental/warp/__init__.py +0 -9
  263. cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +0 -111
  264. cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +0 -37
  265. cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +0 -371
  266. cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +0 -242
  267. cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +0 -137
  268. cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +0 -99
  269. cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +0 -68
  270. cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +0 -86
  271. cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +0 -79
  272. cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +0 -98
  273. cuda/cccl/headers/include/thrust/detail/device_delete.inl +0 -52
  274. cuda/cccl/headers/include/thrust/detail/device_free.inl +0 -47
  275. cuda/cccl/headers/include/thrust/detail/device_new.inl +0 -61
  276. cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +0 -40
  277. cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +0 -37
  278. cuda/cccl/parallel/experimental/.gitignore +0 -4
  279. cuda/cccl/parallel/experimental/cu12/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  280. cuda/cccl/parallel/experimental/cu13/_bindings_impl.cpython-310-aarch64-linux-gnu.so +0 -0
  281. /cuda/{cccl/parallel/experimental → compute}/_bindings.py +0 -0
  282. /cuda/{cccl/parallel/experimental → compute}/_caching.py +0 -0
  283. /cuda/{cccl/parallel/experimental → compute}/_cccl_interop.py +0 -0
  284. /cuda/{cccl/parallel/experimental → compute}/_utils/__init__.py +0 -0
  285. /cuda/{cccl/parallel/experimental → compute}/_utils/protocols.py +0 -0
  286. /cuda/{cccl/parallel/experimental → compute}/_utils/temp_storage_buffer.py +0 -0
  287. /cuda/{cccl/parallel/experimental → compute}/algorithms/__init__.py +0 -0
  288. /cuda/{cccl/parallel/experimental → compute}/cccl/.gitkeep +0 -0
  289. /cuda/{cccl/parallel/experimental → compute}/iterators/_zip_iterator.py +0 -0
  290. /cuda/{cccl/parallel/experimental → compute}/op.py +0 -0
  291. /cuda/{cccl/cooperative/experimental → coop}/_caching.py +0 -0
  292. /cuda/{cccl/cooperative/experimental → coop}/_common.py +0 -0
  293. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/WHEEL +0 -0
  294. {cuda_cccl-0.3.0.dist-info → cuda_cccl-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -28,6 +28,7 @@
28
28
  #include <cuda/std/__concepts/totally_ordered.h>
29
29
  #include <cuda/std/__iterator/iterator_traits.h>
30
30
  #include <cuda/std/__mdspan/submdspan_helper.h>
31
+ #include <cuda/std/__ranges/compressed_movable_box.h>
31
32
  #include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
32
33
  #include <cuda/std/__type_traits/is_nothrow_default_constructible.h>
33
34
  #include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
@@ -57,12 +58,32 @@ private:
57
58
  static_assert(::cuda::std::__integer_like<_Stride> || ::cuda::std::__integral_constant_like<_Stride>,
58
59
  "The stride of a strided_iterator must either be an integer-like or integral-constant-like.");
59
60
 
60
- _Iter __iter_{};
61
- _Stride __stride_{};
62
-
63
61
  template <class, class>
64
62
  friend class strided_iterator;
65
63
 
64
+ // Not a base because then the friend operators would be ambiguous
65
+ ::cuda::std::__compressed_movable_box<_Iter, _Stride> __store_;
66
+
67
+ [[nodiscard]] _CCCL_API constexpr _Iter& __iter() noexcept
68
+ {
69
+ return __store_.template __get<0>();
70
+ }
71
+
72
+ [[nodiscard]] _CCCL_API constexpr const _Iter& __iter() const noexcept
73
+ {
74
+ return __store_.template __get<0>();
75
+ }
76
+
77
+ [[nodiscard]] _CCCL_API constexpr _Stride& __stride() noexcept
78
+ {
79
+ return __store_.template __get<1>();
80
+ }
81
+
82
+ [[nodiscard]] _CCCL_API constexpr const _Stride& __stride() const noexcept
83
+ {
84
+ return __store_.template __get<1>();
85
+ }
86
+
66
87
  public:
67
88
  using iterator_concept = ::cuda::std::random_access_iterator_tag;
68
89
  using iterator_category = ::cuda::std::random_access_iterator_tag;
@@ -78,7 +99,12 @@ public:
78
99
  //! @note _Iter must be default initializable because it is a random_access_iterator and thereby semiregular
79
100
  //! _Stride must be integer-like or integral_constant_like which requires default constructability
80
101
  _CCCL_EXEC_CHECK_DISABLE
81
- _CCCL_HIDE_FROM_ABI strided_iterator() = default;
102
+ _CCCL_TEMPLATE(class _Iter2 = _Iter, class _Stride2 = _Stride)
103
+ _CCCL_REQUIRES(::cuda::std::default_initializable<_Iter2> _CCCL_AND ::cuda::std::default_initializable<_Stride2>)
104
+ _CCCL_API constexpr strided_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Iter2>
105
+ && ::cuda::std::is_nothrow_default_constructible_v<_Stride2>)
106
+ : __store_()
107
+ {}
82
108
 
83
109
  //! @brief Constructs a @c strided_iterator from a base iterator
84
110
  //! @param __iter The base iterator
@@ -90,8 +116,7 @@ public:
90
116
  _CCCL_REQUIRES(::cuda::std::__integral_constant_like<_Stride2>)
91
117
  _CCCL_API constexpr explicit strided_iterator(_Iter __iter) noexcept(
92
118
  ::cuda::std::is_nothrow_move_constructible_v<_Iter> && ::cuda::std::is_nothrow_default_constructible_v<_Stride2>)
93
- : __iter_(::cuda::std::move(__iter))
94
- , __stride_()
119
+ : __store_(::cuda::std::move(__iter))
95
120
  {}
96
121
 
97
122
  //! @brief Constructs a @c strided_iterator from a base iterator and a stride
@@ -100,21 +125,20 @@ public:
100
125
  _CCCL_EXEC_CHECK_DISABLE
101
126
  _CCCL_API constexpr explicit strided_iterator(_Iter __iter, _Stride __stride) noexcept(
102
127
  ::cuda::std::is_nothrow_move_constructible_v<_Iter> && ::cuda::std::is_nothrow_move_constructible_v<_Stride>)
103
- : __iter_(::cuda::std::move(__iter))
104
- , __stride_(::cuda::std::move(__stride))
128
+ : __store_(::cuda::std::move(__iter), ::cuda::std::move(__stride))
105
129
  {}
106
130
 
107
131
  //! @brief Returns a const reference to the stored iterator
108
132
  [[nodiscard]] _CCCL_API constexpr const _Iter& base() const& noexcept
109
133
  {
110
- return __iter_;
134
+ return __iter();
111
135
  }
112
136
 
113
137
  //! @brief Extracts the stored iterator
114
138
  _CCCL_EXEC_CHECK_DISABLE
115
139
  [[nodiscard]] _CCCL_API constexpr _Iter base() && noexcept(::cuda::std::is_nothrow_move_constructible_v<_Iter>)
116
140
  {
117
- return ::cuda::std::move(__iter_);
141
+ return ::cuda::std::move(__iter());
118
142
  }
119
143
 
120
144
  static constexpr bool __noexcept_stride =
@@ -124,32 +148,33 @@ public:
124
148
  _CCCL_EXEC_CHECK_DISABLE
125
149
  [[nodiscard]] _CCCL_API constexpr difference_type stride() const noexcept(__noexcept_stride)
126
150
  {
127
- return static_cast<difference_type>(::cuda::std::__de_ice(__stride_));
151
+ return static_cast<difference_type>(::cuda::std::__de_ice(__stride()));
128
152
  }
129
153
 
130
154
  //! @brief Dereferences the stored base iterator
131
155
  _CCCL_EXEC_CHECK_DISABLE
132
- [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() noexcept(noexcept(*__iter_))
156
+ [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() noexcept(noexcept(*::cuda::std::declval<_Iter&>()))
133
157
  {
134
- return *__iter_;
158
+ return *__iter();
135
159
  }
136
160
 
137
161
  //! @brief Dereferences the stored base iterator
138
162
  _CCCL_EXEC_CHECK_DISABLE
139
163
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
140
164
  _CCCL_REQUIRES(::cuda::std::__dereferenceable<const _Iter2>)
141
- [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() const noexcept(noexcept(*__iter_))
165
+ [[nodiscard]] _CCCL_API constexpr decltype(auto) operator*() const
166
+ noexcept(noexcept(*::cuda::std::declval<const _Iter2&>()))
142
167
  {
143
- return *__iter_;
168
+ return *__iter();
144
169
  }
145
170
 
146
171
  //! @brief Subscripts the stored base iterator with a given offset times the stride
147
172
  //! @param __n The offset
148
173
  _CCCL_EXEC_CHECK_DISABLE
149
174
  [[nodiscard]] _CCCL_API constexpr decltype(auto)
150
- operator[](difference_type __n) noexcept(__noexcept_stride && noexcept(__iter_[__n]))
175
+ operator[](difference_type __n) noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>()[__n]))
151
176
  {
152
- return __iter_[__n * stride()];
177
+ return __iter()[__n * stride()];
153
178
  }
154
179
 
155
180
  //! @brief Subscripts the stored base iterator with a given offset times the stride
@@ -158,47 +183,49 @@ public:
158
183
  _CCCL_TEMPLATE(class _Iter2 = _Iter)
159
184
  _CCCL_REQUIRES(::cuda::std::__dereferenceable<const _Iter2>)
160
185
  [[nodiscard]] _CCCL_API constexpr decltype(auto) operator[](difference_type __n) const
161
- noexcept(__noexcept_stride && noexcept(__iter_[__n]))
186
+ noexcept(__noexcept_stride && noexcept(::cuda::std::declval<const _Iter2&>()[__n]))
162
187
  {
163
- return __iter_[__n * stride()];
188
+ return __iter()[__n * stride()];
164
189
  }
165
190
 
166
191
  //! @brief Increments the stored base iterator by the stride
167
- // Note: we cannot use __iter_ += stride() in the noexcept clause because that breaks gcc < 9
192
+ // Note: we cannot use __iter() += stride() in the noexcept clause because that breaks gcc < 9
168
193
  _CCCL_EXEC_CHECK_DISABLE
169
- _CCCL_API constexpr strided_iterator& operator++() noexcept(__noexcept_stride && noexcept(__iter_ += 1))
194
+ _CCCL_API constexpr strided_iterator&
195
+ operator++() noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() += 1))
170
196
  {
171
- __iter_ += stride();
197
+ __iter() += stride();
172
198
  return *this;
173
199
  }
174
200
 
175
201
  //! @brief Increments the stored base iterator by the stride
176
202
  _CCCL_EXEC_CHECK_DISABLE
177
203
  _CCCL_API constexpr auto operator++(int) noexcept(
178
- noexcept(__noexcept_stride && noexcept(__iter_ += 1))
204
+ noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() += 1))
179
205
  && ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && ::cuda::std::is_nothrow_copy_constructible_v<_Stride>)
180
206
  {
181
207
  auto __tmp = *this;
182
- __iter_ += stride();
208
+ __iter() += stride();
183
209
  return __tmp;
184
210
  }
185
211
 
186
212
  //! @brief Decrements the stored base iterator by the stride
187
213
  _CCCL_EXEC_CHECK_DISABLE
188
- _CCCL_API constexpr strided_iterator& operator--() noexcept(__noexcept_stride && noexcept(__iter_ -= 1))
214
+ _CCCL_API constexpr strided_iterator&
215
+ operator--() noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() -= 1))
189
216
  {
190
- __iter_ -= stride();
217
+ __iter() -= stride();
191
218
  return *this;
192
219
  }
193
220
 
194
221
  //! @brief Decrements the stored base iterator by the stride
195
222
  _CCCL_EXEC_CHECK_DISABLE
196
223
  _CCCL_API constexpr strided_iterator operator--(int) noexcept(
197
- noexcept(__noexcept_stride && noexcept(__iter_ -= 1))
224
+ noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() -= 1))
198
225
  && ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && ::cuda::std::is_nothrow_copy_constructible_v<_Stride>)
199
226
  {
200
227
  auto __tmp = *this;
201
- __iter_ -= stride();
228
+ __iter() -= stride();
202
229
  return __tmp;
203
230
  }
204
231
 
@@ -207,9 +234,9 @@ public:
207
234
  //! @note Increments the base iterator by @c __n times the stride
208
235
  _CCCL_EXEC_CHECK_DISABLE
209
236
  _CCCL_API constexpr strided_iterator&
210
- operator+=(difference_type __n) noexcept(__noexcept_stride && noexcept(__iter_ += 1))
237
+ operator+=(difference_type __n) noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() += 1))
211
238
  {
212
- __iter_ += stride() * __n;
239
+ __iter() += stride() * __n;
213
240
  return *this;
214
241
  }
215
242
 
@@ -218,10 +245,10 @@ public:
218
245
  //! @param __n The number of steps to increment
219
246
  _CCCL_EXEC_CHECK_DISABLE
220
247
  [[nodiscard]] _CCCL_API friend constexpr strided_iterator
221
- operator+(strided_iterator __iter, difference_type __n) noexcept(noexcept(__iter_ += __n))
248
+ operator+(const strided_iterator& __iter, difference_type __n) noexcept(
249
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(::cuda::std::declval<const _Iter&>() + __n))
222
250
  {
223
- __iter += __n;
224
- return __iter;
251
+ return strided_iterator{__iter.__iter() + __iter.stride() * __n, __iter.__stride()};
225
252
  }
226
253
 
227
254
  //! @brief Returns a copy of a @c strided_iterator incremented by a given number of steps
@@ -229,9 +256,10 @@ public:
229
256
  //! @param __iter The @c strided_iterator to advance
230
257
  _CCCL_EXEC_CHECK_DISABLE
231
258
  [[nodiscard]] _CCCL_API friend constexpr strided_iterator
232
- operator+(difference_type __n, strided_iterator __iter) noexcept(noexcept(__iter_ + __n))
259
+ operator+(difference_type __n, const strided_iterator& __iter) noexcept(
260
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(::cuda::std::declval<const _Iter&>() + __n))
233
261
  {
234
- return __iter + __n;
262
+ return strided_iterator{__iter.__iter() + __iter.stride() * __n, __iter.__stride()};
235
263
  }
236
264
 
237
265
  //! @brief Decrements a @c strided_iterator by a given number of steps
@@ -239,9 +267,9 @@ public:
239
267
  //! @note Decrements the base iterator by @c __n times the stride
240
268
  _CCCL_EXEC_CHECK_DISABLE
241
269
  _CCCL_API constexpr strided_iterator&
242
- operator-=(difference_type __n) noexcept(__noexcept_stride && noexcept(__iter_ -= 1))
270
+ operator-=(difference_type __n) noexcept(__noexcept_stride && noexcept(::cuda::std::declval<_Iter&>() -= 1))
243
271
  {
244
- __iter_ -= stride() * __n;
272
+ __iter() -= stride() * __n;
245
273
  return *this;
246
274
  }
247
275
 
@@ -250,21 +278,25 @@ public:
250
278
  //! @param __iter The @c strided_iterator to decrement
251
279
  _CCCL_EXEC_CHECK_DISABLE
252
280
  [[nodiscard]] _CCCL_API friend constexpr strided_iterator
253
- operator-(strided_iterator __iter, difference_type __n) noexcept(noexcept(__iter_ -= __n))
281
+ operator-(const strided_iterator& __iter, difference_type __n) noexcept(
282
+ ::cuda::std::is_nothrow_copy_constructible_v<_Iter> && noexcept(::cuda::std::declval<const _Iter&>() - __n))
254
283
  {
255
- __iter -= __n;
256
- return __iter;
284
+ return strided_iterator{__iter.__iter() - __iter.stride() * __n, __iter.__stride()};
257
285
  }
258
286
 
287
+ template <class _Iter2, class _OtherIter>
288
+ static constexpr bool __noexcept_difference =
289
+ noexcept(::cuda::std::declval<const _Iter2&>() - ::cuda::std::declval<const _OtherIter&>());
290
+
259
291
  //! @brief Returns distance between two @c strided_iterator's in units of the stride
260
292
  _CCCL_EXEC_CHECK_DISABLE
261
293
  _CCCL_TEMPLATE(class _OtherIter, class _OtherStride)
262
294
  _CCCL_REQUIRES(::cuda::std::sized_sentinel_for<_OtherIter, _Iter>)
263
295
  [[nodiscard]] _CCCL_API friend constexpr difference_type
264
- operator-(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
265
- noexcept(::cuda::std::declval<_Iter>() - ::cuda::std::declval<_OtherIter>()))
296
+ operator-(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) //
297
+ noexcept(__noexcept_difference<_Iter, _OtherIter>)
266
298
  {
267
- const difference_type __diff = __x.__iter_ - __y.base();
299
+ const difference_type __diff = __x.__iter() - __y.base();
268
300
  _CCCL_ASSERT(__x.stride() == __y.stride(), "Taking the difference of two strided_iterators with different stride");
269
301
  _CCCL_ASSERT(__diff % __x.stride() == 0, "Underlying iterator difference must be divisible by the stride");
270
302
  return __diff / __x.stride();
@@ -278,7 +310,7 @@ public:
278
310
  operator==(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
279
311
  noexcept(::cuda::std::declval<const _Iter&>() == ::cuda::std::declval<const _OtherIter&>()))
280
312
  {
281
- return __x.__iter_ == __y.base();
313
+ return __x.__iter() == __y.base();
282
314
  }
283
315
 
284
316
  #if _CCCL_STD_VER <= 2017
@@ -290,7 +322,7 @@ public:
290
322
  operator!=(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
291
323
  noexcept(::cuda::std::declval<const _Iter&>() == ::cuda::std::declval<const _OtherIter&>()))
292
324
  {
293
- return __x.__iter_ != __y.base();
325
+ return __x.__iter() != __y.base();
294
326
  }
295
327
  #endif // _CCCL_STD_VER <= 2017
296
328
 
@@ -304,7 +336,7 @@ public:
304
336
  operator<=>(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
305
337
  noexcept(::cuda::std::declval<const _Iter&>() <=> ::cuda::std::declval<const _OtherIter&>()))
306
338
  {
307
- return __x.__iter_ <=> __y.base();
339
+ return __x.__iter() <=> __y.base();
308
340
  }
309
341
  #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
310
342
 
@@ -316,7 +348,7 @@ public:
316
348
  operator<(const strided_iterator& __x, const strided_iterator<_OtherIter, _OtherStride>& __y) noexcept(
317
349
  noexcept(::cuda::std::declval<const _Iter&>() < ::cuda::std::declval<const _OtherIter&>()))
318
350
  {
319
- return __x.__iter_ < __y.base();
351
+ return __x.__iter() < __y.base();
320
352
  }
321
353
 
322
354
  //! @brief Compares two @c strided_iterator's for greater than by comparing the stored iterators
@@ -24,12 +24,14 @@
24
24
  #include <cuda/std/__functional/invoke.h>
25
25
  #include <cuda/std/__iterator/concepts.h>
26
26
  #include <cuda/std/__iterator/iterator_traits.h>
27
+ #include <cuda/std/__ranges/compressed_movable_box.h>
27
28
  #include <cuda/std/__ranges/movable_box.h>
28
29
  #include <cuda/std/__type_traits/conditional.h>
29
30
  #include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
30
31
  #include <cuda/std/__type_traits/is_nothrow_default_constructible.h>
31
32
  #include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
32
33
  #include <cuda/std/__type_traits/is_same.h>
34
+ #include <cuda/std/__type_traits/is_signed.h>
33
35
  #include <cuda/std/__type_traits/remove_cvref.h>
34
36
  #include <cuda/std/__utility/forward.h>
35
37
  #include <cuda/std/__utility/move.h>
@@ -113,8 +115,30 @@ template <class _Fn, class _Index>
113
115
  class tabulate_output_iterator
114
116
  {
115
117
  private:
116
- ::cuda::std::ranges::__movable_box<_Fn> __func_;
117
- _Index __index_ = 0;
118
+ // Not a base because then the friend operators would be ambiguous
119
+ ::cuda::std::__compressed_movable_box<_Index, _Fn> __store_;
120
+
121
+ [[nodiscard]] _CCCL_API constexpr _Index& __index() noexcept
122
+ {
123
+ return __store_.template __get<0>();
124
+ }
125
+
126
+ [[nodiscard]] _CCCL_API constexpr const _Index& __index() const noexcept
127
+ {
128
+ return __store_.template __get<0>();
129
+ }
130
+
131
+ [[nodiscard]] _CCCL_API constexpr _Fn& __func() noexcept
132
+ {
133
+ return __store_.template __get<1>();
134
+ }
135
+
136
+ [[nodiscard]] _CCCL_API constexpr const _Fn& __func() const noexcept
137
+ {
138
+ return __store_.template __get<1>();
139
+ }
140
+
141
+ static_assert(::cuda::std::is_signed_v<_Index>, "tabulate_output_iterator: _Index must be a signed integer");
118
142
 
119
143
  public:
120
144
  using iterator_concept = ::cuda::std::random_access_iterator_tag;
@@ -124,45 +148,39 @@ public:
124
148
  using pointer = void;
125
149
  using reference = void;
126
150
 
127
- #if _CCCL_HAS_CONCEPTS()
128
- _CCCL_EXEC_CHECK_DISABLE
129
- _CCCL_HIDE_FROM_ABI tabulate_output_iterator()
130
- requires ::cuda::std::default_initializable<_Fn>
131
- = default;
132
- #else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv
133
151
  _CCCL_EXEC_CHECK_DISABLE
134
152
  _CCCL_TEMPLATE(class _Fn2 = _Fn)
135
153
  _CCCL_REQUIRES(::cuda::std::default_initializable<_Fn2>)
136
- _CCCL_API constexpr tabulate_output_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Fn2>) {}
137
- #endif // ^^^ !_CCCL_HAS_CONCEPTS() ^^^
154
+ _CCCL_API constexpr tabulate_output_iterator() noexcept(::cuda::std::is_nothrow_default_constructible_v<_Fn2>)
155
+ : __store_()
156
+ {}
138
157
 
139
158
  //! @brief Constructs a @c tabulate_output_iterator with a given functor and an optional index
140
159
  //! @param __func the output function
141
160
  //! @param __index the position in the output sequence
142
161
  _CCCL_API constexpr tabulate_output_iterator(_Fn __func, _Index __index = 0) noexcept(
143
162
  ::cuda::std::is_nothrow_move_constructible_v<_Fn>)
144
- : __func_(::cuda::std::in_place, ::cuda::std::move(__func))
145
- , __index_(__index)
163
+ : __store_(__index, ::cuda::std::move(__func))
146
164
  {}
147
165
 
148
166
  //! @brief Returns the stored index
149
167
  [[nodiscard]] _CCCL_API constexpr difference_type index() const noexcept
150
168
  {
151
- return __index_;
169
+ return __index();
152
170
  }
153
171
 
154
172
  //! @brief Dereferences the @c tabulate_output_iterator
155
173
  //! @returns A proxy that applies the stored function and index on assignment
156
174
  [[nodiscard]] _CCCL_API constexpr auto operator*() const noexcept
157
175
  {
158
- return __tabulate_proxy<_Fn, _Index>{const_cast<_Fn&>(*__func_), __index_};
176
+ return __tabulate_proxy<_Fn, _Index>{const_cast<_Fn&>(__func()), __index()};
159
177
  }
160
178
 
161
179
  //! @brief Dereferences the @c tabulate_output_iterator
162
180
  //! @returns A proxy that applies the stored function and index on assignment
163
181
  [[nodiscard]] _CCCL_API constexpr auto operator*() noexcept
164
182
  {
165
- return __tabulate_proxy<_Fn, _Index>{*__func_, __index_};
183
+ return __tabulate_proxy<_Fn, _Index>{__func(), __index()};
166
184
  }
167
185
 
168
186
  //! @brief Subscripts the @c tabulate_output_iterator with a given offset
@@ -170,7 +188,7 @@ public:
170
188
  //! @returns A proxy that applies the stored function and index on assignment
171
189
  [[nodiscard]] _CCCL_API constexpr auto operator[](difference_type __n) const noexcept
172
190
  {
173
- return __tabulate_proxy<_Fn, _Index>{const_cast<_Fn&>(*__func_), __index_ + __n};
191
+ return __tabulate_proxy<_Fn, _Index>{const_cast<_Fn&>(__func()), __index() + __n};
174
192
  }
175
193
 
176
194
  //! @brief Subscripts the @c tabulate_output_iterator with a given offset
@@ -178,13 +196,13 @@ public:
178
196
  //! @returns A proxy that applies the stored function and index on assignment
179
197
  [[nodiscard]] _CCCL_API constexpr auto operator[](difference_type __n) noexcept
180
198
  {
181
- return __tabulate_proxy<_Fn, _Index>{*__func_, __index_ + __n};
199
+ return __tabulate_proxy<_Fn, _Index>{__func(), __index() + __n};
182
200
  }
183
201
 
184
202
  //! @brief Increments the @c tabulate_output_iterator by incrementing the stored index
185
203
  _CCCL_API constexpr tabulate_output_iterator& operator++() noexcept
186
204
  {
187
- ++__index_;
205
+ ++__index();
188
206
  return *this;
189
207
  }
190
208
 
@@ -193,14 +211,14 @@ public:
193
211
  operator++(int) noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Fn>)
194
212
  {
195
213
  tabulate_output_iterator __tmp = *this;
196
- ++__index_;
214
+ ++__index();
197
215
  return __tmp;
198
216
  }
199
217
 
200
218
  //! @brief Decrements the @c tabulate_output_iterator by decrementing the stored index
201
219
  _CCCL_API constexpr tabulate_output_iterator& operator--() noexcept
202
220
  {
203
- --__index_;
221
+ --__index();
204
222
  return *this;
205
223
  }
206
224
 
@@ -209,56 +227,61 @@ public:
209
227
  operator--(int) noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Fn>)
210
228
  {
211
229
  tabulate_output_iterator __tmp = *this;
212
- --__index_;
230
+ --__index();
213
231
  return __tmp;
214
232
  }
215
233
 
216
234
  //! @brief Returns a copy of this @c tabulate_output_iterator advanced a given number of elements
217
235
  //! @param __n The number of elements to advance
218
- [[nodiscard]] _CCCL_API constexpr tabulate_output_iterator operator+(difference_type __n) const
236
+ template <int = 0> // Template because compiler will complain about non-literal return type if _Fn is not a literal
237
+ [[nodiscard]] _CCCL_API friend constexpr tabulate_output_iterator
238
+ operator+(const tabulate_output_iterator& __iter, difference_type __n) //
219
239
  noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Fn>)
220
240
  {
221
- return tabulate_output_iterator{*__func_, __index_ + __n};
241
+ return tabulate_output_iterator{__iter.__func(), __iter.__index() + __n};
222
242
  }
223
243
 
224
244
  //! @brief Returns a copy of a @c tabulate_output_iterator advanced a given number of elements
225
245
  //! @param __n The number of elements to advance
226
246
  //! @param __iter The original @c tabulate_output_iterator
247
+ template <int = 0> // Template because compiler will complain about non-literal return type if _Fn is not a literal
227
248
  [[nodiscard]] _CCCL_API friend constexpr tabulate_output_iterator
228
249
  operator+(difference_type __n, const tabulate_output_iterator& __iter) //
229
250
  noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Fn>)
230
251
  {
231
- return __iter + __n;
252
+ return tabulate_output_iterator{__iter.__func(), __iter.__index() + __n};
232
253
  }
233
254
 
234
255
  //! @brief Advances the @c tabulate_output_iterator by a given number of elements
235
256
  //! @param __n The number of elements to advance
236
257
  _CCCL_API constexpr tabulate_output_iterator& operator+=(difference_type __n) noexcept
237
258
  {
238
- __index_ += __n;
259
+ __index() += __n;
239
260
  return *this;
240
261
  }
241
262
 
242
263
  //! @brief Returns a copy of this @c tabulate_output_iterator decremented a given number of elements
243
264
  //! @param __n The number of elements to decremented
244
- [[nodiscard]] _CCCL_API constexpr tabulate_output_iterator operator-(difference_type __n) const
265
+ template <int = 0> // Template because compiler will complain about non-literal return type if _Fn is not a literal
266
+ [[nodiscard]] _CCCL_API friend constexpr tabulate_output_iterator
267
+ operator-(const tabulate_output_iterator& __iter, difference_type __n) //
245
268
  noexcept(::cuda::std::is_nothrow_copy_constructible_v<_Fn>)
246
269
  {
247
- return tabulate_output_iterator{*__func_, __index_ - __n};
270
+ return tabulate_output_iterator{__iter.__func(), __iter.__index() - __n};
248
271
  }
249
272
 
250
273
  //! @brief Returns the distance between two @c tabulate_output_iterator 's
251
274
  [[nodiscard]] _CCCL_API friend constexpr difference_type
252
275
  operator-(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
253
276
  {
254
- return __rhs.__index_ - __lhs.__index_;
277
+ return __rhs.__index() - __lhs.__index();
255
278
  }
256
279
 
257
280
  //! @brief Decrements the @c tabulate_output_iterator by a given number of elements
258
281
  //! @param __n The number of elements to decrement
259
282
  _CCCL_API constexpr tabulate_output_iterator& operator-=(difference_type __n) noexcept
260
283
  {
261
- __index_ -= __n;
284
+ __index() -= __n;
262
285
  return *this;
263
286
  }
264
287
 
@@ -266,7 +289,7 @@ public:
266
289
  [[nodiscard]] _CCCL_API friend constexpr bool
267
290
  operator==(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
268
291
  {
269
- return __lhs.__index_ == __rhs.__index_;
292
+ return __lhs.__index() == __rhs.__index();
270
293
  }
271
294
 
272
295
  #if _CCCL_STD_VER <= 2017
@@ -274,7 +297,7 @@ public:
274
297
  [[nodiscard]] _CCCL_API friend constexpr bool
275
298
  operator!=(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
276
299
  {
277
- return __lhs.__index_ != __rhs.__index_;
300
+ return __lhs.__index() != __rhs.__index();
278
301
  }
279
302
  #endif // _CCCL_STD_VER <= 2017
280
303
 
@@ -283,35 +306,35 @@ public:
283
306
  [[nodiscard]] _CCCL_API friend constexpr strong_ordering
284
307
  operator<=>(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
285
308
  {
286
- return __lhs.__index_ <=> __rhs.__index_;
309
+ return __lhs.__index() <=> __rhs.__index();
287
310
  }
288
311
  #else // ^^^ _LIBCUDACXX_HAS_SPACESHIP_OPERATOR() ^^^ / vvv !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR() vvv
289
312
  //! @brief Compares two @c tabulate_output_iterator for less than by comparing their indices
290
313
  [[nodiscard]] _CCCL_API friend constexpr bool
291
314
  operator<(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
292
315
  {
293
- return __lhs.__index_ < __rhs.__index_;
316
+ return __lhs.__index() < __rhs.__index();
294
317
  }
295
318
 
296
319
  //! @brief Compares two @c tabulate_output_iterator for less equal by comparing their indices
297
320
  [[nodiscard]] _CCCL_API friend constexpr bool
298
321
  operator<=(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
299
322
  {
300
- return __lhs.__index_ <= __rhs.__index_;
323
+ return __lhs.__index() <= __rhs.__index();
301
324
  }
302
325
 
303
326
  //! @brief Compares two @c tabulate_output_iterator for greater than by comparing their indices
304
327
  [[nodiscard]] _CCCL_API friend constexpr bool
305
328
  operator>(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
306
329
  {
307
- return __lhs.__index_ > __rhs.__index_;
330
+ return __lhs.__index() > __rhs.__index();
308
331
  }
309
332
 
310
333
  //! @brief Compares two @c tabulate_output_iterator for greater equal by comparing their indices
311
334
  [[nodiscard]] _CCCL_API friend constexpr bool
312
335
  operator>=(const tabulate_output_iterator& __lhs, const tabulate_output_iterator& __rhs) noexcept
313
336
  {
314
- return __lhs.__index_ >= __rhs.__index_;
337
+ return __lhs.__index() >= __rhs.__index();
315
338
  }
316
339
  #endif // !_LIBCUDACXX_HAS_SPACESHIP_OPERATOR()
317
340
  };