cuda-cccl 0.1.3.1.0.dev1486__cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cuda-cccl might be problematic. Click here for more details.
- cuda/cccl/__init__.py +14 -0
- cuda/cccl/cooperative/__init__.py +3 -0
- cuda/cccl/cooperative/experimental/__init__.py +8 -0
- cuda/cccl/cooperative/experimental/_caching.py +48 -0
- cuda/cccl/cooperative/experimental/_common.py +276 -0
- cuda/cccl/cooperative/experimental/_nvrtc.py +91 -0
- cuda/cccl/cooperative/experimental/_scan_op.py +181 -0
- cuda/cccl/cooperative/experimental/_types.py +953 -0
- cuda/cccl/cooperative/experimental/_typing.py +107 -0
- cuda/cccl/cooperative/experimental/block/__init__.py +33 -0
- cuda/cccl/cooperative/experimental/block/_block_load_store.py +215 -0
- cuda/cccl/cooperative/experimental/block/_block_merge_sort.py +125 -0
- cuda/cccl/cooperative/experimental/block/_block_radix_sort.py +214 -0
- cuda/cccl/cooperative/experimental/block/_block_reduce.py +294 -0
- cuda/cccl/cooperative/experimental/block/_block_scan.py +983 -0
- cuda/cccl/cooperative/experimental/warp/__init__.py +9 -0
- cuda/cccl/cooperative/experimental/warp/_warp_merge_sort.py +98 -0
- cuda/cccl/cooperative/experimental/warp/_warp_reduce.py +153 -0
- cuda/cccl/cooperative/experimental/warp/_warp_scan.py +78 -0
- cuda/cccl/headers/__init__.py +7 -0
- cuda/cccl/headers/include/__init__.py +1 -0
- cuda/cccl/headers/include/cub/agent/agent_adjacent_difference.cuh +261 -0
- cuda/cccl/headers/include/cub/agent/agent_batch_memcpy.cuh +1181 -0
- cuda/cccl/headers/include/cub/agent/agent_for.cuh +84 -0
- cuda/cccl/headers/include/cub/agent/agent_histogram.cuh +919 -0
- cuda/cccl/headers/include/cub/agent/agent_merge.cuh +227 -0
- cuda/cccl/headers/include/cub/agent/agent_merge_sort.cuh +752 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_downsweep.cuh +766 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_histogram.cuh +286 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_onesweep.cuh +704 -0
- cuda/cccl/headers/include/cub/agent/agent_radix_sort_upsweep.cuh +557 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce.cuh +678 -0
- cuda/cccl/headers/include/cub/agent/agent_reduce_by_key.cuh +804 -0
- cuda/cccl/headers/include/cub/agent/agent_rle.cuh +997 -0
- cuda/cccl/headers/include/cub/agent/agent_scan.cuh +561 -0
- cuda/cccl/headers/include/cub/agent/agent_scan_by_key.cuh +473 -0
- cuda/cccl/headers/include/cub/agent/agent_segmented_radix_sort.cuh +292 -0
- cuda/cccl/headers/include/cub/agent/agent_select_if.cuh +1032 -0
- cuda/cccl/headers/include/cub/agent/agent_sub_warp_merge_sort.cuh +342 -0
- cuda/cccl/headers/include/cub/agent/agent_three_way_partition.cuh +592 -0
- cuda/cccl/headers/include/cub/agent/agent_unique_by_key.cuh +614 -0
- cuda/cccl/headers/include/cub/agent/single_pass_scan_operators.cuh +1346 -0
- cuda/cccl/headers/include/cub/block/block_adjacent_difference.cuh +965 -0
- cuda/cccl/headers/include/cub/block/block_discontinuity.cuh +1217 -0
- cuda/cccl/headers/include/cub/block/block_exchange.cuh +1306 -0
- cuda/cccl/headers/include/cub/block/block_histogram.cuh +420 -0
- cuda/cccl/headers/include/cub/block/block_load.cuh +1259 -0
- cuda/cccl/headers/include/cub/block/block_merge_sort.cuh +787 -0
- cuda/cccl/headers/include/cub/block/block_radix_rank.cuh +1218 -0
- cuda/cccl/headers/include/cub/block/block_radix_sort.cuh +2193 -0
- cuda/cccl/headers/include/cub/block/block_raking_layout.cuh +150 -0
- cuda/cccl/headers/include/cub/block/block_reduce.cuh +629 -0
- cuda/cccl/headers/include/cub/block/block_run_length_decode.cuh +437 -0
- cuda/cccl/headers/include/cub/block/block_scan.cuh +2600 -0
- cuda/cccl/headers/include/cub/block/block_shuffle.cuh +346 -0
- cuda/cccl/headers/include/cub/block/block_store.cuh +1246 -0
- cuda/cccl/headers/include/cub/block/radix_rank_sort_operations.cuh +620 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_atomic.cuh +86 -0
- cuda/cccl/headers/include/cub/block/specializations/block_histogram_sort.cuh +240 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking.cuh +252 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_raking_commutative_only.cuh +238 -0
- cuda/cccl/headers/include/cub/block/specializations/block_reduce_warp_reductions.cuh +259 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_raking.cuh +790 -0
- cuda/cccl/headers/include/cub/block/specializations/block_scan_warp_scans.cuh +538 -0
- cuda/cccl/headers/include/cub/config.cuh +60 -0
- cuda/cccl/headers/include/cub/cub.cuh +112 -0
- cuda/cccl/headers/include/cub/detail/array_utils.cuh +77 -0
- cuda/cccl/headers/include/cub/detail/choose_offset.cuh +155 -0
- cuda/cccl/headers/include/cub/detail/detect_cuda_runtime.cuh +93 -0
- cuda/cccl/headers/include/cub/detail/device_double_buffer.cuh +96 -0
- cuda/cccl/headers/include/cub/detail/fast_modulo_division.cuh +246 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_driver.cuh +120 -0
- cuda/cccl/headers/include/cub/detail/launcher/cuda_runtime.cuh +74 -0
- cuda/cccl/headers/include/cub/detail/mdspan_utils.cuh +118 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/README.md +71 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/array.h +68 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/json.h +61 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/object.h +100 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/string.h +71 -0
- cuda/cccl/headers/include/cub/detail/ptx-json/value.h +93 -0
- cuda/cccl/headers/include/cub/detail/ptx-json-parser.h +63 -0
- cuda/cccl/headers/include/cub/detail/rfa.cuh +724 -0
- cuda/cccl/headers/include/cub/detail/strong_load.cuh +189 -0
- cuda/cccl/headers/include/cub/detail/strong_store.cuh +220 -0
- cuda/cccl/headers/include/cub/detail/temporary_storage.cuh +355 -0
- cuda/cccl/headers/include/cub/detail/type_traits.cuh +206 -0
- cuda/cccl/headers/include/cub/detail/uninitialized_copy.cuh +72 -0
- cuda/cccl/headers/include/cub/detail/unsafe_bitcast.cuh +56 -0
- cuda/cccl/headers/include/cub/device/device_adjacent_difference.cuh +596 -0
- cuda/cccl/headers/include/cub/device/device_copy.cuh +187 -0
- cuda/cccl/headers/include/cub/device/device_for.cuh +994 -0
- cuda/cccl/headers/include/cub/device/device_histogram.cuh +1507 -0
- cuda/cccl/headers/include/cub/device/device_memcpy.cuh +195 -0
- cuda/cccl/headers/include/cub/device/device_merge.cuh +202 -0
- cuda/cccl/headers/include/cub/device/device_merge_sort.cuh +979 -0
- cuda/cccl/headers/include/cub/device/device_partition.cuh +664 -0
- cuda/cccl/headers/include/cub/device/device_radix_sort.cuh +3431 -0
- cuda/cccl/headers/include/cub/device/device_reduce.cuh +1387 -0
- cuda/cccl/headers/include/cub/device/device_run_length_encode.cuh +368 -0
- cuda/cccl/headers/include/cub/device/device_scan.cuh +1901 -0
- cuda/cccl/headers/include/cub/device/device_segmented_radix_sort.cuh +1496 -0
- cuda/cccl/headers/include/cub/device/device_segmented_reduce.cuh +1512 -0
- cuda/cccl/headers/include/cub/device/device_segmented_sort.cuh +2811 -0
- cuda/cccl/headers/include/cub/device/device_select.cuh +1224 -0
- cuda/cccl/headers/include/cub/device/device_transform.cuh +313 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_adjacent_difference.cuh +314 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_advance_iterators.cuh +109 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_batch_memcpy.cuh +718 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_common.cuh +45 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_for.cuh +197 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_histogram.cuh +1051 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge.cuh +305 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_merge_sort.cuh +473 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_radix_sort.cuh +1748 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce.cuh +1316 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_by_key.cuh +625 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_reduce_deterministic.cuh +502 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_rle.cuh +548 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan.cuh +497 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_scan_by_key.cuh +598 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_segmented_sort.cuh +1374 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_select_if.cuh +838 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce.cuh +341 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_streaming_reduce_by_key.cuh +439 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_three_way_partition.cuh +552 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_transform.cuh +397 -0
- cuda/cccl/headers/include/cub/device/dispatch/dispatch_unique_by_key.cuh +543 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/for_each.cuh +218 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/histogram.cuh +505 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/merge_sort.cuh +338 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/radix_sort.cuh +799 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/reduce.cuh +523 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/scan.cuh +194 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/segmented_reduce.cuh +330 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/transform.cuh +437 -0
- cuda/cccl/headers/include/cub/device/dispatch/kernels/unique_by_key.cuh +176 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +70 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh +121 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_for.cuh +63 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_histogram.cuh +278 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge.cuh +91 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_merge_sort.cuh +118 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_radix_sort.cuh +1068 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce.cuh +397 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh +945 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +675 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan.cuh +555 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +1013 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +249 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_select_if.cuh +1587 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +407 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_transform.cuh +283 -0
- cuda/cccl/headers/include/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +874 -0
- cuda/cccl/headers/include/cub/grid/grid_even_share.cuh +215 -0
- cuda/cccl/headers/include/cub/grid/grid_mapping.cuh +106 -0
- cuda/cccl/headers/include/cub/grid/grid_queue.cuh +202 -0
- cuda/cccl/headers/include/cub/iterator/arg_index_input_iterator.cuh +256 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_input_iterator.cuh +238 -0
- cuda/cccl/headers/include/cub/iterator/cache_modified_output_iterator.cuh +252 -0
- cuda/cccl/headers/include/cub/iterator/tex_obj_input_iterator.cuh +322 -0
- cuda/cccl/headers/include/cub/thread/thread_load.cuh +347 -0
- cuda/cccl/headers/include/cub/thread/thread_operators.cuh +629 -0
- cuda/cccl/headers/include/cub/thread/thread_reduce.cuh +504 -0
- cuda/cccl/headers/include/cub/thread/thread_scan.cuh +340 -0
- cuda/cccl/headers/include/cub/thread/thread_search.cuh +198 -0
- cuda/cccl/headers/include/cub/thread/thread_simd.cuh +406 -0
- cuda/cccl/headers/include/cub/thread/thread_sort.cuh +101 -0
- cuda/cccl/headers/include/cub/thread/thread_store.cuh +364 -0
- cuda/cccl/headers/include/cub/util_allocator.cuh +921 -0
- cuda/cccl/headers/include/cub/util_arch.cuh +163 -0
- cuda/cccl/headers/include/cub/util_cpp_dialect.cuh +95 -0
- cuda/cccl/headers/include/cub/util_debug.cuh +207 -0
- cuda/cccl/headers/include/cub/util_device.cuh +779 -0
- cuda/cccl/headers/include/cub/util_macro.cuh +91 -0
- cuda/cccl/headers/include/cub/util_math.cuh +115 -0
- cuda/cccl/headers/include/cub/util_namespace.cuh +176 -0
- cuda/cccl/headers/include/cub/util_policy_wrapper_t.cuh +55 -0
- cuda/cccl/headers/include/cub/util_ptx.cuh +513 -0
- cuda/cccl/headers/include/cub/util_temporary_storage.cuh +122 -0
- cuda/cccl/headers/include/cub/util_type.cuh +1111 -0
- cuda/cccl/headers/include/cub/util_vsmem.cuh +251 -0
- cuda/cccl/headers/include/cub/version.cuh +89 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_shfl.cuh +329 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_exchange_smem.cuh +177 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_shfl.cuh +729 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_reduce_smem.cuh +405 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_shfl.cuh +688 -0
- cuda/cccl/headers/include/cub/warp/specializations/warp_scan_smem.cuh +437 -0
- cuda/cccl/headers/include/cub/warp/warp_exchange.cuh +405 -0
- cuda/cccl/headers/include/cub/warp/warp_load.cuh +614 -0
- cuda/cccl/headers/include/cub/warp/warp_merge_sort.cuh +169 -0
- cuda/cccl/headers/include/cub/warp/warp_reduce.cuh +822 -0
- cuda/cccl/headers/include/cub/warp/warp_scan.cuh +1156 -0
- cuda/cccl/headers/include/cub/warp/warp_store.cuh +520 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property.h +169 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/access_property_encoding.h +172 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr.h +210 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/annotated_ptr_base.h +100 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/apply_access_property.h +84 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/associate_access_property.h +127 -0
- cuda/cccl/headers/include/cuda/__annotated_ptr/createpolicy.h +209 -0
- cuda/cccl/headers/include/cuda/__atomic/atomic.h +145 -0
- cuda/cccl/headers/include/cuda/__barrier/aligned_size.h +61 -0
- cuda/cccl/headers/include/cuda/__barrier/async_contract_fulfillment.h +39 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier.h +66 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_arrive_tx.h +100 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_block_scope.h +454 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_expect_tx.h +72 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_native_handle.h +45 -0
- cuda/cccl/headers/include/cuda/__barrier/barrier_thread_scope.h +61 -0
- cuda/cccl/headers/include/cuda/__bit/bit_reverse.h +171 -0
- cuda/cccl/headers/include/cuda/__bit/bitfield.h +122 -0
- cuda/cccl/headers/include/cuda/__bit/bitmask.h +88 -0
- cuda/cccl/headers/include/cuda/__cccl_config +36 -0
- cuda/cccl/headers/include/cuda/__cmath/ceil_div.h +126 -0
- cuda/cccl/headers/include/cuda/__cmath/ilog.h +195 -0
- cuda/cccl/headers/include/cuda/__cmath/ipow.h +107 -0
- cuda/cccl/headers/include/cuda/__cmath/isqrt.h +80 -0
- cuda/cccl/headers/include/cuda/__cmath/neg.h +47 -0
- cuda/cccl/headers/include/cuda/__cmath/pow2.h +74 -0
- cuda/cccl/headers/include/cuda/__cmath/round_down.h +104 -0
- cuda/cccl/headers/include/cuda/__cmath/round_up.h +106 -0
- cuda/cccl/headers/include/cuda/__cmath/uabs.h +57 -0
- cuda/cccl/headers/include/cuda/__execution/determinism.h +90 -0
- cuda/cccl/headers/include/cuda/__execution/require.h +67 -0
- cuda/cccl/headers/include/cuda/__execution/tune.h +62 -0
- cuda/cccl/headers/include/cuda/__functional/address_stability.h +131 -0
- cuda/cccl/headers/include/cuda/__functional/for_each_canceled.h +279 -0
- cuda/cccl/headers/include/cuda/__functional/get_device_address.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/maximum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/minimum.h +58 -0
- cuda/cccl/headers/include/cuda/__functional/proclaim_return_type.h +108 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/barrier_native_handle.h +42 -0
- cuda/cccl/headers/include/cuda/__fwd/get_stream.h +38 -0
- cuda/cccl/headers/include/cuda/__fwd/pipeline.h +37 -0
- cuda/cccl/headers/include/cuda/__iterator/constant_iterator.h +261 -0
- cuda/cccl/headers/include/cuda/__iterator/counting_iterator.h +407 -0
- cuda/cccl/headers/include/cuda/__iterator/discard_iterator.h +314 -0
- cuda/cccl/headers/include/cuda/__iterator/strided_iterator.h +323 -0
- cuda/cccl/headers/include/cuda/__iterator/transform_iterator.h +481 -0
- cuda/cccl/headers/include/cuda/__latch/latch.h +44 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_accessor.h +457 -0
- cuda/cccl/headers/include/cuda/__mdspan/host_device_mdspan.h +63 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_accessor.h +123 -0
- cuda/cccl/headers/include/cuda/__mdspan/restrict_mdspan.h +51 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/check_preconditions.h +79 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/completion_mechanism.h +47 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h +60 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_fallback.h +72 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/cp_async_shared_global.h +98 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/dispatch_memcpy_async.h +162 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/is_local_smem_barrier.h +49 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async.h +179 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_barrier.h +99 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_async_tx.h +99 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/memcpy_completion.h +170 -0
- cuda/cccl/headers/include/cuda/__memcpy_async/try_get_barrier_handle.h +59 -0
- cuda/cccl/headers/include/cuda/__memory/address_space.h +86 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_memory_resource.h +94 -0
- cuda/cccl/headers/include/cuda/__memory_resource/get_property.h +158 -0
- cuda/cccl/headers/include/cuda/__memory_resource/properties.h +73 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource.h +129 -0
- cuda/cccl/headers/include/cuda/__memory_resource/resource_ref.h +653 -0
- cuda/cccl/headers/include/cuda/__numeric/narrow.h +108 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_cast.h +57 -0
- cuda/cccl/headers/include/cuda/__numeric/overflow_result.h +43 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx.h +101 -0
- cuda/cccl/headers/include/cuda/__nvtx/nvtx3.h +2982 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/barrier_cluster.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bfind.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/bmsk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/clusterlaunchcontrol.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk.h +44 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_commit_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_tensor.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_bulk_wait_group.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_async_mbarrier_arrive.h +42 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h +60 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/cp_reduce_async_bulk_tensor.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/elect_sync.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/exit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/fence.h +49 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/barrier_cluster.h +115 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bfind.h +190 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/bmsk.h +54 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/clusterlaunchcontrol.h +240 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk.h +193 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h +52 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h +957 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_gather_scatter.h +288 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h +596 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_async_mbarrier_arrive_noinc.h +26 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h +1445 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h +132 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h +117 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_tensor.h +601 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/elect_sync.h +36 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/exit.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence.h +208 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async.h +58 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_async_generic_sync_restrict.h +62 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_proxy_tensormap_generic.h +101 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/fence_sync_restrict.h +62 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/get_sreg.h +949 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/getctarank.h +32 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/ld.h +15074 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h +385 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h +176 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h +34 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_expect_tx.h +94 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_init.h +27 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h +137 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h +138 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h +280 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h +282 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_ld_reduce.h +2148 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_red.h +1272 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/multimem_st.h +228 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/prmt.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/red_async.h +430 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shl.h +96 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/shr.h +168 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st.h +1830 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_async.h +123 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/st_bulk.h +31 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_alloc.h +105 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_commit.h +81 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_cp.h +612 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_fence.h +44 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_ld.h +4446 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma.h +4061 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_mma_ws.h +6438 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_shift.h +36 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_st.h +4582 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tcgen05_wait.h +44 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_cp_fenceproxy.h +67 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/tensormap_replace.h +750 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/generated/trap.h +25 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/get_sreg.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/getctarank.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_arrive.h +45 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_expect_tx.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_init.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/mbarrier_wait.h +46 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_ld_reduce.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_red.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/multimem_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/prmt.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/red_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shfl_sync.h +275 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shl.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/shr.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_async.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/st_bulk.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_alloc.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_commit.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_cp.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_fence.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_ld.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_mma_ws.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_shift.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_st.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tcgen05_wait.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_cp_fenceproxy.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/tensormap_replace.h +43 -0
- cuda/cccl/headers/include/cuda/__ptx/instructions/trap.h +41 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_dot_variants.h +230 -0
- cuda/cccl/headers/include/cuda/__ptx/ptx_helper_functions.h +151 -0
- cuda/cccl/headers/include/cuda/__semaphore/counting_semaphore.h +53 -0
- cuda/cccl/headers/include/cuda/__stream/get_stream.h +97 -0
- cuda/cccl/headers/include/cuda/__stream/stream_ref.h +165 -0
- cuda/cccl/headers/include/cuda/__type_traits/is_floating_point.h +47 -0
- cuda/cccl/headers/include/cuda/__warp/lane_mask.h +326 -0
- cuda/cccl/headers/include/cuda/__warp/warp_match_all.h +66 -0
- cuda/cccl/headers/include/cuda/__warp/warp_shuffle.h +249 -0
- cuda/cccl/headers/include/cuda/access_property +26 -0
- cuda/cccl/headers/include/cuda/annotated_ptr +29 -0
- cuda/cccl/headers/include/cuda/atomic +27 -0
- cuda/cccl/headers/include/cuda/barrier +262 -0
- cuda/cccl/headers/include/cuda/bit +29 -0
- cuda/cccl/headers/include/cuda/cmath +35 -0
- cuda/cccl/headers/include/cuda/discard_memory +61 -0
- cuda/cccl/headers/include/cuda/functional +31 -0
- cuda/cccl/headers/include/cuda/iterator +31 -0
- cuda/cccl/headers/include/cuda/latch +27 -0
- cuda/cccl/headers/include/cuda/mdspan +28 -0
- cuda/cccl/headers/include/cuda/memory +28 -0
- cuda/cccl/headers/include/cuda/memory_resource +41 -0
- cuda/cccl/headers/include/cuda/numeric +28 -0
- cuda/cccl/headers/include/cuda/pipeline +579 -0
- cuda/cccl/headers/include/cuda/ptx +118 -0
- cuda/cccl/headers/include/cuda/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/adjacent_find.h +60 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/all_of.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/any_of.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/binary_search.h +52 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/clamp.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp.h +64 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/comp_ref_type.h +85 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy.h +143 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_backward.h +79 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/copy_n.h +74 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/count_if.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal.h +129 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/equal_range.h +101 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/fill_n.h +51 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find.h +64 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_end.h +225 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_first_of.h +73 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/find_if_not.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each.h +42 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/for_each_n.h +48 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate.h +41 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/generate_n.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/half_positive.h +49 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/includes.h +92 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap.h +51 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_heap_until.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_partitioned.h +58 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_permutation.h +252 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/is_sorted_until.h +69 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iter_swap.h +82 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/iterator_operations.h +188 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lexicographical_compare.h +68 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/lower_bound.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_heap.h +72 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/make_projected.h +96 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/max_element.h +70 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/merge.h +89 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min.h +62 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/min_element.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax.h +71 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/minmax_element.h +141 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/mismatch.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move.h +88 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/move_backward.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/next_permutation.h +89 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/none_of.h +46 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort.h +102 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partial_sort_copy.h +122 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition.h +121 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_copy.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/partition_point.h +61 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/pop_heap.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/prev_permutation.h +89 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/push_heap.h +103 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_iterator_concept.h +65 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min.h +99 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/ranges_min_element.h +69 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove.h +55 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_copy_if.h +47 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/remove_if.h +56 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy.h +54 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_copy_if.h +50 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/replace_if.h +45 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse.h +81 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/reverse_copy.h +43 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate.h +264 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/rotate_copy.h +40 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search.h +185 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/search_n.h +163 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_difference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_intersection.h +123 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_symmetric_difference.h +135 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/set_union.h +129 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_left.h +84 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/shift_right.h +144 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sift_down.h +139 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/sort_heap.h +72 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/swap_ranges.h +78 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/transform.h +59 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique.h +77 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unique_copy.h +156 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_iter.h +96 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/unwrap_range.h +127 -0
- cuda/cccl/headers/include/cuda/std/__algorithm/upper_bound.h +83 -0
- cuda/cccl/headers/include/cuda/std/__algorithm_ +26 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/common.h +192 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/owned.h +138 -0
- cuda/cccl/headers/include/cuda/std/__atomic/api/reference.h +118 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/common.h +58 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_local.h +218 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_derived.h +401 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated.h +3971 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/cuda_ptx_generated_helper.h +177 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions/host.h +211 -0
- cuda/cccl/headers/include/cuda/std/__atomic/functions.h +33 -0
- cuda/cccl/headers/include/cuda/std/__atomic/order.h +159 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform/msvc_to_builtins.h +654 -0
- cuda/cccl/headers/include/cuda/std/__atomic/platform.h +93 -0
- cuda/cccl/headers/include/cuda/std/__atomic/scopes.h +105 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/base.h +250 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/common.h +105 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/locked.h +225 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/reference.h +73 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types/small.h +228 -0
- cuda/cccl/headers/include/cuda/std/__atomic/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/notify_wait.h +95 -0
- cuda/cccl/headers/include/cuda/std/__atomic/wait/polling.h +65 -0
- cuda/cccl/headers/include/cuda/std/__barrier/barrier.h +227 -0
- cuda/cccl/headers/include/cuda/std/__barrier/empty_completion.h +37 -0
- cuda/cccl/headers/include/cuda/std/__barrier/poll_tester.h +84 -0
- cuda/cccl/headers/include/cuda/std/__bit/bit_cast.h +77 -0
- cuda/cccl/headers/include/cuda/std/__bit/byteswap.h +183 -0
- cuda/cccl/headers/include/cuda/std/__bit/countl.h +167 -0
- cuda/cccl/headers/include/cuda/std/__bit/countr.h +185 -0
- cuda/cccl/headers/include/cuda/std/__bit/endian.h +39 -0
- cuda/cccl/headers/include/cuda/std/__bit/has_single_bit.h +43 -0
- cuda/cccl/headers/include/cuda/std/__bit/integral.h +124 -0
- cuda/cccl/headers/include/cuda/std/__bit/popcount.h +154 -0
- cuda/cccl/headers/include/cuda/std/__bit/reference.h +1274 -0
- cuda/cccl/headers/include/cuda/std/__bit/rotate.h +94 -0
- cuda/cccl/headers/include/cuda/std/__cccl/architecture.h +78 -0
- cuda/cccl/headers/include/cuda/std/__cccl/assert.h +146 -0
- cuda/cccl/headers/include/cuda/std/__cccl/attributes.h +207 -0
- cuda/cccl/headers/include/cuda/std/__cccl/builtin.h +1343 -0
- cuda/cccl/headers/include/cuda/std/__cccl/compiler.h +216 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_capabilities.h +43 -0
- cuda/cccl/headers/include/cuda/std/__cccl/cuda_toolkit.h +53 -0
- cuda/cccl/headers/include/cuda/std/__cccl/deprecated.h +69 -0
- cuda/cccl/headers/include/cuda/std/__cccl/diagnostic.h +129 -0
- cuda/cccl/headers/include/cuda/std/__cccl/dialect.h +124 -0
- cuda/cccl/headers/include/cuda/std/__cccl/epilogue.h +326 -0
- cuda/cccl/headers/include/cuda/std/__cccl/exceptions.h +35 -0
- cuda/cccl/headers/include/cuda/std/__cccl/execution_space.h +68 -0
- cuda/cccl/headers/include/cuda/std/__cccl/extended_data_types.h +129 -0
- cuda/cccl/headers/include/cuda/std/__cccl/is_non_narrowing_convertible.h +73 -0
- cuda/cccl/headers/include/cuda/std/__cccl/os.h +48 -0
- cuda/cccl/headers/include/cuda/std/__cccl/preprocessor.h +1234 -0
- cuda/cccl/headers/include/cuda/std/__cccl/prologue.h +267 -0
- cuda/cccl/headers/include/cuda/std/__cccl/ptx_isa.h +176 -0
- cuda/cccl/headers/include/cuda/std/__cccl/rtti.h +72 -0
- cuda/cccl/headers/include/cuda/std/__cccl/sequence_access.h +87 -0
- cuda/cccl/headers/include/cuda/std/__cccl/system_header.h +38 -0
- cuda/cccl/headers/include/cuda/std/__cccl/unreachable.h +31 -0
- cuda/cccl/headers/include/cuda/std/__cccl/version.h +26 -0
- cuda/cccl/headers/include/cuda/std/__cccl/visibility.h +112 -0
- cuda/cccl/headers/include/cuda/std/__charconv/chars_format.h +81 -0
- cuda/cccl/headers/include/cuda/std/__charconv/from_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars.h +148 -0
- cuda/cccl/headers/include/cuda/std/__charconv/to_chars_result.h +56 -0
- cuda/cccl/headers/include/cuda/std/__charconv_ +30 -0
- cuda/cccl/headers/include/cuda/std/__cmath/abs.h +240 -0
- cuda/cccl/headers/include/cuda/std/__cmath/copysign.h +187 -0
- cuda/cccl/headers/include/cuda/std/__cmath/exponential_functions.h +620 -0
- cuda/cccl/headers/include/cuda/std/__cmath/fpclassify.h +207 -0
- cuda/cccl/headers/include/cuda/std/__cmath/gamma.h +181 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hyperbolic_functions.h +250 -0
- cuda/cccl/headers/include/cuda/std/__cmath/hypot.h +213 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_hyperbolic_functions.h +250 -0
- cuda/cccl/headers/include/cuda/std/__cmath/inverse_trigonometric_functions.h +323 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isfinite.h +163 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isinf.h +201 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnan.h +176 -0
- cuda/cccl/headers/include/cuda/std/__cmath/isnormal.h +129 -0
- cuda/cccl/headers/include/cuda/std/__cmath/lerp.h +106 -0
- cuda/cccl/headers/include/cuda/std/__cmath/logarithms.h +503 -0
- cuda/cccl/headers/include/cuda/std/__cmath/min_max.h +236 -0
- cuda/cccl/headers/include/cuda/std/__cmath/nvbf16.h +58 -0
- cuda/cccl/headers/include/cuda/std/__cmath/nvfp16.h +58 -0
- cuda/cccl/headers/include/cuda/std/__cmath/roots.h +180 -0
- cuda/cccl/headers/include/cuda/std/__cmath/rounding_functions.h +877 -0
- cuda/cccl/headers/include/cuda/std/__cmath/signbit.h +155 -0
- cuda/cccl/headers/include/cuda/std/__cmath/traits.h +170 -0
- cuda/cccl/headers/include/cuda/std/__cmath/trigonometric_functions.h +292 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvbf16.h +351 -0
- cuda/cccl/headers/include/cuda/std/__complex/nvfp16.h +350 -0
- cuda/cccl/headers/include/cuda/std/__complex/vector_support.h +135 -0
- cuda/cccl/headers/include/cuda/std/__concepts/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__concepts/assignable.h +64 -0
- cuda/cccl/headers/include/cuda/std/__concepts/boolean_testable.h +63 -0
- cuda/cccl/headers/include/cuda/std/__concepts/class_or_enum.h +46 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_reference_with.h +69 -0
- cuda/cccl/headers/include/cuda/std/__concepts/common_with.h +82 -0
- cuda/cccl/headers/include/cuda/std/__concepts/concept_macros.h +274 -0
- cuda/cccl/headers/include/cuda/std/__concepts/constructible.h +107 -0
- cuda/cccl/headers/include/cuda/std/__concepts/convertible_to.h +71 -0
- cuda/cccl/headers/include/cuda/std/__concepts/copyable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__concepts/derived_from.h +57 -0
- cuda/cccl/headers/include/cuda/std/__concepts/destructible.h +76 -0
- cuda/cccl/headers/include/cuda/std/__concepts/different_from.h +38 -0
- cuda/cccl/headers/include/cuda/std/__concepts/equality_comparable.h +100 -0
- cuda/cccl/headers/include/cuda/std/__concepts/invocable.h +80 -0
- cuda/cccl/headers/include/cuda/std/__concepts/movable.h +58 -0
- cuda/cccl/headers/include/cuda/std/__concepts/predicate.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/regular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/relation.h +77 -0
- cuda/cccl/headers/include/cuda/std/__concepts/same_as.h +42 -0
- cuda/cccl/headers/include/cuda/std/__concepts/semiregular.h +54 -0
- cuda/cccl/headers/include/cuda/std/__concepts/swappable.h +206 -0
- cuda/cccl/headers/include/cuda/std/__concepts/totally_ordered.h +101 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/byte.h +113 -0
- cuda/cccl/headers/include/cuda/std/__cstddef/types.h +52 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/abs.h +57 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/aligned_alloc.h +66 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/div.h +96 -0
- cuda/cccl/headers/include/cuda/std/__cstdlib/malloc.h +69 -0
- cuda/cccl/headers/include/cuda/std/__cuda/api_wrapper.h +62 -0
- cuda/cccl/headers/include/cuda/std/__cuda/ensure_current_device.h +72 -0
- cuda/cccl/headers/include/cuda/std/__exception/cuda_error.h +143 -0
- cuda/cccl/headers/include/cuda/std/__exception/terminate.h +73 -0
- cuda/cccl/headers/include/cuda/std/__execution/env.h +436 -0
- cuda/cccl/headers/include/cuda/std/__expected/bad_expected_access.h +127 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected.h +2002 -0
- cuda/cccl/headers/include/cuda/std/__expected/expected_base.h +1078 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpect.h +37 -0
- cuda/cccl/headers/include/cuda/std/__expected/unexpected.h +178 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/arithmetic.h +56 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cast.h +809 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/cccl_fp.h +125 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/common_type.h +48 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/constants.h +172 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/conversion_rank_order.h +103 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/format.h +162 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/fp.h +39 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/mask.h +64 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/native_type.h +81 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/nvfp_types.h +58 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/overflow_handler.h +139 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/properties.h +229 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/storage.h +248 -0
- cuda/cccl/headers/include/cuda/std/__floating_point/traits.h +172 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_function.h +63 -0
- cuda/cccl/headers/include/cuda/std/__functional/binary_negate.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind.h +352 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_back.h +88 -0
- cuda/cccl/headers/include/cuda/std/__functional/bind_front.h +73 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder1st.h +75 -0
- cuda/cccl/headers/include/cuda/std/__functional/binder2nd.h +75 -0
- cuda/cccl/headers/include/cuda/std/__functional/compose.h +69 -0
- cuda/cccl/headers/include/cuda/std/__functional/default_searcher.h +75 -0
- cuda/cccl/headers/include/cuda/std/__functional/function.h +1277 -0
- cuda/cccl/headers/include/cuda/std/__functional/hash.h +650 -0
- cuda/cccl/headers/include/cuda/std/__functional/identity.h +61 -0
- cuda/cccl/headers/include/cuda/std/__functional/invoke.h +560 -0
- cuda/cccl/headers/include/cuda/std/__functional/is_transparent.h +43 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fn.h +67 -0
- cuda/cccl/headers/include/cuda/std/__functional/mem_fun_ref.h +214 -0
- cuda/cccl/headers/include/cuda/std/__functional/not_fn.h +121 -0
- cuda/cccl/headers/include/cuda/std/__functional/operations.h +534 -0
- cuda/cccl/headers/include/cuda/std/__functional/perfect_forward.h +127 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_binary_function.h +65 -0
- cuda/cccl/headers/include/cuda/std/__functional/pointer_to_unary_function.h +64 -0
- cuda/cccl/headers/include/cuda/std/__functional/ranges_operations.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/reference_wrapper.h +113 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_function.h +62 -0
- cuda/cccl/headers/include/cuda/std/__functional/unary_negate.h +67 -0
- cuda/cccl/headers/include/cuda/std/__functional/unwrap_ref.h +56 -0
- cuda/cccl/headers/include/cuda/std/__functional/weak_result_type.h +278 -0
- cuda/cccl/headers/include/cuda/std/__fwd/allocator.h +35 -0
- cuda/cccl/headers/include/cuda/std/__fwd/array.h +36 -0
- cuda/cccl/headers/include/cuda/std/__fwd/char_traits.h +49 -0
- cuda/cccl/headers/include/cuda/std/__fwd/complex.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/fp.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/get.h +123 -0
- cuda/cccl/headers/include/cuda/std/__fwd/hash.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/iterator_traits.h +40 -0
- cuda/cccl/headers/include/cuda/std/__fwd/mdspan.h +73 -0
- cuda/cccl/headers/include/cuda/std/__fwd/memory_resource.h +37 -0
- cuda/cccl/headers/include/cuda/std/__fwd/pair.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/reference_wrapper.h +34 -0
- cuda/cccl/headers/include/cuda/std/__fwd/span.h +38 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string.h +83 -0
- cuda/cccl/headers/include/cuda/std/__fwd/string_view.h +59 -0
- cuda/cccl/headers/include/cuda/std/__fwd/subrange.h +55 -0
- cuda/cccl/headers/include/cuda/std/__fwd/tuple.h +34 -0
- cuda/cccl/headers/include/cuda/std/__internal/cpp_dialect.h +44 -0
- cuda/cccl/headers/include/cuda/std/__internal/features.h +71 -0
- cuda/cccl/headers/include/cuda/std/__internal/namespaces.h +102 -0
- cuda/cccl/headers/include/cuda/std/__iterator/access.h +132 -0
- cuda/cccl/headers/include/cuda/std/__iterator/advance.h +230 -0
- cuda/cccl/headers/include/cuda/std/__iterator/back_insert_iterator.h +103 -0
- cuda/cccl/headers/include/cuda/std/__iterator/bounded_iter.h +264 -0
- cuda/cccl/headers/include/cuda/std/__iterator/concepts.h +608 -0
- cuda/cccl/headers/include/cuda/std/__iterator/counted_iterator.h +469 -0
- cuda/cccl/headers/include/cuda/std/__iterator/data.h +63 -0
- cuda/cccl/headers/include/cuda/std/__iterator/default_sentinel.h +36 -0
- cuda/cccl/headers/include/cuda/std/__iterator/distance.h +126 -0
- cuda/cccl/headers/include/cuda/std/__iterator/empty.h +54 -0
- cuda/cccl/headers/include/cuda/std/__iterator/erase_if_container.h +53 -0
- cuda/cccl/headers/include/cuda/std/__iterator/front_insert_iterator.h +98 -0
- cuda/cccl/headers/include/cuda/std/__iterator/incrementable_traits.h +152 -0
- cuda/cccl/headers/include/cuda/std/__iterator/indirectly_comparable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/insert_iterator.h +105 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istream_iterator.h +141 -0
- cuda/cccl/headers/include/cuda/std/__iterator/istreambuf_iterator.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_move.h +161 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iter_swap.h +163 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator.h +44 -0
- cuda/cccl/headers/include/cuda/std/__iterator/iterator_traits.h +935 -0
- cuda/cccl/headers/include/cuda/std/__iterator/mergeable.h +72 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_iterator.h +401 -0
- cuda/cccl/headers/include/cuda/std/__iterator/move_sentinel.h +73 -0
- cuda/cccl/headers/include/cuda/std/__iterator/next.h +102 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostream_iterator.h +99 -0
- cuda/cccl/headers/include/cuda/std/__iterator/ostreambuf_iterator.h +101 -0
- cuda/cccl/headers/include/cuda/std/__iterator/permutable.h +54 -0
- cuda/cccl/headers/include/cuda/std/__iterator/prev.h +92 -0
- cuda/cccl/headers/include/cuda/std/__iterator/projected.h +61 -0
- cuda/cccl/headers/include/cuda/std/__iterator/readable_traits.h +185 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_access.h +146 -0
- cuda/cccl/headers/include/cuda/std/__iterator/reverse_iterator.h +615 -0
- cuda/cccl/headers/include/cuda/std/__iterator/size.h +69 -0
- cuda/cccl/headers/include/cuda/std/__iterator/sortable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__iterator/unreachable_sentinel.h +88 -0
- cuda/cccl/headers/include/cuda/std/__iterator/wrap_iter.h +259 -0
- cuda/cccl/headers/include/cuda/std/__latch/latch.h +88 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits.h +617 -0
- cuda/cccl/headers/include/cuda/std/__limits/numeric_limits_ext.h +781 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conj_if_needed.h +78 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugate_transposed.h +55 -0
- cuda/cccl/headers/include/cuda/std/__linalg/conjugated.h +140 -0
- cuda/cccl/headers/include/cuda/std/__linalg/scaled.h +134 -0
- cuda/cccl/headers/include/cuda/std/__linalg/transposed.h +328 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/aligned_accessor.h +100 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/concepts.h +139 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/default_accessor.h +74 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/empty_base.h +363 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/extents.h +765 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_left.h +317 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_right.h +310 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/layout_stride.h +615 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h +512 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_extents.h +193 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_helper.h +190 -0
- cuda/cccl/headers/include/cuda/std/__mdspan/submdspan_mapping.h +347 -0
- cuda/cccl/headers/include/cuda/std/__memory/addressof.h +64 -0
- cuda/cccl/headers/include/cuda/std/__memory/align.h +87 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocate_at_least.h +81 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocation_guard.h +100 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator.h +320 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_arg_t.h +84 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_destructor.h +59 -0
- cuda/cccl/headers/include/cuda/std/__memory/allocator_traits.h +569 -0
- cuda/cccl/headers/include/cuda/std/__memory/assume_aligned.h +60 -0
- cuda/cccl/headers/include/cuda/std/__memory/builtin_new_allocator.h +87 -0
- cuda/cccl/headers/include/cuda/std/__memory/compressed_pair.h +231 -0
- cuda/cccl/headers/include/cuda/std/__memory/construct_at.h +248 -0
- cuda/cccl/headers/include/cuda/std/__memory/destruct_n.h +91 -0
- cuda/cccl/headers/include/cuda/std/__memory/is_sufficiently_aligned.h +43 -0
- cuda/cccl/headers/include/cuda/std/__memory/pointer_traits.h +260 -0
- cuda/cccl/headers/include/cuda/std/__memory/temporary_buffer.h +92 -0
- cuda/cccl/headers/include/cuda/std/__memory/uninitialized_algorithms.h +686 -0
- cuda/cccl/headers/include/cuda/std/__memory/unique_ptr.h +771 -0
- cuda/cccl/headers/include/cuda/std/__memory/uses_allocator.h +55 -0
- cuda/cccl/headers/include/cuda/std/__memory/voidify.h +41 -0
- cuda/cccl/headers/include/cuda/std/__memory_ +34 -0
- cuda/cccl/headers/include/cuda/std/__new/allocate.h +126 -0
- cuda/cccl/headers/include/cuda/std/__new/bad_alloc.h +57 -0
- cuda/cccl/headers/include/cuda/std/__new/launder.h +49 -0
- cuda/cccl/headers/include/cuda/std/__new_ +29 -0
- cuda/cccl/headers/include/cuda/std/__numeric/accumulate.h +57 -0
- cuda/cccl/headers/include/cuda/std/__numeric/adjacent_difference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__numeric/exclusive_scan.h +66 -0
- cuda/cccl/headers/include/cuda/std/__numeric/gcd_lcm.h +80 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inclusive_scan.h +73 -0
- cuda/cccl/headers/include/cuda/std/__numeric/inner_product.h +62 -0
- cuda/cccl/headers/include/cuda/std/__numeric/iota.h +42 -0
- cuda/cccl/headers/include/cuda/std/__numeric/midpoint.h +100 -0
- cuda/cccl/headers/include/cuda/std/__numeric/partial_sum.h +70 -0
- cuda/cccl/headers/include/cuda/std/__numeric/reduce.h +61 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_exclusive_scan.h +51 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_inclusive_scan.h +65 -0
- cuda/cccl/headers/include/cuda/std/__numeric/transform_reduce.h +72 -0
- cuda/cccl/headers/include/cuda/std/__ranges/access.h +304 -0
- cuda/cccl/headers/include/cuda/std/__ranges/all.h +97 -0
- cuda/cccl/headers/include/cuda/std/__ranges/concepts.h +313 -0
- cuda/cccl/headers/include/cuda/std/__ranges/counted.h +90 -0
- cuda/cccl/headers/include/cuda/std/__ranges/dangling.h +54 -0
- cuda/cccl/headers/include/cuda/std/__ranges/data.h +136 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty.h +111 -0
- cuda/cccl/headers/include/cuda/std/__ranges/empty_view.h +77 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_borrowed_range.h +41 -0
- cuda/cccl/headers/include/cuda/std/__ranges/enable_view.h +77 -0
- cuda/cccl/headers/include/cuda/std/__ranges/from_range.h +36 -0
- cuda/cccl/headers/include/cuda/std/__ranges/iota_view.h +271 -0
- cuda/cccl/headers/include/cuda/std/__ranges/movable_box.h +410 -0
- cuda/cccl/headers/include/cuda/std/__ranges/owning_view.h +161 -0
- cuda/cccl/headers/include/cuda/std/__ranges/range_adaptor.h +114 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rbegin.h +175 -0
- cuda/cccl/headers/include/cuda/std/__ranges/ref_view.h +121 -0
- cuda/cccl/headers/include/cuda/std/__ranges/rend.h +182 -0
- cuda/cccl/headers/include/cuda/std/__ranges/repeat_view.h +343 -0
- cuda/cccl/headers/include/cuda/std/__ranges/single_view.h +156 -0
- cuda/cccl/headers/include/cuda/std/__ranges/size.h +200 -0
- cuda/cccl/headers/include/cuda/std/__ranges/subrange.h +513 -0
- cuda/cccl/headers/include/cuda/std/__ranges/take_while_view.h +263 -0
- cuda/cccl/headers/include/cuda/std/__ranges/transform_view.h +531 -0
- cuda/cccl/headers/include/cuda/std/__ranges/unwrap_end.h +53 -0
- cuda/cccl/headers/include/cuda/std/__ranges/view_interface.h +181 -0
- cuda/cccl/headers/include/cuda/std/__ranges/views.h +38 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/atomic_semaphore.h +233 -0
- cuda/cccl/headers/include/cuda/std/__semaphore/counting_semaphore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__string/char_traits.h +191 -0
- cuda/cccl/headers/include/cuda/std/__string/constexpr_c_functions.h +591 -0
- cuda/cccl/headers/include/cuda/std/__string/helper_functions.h +299 -0
- cuda/cccl/headers/include/cuda/std/__string/string_view.h +244 -0
- cuda/cccl/headers/include/cuda/std/__string_ +29 -0
- cuda/cccl/headers/include/cuda/std/__system_error/errc.h +51 -0
- cuda/cccl/headers/include/cuda/std/__system_error_ +26 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support.h +105 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_cuda.h +47 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_external.h +41 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_pthread.h +144 -0
- cuda/cccl/headers/include/cuda/std/__thread/threading_support_win32.h +87 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/ignore.h +51 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/make_tuple_types.h +98 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/sfinae_helpers.h +236 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/structured_bindings.h +216 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_element.h +70 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_indices.h +44 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like.h +90 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_like_ext.h +73 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_size.h +79 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/tuple_types.h +35 -0
- cuda/cccl/headers/include/cuda/std/__tuple_dir/vector_types.h +242 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_const.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_cv.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_lvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_pointer.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_rvalue_reference.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/add_volatile.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_storage.h +149 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/aligned_union.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/alignment_of.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/always_false.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/can_extract_key.h +69 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_reference.h +262 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/common_type.h +174 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conditional.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/conjunction.h +67 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cv.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/copy_cvref.h +148 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/decay.h +83 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/dependent_type.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/disjunction.h +77 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/enable_if.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/extent.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/fold.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_unique_object_representation.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/has_virtual_destructor.h +51 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/integral_constant.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_abstract.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_aggregate.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_allocator.h +46 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_arithmetic.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_array.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_assignable.h +78 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_base_of.h +83 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_bounded_array.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_callable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_char_like_type.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_class.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_compound.h +54 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_const.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constant_evaluated.h +51 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_constructible.h +174 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_convertible.h +214 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_core_convertible.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_corresponding_member.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_default_constructible.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_destructible.h +115 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_empty.h +73 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_enum.h +68 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_arithmetic.h +38 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_extended_floating_point.h +81 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_final.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_floating_point.h +53 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_function.h +61 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_fundamental.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_implicitly_default_constructible.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integer.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_integral.h +123 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_layout_compatible.h +45 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_literal_type.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_function_pointer.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_object_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_member_pointer.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_assignable.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_assignable.h +70 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_constructible.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_convertible.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_copy_constructible.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_default_constructible.h +54 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_destructible.h +79 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_nothrow_move_constructible.h +42 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_null_pointer.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_object.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_one_of.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pod.h +62 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_base_of.h +87 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_pointer_interconvertible_with_class.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_polymorphic.h +63 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_primary_template.h +119 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference.h +95 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_reference_wrapper.h +50 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_referenceable.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_same.h +84 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scalar.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_scoped_enum.h +49 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_signed_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_standard_layout.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_swappable.h +203 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivial.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_assignable.h +70 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_constructible.h +82 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copy_constructible.h +61 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_copyable.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_default_constructible.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_destructible.h +73 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_assignable.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_trivially_move_constructible.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unbounded_array.h +43 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_union.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_unsigned_integer.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_valid_expansion.h +41 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_void.h +55 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/is_volatile.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/lazy.h +35 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_const_lvalue_ref.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_nbit_int.h +107 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_signed.h +140 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/make_unsigned.h +151 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/maybe_const.h +36 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/nat.h +39 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/negation.h +44 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/num_bits.h +123 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/promote.h +163 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/rank.h +60 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_constructs_from_temporary.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/reference_converts_from_temporary.h +56 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_all_extents.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const.h +59 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_const_ref.h +37 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cv.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_cvref.h +57 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_extent.h +65 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_pointer.h +73 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_reference.h +72 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/remove_volatile.h +58 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/result_of.h +47 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_identity.h +40 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_list.h +1069 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/type_set.h +132 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/underlying_type.h +66 -0
- cuda/cccl/headers/include/cuda/std/__type_traits/void_t.h +34 -0
- cuda/cccl/headers/include/cuda/std/__utility/as_const.h +52 -0
- cuda/cccl/headers/include/cuda/std/__utility/auto_cast.h +32 -0
- cuda/cccl/headers/include/cuda/std/__utility/cmp.h +116 -0
- cuda/cccl/headers/include/cuda/std/__utility/convert_to_integral.h +103 -0
- cuda/cccl/headers/include/cuda/std/__utility/declval.h +63 -0
- cuda/cccl/headers/include/cuda/std/__utility/exception_guard.h +162 -0
- cuda/cccl/headers/include/cuda/std/__utility/exchange.h +46 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward.h +59 -0
- cuda/cccl/headers/include/cuda/std/__utility/forward_like.h +56 -0
- cuda/cccl/headers/include/cuda/std/__utility/in_place.h +77 -0
- cuda/cccl/headers/include/cuda/std/__utility/integer_sequence.h +251 -0
- cuda/cccl/headers/include/cuda/std/__utility/monostate.h +99 -0
- cuda/cccl/headers/include/cuda/std/__utility/move.h +75 -0
- cuda/cccl/headers/include/cuda/std/__utility/pair.h +808 -0
- cuda/cccl/headers/include/cuda/std/__utility/piecewise_construct.h +37 -0
- cuda/cccl/headers/include/cuda/std/__utility/pod_tuple.h +763 -0
- cuda/cccl/headers/include/cuda/std/__utility/priority_tag.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/rel_ops.h +63 -0
- cuda/cccl/headers/include/cuda/std/__utility/swap.h +65 -0
- cuda/cccl/headers/include/cuda/std/__utility/to_underlying.h +40 -0
- cuda/cccl/headers/include/cuda/std/__utility/typeid.h +425 -0
- cuda/cccl/headers/include/cuda/std/__utility/unreachable.h +37 -0
- cuda/cccl/headers/include/cuda/std/array +527 -0
- cuda/cccl/headers/include/cuda/std/atomic +823 -0
- cuda/cccl/headers/include/cuda/std/barrier +43 -0
- cuda/cccl/headers/include/cuda/std/bit +35 -0
- cuda/cccl/headers/include/cuda/std/bitset +1026 -0
- cuda/cccl/headers/include/cuda/std/cassert +28 -0
- cuda/cccl/headers/include/cuda/std/ccomplex +15 -0
- cuda/cccl/headers/include/cuda/std/cfloat +59 -0
- cuda/cccl/headers/include/cuda/std/chrono +26 -0
- cuda/cccl/headers/include/cuda/std/climits +61 -0
- cuda/cccl/headers/include/cuda/std/cmath +25 -0
- cuda/cccl/headers/include/cuda/std/complex +25 -0
- cuda/cccl/headers/include/cuda/std/concepts +48 -0
- cuda/cccl/headers/include/cuda/std/cstddef +28 -0
- cuda/cccl/headers/include/cuda/std/cstdint +178 -0
- cuda/cccl/headers/include/cuda/std/cstdlib +30 -0
- cuda/cccl/headers/include/cuda/std/cstring +111 -0
- cuda/cccl/headers/include/cuda/std/ctime +147 -0
- cuda/cccl/headers/include/cuda/std/detail/__config +45 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/__config +258 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/algorithm +2692 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/chrono +3689 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/cmath +685 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/complex +1610 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/iosfwd +128 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/optional +1786 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/stdexcept +120 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/tuple +1378 -0
- cuda/cccl/headers/include/cuda/std/detail/libcxx/include/variant +2160 -0
- cuda/cccl/headers/include/cuda/std/execution +27 -0
- cuda/cccl/headers/include/cuda/std/expected +30 -0
- cuda/cccl/headers/include/cuda/std/functional +56 -0
- cuda/cccl/headers/include/cuda/std/initializer_list +36 -0
- cuda/cccl/headers/include/cuda/std/inplace_vector +2171 -0
- cuda/cccl/headers/include/cuda/std/iterator +70 -0
- cuda/cccl/headers/include/cuda/std/latch +34 -0
- cuda/cccl/headers/include/cuda/std/limits +28 -0
- cuda/cccl/headers/include/cuda/std/linalg +30 -0
- cuda/cccl/headers/include/cuda/std/mdspan +38 -0
- cuda/cccl/headers/include/cuda/std/memory +39 -0
- cuda/cccl/headers/include/cuda/std/numbers +335 -0
- cuda/cccl/headers/include/cuda/std/numeric +41 -0
- cuda/cccl/headers/include/cuda/std/optional +25 -0
- cuda/cccl/headers/include/cuda/std/ranges +68 -0
- cuda/cccl/headers/include/cuda/std/ratio +417 -0
- cuda/cccl/headers/include/cuda/std/semaphore +31 -0
- cuda/cccl/headers/include/cuda/std/source_location +83 -0
- cuda/cccl/headers/include/cuda/std/span +640 -0
- cuda/cccl/headers/include/cuda/std/string_view +814 -0
- cuda/cccl/headers/include/cuda/std/tuple +26 -0
- cuda/cccl/headers/include/cuda/std/type_traits +176 -0
- cuda/cccl/headers/include/cuda/std/utility +70 -0
- cuda/cccl/headers/include/cuda/std/variant +25 -0
- cuda/cccl/headers/include/cuda/std/version +245 -0
- cuda/cccl/headers/include/cuda/stream_ref +54 -0
- cuda/cccl/headers/include/cuda/type_traits +27 -0
- cuda/cccl/headers/include/cuda/version +16 -0
- cuda/cccl/headers/include/cuda/warp +28 -0
- cuda/cccl/headers/include/cuda/work_stealing +26 -0
- cuda/cccl/headers/include/nv/detail/__preprocessor +169 -0
- cuda/cccl/headers/include/nv/detail/__target_macros +599 -0
- cuda/cccl/headers/include/nv/target +229 -0
- cuda/cccl/headers/include/thrust/addressof.h +22 -0
- cuda/cccl/headers/include/thrust/adjacent_difference.h +254 -0
- cuda/cccl/headers/include/thrust/advance.h +59 -0
- cuda/cccl/headers/include/thrust/allocate_unique.h +299 -0
- cuda/cccl/headers/include/thrust/binary_search.h +1910 -0
- cuda/cccl/headers/include/thrust/complex.h +859 -0
- cuda/cccl/headers/include/thrust/copy.h +506 -0
- cuda/cccl/headers/include/thrust/count.h +245 -0
- cuda/cccl/headers/include/thrust/detail/adjacent_difference.inl +95 -0
- cuda/cccl/headers/include/thrust/detail/algorithm_wrapper.h +37 -0
- cuda/cccl/headers/include/thrust/detail/alignment.h +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.h +350 -0
- cuda/cccl/headers/include/thrust/detail/allocator/allocator_traits.inl +371 -0
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.h +45 -0
- cuda/cccl/headers/include/thrust/detail/allocator/copy_construct_range.inl +242 -0
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.h +39 -0
- cuda/cccl/headers/include/thrust/detail/allocator/destroy_range.inl +137 -0
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.h +39 -0
- cuda/cccl/headers/include/thrust/detail/allocator/fill_construct_range.inl +99 -0
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.h +53 -0
- cuda/cccl/headers/include/thrust/detail/allocator/malloc_allocator.inl +68 -0
- cuda/cccl/headers/include/thrust/detail/allocator/no_throw_allocator.h +76 -0
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.h +102 -0
- cuda/cccl/headers/include/thrust/detail/allocator/tagged_allocator.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.h +79 -0
- cuda/cccl/headers/include/thrust/detail/allocator/temporary_allocator.inl +81 -0
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.h +39 -0
- cuda/cccl/headers/include/thrust/detail/allocator/value_initialize_range.inl +98 -0
- cuda/cccl/headers/include/thrust/detail/allocator_aware_execution_policy.h +99 -0
- cuda/cccl/headers/include/thrust/detail/binary_search.inl +525 -0
- cuda/cccl/headers/include/thrust/detail/caching_allocator.h +47 -0
- cuda/cccl/headers/include/thrust/detail/complex/arithmetic.h +255 -0
- cuda/cccl/headers/include/thrust/detail/complex/c99math.h +64 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrig.h +875 -0
- cuda/cccl/headers/include/thrust/detail/complex/catrigf.h +589 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccosh.h +233 -0
- cuda/cccl/headers/include/thrust/detail/complex/ccoshf.h +161 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexp.h +195 -0
- cuda/cccl/headers/include/thrust/detail/complex/cexpf.h +173 -0
- cuda/cccl/headers/include/thrust/detail/complex/clog.h +223 -0
- cuda/cccl/headers/include/thrust/detail/complex/clogf.h +210 -0
- cuda/cccl/headers/include/thrust/detail/complex/complex.inl +263 -0
- cuda/cccl/headers/include/thrust/detail/complex/cpow.h +50 -0
- cuda/cccl/headers/include/thrust/detail/complex/cproj.h +81 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinh.h +228 -0
- cuda/cccl/headers/include/thrust/detail/complex/csinhf.h +168 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrt.h +178 -0
- cuda/cccl/headers/include/thrust/detail/complex/csqrtf.h +174 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanh.h +208 -0
- cuda/cccl/headers/include/thrust/detail/complex/ctanhf.h +133 -0
- cuda/cccl/headers/include/thrust/detail/complex/math_private.h +138 -0
- cuda/cccl/headers/include/thrust/detail/complex/stream.h +73 -0
- cuda/cccl/headers/include/thrust/detail/config/compiler.h +38 -0
- cuda/cccl/headers/include/thrust/detail/config/config.h +43 -0
- cuda/cccl/headers/include/thrust/detail/config/cpp_dialect.h +78 -0
- cuda/cccl/headers/include/thrust/detail/config/device_system.h +55 -0
- cuda/cccl/headers/include/thrust/detail/config/host_system.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config/memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/detail/config/namespace.h +162 -0
- cuda/cccl/headers/include/thrust/detail/config/simple_defines.h +48 -0
- cuda/cccl/headers/include/thrust/detail/config.h +36 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.h +228 -0
- cuda/cccl/headers/include/thrust/detail/contiguous_storage.inl +273 -0
- cuda/cccl/headers/include/thrust/detail/copy.h +72 -0
- cuda/cccl/headers/include/thrust/detail/copy.inl +129 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.h +62 -0
- cuda/cccl/headers/include/thrust/detail/copy_if.inl +102 -0
- cuda/cccl/headers/include/thrust/detail/count.h +55 -0
- cuda/cccl/headers/include/thrust/detail/count.inl +89 -0
- cuda/cccl/headers/include/thrust/detail/device_delete.inl +52 -0
- cuda/cccl/headers/include/thrust/detail/device_free.inl +47 -0
- cuda/cccl/headers/include/thrust/detail/device_malloc.inl +60 -0
- cuda/cccl/headers/include/thrust/detail/device_new.inl +61 -0
- cuda/cccl/headers/include/thrust/detail/device_ptr.inl +48 -0
- cuda/cccl/headers/include/thrust/detail/equal.inl +93 -0
- cuda/cccl/headers/include/thrust/detail/event_error.h +160 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator.h +80 -0
- cuda/cccl/headers/include/thrust/detail/execute_with_allocator_fwd.h +61 -0
- cuda/cccl/headers/include/thrust/detail/execution_policy.h +80 -0
- cuda/cccl/headers/include/thrust/detail/extrema.inl +184 -0
- cuda/cccl/headers/include/thrust/detail/fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/find.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/for_each.inl +84 -0
- cuda/cccl/headers/include/thrust/detail/function.h +49 -0
- cuda/cccl/headers/include/thrust/detail/functional/actor.h +214 -0
- cuda/cccl/headers/include/thrust/detail/functional/operators.h +386 -0
- cuda/cccl/headers/include/thrust/detail/gather.inl +173 -0
- cuda/cccl/headers/include/thrust/detail/generate.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/get_iterator_value.h +62 -0
- cuda/cccl/headers/include/thrust/detail/inner_product.inl +118 -0
- cuda/cccl/headers/include/thrust/detail/integer_math.h +130 -0
- cuda/cccl/headers/include/thrust/detail/internal_functional.h +285 -0
- cuda/cccl/headers/include/thrust/detail/logical.inl +113 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free.h +92 -0
- cuda/cccl/headers/include/thrust/detail/malloc_and_free_fwd.h +45 -0
- cuda/cccl/headers/include/thrust/detail/memory_algorithms.h +209 -0
- cuda/cccl/headers/include/thrust/detail/memory_wrapper.h +40 -0
- cuda/cccl/headers/include/thrust/detail/merge.inl +276 -0
- cuda/cccl/headers/include/thrust/detail/mismatch.inl +94 -0
- cuda/cccl/headers/include/thrust/detail/mpl/math.h +164 -0
- cuda/cccl/headers/include/thrust/detail/numeric_wrapper.h +37 -0
- cuda/cccl/headers/include/thrust/detail/overlapped_copy.h +124 -0
- cuda/cccl/headers/include/thrust/detail/partition.inl +378 -0
- cuda/cccl/headers/include/thrust/detail/pointer.h +217 -0
- cuda/cccl/headers/include/thrust/detail/pointer.inl +172 -0
- cuda/cccl/headers/include/thrust/detail/preprocessor.h +652 -0
- cuda/cccl/headers/include/thrust/detail/random_bijection.h +177 -0
- cuda/cccl/headers/include/thrust/detail/range/head_flags.h +116 -0
- cuda/cccl/headers/include/thrust/detail/range/tail_flags.h +130 -0
- cuda/cccl/headers/include/thrust/detail/raw_pointer_cast.h +52 -0
- cuda/cccl/headers/include/thrust/detail/raw_reference_cast.h +189 -0
- cuda/cccl/headers/include/thrust/detail/reduce.inl +377 -0
- cuda/cccl/headers/include/thrust/detail/reference.h +500 -0
- cuda/cccl/headers/include/thrust/detail/reference_forward_declaration.h +35 -0
- cuda/cccl/headers/include/thrust/detail/remove.inl +213 -0
- cuda/cccl/headers/include/thrust/detail/replace.inl +231 -0
- cuda/cccl/headers/include/thrust/detail/reverse.inl +88 -0
- cuda/cccl/headers/include/thrust/detail/scan.inl +518 -0
- cuda/cccl/headers/include/thrust/detail/scatter.inl +157 -0
- cuda/cccl/headers/include/thrust/detail/seq.h +54 -0
- cuda/cccl/headers/include/thrust/detail/sequence.inl +109 -0
- cuda/cccl/headers/include/thrust/detail/set_operations.inl +981 -0
- cuda/cccl/headers/include/thrust/detail/shuffle.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/sort.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/static_assert.h +58 -0
- cuda/cccl/headers/include/thrust/detail/static_map.h +167 -0
- cuda/cccl/headers/include/thrust/detail/swap_ranges.inl +65 -0
- cuda/cccl/headers/include/thrust/detail/tabulate.inl +62 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.h +153 -0
- cuda/cccl/headers/include/thrust/detail/temporary_array.inl +138 -0
- cuda/cccl/headers/include/thrust/detail/temporary_buffer.h +81 -0
- cuda/cccl/headers/include/thrust/detail/transform.inl +250 -0
- cuda/cccl/headers/include/thrust/detail/transform_reduce.inl +69 -0
- cuda/cccl/headers/include/thrust/detail/transform_scan.inl +161 -0
- cuda/cccl/headers/include/thrust/detail/trivial_sequence.h +131 -0
- cuda/cccl/headers/include/thrust/detail/tuple_meta_transform.h +61 -0
- cuda/cccl/headers/include/thrust/detail/type_deduction.h +62 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_member_function.h +47 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/has_nested_type.h +43 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_call_possible.h +167 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_commutative.h +69 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_metafunction_defined.h +39 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/is_thrust_pointer.h +60 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_discard_iterator.h +44 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/iterator/is_output_iterator.h +46 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/minimum_type.h +89 -0
- cuda/cccl/headers/include/thrust/detail/type_traits/pointer_traits.h +332 -0
- cuda/cccl/headers/include/thrust/detail/type_traits.h +136 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_copy.inl +90 -0
- cuda/cccl/headers/include/thrust/detail/uninitialized_fill.inl +86 -0
- cuda/cccl/headers/include/thrust/detail/unique.inl +373 -0
- cuda/cccl/headers/include/thrust/detail/use_default.h +34 -0
- cuda/cccl/headers/include/thrust/detail/util/align.h +59 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.h +630 -0
- cuda/cccl/headers/include/thrust/detail/vector_base.inl +1242 -0
- cuda/cccl/headers/include/thrust/device_allocator.h +134 -0
- cuda/cccl/headers/include/thrust/device_delete.h +59 -0
- cuda/cccl/headers/include/thrust/device_free.h +72 -0
- cuda/cccl/headers/include/thrust/device_make_unique.h +56 -0
- cuda/cccl/headers/include/thrust/device_malloc.h +108 -0
- cuda/cccl/headers/include/thrust/device_malloc_allocator.h +190 -0
- cuda/cccl/headers/include/thrust/device_new.h +91 -0
- cuda/cccl/headers/include/thrust/device_new_allocator.h +179 -0
- cuda/cccl/headers/include/thrust/device_ptr.h +202 -0
- cuda/cccl/headers/include/thrust/device_reference.h +986 -0
- cuda/cccl/headers/include/thrust/device_vector.h +574 -0
- cuda/cccl/headers/include/thrust/distance.h +43 -0
- cuda/cccl/headers/include/thrust/equal.h +247 -0
- cuda/cccl/headers/include/thrust/execution_policy.h +384 -0
- cuda/cccl/headers/include/thrust/extrema.h +657 -0
- cuda/cccl/headers/include/thrust/fill.h +201 -0
- cuda/cccl/headers/include/thrust/find.h +382 -0
- cuda/cccl/headers/include/thrust/for_each.h +261 -0
- cuda/cccl/headers/include/thrust/functional.h +396 -0
- cuda/cccl/headers/include/thrust/gather.h +464 -0
- cuda/cccl/headers/include/thrust/generate.h +193 -0
- cuda/cccl/headers/include/thrust/host_vector.h +576 -0
- cuda/cccl/headers/include/thrust/inner_product.h +264 -0
- cuda/cccl/headers/include/thrust/iterator/constant_iterator.h +219 -0
- cuda/cccl/headers/include/thrust/iterator/counting_iterator.h +335 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_assign.h +48 -0
- cuda/cccl/headers/include/thrust/iterator/detail/any_system_tag.h +43 -0
- cuda/cccl/headers/include/thrust/iterator/detail/device_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/host_system_tag.h +38 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_adaptor_base.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_system.h +51 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_to_traversal.h +62 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h +57 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_facade_category.h +199 -0
- cuda/cccl/headers/include/thrust/iterator/detail/iterator_traversal_tags.h +50 -0
- cuda/cccl/headers/include/thrust/iterator/detail/minimum_system.h +53 -0
- cuda/cccl/headers/include/thrust/iterator/detail/normal_iterator.h +69 -0
- cuda/cccl/headers/include/thrust/iterator/detail/retag.h +104 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tagged_iterator.h +81 -0
- cuda/cccl/headers/include/thrust/iterator/detail/tuple_of_iterator_references.h +174 -0
- cuda/cccl/headers/include/thrust/iterator/discard_iterator.h +164 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_adaptor.h +251 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_categories.h +215 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_facade.h +660 -0
- cuda/cccl/headers/include/thrust/iterator/iterator_traits.h +245 -0
- cuda/cccl/headers/include/thrust/iterator/offset_iterator.h +192 -0
- cuda/cccl/headers/include/thrust/iterator/permutation_iterator.h +204 -0
- cuda/cccl/headers/include/thrust/iterator/retag.h +74 -0
- cuda/cccl/headers/include/thrust/iterator/reverse_iterator.h +221 -0
- cuda/cccl/headers/include/thrust/iterator/shuffle_iterator.h +184 -0
- cuda/cccl/headers/include/thrust/iterator/strided_iterator.h +152 -0
- cuda/cccl/headers/include/thrust/iterator/tabulate_output_iterator.h +149 -0
- cuda/cccl/headers/include/thrust/iterator/transform_input_output_iterator.h +226 -0
- cuda/cccl/headers/include/thrust/iterator/transform_iterator.h +351 -0
- cuda/cccl/headers/include/thrust/iterator/transform_output_iterator.h +190 -0
- cuda/cccl/headers/include/thrust/iterator/zip_iterator.h +357 -0
- cuda/cccl/headers/include/thrust/logical.h +290 -0
- cuda/cccl/headers/include/thrust/memory.h +395 -0
- cuda/cccl/headers/include/thrust/merge.h +725 -0
- cuda/cccl/headers/include/thrust/mismatch.h +261 -0
- cuda/cccl/headers/include/thrust/mr/allocator.h +229 -0
- cuda/cccl/headers/include/thrust/mr/device_memory_resource.h +41 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_pool.h +526 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_sync_pool.h +118 -0
- cuda/cccl/headers/include/thrust/mr/disjoint_tls_pool.h +68 -0
- cuda/cccl/headers/include/thrust/mr/fancy_pointer_resource.h +67 -0
- cuda/cccl/headers/include/thrust/mr/host_memory_resource.h +38 -0
- cuda/cccl/headers/include/thrust/mr/memory_resource.h +217 -0
- cuda/cccl/headers/include/thrust/mr/new.h +100 -0
- cuda/cccl/headers/include/thrust/mr/polymorphic_adaptor.h +63 -0
- cuda/cccl/headers/include/thrust/mr/pool.h +526 -0
- cuda/cccl/headers/include/thrust/mr/pool_options.h +174 -0
- cuda/cccl/headers/include/thrust/mr/sync_pool.h +114 -0
- cuda/cccl/headers/include/thrust/mr/tls_pool.h +65 -0
- cuda/cccl/headers/include/thrust/mr/universal_memory_resource.h +29 -0
- cuda/cccl/headers/include/thrust/mr/validator.h +56 -0
- cuda/cccl/headers/include/thrust/pair.h +102 -0
- cuda/cccl/headers/include/thrust/partition.h +1383 -0
- cuda/cccl/headers/include/thrust/per_device_resource.h +98 -0
- cuda/cccl/headers/include/thrust/random/detail/discard_block_engine.inl +184 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine.inl +155 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_congruential_engine_discard.h +104 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine.inl +151 -0
- cuda/cccl/headers/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +53 -0
- cuda/cccl/headers/include/thrust/random/detail/mod.h +101 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution.inl +187 -0
- cuda/cccl/headers/include/thrust/random/detail/normal_distribution_base.h +160 -0
- cuda/cccl/headers/include/thrust/random/detail/random_core_access.h +63 -0
- cuda/cccl/headers/include/thrust/random/detail/subtract_with_carry_engine.inl +201 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_int_distribution.inl +198 -0
- cuda/cccl/headers/include/thrust/random/detail/uniform_real_distribution.inl +198 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine.inl +183 -0
- cuda/cccl/headers/include/thrust/random/detail/xor_combine_engine_max.h +217 -0
- cuda/cccl/headers/include/thrust/random/discard_block_engine.h +240 -0
- cuda/cccl/headers/include/thrust/random/linear_congruential_engine.h +289 -0
- cuda/cccl/headers/include/thrust/random/linear_feedback_shift_engine.h +217 -0
- cuda/cccl/headers/include/thrust/random/normal_distribution.h +257 -0
- cuda/cccl/headers/include/thrust/random/subtract_with_carry_engine.h +247 -0
- cuda/cccl/headers/include/thrust/random/uniform_int_distribution.h +261 -0
- cuda/cccl/headers/include/thrust/random/uniform_real_distribution.h +258 -0
- cuda/cccl/headers/include/thrust/random/xor_combine_engine.h +255 -0
- cuda/cccl/headers/include/thrust/random.h +120 -0
- cuda/cccl/headers/include/thrust/reduce.h +1112 -0
- cuda/cccl/headers/include/thrust/remove.h +768 -0
- cuda/cccl/headers/include/thrust/replace.h +827 -0
- cuda/cccl/headers/include/thrust/reverse.h +213 -0
- cuda/cccl/headers/include/thrust/scan.h +1671 -0
- cuda/cccl/headers/include/thrust/scatter.h +446 -0
- cuda/cccl/headers/include/thrust/sequence.h +277 -0
- cuda/cccl/headers/include/thrust/set_operations.h +3026 -0
- cuda/cccl/headers/include/thrust/shuffle.h +182 -0
- cuda/cccl/headers/include/thrust/sort.h +1320 -0
- cuda/cccl/headers/include/thrust/swap.h +147 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/adjacent_difference.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/binary_search.h +32 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/copy_if.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/execution_policy.h +90 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/extrema.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/find.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/for_each.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/memory.inl +60 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/par.h +62 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/partition.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reduce_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/remove.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scan_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/sort.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/unique_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/cpp/detail/vector.inl +130 -0
- cuda/cccl/headers/include/thrust/system/cpp/execution_policy.h +161 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory.h +109 -0
- cuda/cccl/headers/include/thrust/system/cpp/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/cpp/pointer.h +119 -0
- cuda/cccl/headers/include/thrust/system/cpp/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system/cuda/config.h +123 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/adjacent_difference.h +219 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/assign_value.h +124 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/binary_search.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cdp_dispatch.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy.h +129 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/copy_if.h +255 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/agent_launcher.h +289 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/load_iterator.h +58 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/make_load_iterator.h +60 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/triple_chevron_launch.h +191 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/core/util.h +630 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/count.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/cross_system.h +243 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/dispatch.h +210 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/equal.h +64 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/error.inl +96 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/execution_policy.h +113 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/extrema.h +476 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/fill.h +82 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/find.h +272 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/for_each.h +83 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/gather.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/generate.h +85 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/get_value.h +65 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/inner_product.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_cross_system.h +204 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/internal/copy_device_to_device.h +98 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/iter_swap.h +69 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/make_unsigned_special.h +61 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/malloc_and_free.h +121 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/memory.inl +57 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/merge.h +228 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/mismatch.h +217 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/par.h +237 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/par_to_seq.h +95 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/parallel_for.h +81 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/partition.h +405 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/per_device_resource.h +72 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce.h +961 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reduce_by_key.h +1000 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/remove.h +107 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/replace.h +164 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/reverse.h +88 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan.h +342 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scan_by_key.h +415 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/scatter.h +79 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/set_operations.h +1736 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/sort.h +482 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/swap_ranges.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h +75 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/temporary_buffer.h +132 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/terminate.h +53 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform.h +403 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_reduce.h +143 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/transform_scan.h +119 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_copy.h +94 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/uninitialized_fill.h +91 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique.h +648 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/unique_by_key.h +311 -0
- cuda/cccl/headers/include/thrust/system/cuda/detail/util.h +251 -0
- cuda/cccl/headers/include/thrust/system/cuda/error.h +175 -0
- cuda/cccl/headers/include/thrust/system/cuda/execution_policy.h +39 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/memory_resource.h +122 -0
- cuda/cccl/headers/include/thrust/system/cuda/pointer.h +140 -0
- cuda/cccl/headers/include/thrust/system/cuda/vector.h +108 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/adjacent_difference.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/assign_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/binary_search.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/copy_if.h +52 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/count.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/equal.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/extrema.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/find.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/gather.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/generate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/get_value.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/inner_product.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/iter_swap.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/logical.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/malloc_and_free.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/merge.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/mismatch.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/partition.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/per_device_resource.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reduce_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/remove.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/replace.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/reverse.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scan_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/scatter.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sequence.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/set_operations.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/sort.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/swap_ranges.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/tabulate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/temporary_buffer.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_reduce.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/transform_scan.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/uninitialized_fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/adl/unique_by_key.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/bad_alloc.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/errno.h +125 -0
- cuda/cccl/headers/include/thrust/system/detail/error_category.inl +302 -0
- cuda/cccl/headers/include/thrust/system/detail/error_code.inl +173 -0
- cuda/cccl/headers/include/thrust/system/detail/error_condition.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.h +59 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/adjacent_difference.inl +85 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.h +167 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/binary_search.inl +391 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy.inl +70 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/copy_if.inl +152 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/count.inl +90 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/equal.inl +66 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.h +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/extrema.inl +258 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/fill.h +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/find.inl +143 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/for_each.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.h +79 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/gather.inl +102 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/generate.inl +63 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.h +66 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/inner_product.inl +78 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/logical.h +65 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/memory.inl +83 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.h +105 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/merge.inl +154 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/mismatch.inl +74 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.h +135 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/partition.inl +213 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/per_device_resource.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.h +77 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce.inl +106 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.h +89 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reduce_by_key.inl +192 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.h +92 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/remove.inl +127 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.h +101 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/replace.inl +181 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/reverse.inl +72 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.h +78 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scalar/binary_search.inl +141 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.h +78 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan.inl +91 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.h +132 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scan_by_key.inl +238 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.h +79 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/scatter.inl +91 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/select_system.h +96 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sequence.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sequence.inl +95 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.h +288 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/set_operations.inl +482 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.h +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/shuffle.inl +131 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.h +119 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/sort.inl +181 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.h +50 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/swap_ranges.inl +82 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.h +47 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tabulate.inl +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/tag.h +53 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.h +60 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/temporary_buffer.inl +88 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform.h +109 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform.inl +185 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.h +56 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_reduce.inl +62 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.h +86 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/transform_scan.inl +119 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_copy.inl +172 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/uninitialized_fill.inl +121 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.h +77 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique.inl +119 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.h +87 -0
- cuda/cccl/headers/include/thrust/system/detail/generic/unique_by_key.inl +132 -0
- cuda/cccl/headers/include/thrust/system/detail/internal/decompose.h +123 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/adjacent_difference.h +76 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/assign_value.h +48 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/binary_search.h +142 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy.inl +125 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_backward.h +55 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/copy_if.h +77 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/count.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/equal.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/execution_policy.h +78 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/extrema.h +116 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/find.h +68 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/for_each.h +80 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/gather.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/general_copy.h +129 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/generate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/get_value.h +49 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/inner_product.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/insertion_sort.h +147 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/iter_swap.h +51 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/logical.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/malloc_and_free.h +56 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.h +81 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/merge.inl +151 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/mismatch.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/partition.h +309 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce.h +70 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reduce_by_key.h +104 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/remove.h +185 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/replace.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/reverse.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan.h +160 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scan_by_key.h +151 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/scatter.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sequence.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/set_operations.h +212 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.h +65 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/sort.inl +187 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.h +61 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_merge_sort.inl +362 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_primitive_sort.inl +130 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.h +54 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/stable_radix_sort.inl +592 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/swap_ranges.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/tabulate.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_reduce.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/transform_scan.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/trivial_copy.h +64 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_copy.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/uninitialized_fill.h +29 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique.h +121 -0
- cuda/cccl/headers/include/thrust/system/detail/sequential/unique_by_key.h +112 -0
- cuda/cccl/headers/include/thrust/system/detail/system_error.inl +108 -0
- cuda/cccl/headers/include/thrust/system/error_code.h +512 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/binary_search.h +77 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy.inl +74 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/copy_if.inl +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.h +50 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/default_decomposition.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/execution_policy.h +113 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/find.h +53 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.h +56 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/for_each.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/memory.inl +93 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/merge.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/par.h +62 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.h +88 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/partition.inl +102 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/pragma_omp.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce.inl +78 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.h +64 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_by_key.inl +65 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.h +59 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reduce_intervals.inl +103 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.h +72 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scan_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/sort.inl +259 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/omp/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/omp/execution_policy.h +160 -0
- cuda/cccl/headers/include/thrust/system/omp/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/omp/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/omp/pointer.h +120 -0
- cuda/cccl/headers/include/thrust/system/omp/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system/system_error.h +184 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/adjacent_difference.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/assign_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/binary_search.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.h +50 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy.inl +73 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.h +47 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/copy_if.inl +136 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/count.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/equal.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/execution_policy.h +92 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/extrema.h +66 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/find.h +49 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.h +51 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/for_each.inl +91 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/gather.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/generate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/get_value.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/inner_product.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/iter_swap.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/logical.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/malloc_and_free.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/memory.inl +94 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.h +77 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/merge.inl +327 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/mismatch.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/par.h +62 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.h +84 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/partition.inl +98 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/per_device_resource.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.h +54 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce.inl +137 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.h +61 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_by_key.inl +400 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reduce_intervals.h +140 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.h +76 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/remove.inl +87 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/replace.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/reverse.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.h +59 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan.inl +312 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scan_by_key.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/scatter.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sequence.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/set_operations.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/sort.inl +295 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/swap_ranges.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/tabulate.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/temporary_buffer.h +29 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_reduce.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/transform_scan.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_copy.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/uninitialized_fill.h +30 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.h +60 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique.inl +71 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.h +67 -0
- cuda/cccl/headers/include/thrust/system/tbb/detail/unique_by_key.inl +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/execution_policy.h +160 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory.h +111 -0
- cuda/cccl/headers/include/thrust/system/tbb/memory_resource.h +75 -0
- cuda/cccl/headers/include/thrust/system/tbb/pointer.h +120 -0
- cuda/cccl/headers/include/thrust/system/tbb/vector.h +99 -0
- cuda/cccl/headers/include/thrust/system_error.h +57 -0
- cuda/cccl/headers/include/thrust/tabulate.h +125 -0
- cuda/cccl/headers/include/thrust/transform.h +903 -0
- cuda/cccl/headers/include/thrust/transform_reduce.h +190 -0
- cuda/cccl/headers/include/thrust/transform_scan.h +442 -0
- cuda/cccl/headers/include/thrust/tuple.h +142 -0
- cuda/cccl/headers/include/thrust/type_traits/integer_sequence.h +261 -0
- cuda/cccl/headers/include/thrust/type_traits/is_contiguous_iterator.h +182 -0
- cuda/cccl/headers/include/thrust/type_traits/is_execution_policy.h +65 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_less_or_greater_function_object.h +184 -0
- cuda/cccl/headers/include/thrust/type_traits/is_operator_plus_function_object.h +116 -0
- cuda/cccl/headers/include/thrust/type_traits/is_trivially_relocatable.h +306 -0
- cuda/cccl/headers/include/thrust/type_traits/logical_metafunctions.h +42 -0
- cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h +93 -0
- cuda/cccl/headers/include/thrust/uninitialized_copy.h +300 -0
- cuda/cccl/headers/include/thrust/uninitialized_fill.h +268 -0
- cuda/cccl/headers/include/thrust/unique.h +1090 -0
- cuda/cccl/headers/include/thrust/universal_allocator.h +90 -0
- cuda/cccl/headers/include/thrust/universal_ptr.h +34 -0
- cuda/cccl/headers/include/thrust/universal_vector.h +71 -0
- cuda/cccl/headers/include/thrust/version.h +93 -0
- cuda/cccl/headers/include/thrust/zip_function.h +176 -0
- cuda/cccl/headers/include_paths.py +72 -0
- cuda/cccl/parallel/__init__.py +3 -0
- cuda/cccl/parallel/experimental/__init__.py +3 -0
- cuda/cccl/parallel/experimental/_bindings.py +24 -0
- cuda/cccl/parallel/experimental/_bindings.pyi +388 -0
- cuda/cccl/parallel/experimental/_bindings_impl.cpython-311-x86_64-linux-gnu.so +0 -0
- cuda/cccl/parallel/experimental/_bindings_impl.pyx +2158 -0
- cuda/cccl/parallel/experimental/_caching.py +71 -0
- cuda/cccl/parallel/experimental/_cccl_interop.py +371 -0
- cuda/cccl/parallel/experimental/_utils/__init__.py +0 -0
- cuda/cccl/parallel/experimental/_utils/protocols.py +132 -0
- cuda/cccl/parallel/experimental/algorithms/__init__.py +28 -0
- cuda/cccl/parallel/experimental/algorithms/_merge_sort.py +172 -0
- cuda/cccl/parallel/experimental/algorithms/_radix_sort.py +244 -0
- cuda/cccl/parallel/experimental/algorithms/_reduce.py +136 -0
- cuda/cccl/parallel/experimental/algorithms/_scan.py +179 -0
- cuda/cccl/parallel/experimental/algorithms/_segmented_reduce.py +183 -0
- cuda/cccl/parallel/experimental/algorithms/_transform.py +213 -0
- cuda/cccl/parallel/experimental/algorithms/_unique_by_key.py +179 -0
- cuda/cccl/parallel/experimental/cccl/.gitkeep +0 -0
- cuda/cccl/parallel/experimental/cccl/libcccl.c.parallel.so +0 -0
- cuda/cccl/parallel/experimental/iterators/__init__.py +157 -0
- cuda/cccl/parallel/experimental/iterators/_iterators.py +650 -0
- cuda/cccl/parallel/experimental/numba_utils.py +6 -0
- cuda/cccl/parallel/experimental/struct.py +150 -0
- cuda/cccl/parallel/experimental/typing.py +27 -0
- cuda/cccl/py.typed +0 -0
- cuda_cccl-0.1.3.1.0.dev1486.dist-info/METADATA +29 -0
- cuda_cccl-0.1.3.1.0.dev1486.dist-info/RECORD +1819 -0
- cuda_cccl-0.1.3.1.0.dev1486.dist-info/WHEEL +6 -0
- cuda_cccl-0.1.3.1.0.dev1486.dist-info/licenses/LICENSE +1 -0
|
@@ -0,0 +1,2158 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
|
|
5
|
+
# Python signatures are declared in the companion Python stub file _bindings.pyi
|
|
6
|
+
# Make sure to update PYI with change to Python API to ensure that Python
|
|
7
|
+
# static type checker tools like mypy green-lights cuda.cccl.parallel
|
|
8
|
+
|
|
9
|
+
from libc.string cimport memset, memcpy
|
|
10
|
+
from libc.stdint cimport uint8_t, uint32_t, uint64_t, int64_t, uintptr_t
|
|
11
|
+
from cpython.bytes cimport PyBytes_FromStringAndSize
|
|
12
|
+
|
|
13
|
+
from cpython.buffer cimport (
|
|
14
|
+
Py_buffer, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS,
|
|
15
|
+
PyBuffer_Release, PyObject_CheckBuffer, PyObject_GetBuffer
|
|
16
|
+
)
|
|
17
|
+
from cpython.pycapsule cimport (
|
|
18
|
+
PyCapsule_CheckExact, PyCapsule_IsValid, PyCapsule_GetPointer
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
import ctypes
|
|
22
|
+
|
|
23
|
+
cdef extern from "<cuda.h>":
|
|
24
|
+
cdef struct OpaqueCUstream_st
|
|
25
|
+
cdef struct OpaqueCUkernel_st
|
|
26
|
+
cdef struct OpaqueCUlibrary_st
|
|
27
|
+
|
|
28
|
+
ctypedef int CUresult
|
|
29
|
+
ctypedef OpaqueCUstream_st *CUstream
|
|
30
|
+
ctypedef OpaqueCUkernel_st *CUkernel
|
|
31
|
+
ctypedef OpaqueCUlibrary_st *CUlibrary
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
cdef extern from "cccl/c/types.h":
|
|
35
|
+
ctypedef enum cccl_type_enum:
|
|
36
|
+
CCCL_INT8
|
|
37
|
+
CCCL_INT16
|
|
38
|
+
CCCL_INT32
|
|
39
|
+
CCCL_INT64
|
|
40
|
+
CCCL_UINT8
|
|
41
|
+
CCCL_UINT16
|
|
42
|
+
CCCL_UINT32
|
|
43
|
+
CCCL_UINT64
|
|
44
|
+
CCCL_FLOAT32
|
|
45
|
+
CCCL_FLOAT64
|
|
46
|
+
CCCL_STORAGE
|
|
47
|
+
|
|
48
|
+
ctypedef enum cccl_op_kind_t:
|
|
49
|
+
CCCL_STATELESS
|
|
50
|
+
CCCL_STATEFUL
|
|
51
|
+
|
|
52
|
+
ctypedef enum cccl_iterator_kind_t:
|
|
53
|
+
CCCL_POINTER
|
|
54
|
+
CCCL_ITERATOR
|
|
55
|
+
|
|
56
|
+
cdef struct cccl_type_info:
|
|
57
|
+
size_t size
|
|
58
|
+
size_t alignment
|
|
59
|
+
cccl_type_enum type
|
|
60
|
+
|
|
61
|
+
cdef struct cccl_op_t:
|
|
62
|
+
cccl_op_kind_t type
|
|
63
|
+
const char* name
|
|
64
|
+
const char* ltoir
|
|
65
|
+
size_t ltoir_size
|
|
66
|
+
size_t size
|
|
67
|
+
size_t alignment
|
|
68
|
+
void *state
|
|
69
|
+
|
|
70
|
+
cdef struct cccl_value_t:
|
|
71
|
+
cccl_type_info type
|
|
72
|
+
void *state
|
|
73
|
+
|
|
74
|
+
cdef union cccl_increment_t:
|
|
75
|
+
int64_t signed_offset
|
|
76
|
+
uint64_t unsigned_offset
|
|
77
|
+
|
|
78
|
+
ctypedef void (*cccl_host_op_fn_ptr_t)(void *, cccl_increment_t) nogil
|
|
79
|
+
|
|
80
|
+
cdef struct cccl_iterator_t:
|
|
81
|
+
size_t size
|
|
82
|
+
size_t alignment
|
|
83
|
+
cccl_iterator_kind_t type
|
|
84
|
+
cccl_op_t advance
|
|
85
|
+
cccl_op_t dereference
|
|
86
|
+
cccl_type_info value_type
|
|
87
|
+
void *state
|
|
88
|
+
cccl_host_op_fn_ptr_t host_advance
|
|
89
|
+
|
|
90
|
+
ctypedef enum cccl_sort_order_t:
|
|
91
|
+
CCCL_ASCENDING
|
|
92
|
+
CCCL_DESCENDING
|
|
93
|
+
|
|
94
|
+
cdef void arg_type_check(
|
|
95
|
+
str arg_name,
|
|
96
|
+
object expected_type,
|
|
97
|
+
object arg
|
|
98
|
+
) except *:
|
|
99
|
+
if not isinstance(arg, expected_type):
|
|
100
|
+
raise TypeError(
|
|
101
|
+
f"Expected {arg_name} to have type '{expected_type}', "
|
|
102
|
+
f"got '{type(arg)}'"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
cdef class IntEnumerationMember:
|
|
107
|
+
"""
|
|
108
|
+
Represents enumeration member which records the enumeration it is a part of
|
|
109
|
+
for type-checking.
|
|
110
|
+
"""
|
|
111
|
+
cdef object parent_class
|
|
112
|
+
cdef str enum_name
|
|
113
|
+
cdef str attr_name
|
|
114
|
+
cdef int attr_value
|
|
115
|
+
|
|
116
|
+
def __cinit__(self, object parent_class, str enum_name, str attr_name, int attr_value):
|
|
117
|
+
self.parent_class = parent_class
|
|
118
|
+
self.enum_name = enum_name
|
|
119
|
+
self.attr_name = attr_name
|
|
120
|
+
self.attr_value = attr_value
|
|
121
|
+
|
|
122
|
+
cdef str get_repr_str(self):
|
|
123
|
+
return f"<{self.enum_name}.{self.attr_name}: {self.attr_value}>"
|
|
124
|
+
|
|
125
|
+
def __repr__(self):
|
|
126
|
+
return self.get_repr_str()
|
|
127
|
+
|
|
128
|
+
def __str__(self):
|
|
129
|
+
return self.get_repr_str()
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def parent_class(self):
|
|
133
|
+
"Type of parental enumeration"
|
|
134
|
+
return self.parent_class
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def name(self):
|
|
138
|
+
"Name of the enumeration member"
|
|
139
|
+
return self.attr_name
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def value(self):
|
|
143
|
+
return self.attr_value
|
|
144
|
+
|
|
145
|
+
def __int__(self):
|
|
146
|
+
return int(self.attr_value)
|
|
147
|
+
|
|
148
|
+
def __hash__(self):
|
|
149
|
+
cdef object _cmp_key = (type(self), self.parent_class, <object>self.attr_value)
|
|
150
|
+
return hash(_cmp_key)
|
|
151
|
+
|
|
152
|
+
def __eq__(self, other):
|
|
153
|
+
cdef IntEnumerationMember rhs
|
|
154
|
+
if type(other) == type(self):
|
|
155
|
+
rhs = <IntEnumerationMember>other
|
|
156
|
+
return (self.attr_value == rhs.attr_value) and (self.parent_class == rhs.parent_class)
|
|
157
|
+
else:
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
cdef class IntEnumerationBase:
|
|
162
|
+
cdef str enum_name
|
|
163
|
+
|
|
164
|
+
def __cinit__(self):
|
|
165
|
+
self.enum_name = "Undefined"
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def __name__(self):
|
|
169
|
+
return self.enum_name
|
|
170
|
+
|
|
171
|
+
def __repr__(self):
|
|
172
|
+
return f"<enum '{self.enum_name}'>"
|
|
173
|
+
|
|
174
|
+
def __str__(self):
|
|
175
|
+
return f"<enum '{self.enum_name}'>"
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
cdef class Enumeration_CCCLType(IntEnumerationBase):
|
|
179
|
+
"Enumeration of CCCL types"
|
|
180
|
+
cdef IntEnumerationMember _int8
|
|
181
|
+
cdef IntEnumerationMember _int16
|
|
182
|
+
cdef IntEnumerationMember _int32
|
|
183
|
+
cdef IntEnumerationMember _int64
|
|
184
|
+
cdef IntEnumerationMember _uint8
|
|
185
|
+
cdef IntEnumerationMember _uint16
|
|
186
|
+
cdef IntEnumerationMember _uint32
|
|
187
|
+
cdef IntEnumerationMember _uint64
|
|
188
|
+
cdef IntEnumerationMember _float32
|
|
189
|
+
cdef IntEnumerationMember _float64
|
|
190
|
+
cdef IntEnumerationMember _storage
|
|
191
|
+
|
|
192
|
+
def __cinit__(self):
|
|
193
|
+
self.enum_name = "TypeEnum"
|
|
194
|
+
self._int8 = self.make_INT8()
|
|
195
|
+
self._int16 = self.make_INT16()
|
|
196
|
+
self._int32 = self.make_INT32()
|
|
197
|
+
self._int64 = self.make_INT64()
|
|
198
|
+
self._uint8 = self.make_UINT8()
|
|
199
|
+
self._uint16 = self.make_UINT16()
|
|
200
|
+
self._uint32 = self.make_UINT32()
|
|
201
|
+
self._uint64 = self.make_UINT64()
|
|
202
|
+
self._float32 = self.make_FLOAT32()
|
|
203
|
+
self._float64 = self.make_FLOAT64()
|
|
204
|
+
self._storage = self.make_STORAGE()
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def INT8(self):
|
|
208
|
+
return self._int8
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def INT16(self):
|
|
212
|
+
return self._int16
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def INT32(self):
|
|
216
|
+
return self._int32
|
|
217
|
+
|
|
218
|
+
@property
|
|
219
|
+
def INT64(self):
|
|
220
|
+
return self._int64
|
|
221
|
+
|
|
222
|
+
@property
|
|
223
|
+
def UINT8(self):
|
|
224
|
+
return self._uint8
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def UINT16(self):
|
|
228
|
+
return self._uint16
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def UINT32(self):
|
|
232
|
+
return self._uint32
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def UINT64(self):
|
|
236
|
+
return self._uint64
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def FLOAT32(self):
|
|
240
|
+
return self._float32
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def FLOAT64(self):
|
|
244
|
+
return self._float64
|
|
245
|
+
|
|
246
|
+
@property
|
|
247
|
+
def STORAGE(self):
|
|
248
|
+
return self._storage
|
|
249
|
+
|
|
250
|
+
cdef IntEnumerationMember make_INT8(self):
|
|
251
|
+
cdef str prop_name = "INT8"
|
|
252
|
+
return IntEnumerationMember(
|
|
253
|
+
type(self),
|
|
254
|
+
self.enum_name,
|
|
255
|
+
prop_name,
|
|
256
|
+
cccl_type_enum.CCCL_INT8
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
cdef IntEnumerationMember make_INT16(self):
|
|
260
|
+
cdef str prop_name = "INT16"
|
|
261
|
+
return IntEnumerationMember(
|
|
262
|
+
type(self),
|
|
263
|
+
self.enum_name,
|
|
264
|
+
prop_name,
|
|
265
|
+
cccl_type_enum.CCCL_INT16
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
cdef IntEnumerationMember make_INT32(self):
|
|
269
|
+
cdef str prop_name = "INT32"
|
|
270
|
+
return IntEnumerationMember(
|
|
271
|
+
type(self),
|
|
272
|
+
self.enum_name,
|
|
273
|
+
prop_name,
|
|
274
|
+
cccl_type_enum.CCCL_INT32
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
cdef IntEnumerationMember make_INT64(self):
|
|
278
|
+
cdef str prop_name = "INT64"
|
|
279
|
+
return IntEnumerationMember(
|
|
280
|
+
type(self),
|
|
281
|
+
self.enum_name,
|
|
282
|
+
prop_name,
|
|
283
|
+
cccl_type_enum.CCCL_INT64
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
cdef IntEnumerationMember make_UINT8(self):
|
|
287
|
+
cdef str prop_name = "UINT8"
|
|
288
|
+
return IntEnumerationMember(
|
|
289
|
+
type(self),
|
|
290
|
+
self.enum_name,
|
|
291
|
+
prop_name,
|
|
292
|
+
cccl_type_enum.CCCL_UINT8
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
cdef IntEnumerationMember make_UINT16(self):
|
|
296
|
+
cdef str prop_name = "UINT16"
|
|
297
|
+
return IntEnumerationMember(
|
|
298
|
+
type(self),
|
|
299
|
+
self.enum_name,
|
|
300
|
+
prop_name,
|
|
301
|
+
cccl_type_enum.CCCL_UINT16
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
cdef IntEnumerationMember make_UINT32(self):
|
|
305
|
+
cdef str prop_name = "UINT32"
|
|
306
|
+
return IntEnumerationMember(
|
|
307
|
+
type(self),
|
|
308
|
+
self.enum_name,
|
|
309
|
+
prop_name,
|
|
310
|
+
cccl_type_enum.CCCL_UINT32
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
cdef IntEnumerationMember make_UINT64(self):
|
|
314
|
+
cdef str prop_name = "UINT64"
|
|
315
|
+
return IntEnumerationMember(
|
|
316
|
+
type(self),
|
|
317
|
+
self.enum_name,
|
|
318
|
+
prop_name,
|
|
319
|
+
cccl_type_enum.CCCL_UINT64
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
cdef IntEnumerationMember make_FLOAT32(self):
|
|
324
|
+
cdef str prop_name = "FLOAT32"
|
|
325
|
+
return IntEnumerationMember(
|
|
326
|
+
type(self),
|
|
327
|
+
self.enum_name,
|
|
328
|
+
prop_name,
|
|
329
|
+
cccl_type_enum.CCCL_FLOAT32
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
cdef IntEnumerationMember make_FLOAT64(self):
|
|
333
|
+
cdef str prop_name = "FLOAT64"
|
|
334
|
+
return IntEnumerationMember(
|
|
335
|
+
type(self),
|
|
336
|
+
self.enum_name,
|
|
337
|
+
prop_name,
|
|
338
|
+
cccl_type_enum.CCCL_FLOAT64
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
cdef IntEnumerationMember make_STORAGE(self):
|
|
343
|
+
cdef str prop_name = "STORAGE"
|
|
344
|
+
return IntEnumerationMember(
|
|
345
|
+
type(self),
|
|
346
|
+
self.enum_name,
|
|
347
|
+
prop_name,
|
|
348
|
+
cccl_type_enum.CCCL_STORAGE
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
cdef class Enumeration_OpKind(IntEnumerationBase):
|
|
353
|
+
"Enumeration of operator kinds"
|
|
354
|
+
cdef IntEnumerationMember _stateless
|
|
355
|
+
cdef IntEnumerationMember _stateful
|
|
356
|
+
|
|
357
|
+
def __cinit__(self):
|
|
358
|
+
self.enum_name = "OpKindEnum"
|
|
359
|
+
self._stateless = self.make_STATELESS()
|
|
360
|
+
self._stateful = self.make_STATEFUL()
|
|
361
|
+
|
|
362
|
+
cdef IntEnumerationMember make_STATELESS(self):
|
|
363
|
+
cdef str prop_name = "STATELESS"
|
|
364
|
+
return IntEnumerationMember(
|
|
365
|
+
type(self),
|
|
366
|
+
self.enum_name,
|
|
367
|
+
prop_name,
|
|
368
|
+
cccl_op_kind_t.CCCL_STATELESS
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
cdef IntEnumerationMember make_STATEFUL(self):
|
|
372
|
+
cdef str prop_name = "STATEFUL"
|
|
373
|
+
return IntEnumerationMember(
|
|
374
|
+
type(self),
|
|
375
|
+
self.enum_name,
|
|
376
|
+
prop_name,
|
|
377
|
+
cccl_op_kind_t.CCCL_STATEFUL
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
@property
|
|
382
|
+
def STATELESS(self):
|
|
383
|
+
return self._stateless
|
|
384
|
+
|
|
385
|
+
@property
|
|
386
|
+
def STATEFUL(self):
|
|
387
|
+
return self._stateful
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
cdef class Enumeration_IteratorKind(IntEnumerationBase):
|
|
391
|
+
"Enumeration of iterator kinds"
|
|
392
|
+
cdef IntEnumerationMember _pointer
|
|
393
|
+
cdef IntEnumerationMember _iterator
|
|
394
|
+
|
|
395
|
+
def __cinit__(self):
|
|
396
|
+
self.enum_name = "IteratorKindEnum"
|
|
397
|
+
self._pointer = self.make_POINTER()
|
|
398
|
+
self._iterator = self.make_ITERATOR()
|
|
399
|
+
|
|
400
|
+
cdef IntEnumerationMember make_POINTER(self):
|
|
401
|
+
cdef str prop_name = "POINTER"
|
|
402
|
+
return IntEnumerationMember(
|
|
403
|
+
type(self),
|
|
404
|
+
self.enum_name,
|
|
405
|
+
prop_name,
|
|
406
|
+
cccl_iterator_kind_t.CCCL_POINTER
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
cdef IntEnumerationMember make_ITERATOR(self):
|
|
410
|
+
cdef str prop_name = "ITERATOR"
|
|
411
|
+
return IntEnumerationMember(
|
|
412
|
+
type(self),
|
|
413
|
+
self.enum_name,
|
|
414
|
+
prop_name,
|
|
415
|
+
cccl_iterator_kind_t.CCCL_ITERATOR
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
@property
|
|
419
|
+
def POINTER(self):
|
|
420
|
+
return self._pointer
|
|
421
|
+
|
|
422
|
+
@property
|
|
423
|
+
def ITERATOR(self):
|
|
424
|
+
return self._iterator
|
|
425
|
+
|
|
426
|
+
cdef class Enumeration_SortOrder(IntEnumerationBase):
|
|
427
|
+
"Enumeration of sort orders (ascending or descending)"
|
|
428
|
+
cdef IntEnumerationMember _ascending
|
|
429
|
+
cdef IntEnumerationMember _descending
|
|
430
|
+
|
|
431
|
+
def __cinit__(self):
|
|
432
|
+
self.enum_name = "SortOrder"
|
|
433
|
+
self._ascending = self.make_ASCENDING()
|
|
434
|
+
self._descending = self.make_DESCENDING()
|
|
435
|
+
|
|
436
|
+
cdef IntEnumerationMember make_ASCENDING(self):
|
|
437
|
+
cdef str prop_name = "ASCENDING"
|
|
438
|
+
return IntEnumerationMember(
|
|
439
|
+
type(self),
|
|
440
|
+
self.enum_name,
|
|
441
|
+
prop_name,
|
|
442
|
+
cccl_sort_order_t.CCCL_ASCENDING
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
cdef IntEnumerationMember make_DESCENDING(self):
|
|
446
|
+
cdef str prop_name = "DESCENDING"
|
|
447
|
+
return IntEnumerationMember(
|
|
448
|
+
type(self),
|
|
449
|
+
self.enum_name,
|
|
450
|
+
prop_name,
|
|
451
|
+
cccl_sort_order_t.CCCL_DESCENDING
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
@property
|
|
455
|
+
def ASCENDING(self):
|
|
456
|
+
return self._ascending
|
|
457
|
+
|
|
458
|
+
@property
|
|
459
|
+
def DESCENDING(self):
|
|
460
|
+
return self._descending
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
TypeEnum = Enumeration_CCCLType()
|
|
464
|
+
OpKind = Enumeration_OpKind()
|
|
465
|
+
IteratorKind = Enumeration_IteratorKind()
|
|
466
|
+
SortOrder = Enumeration_SortOrder()
|
|
467
|
+
|
|
468
|
+
cpdef bint is_TypeEnum(IntEnumerationMember attr):
|
|
469
|
+
"Return True if attribute is a member of TypeEnum enumeration"
|
|
470
|
+
return attr.parent_class is Enumeration_CCCLType
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
cpdef bint is_OpKind(IntEnumerationMember attr):
|
|
474
|
+
"Return True if attribute is a member of OpKind enumeration"
|
|
475
|
+
return attr.parent_class is Enumeration_OpKind
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
cpdef bint is_IteratorKind(IntEnumerationMember attr):
|
|
479
|
+
"Return True if attribute is a member of IteratorKind enumeration"
|
|
480
|
+
return attr.parent_class is Enumeration_IteratorKind
|
|
481
|
+
|
|
482
|
+
cpdef bint is_SortOrder(IntEnumerationMember attr):
|
|
483
|
+
"Return True if attribute is a member of SortOrder enumeration"
|
|
484
|
+
return attr.parent_class is Enumeration_SortOrder
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
cdef void _validate_alignment(int alignment) except *:
|
|
488
|
+
"""
|
|
489
|
+
Alignment must be positive integer and a power of two
|
|
490
|
+
that can be represented by uint32_t type.
|
|
491
|
+
"""
|
|
492
|
+
cdef uint32_t val
|
|
493
|
+
if alignment < 1:
|
|
494
|
+
raise ValueError(
|
|
495
|
+
"Alignment must be non-negative, "
|
|
496
|
+
f"got {alignment}."
|
|
497
|
+
)
|
|
498
|
+
val = <uint32_t>alignment
|
|
499
|
+
if (val & (val - 1)) != 0:
|
|
500
|
+
raise ValueError(
|
|
501
|
+
"Alignment must be a power of two, "
|
|
502
|
+
f"got {alignment}"
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
cdef class Op:
|
|
507
|
+
"""
|
|
508
|
+
Represents CCCL Operation
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
name (str):
|
|
512
|
+
Name of the operation
|
|
513
|
+
operator_type (OpKind):
|
|
514
|
+
Whether operator is stateless or stateful
|
|
515
|
+
ltoir (bytes):
|
|
516
|
+
The LTOIR for the operation compiled for device
|
|
517
|
+
state (bytes, optional):
|
|
518
|
+
State for the stateful operation.
|
|
519
|
+
state_alignment (int, optional):
|
|
520
|
+
Alignment of the state struct. Default: `1`.
|
|
521
|
+
"""
|
|
522
|
+
# need Python owner of memory used for operator name
|
|
523
|
+
cdef bytes op_encoded_name
|
|
524
|
+
cdef bytes ltoir_bytes
|
|
525
|
+
cdef bytes state_bytes
|
|
526
|
+
cdef cccl_op_t op_data
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
cdef void _set_members(self, cccl_op_kind_t op_type, str name, bytes lto_ir, bytes state, int state_alignment):
|
|
530
|
+
memset(&self.op_data, 0, sizeof(cccl_op_t))
|
|
531
|
+
# Reference Python objects in the class to ensure lifetime
|
|
532
|
+
self.op_encoded_name = name.encode("utf-8")
|
|
533
|
+
self.ltoir_bytes = lto_ir
|
|
534
|
+
self.state_bytes = state
|
|
535
|
+
# set fields of op_data struct
|
|
536
|
+
self.op_data.type = op_type
|
|
537
|
+
self.op_data.name = <const char *>self.op_encoded_name
|
|
538
|
+
self.op_data.ltoir = <const char *>lto_ir
|
|
539
|
+
self.op_data.ltoir_size = len(lto_ir)
|
|
540
|
+
self.op_data.size = len(state)
|
|
541
|
+
self.op_data.alignment = state_alignment
|
|
542
|
+
self.op_data.state = <void *><const char *>state
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def __cinit__(self, /, *, name = None, operator_type = OpKind.STATELESS, ltoir = None, state = None, state_alignment = 1):
|
|
546
|
+
if name is None and ltoir is None:
|
|
547
|
+
name = ""
|
|
548
|
+
ltoir = b""
|
|
549
|
+
if state is None:
|
|
550
|
+
state = b""
|
|
551
|
+
arg_type_check(arg_name="name", expected_type=str, arg=name)
|
|
552
|
+
arg_type_check(arg_name="ltoir", expected_type=bytes, arg=ltoir)
|
|
553
|
+
arg_type_check(arg_name="state", expected_type=bytes, arg=state)
|
|
554
|
+
arg_type_check(arg_name="state_alignment", expected_type=int, arg=state_alignment)
|
|
555
|
+
arg_type_check(arg_name="operator_type", expected_type=IntEnumerationMember, arg=operator_type)
|
|
556
|
+
if not is_OpKind(operator_type):
|
|
557
|
+
raise TypeError(
|
|
558
|
+
f"The operator_type argument should be an enumerator of operator kinds"
|
|
559
|
+
)
|
|
560
|
+
_validate_alignment(state_alignment)
|
|
561
|
+
self._set_members(
|
|
562
|
+
<cccl_op_kind_t> operator_type.value,
|
|
563
|
+
<str> name,
|
|
564
|
+
<bytes> ltoir,
|
|
565
|
+
<bytes> state,
|
|
566
|
+
<int> state_alignment
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
cdef void set_state(self, bytes state):
|
|
571
|
+
self.state_bytes = state
|
|
572
|
+
self.op_data.state = <void *><const char *>state
|
|
573
|
+
|
|
574
|
+
@property
|
|
575
|
+
def state(self):
|
|
576
|
+
return self.state_bytes
|
|
577
|
+
|
|
578
|
+
@state.setter
|
|
579
|
+
def state(self, bytes new_value):
|
|
580
|
+
self.set_state(<bytes>new_value)
|
|
581
|
+
|
|
582
|
+
@property
|
|
583
|
+
def name(self):
|
|
584
|
+
return self.op_encoded_name.decode("utf-8")
|
|
585
|
+
|
|
586
|
+
@property
|
|
587
|
+
def ltoir(self):
|
|
588
|
+
return self.ltoir_bytes
|
|
589
|
+
|
|
590
|
+
@property
|
|
591
|
+
def state_alignment(self):
|
|
592
|
+
return self.op_data.alignment
|
|
593
|
+
|
|
594
|
+
@property
|
|
595
|
+
def state_typenum(self):
|
|
596
|
+
return self.op_data.type
|
|
597
|
+
|
|
598
|
+
def as_bytes(self):
|
|
599
|
+
"Debugging utility to view memory content of library struct"
|
|
600
|
+
cdef uint8_t[:] mem_view = bytearray(sizeof(self.op_data))
|
|
601
|
+
memcpy(&mem_view[0], &self.op_data, sizeof(self.op_data))
|
|
602
|
+
return bytes(mem_view)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
cdef class TypeInfo:
|
|
606
|
+
"""
|
|
607
|
+
Represents CCCL type info structure
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
size (int):
|
|
611
|
+
Size of the type in bytes.
|
|
612
|
+
alignment (int):
|
|
613
|
+
Alignment of the type in bytes.
|
|
614
|
+
type_enum (IntEnumerationMember):
|
|
615
|
+
Enumeration member identifying the type.
|
|
616
|
+
"""
|
|
617
|
+
cdef cccl_type_info type_info
|
|
618
|
+
|
|
619
|
+
def __cinit__(self, int size, int alignment, IntEnumerationMember type_enum):
|
|
620
|
+
if size < 1:
|
|
621
|
+
raise ValueError("Size argument must be positive")
|
|
622
|
+
_validate_alignment(alignment)
|
|
623
|
+
if not is_TypeEnum(type_enum):
|
|
624
|
+
raise TypeError(
|
|
625
|
+
f"The type argument should be enum of CCCL types"
|
|
626
|
+
)
|
|
627
|
+
self.type_info.size = size
|
|
628
|
+
self.type_info.alignment = alignment
|
|
629
|
+
self.type_info.type = <cccl_type_enum> type_enum.value
|
|
630
|
+
|
|
631
|
+
@property
|
|
632
|
+
def size(self):
|
|
633
|
+
return self.type_info.size
|
|
634
|
+
|
|
635
|
+
@property
|
|
636
|
+
def alignment(self):
|
|
637
|
+
return self.type_info.alignment
|
|
638
|
+
|
|
639
|
+
@property
|
|
640
|
+
def typenum(self):
|
|
641
|
+
return self.type_info.type
|
|
642
|
+
|
|
643
|
+
def as_bytes(self):
|
|
644
|
+
"Debugging utility to view memory content of library struct"
|
|
645
|
+
cdef uint8_t[:] mem_view = bytearray(sizeof(self.type_info))
|
|
646
|
+
memcpy(&mem_view[0], &self.type_info, sizeof(self.type_info))
|
|
647
|
+
return bytes(mem_view)
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
cdef class Value:
|
|
651
|
+
"""
|
|
652
|
+
Represents CCCL value structure
|
|
653
|
+
|
|
654
|
+
Args:
|
|
655
|
+
value_type (TypeInfo):
|
|
656
|
+
type descriptor
|
|
657
|
+
state (object):
|
|
658
|
+
state of the value type. Object is expected to
|
|
659
|
+
implement Python buffer protocol and be able to provide
|
|
660
|
+
simple contiguous array of type `uint8_t`.
|
|
661
|
+
"""
|
|
662
|
+
cdef uint8_t[::1] state_obj
|
|
663
|
+
cdef TypeInfo value_type
|
|
664
|
+
cdef cccl_value_t value_data;
|
|
665
|
+
|
|
666
|
+
def __cinit__(self, TypeInfo value_type, uint8_t[::1] state):
|
|
667
|
+
self.state_obj = state
|
|
668
|
+
self.value_type = value_type
|
|
669
|
+
self.value_data.type = value_type.type_info
|
|
670
|
+
self.value_data.state = <void *>&state[0]
|
|
671
|
+
|
|
672
|
+
@property
|
|
673
|
+
def type(self):
|
|
674
|
+
return self.value_type
|
|
675
|
+
|
|
676
|
+
@property
|
|
677
|
+
def state(self):
|
|
678
|
+
return self.state_obj
|
|
679
|
+
|
|
680
|
+
@state.setter
|
|
681
|
+
def state(self, uint8_t[::1] new_value):
|
|
682
|
+
if (len(self.state_obj) == len(new_value)):
|
|
683
|
+
self.state_obj = new_value
|
|
684
|
+
self.value_data.state = <void *>&self.state_obj[0]
|
|
685
|
+
else:
|
|
686
|
+
raise ValueError("Size mismatch")
|
|
687
|
+
|
|
688
|
+
def as_bytes(self):
|
|
689
|
+
"Debugging utility to view memory of native struct"
|
|
690
|
+
cdef uint8_t[:] mem_view = bytearray(sizeof(self.value_data))
|
|
691
|
+
memcpy(&mem_view[0], &self.value_data, sizeof(self.value_data))
|
|
692
|
+
return bytes(mem_view)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
cdef void ensure_buffer(object o) except *:
|
|
696
|
+
if not PyObject_CheckBuffer(o):
|
|
697
|
+
raise TypeError(
|
|
698
|
+
"Object with buffer protocol expected, "
|
|
699
|
+
f"got {type(o)}"
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
cdef void * get_buffer_pointer(object o, size_t *size):
|
|
704
|
+
cdef int status = 0
|
|
705
|
+
cdef void *ptr = NULL
|
|
706
|
+
cdef Py_buffer view
|
|
707
|
+
|
|
708
|
+
status = PyObject_GetBuffer(o, &view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
|
|
709
|
+
if status != 0: # pragma: no cover
|
|
710
|
+
size[0] = 0
|
|
711
|
+
raise RuntimeError(
|
|
712
|
+
"Can not access simple contiguous buffer"
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
ptr = view.buf
|
|
716
|
+
if size is not NULL:
|
|
717
|
+
size[0] = <size_t>view.len
|
|
718
|
+
PyBuffer_Release(&view)
|
|
719
|
+
|
|
720
|
+
return ptr
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
cdef void * ctypes_typed_pointer_payload_ptr(object ctypes_typed_ptr):
|
|
724
|
+
"Get pointer to the value buffer represented by ctypes.pointer(ctypes_val)"
|
|
725
|
+
cdef size_t size = 0
|
|
726
|
+
cdef size_t *ptr_ref = NULL
|
|
727
|
+
ensure_buffer(ctypes_typed_ptr)
|
|
728
|
+
ptr_ref = <size_t *>get_buffer_pointer(ctypes_typed_ptr, &size)
|
|
729
|
+
return <void *>(ptr_ref[0])
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
cdef void * ctypes_value_ptr(object ctypes_cdata):
|
|
733
|
+
"Get pointer to the value buffer behind ctypes_val"
|
|
734
|
+
cdef size_t size = 0
|
|
735
|
+
ensure_buffer(ctypes_cdata)
|
|
736
|
+
return get_buffer_pointer(ctypes_cdata, &size)
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
cdef inline void * int_as_ptr(size_t ptr_val):
|
|
740
|
+
return <void *>(ptr_val)
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
cdef class StateBase:
|
|
744
|
+
cdef void *ptr
|
|
745
|
+
cdef object ref
|
|
746
|
+
|
|
747
|
+
def __cinit__(self):
|
|
748
|
+
self.ptr = NULL
|
|
749
|
+
self.ref = None
|
|
750
|
+
|
|
751
|
+
cdef inline void set_state(self, void *ptr, object ref):
|
|
752
|
+
self.ptr = ptr
|
|
753
|
+
self.ref = ref
|
|
754
|
+
|
|
755
|
+
@property
|
|
756
|
+
def pointer(self):
|
|
757
|
+
return <size_t>self.ptr
|
|
758
|
+
|
|
759
|
+
@property
|
|
760
|
+
def reference(self):
|
|
761
|
+
return self.ref
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
cdef class Pointer(StateBase):
|
|
765
|
+
"Represents the pointer value"
|
|
766
|
+
|
|
767
|
+
def __cinit__(self, arg):
|
|
768
|
+
cdef void *ptr
|
|
769
|
+
cdef object ref
|
|
770
|
+
|
|
771
|
+
if isinstance(arg, int):
|
|
772
|
+
ptr = int_as_ptr(arg)
|
|
773
|
+
ref = None
|
|
774
|
+
elif isinstance(arg, ctypes._Pointer):
|
|
775
|
+
ptr = ctypes_typed_pointer_payload_ptr(arg)
|
|
776
|
+
ref = arg
|
|
777
|
+
elif isinstance(arg, ctypes.c_void_p):
|
|
778
|
+
ptr = int_as_ptr(arg.value)
|
|
779
|
+
ref = arg
|
|
780
|
+
else:
|
|
781
|
+
raise TypeError(
|
|
782
|
+
"Expect ctypes pointer, integers, or PointerProxy, "
|
|
783
|
+
f"got type {type(arg)}"
|
|
784
|
+
)
|
|
785
|
+
self.set_state(ptr, ref)
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def make_pointer_object(ptr, owner):
|
|
789
|
+
cdef Pointer res = Pointer(0)
|
|
790
|
+
|
|
791
|
+
if isinstance(ptr, int):
|
|
792
|
+
res.ptr = int_as_ptr(ptr)
|
|
793
|
+
elif isinstance(ptr, ctypes.c_void_p):
|
|
794
|
+
res.ptr = int_as_ptr(ptr.value)
|
|
795
|
+
else:
|
|
796
|
+
raise TypeError(
|
|
797
|
+
"First argument must be an integer, or ctypes.c_void_p, "
|
|
798
|
+
f"got {type(ptr)}"
|
|
799
|
+
)
|
|
800
|
+
res.ref = owner
|
|
801
|
+
return res
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
cdef class IteratorState(StateBase):
|
|
805
|
+
"Represents blob referenced by pointer"
|
|
806
|
+
cdef size_t state_nbytes
|
|
807
|
+
|
|
808
|
+
def __cinit__(self, arg):
|
|
809
|
+
cdef size_t buffer_size = 0
|
|
810
|
+
cdef void *ptr = NULL
|
|
811
|
+
cdef object ref = None
|
|
812
|
+
|
|
813
|
+
super().__init__()
|
|
814
|
+
if isinstance(arg, ctypes._Pointer):
|
|
815
|
+
ptr = ctypes_typed_pointer_payload_ptr(arg)
|
|
816
|
+
ref = arg.contents
|
|
817
|
+
self.state_nbytes = ctypes.sizeof(ref)
|
|
818
|
+
elif PyObject_CheckBuffer(arg):
|
|
819
|
+
ptr = get_buffer_pointer(arg, &buffer_size)
|
|
820
|
+
ref = arg
|
|
821
|
+
self.state_nbytes = buffer_size
|
|
822
|
+
else:
|
|
823
|
+
raise TypeError(
|
|
824
|
+
"Expected a ctypes pointer with content, or object of type bytes or bytearray, "
|
|
825
|
+
f"got type {type(arg)}"
|
|
826
|
+
)
|
|
827
|
+
self.set_state(ptr, ref)
|
|
828
|
+
|
|
829
|
+
cdef inline size_t get_size(self):
|
|
830
|
+
return self.state_nbytes
|
|
831
|
+
|
|
832
|
+
@property
|
|
833
|
+
def size(self):
|
|
834
|
+
return self.state_nbytes
|
|
835
|
+
|
|
836
|
+
def __getbuffer__(self, Py_buffer *buffer, int flags):
|
|
837
|
+
cdef Py_ssize_t cast_size = <Py_ssize_t>self.state_nbytes
|
|
838
|
+
buffer.buf = <char *>self.ptr
|
|
839
|
+
buffer.obj = self
|
|
840
|
+
buffer.len = cast_size
|
|
841
|
+
buffer.readonly = 0
|
|
842
|
+
buffer.itemsize = 1
|
|
843
|
+
buffer.format = "B" # unsigned char
|
|
844
|
+
buffer.ndim = 1
|
|
845
|
+
buffer.shape = <Py_ssize_t *>&self.state_nbytes
|
|
846
|
+
buffer.strides = &buffer.itemsize
|
|
847
|
+
buffer.suboffsets = NULL
|
|
848
|
+
buffer.internal = NULL
|
|
849
|
+
|
|
850
|
+
def __releasebuffer__(self, Py_buffer *buffer):
|
|
851
|
+
pass
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
cdef const char *function_ptr_capsule_name = "void (void *, cccl_increment_t)";
|
|
855
|
+
|
|
856
|
+
cdef bint is_function_pointer_capsule(object o) noexcept:
|
|
857
|
+
"""
|
|
858
|
+
Returns non-zero if input is a valid capsule with
|
|
859
|
+
name 'void (void *, cccl_increment_t)'.
|
|
860
|
+
"""
|
|
861
|
+
return (
|
|
862
|
+
PyCapsule_CheckExact(o) and
|
|
863
|
+
PyCapsule_IsValid(o, function_ptr_capsule_name)
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
cdef inline void* get_function_pointer_from_capsule(object cap) except *:
|
|
868
|
+
return PyCapsule_GetPointer(cap, function_ptr_capsule_name)
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
cdef cccl_host_op_fn_ptr_t unbox_host_advance_fn(object host_fn_obj) except *:
|
|
872
|
+
cdef void *fn_ptr = NULL
|
|
873
|
+
if isinstance(host_fn_obj, ctypes._CFuncPtr):
|
|
874
|
+
# the _CFuncPtr object encapsulates a pointer to the function pointer
|
|
875
|
+
fn_ptr = ctypes_typed_pointer_payload_ptr(host_fn_obj)
|
|
876
|
+
return <cccl_host_op_fn_ptr_t>fn_ptr
|
|
877
|
+
|
|
878
|
+
if isinstance(host_fn_obj, int):
|
|
879
|
+
fn_ptr = <void *><uintptr_t>host_fn_obj
|
|
880
|
+
return <cccl_host_op_fn_ptr_t>fn_ptr
|
|
881
|
+
|
|
882
|
+
if isinstance(host_fn_obj, ctypes.c_void_p):
|
|
883
|
+
fn_ptr = <void *><uintptr_t>host_fn_obj.value
|
|
884
|
+
return <cccl_host_op_fn_ptr_t>fn_ptr
|
|
885
|
+
|
|
886
|
+
if is_function_pointer_capsule(host_fn_obj):
|
|
887
|
+
fn_ptr = get_function_pointer_from_capsule(host_fn_obj)
|
|
888
|
+
return <cccl_host_op_fn_ptr_t>fn_ptr
|
|
889
|
+
|
|
890
|
+
raise TypeError(
|
|
891
|
+
"Expected ctypes function pointer, ctypes.c_void_p, integer or a named capsule, "
|
|
892
|
+
f"got {type(host_fn_obj)}"
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
cdef class Iterator:
|
|
897
|
+
"""
|
|
898
|
+
Represents CCCL iterator.
|
|
899
|
+
|
|
900
|
+
Args:
|
|
901
|
+
alignment (int):
|
|
902
|
+
Alignment of the iterator state
|
|
903
|
+
iterator_type (IntEnumerationMember):
|
|
904
|
+
The type of iterator, `IteratorKind.POINTER` or
|
|
905
|
+
`IteratorKind.ITERATOR`
|
|
906
|
+
advance_fn (Op):
|
|
907
|
+
Descriptor for user-defined `advance` function
|
|
908
|
+
compiled for device
|
|
909
|
+
dereference_fn (Op):
|
|
910
|
+
Descriptor for user-defined `dereference` or `assign`
|
|
911
|
+
function compiled for device
|
|
912
|
+
value_type (TypeInfo):
|
|
913
|
+
Descriptor of the type addressed by the iterator
|
|
914
|
+
state (object, optional):
|
|
915
|
+
Python object for the state of the iterator. For iterators of
|
|
916
|
+
type `ITERATOR` the state object is expected to implement Python
|
|
917
|
+
buffer protocol for SIMPLE 1d buffer of type unsigned byte.
|
|
918
|
+
For iterators of type `POINTER` the state may be an integer convertible
|
|
919
|
+
to `uintptr_t`, or a `ctypes` pointer (typed or untyped).
|
|
920
|
+
Value `None` represents absence of iterator state.
|
|
921
|
+
host_advance_fn (object, optional):
|
|
922
|
+
Python object for host callable function to advance state by a given
|
|
923
|
+
increment. The argument may only be set for iterators of type
|
|
924
|
+
`IteratorKind.ITERATOR` and raise an exception otherwise. Supported
|
|
925
|
+
types are `int` or `ctypes.c_void_p` (raw pointer), ctypes function
|
|
926
|
+
pointer, or a Python capsule with name `"void *(void *, cccl_increment_t)"`.
|
|
927
|
+
"""
|
|
928
|
+
cdef Op advance
|
|
929
|
+
cdef Op dereference
|
|
930
|
+
cdef object state_obj
|
|
931
|
+
cdef object host_advance_obj
|
|
932
|
+
cdef cccl_iterator_t iter_data
|
|
933
|
+
|
|
934
|
+
def __cinit__(self,
|
|
935
|
+
int alignment,
|
|
936
|
+
IntEnumerationMember iterator_type,
|
|
937
|
+
Op advance_fn,
|
|
938
|
+
Op dereference_fn,
|
|
939
|
+
TypeInfo value_type,
|
|
940
|
+
state=None,
|
|
941
|
+
host_advance_fn=None
|
|
942
|
+
):
|
|
943
|
+
cdef cccl_iterator_kind_t it_kind
|
|
944
|
+
_validate_alignment(alignment)
|
|
945
|
+
if not is_IteratorKind(iterator_type):
|
|
946
|
+
raise TypeError("iterator_type must describe iterator kind")
|
|
947
|
+
it_kind = iterator_type.value
|
|
948
|
+
if it_kind == cccl_iterator_kind_t.CCCL_POINTER:
|
|
949
|
+
if state is None:
|
|
950
|
+
self.state_obj = None
|
|
951
|
+
self.iter_data.size = 0
|
|
952
|
+
self.iter_data.state = NULL
|
|
953
|
+
elif isinstance(state, int):
|
|
954
|
+
self.state_obj = None
|
|
955
|
+
self.iter_data.size = 0
|
|
956
|
+
self.iter_data.state = int_as_ptr(state)
|
|
957
|
+
elif isinstance(state, Pointer):
|
|
958
|
+
self.state_obj = state.reference
|
|
959
|
+
self.iter_data.size = 0
|
|
960
|
+
self.iter_data.state = (<Pointer>state).ptr
|
|
961
|
+
else:
|
|
962
|
+
raise TypeError(
|
|
963
|
+
"Expect for Iterator of kind POINTER, state must have type Pointer or int, "
|
|
964
|
+
f"got {type(state)}"
|
|
965
|
+
)
|
|
966
|
+
if host_advance_fn is not None:
|
|
967
|
+
raise ValueError(
|
|
968
|
+
"host_advance_fn must be set to None for iterators of kind POINTER"
|
|
969
|
+
)
|
|
970
|
+
self.iter_data.host_advance = NULL
|
|
971
|
+
self.host_advance_obj = None
|
|
972
|
+
elif it_kind == cccl_iterator_kind_t.CCCL_ITERATOR:
|
|
973
|
+
if state is None:
|
|
974
|
+
self.state_obj = None
|
|
975
|
+
self.iter_data.size = 0
|
|
976
|
+
self.iter_data.state = NULL
|
|
977
|
+
elif isinstance(state, IteratorState):
|
|
978
|
+
self.state_obj = state.reference
|
|
979
|
+
self.iter_data.size = (<IteratorState>state).size
|
|
980
|
+
self.iter_data.state = (<IteratorState>state).ptr
|
|
981
|
+
else:
|
|
982
|
+
raise TypeError(
|
|
983
|
+
"For Iterator of kind ITERATOR, state must have type IteratorState, "
|
|
984
|
+
f"got type {type(state)}"
|
|
985
|
+
)
|
|
986
|
+
if host_advance_fn is not None:
|
|
987
|
+
self.iter_data.host_advance = unbox_host_advance_fn(host_advance_fn)
|
|
988
|
+
self.host_advance_obj = host_advance_fn
|
|
989
|
+
else:
|
|
990
|
+
self.iter_data.host_advance = NULL
|
|
991
|
+
self.host_advance_obj = None
|
|
992
|
+
else: # pragma: no cover
|
|
993
|
+
raise ValueError("Unrecognized iterator kind")
|
|
994
|
+
self.advance = advance_fn
|
|
995
|
+
self.dereference = dereference_fn
|
|
996
|
+
self.iter_data.alignment = alignment
|
|
997
|
+
self.iter_data.type = <cccl_iterator_kind_t> it_kind
|
|
998
|
+
self.iter_data.advance = self.advance.op_data
|
|
999
|
+
self.iter_data.dereference = self.dereference.op_data
|
|
1000
|
+
self.iter_data.value_type = value_type.type_info
|
|
1001
|
+
|
|
1002
|
+
@property
|
|
1003
|
+
def advance_op(self):
|
|
1004
|
+
return self.advance
|
|
1005
|
+
|
|
1006
|
+
@property
|
|
1007
|
+
def dereference_or_assign_op(self):
|
|
1008
|
+
return self.dereference
|
|
1009
|
+
|
|
1010
|
+
@property
|
|
1011
|
+
def state(self):
|
|
1012
|
+
if self.iter_data.type == cccl_iterator_kind_t.CCCL_POINTER:
|
|
1013
|
+
return <size_t>self.iter_data.state
|
|
1014
|
+
else:
|
|
1015
|
+
return self.state_obj
|
|
1016
|
+
|
|
1017
|
+
@state.setter
|
|
1018
|
+
def state(self, new_value):
|
|
1019
|
+
cdef ssize_t state_sz = 0
|
|
1020
|
+
cdef size_t ptr = 0
|
|
1021
|
+
cdef cccl_iterator_kind_t it_kind = self.iter_data.type
|
|
1022
|
+
if it_kind == cccl_iterator_kind_t.CCCL_POINTER:
|
|
1023
|
+
if isinstance(new_value, Pointer):
|
|
1024
|
+
self.state_obj = (<Pointer>new_value).ref
|
|
1025
|
+
self.iter_data.size = state_sz
|
|
1026
|
+
self.iter_data.state = (<Pointer>new_value).ptr
|
|
1027
|
+
elif isinstance(new_value, int):
|
|
1028
|
+
self.state_obj = None
|
|
1029
|
+
self.iter_data.size = state_sz
|
|
1030
|
+
self.iter_data.state = int_as_ptr(new_value)
|
|
1031
|
+
elif new_value is None:
|
|
1032
|
+
self.state_obj = None
|
|
1033
|
+
self.iter_data.size = 0
|
|
1034
|
+
self.iter_data.state = NULL
|
|
1035
|
+
else:
|
|
1036
|
+
raise TypeError(
|
|
1037
|
+
"For iterator with type POINTER, state value must have type int or type Pointer, "
|
|
1038
|
+
f"got type {type(new_value)}"
|
|
1039
|
+
)
|
|
1040
|
+
elif it_kind == cccl_iterator_kind_t.CCCL_ITERATOR:
|
|
1041
|
+
if isinstance(new_value, IteratorState):
|
|
1042
|
+
self.state_obj = new_value.reference
|
|
1043
|
+
self.iter_data.size = (<IteratorState>new_value).size
|
|
1044
|
+
self.iter_data.state = (<IteratorState>new_value).ptr
|
|
1045
|
+
elif isinstance(new_value, Pointer):
|
|
1046
|
+
self.state_obj = new_value.reference
|
|
1047
|
+
if self.iter_data.size == 0:
|
|
1048
|
+
raise ValueError("Assigning incomplete state value to iterator without state size information")
|
|
1049
|
+
self.iter_data.state = (<Pointer>new_value).ptr
|
|
1050
|
+
elif PyObject_CheckBuffer(new_value):
|
|
1051
|
+
self.iter_data.state = get_buffer_pointer(new_value, &self.iter_data.size)
|
|
1052
|
+
self.state_obj = new_value
|
|
1053
|
+
elif new_value is None:
|
|
1054
|
+
self.state_obj = None
|
|
1055
|
+
self.iter_data.size = 0
|
|
1056
|
+
self.iter_data.state = NULL
|
|
1057
|
+
else:
|
|
1058
|
+
raise TypeError(
|
|
1059
|
+
"For iterator with type ITERATOR, state value must have type IteratorState or type bytes, "
|
|
1060
|
+
f"got type {type(new_value)}"
|
|
1061
|
+
)
|
|
1062
|
+
else:
|
|
1063
|
+
raise TypeError("The new value should be an integer for iterators of POINTER kind, and bytes for ITERATOR kind")
|
|
1064
|
+
|
|
1065
|
+
@property
|
|
1066
|
+
def type(self):
|
|
1067
|
+
cdef cccl_iterator_kind_t it_kind = self.iter_data.type
|
|
1068
|
+
if it_kind == cccl_iterator_kind_t.CCCL_POINTER:
|
|
1069
|
+
return IteratorKind.POINTER
|
|
1070
|
+
else:
|
|
1071
|
+
return IteratorKind.ITERATOR
|
|
1072
|
+
|
|
1073
|
+
def is_kind_pointer(self):
|
|
1074
|
+
cdef cccl_iterator_kind_t it_kind = self.iter_data.type
|
|
1075
|
+
return (it_kind == cccl_iterator_kind_t.CCCL_POINTER)
|
|
1076
|
+
|
|
1077
|
+
def is_kind_iterator(self):
|
|
1078
|
+
cdef cccl_iterator_kind_t it_kind = self.iter_data.type
|
|
1079
|
+
return (it_kind == cccl_iterator_kind_t.CCCL_ITERATOR)
|
|
1080
|
+
|
|
1081
|
+
def as_bytes(self):
|
|
1082
|
+
"Debugging ulitity to get memory view into library struct"
|
|
1083
|
+
cdef uint8_t[:] mem_view = bytearray(sizeof(self.iter_data))
|
|
1084
|
+
memcpy(&mem_view[0], &self.iter_data, sizeof(self.iter_data))
|
|
1085
|
+
return bytes(mem_view)
|
|
1086
|
+
|
|
1087
|
+
@property
|
|
1088
|
+
def host_advance_fn(self):
|
|
1089
|
+
return self.host_advance_obj
|
|
1090
|
+
|
|
1091
|
+
@host_advance_fn.setter
|
|
1092
|
+
def host_advance_fn(self, func):
|
|
1093
|
+
if (self.iter_data.type == cccl_iterator_kind_t.CCCL_ITERATOR):
|
|
1094
|
+
if func is not None:
|
|
1095
|
+
self.iter_data.host_advance = unbox_host_advance_fn(func)
|
|
1096
|
+
self.host_advance_obj = func
|
|
1097
|
+
else:
|
|
1098
|
+
self.iter_data.host_advance = NULL
|
|
1099
|
+
self.host_advance_obj = None
|
|
1100
|
+
else:
|
|
1101
|
+
raise ValueError
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
cdef class CommonData:
|
|
1105
|
+
cdef int cc_major
|
|
1106
|
+
cdef int cc_minor
|
|
1107
|
+
cdef bytes encoded_cub_path
|
|
1108
|
+
cdef bytes encoded_thrust_path
|
|
1109
|
+
cdef bytes encoded_libcudacxx_path
|
|
1110
|
+
cdef bytes encoded_ctk_path
|
|
1111
|
+
|
|
1112
|
+
def __cinit__(self, int cc_major, int cc_minor, str cub_path, str thrust_path, str libcudacxx_path, str ctk_path):
|
|
1113
|
+
self.cc_major = cc_major
|
|
1114
|
+
self.cc_minor = cc_minor
|
|
1115
|
+
self.encoded_cub_path = cub_path.encode("utf-8")
|
|
1116
|
+
self.encoded_thrust_path = thrust_path.encode("utf-8")
|
|
1117
|
+
self.encoded_libcudacxx_path = libcudacxx_path.encode("utf-8")
|
|
1118
|
+
self.encoded_ctk_path = ctk_path.encode("utf-8")
|
|
1119
|
+
|
|
1120
|
+
cdef inline int get_cc_major(self):
|
|
1121
|
+
return self.cc_major
|
|
1122
|
+
|
|
1123
|
+
cdef inline int get_cc_minor(self):
|
|
1124
|
+
return self.cc_minor
|
|
1125
|
+
|
|
1126
|
+
cdef inline const char * cub_path_get_c_str(self):
|
|
1127
|
+
return <const char *>self.encoded_cub_path
|
|
1128
|
+
|
|
1129
|
+
cdef inline const char * thrust_path_get_c_str(self):
|
|
1130
|
+
return <const char *>self.encoded_thrust_path
|
|
1131
|
+
|
|
1132
|
+
cdef inline const char * libcudacxx_path_get_c_str(self):
|
|
1133
|
+
return <const char *>self.encoded_libcudacxx_path
|
|
1134
|
+
|
|
1135
|
+
cdef inline const char * ctk_path_get_c_str(self):
|
|
1136
|
+
return <const char *>self.encoded_ctk_path
|
|
1137
|
+
|
|
1138
|
+
@property
|
|
1139
|
+
def compute_capability(self):
|
|
1140
|
+
return (self.cc_major, self.cc_minor)
|
|
1141
|
+
|
|
1142
|
+
@property
|
|
1143
|
+
def cub_path(self):
|
|
1144
|
+
return self.encoded_cub_path.decode("utf-8")
|
|
1145
|
+
|
|
1146
|
+
@property
|
|
1147
|
+
def ctk_path(self):
|
|
1148
|
+
return self.encoded_ctk_path.decode("utf-8")
|
|
1149
|
+
|
|
1150
|
+
@property
|
|
1151
|
+
def thrust_path(self):
|
|
1152
|
+
return self.encoded_thrust_path.decode("utf-8")
|
|
1153
|
+
|
|
1154
|
+
@property
|
|
1155
|
+
def libcudacxx_path(self):
|
|
1156
|
+
return self.encoded_libcudacxx_path.decode("utf-8")
|
|
1157
|
+
|
|
1158
|
+
# --------------
|
|
1159
|
+
# DeviceReduce
|
|
1160
|
+
# --------------
|
|
1161
|
+
|
|
1162
|
+
cdef extern from "cccl/c/reduce.h":
|
|
1163
|
+
cdef struct cccl_device_reduce_build_result_t 'cccl_device_reduce_build_result_t':
|
|
1164
|
+
const char* cubin
|
|
1165
|
+
size_t cubin_size
|
|
1166
|
+
|
|
1167
|
+
cdef CUresult cccl_device_reduce_build(
|
|
1168
|
+
cccl_device_reduce_build_result_t*,
|
|
1169
|
+
cccl_iterator_t,
|
|
1170
|
+
cccl_iterator_t,
|
|
1171
|
+
cccl_op_t,
|
|
1172
|
+
cccl_value_t,
|
|
1173
|
+
int, int, const char*, const char*, const char*, const char*
|
|
1174
|
+
) nogil
|
|
1175
|
+
|
|
1176
|
+
cdef CUresult cccl_device_reduce(
|
|
1177
|
+
cccl_device_reduce_build_result_t,
|
|
1178
|
+
void *,
|
|
1179
|
+
size_t *,
|
|
1180
|
+
cccl_iterator_t,
|
|
1181
|
+
cccl_iterator_t,
|
|
1182
|
+
uint64_t,
|
|
1183
|
+
cccl_op_t,
|
|
1184
|
+
cccl_value_t,
|
|
1185
|
+
CUstream
|
|
1186
|
+
) nogil
|
|
1187
|
+
|
|
1188
|
+
cdef CUresult cccl_device_reduce_cleanup(
|
|
1189
|
+
cccl_device_reduce_build_result_t*
|
|
1190
|
+
) nogil
|
|
1191
|
+
|
|
1192
|
+
|
|
1193
|
+
cdef class DeviceReduceBuildResult:
|
|
1194
|
+
cdef cccl_device_reduce_build_result_t build_data
|
|
1195
|
+
|
|
1196
|
+
def __cinit__(
|
|
1197
|
+
DeviceReduceBuildResult self,
|
|
1198
|
+
Iterator d_in,
|
|
1199
|
+
Iterator d_out,
|
|
1200
|
+
Op op,
|
|
1201
|
+
Value h_init,
|
|
1202
|
+
CommonData common_data
|
|
1203
|
+
):
|
|
1204
|
+
cdef CUresult status = -1
|
|
1205
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
1206
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
1207
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
1208
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
1209
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
1210
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
1211
|
+
memset(&self.build_data, 0, sizeof(cccl_device_reduce_build_result_t))
|
|
1212
|
+
|
|
1213
|
+
with nogil:
|
|
1214
|
+
status = cccl_device_reduce_build(
|
|
1215
|
+
&self.build_data,
|
|
1216
|
+
d_in.iter_data,
|
|
1217
|
+
d_out.iter_data,
|
|
1218
|
+
op.op_data,
|
|
1219
|
+
h_init.value_data,
|
|
1220
|
+
cc_major,
|
|
1221
|
+
cc_minor,
|
|
1222
|
+
cub_path,
|
|
1223
|
+
thrust_path,
|
|
1224
|
+
libcudacxx_path,
|
|
1225
|
+
ctk_path,
|
|
1226
|
+
)
|
|
1227
|
+
if status != 0:
|
|
1228
|
+
raise RuntimeError(
|
|
1229
|
+
f"Failed building reduce, error code: {status}"
|
|
1230
|
+
)
|
|
1231
|
+
|
|
1232
|
+
def __dealloc__(DeviceReduceBuildResult self):
|
|
1233
|
+
cdef CUresult status = -1
|
|
1234
|
+
with nogil:
|
|
1235
|
+
status = cccl_device_reduce_cleanup(&self.build_data)
|
|
1236
|
+
if (status != 0):
|
|
1237
|
+
print(f"Return code {status} encountered during reduce result cleanup")
|
|
1238
|
+
|
|
1239
|
+
cpdef int compute(
|
|
1240
|
+
DeviceReduceBuildResult self,
|
|
1241
|
+
temp_storage_ptr,
|
|
1242
|
+
temp_storage_bytes,
|
|
1243
|
+
Iterator d_in,
|
|
1244
|
+
Iterator d_out,
|
|
1245
|
+
size_t num_items,
|
|
1246
|
+
Op op,
|
|
1247
|
+
Value h_init,
|
|
1248
|
+
stream
|
|
1249
|
+
):
|
|
1250
|
+
cdef CUresult status = -1
|
|
1251
|
+
cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
|
|
1252
|
+
cdef size_t storage_sz = <size_t>temp_storage_bytes
|
|
1253
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
1254
|
+
|
|
1255
|
+
with nogil:
|
|
1256
|
+
status = cccl_device_reduce(
|
|
1257
|
+
self.build_data,
|
|
1258
|
+
storage_ptr,
|
|
1259
|
+
&storage_sz,
|
|
1260
|
+
d_in.iter_data,
|
|
1261
|
+
d_out.iter_data,
|
|
1262
|
+
<uint64_t>num_items,
|
|
1263
|
+
op.op_data,
|
|
1264
|
+
h_init.value_data,
|
|
1265
|
+
c_stream
|
|
1266
|
+
)
|
|
1267
|
+
if status != 0:
|
|
1268
|
+
raise RuntimeError(
|
|
1269
|
+
f"Failed executing reduce, error code: {status}"
|
|
1270
|
+
)
|
|
1271
|
+
return storage_sz
|
|
1272
|
+
|
|
1273
|
+
def _get_cubin(self):
|
|
1274
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|
|
1275
|
+
|
|
1276
|
+
# ------------
|
|
1277
|
+
# DeviceScan
|
|
1278
|
+
# ------------
|
|
1279
|
+
|
|
1280
|
+
|
|
1281
|
+
cdef extern from "cccl/c/scan.h":
|
|
1282
|
+
ctypedef bint _Bool
|
|
1283
|
+
|
|
1284
|
+
cdef struct cccl_device_scan_build_result_t 'cccl_device_scan_build_result_t':
|
|
1285
|
+
const char* cubin
|
|
1286
|
+
size_t cubin_size
|
|
1287
|
+
|
|
1288
|
+
cdef CUresult cccl_device_scan_build(
|
|
1289
|
+
cccl_device_scan_build_result_t*,
|
|
1290
|
+
cccl_iterator_t,
|
|
1291
|
+
cccl_iterator_t,
|
|
1292
|
+
cccl_op_t,
|
|
1293
|
+
cccl_value_t,
|
|
1294
|
+
_Bool,
|
|
1295
|
+
int, int, const char*, const char*, const char*, const char*
|
|
1296
|
+
) nogil
|
|
1297
|
+
|
|
1298
|
+
cdef CUresult cccl_device_exclusive_scan(
|
|
1299
|
+
cccl_device_scan_build_result_t,
|
|
1300
|
+
void *,
|
|
1301
|
+
size_t *,
|
|
1302
|
+
cccl_iterator_t,
|
|
1303
|
+
cccl_iterator_t,
|
|
1304
|
+
uint64_t,
|
|
1305
|
+
cccl_op_t,
|
|
1306
|
+
cccl_value_t,
|
|
1307
|
+
CUstream
|
|
1308
|
+
) nogil
|
|
1309
|
+
|
|
1310
|
+
cdef CUresult cccl_device_inclusive_scan(
|
|
1311
|
+
cccl_device_scan_build_result_t,
|
|
1312
|
+
void *,
|
|
1313
|
+
size_t *,
|
|
1314
|
+
cccl_iterator_t,
|
|
1315
|
+
cccl_iterator_t,
|
|
1316
|
+
uint64_t,
|
|
1317
|
+
cccl_op_t,
|
|
1318
|
+
cccl_value_t,
|
|
1319
|
+
CUstream
|
|
1320
|
+
) nogil
|
|
1321
|
+
|
|
1322
|
+
cdef CUresult cccl_device_scan_cleanup(
|
|
1323
|
+
cccl_device_scan_build_result_t*
|
|
1324
|
+
) nogil
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
cdef class DeviceScanBuildResult:
|
|
1328
|
+
cdef cccl_device_scan_build_result_t build_data
|
|
1329
|
+
|
|
1330
|
+
def __cinit__(
|
|
1331
|
+
DeviceScanBuildResult self,
|
|
1332
|
+
Iterator d_in,
|
|
1333
|
+
Iterator d_out,
|
|
1334
|
+
Op op,
|
|
1335
|
+
Value h_init,
|
|
1336
|
+
bint force_inclusive,
|
|
1337
|
+
CommonData common_data
|
|
1338
|
+
):
|
|
1339
|
+
cdef CUresult status = -1
|
|
1340
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
1341
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
1342
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
1343
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
1344
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
1345
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
1346
|
+
memset(&self.build_data, 0, sizeof(cccl_device_scan_build_result_t))
|
|
1347
|
+
|
|
1348
|
+
with nogil:
|
|
1349
|
+
status = cccl_device_scan_build(
|
|
1350
|
+
&self.build_data,
|
|
1351
|
+
d_in.iter_data,
|
|
1352
|
+
d_out.iter_data,
|
|
1353
|
+
op.op_data,
|
|
1354
|
+
h_init.value_data,
|
|
1355
|
+
force_inclusive,
|
|
1356
|
+
cc_major,
|
|
1357
|
+
cc_minor,
|
|
1358
|
+
cub_path,
|
|
1359
|
+
thrust_path,
|
|
1360
|
+
libcudacxx_path,
|
|
1361
|
+
ctk_path,
|
|
1362
|
+
)
|
|
1363
|
+
if status != 0:
|
|
1364
|
+
raise RuntimeError(f"Error {status} building scan")
|
|
1365
|
+
|
|
1366
|
+
def __dealloc__(DeviceScanBuildResult self):
|
|
1367
|
+
cdef CUresult status = -1
|
|
1368
|
+
with nogil:
|
|
1369
|
+
status = cccl_device_scan_cleanup(&self.build_data)
|
|
1370
|
+
if (status != 0):
|
|
1371
|
+
print(f"Return code {status} encountered during scan result cleanup")
|
|
1372
|
+
|
|
1373
|
+
cpdef int compute_inclusive(
|
|
1374
|
+
DeviceScanBuildResult self,
|
|
1375
|
+
temp_storage_ptr,
|
|
1376
|
+
temp_storage_bytes,
|
|
1377
|
+
Iterator d_in,
|
|
1378
|
+
Iterator d_out,
|
|
1379
|
+
size_t num_items,
|
|
1380
|
+
Op op,
|
|
1381
|
+
Value h_init,
|
|
1382
|
+
stream
|
|
1383
|
+
):
|
|
1384
|
+
cdef CUresult status = -1
|
|
1385
|
+
cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
|
|
1386
|
+
cdef size_t storage_sz = <size_t>temp_storage_bytes
|
|
1387
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
1388
|
+
|
|
1389
|
+
with nogil:
|
|
1390
|
+
status = cccl_device_inclusive_scan(
|
|
1391
|
+
self.build_data,
|
|
1392
|
+
storage_ptr,
|
|
1393
|
+
&storage_sz,
|
|
1394
|
+
d_in.iter_data,
|
|
1395
|
+
d_out.iter_data,
|
|
1396
|
+
<uint64_t>num_items,
|
|
1397
|
+
op.op_data,
|
|
1398
|
+
h_init.value_data,
|
|
1399
|
+
c_stream
|
|
1400
|
+
)
|
|
1401
|
+
if status != 0:
|
|
1402
|
+
raise RuntimeError(
|
|
1403
|
+
f"Failed executing inclusive scan, error code: {status}"
|
|
1404
|
+
)
|
|
1405
|
+
return storage_sz
|
|
1406
|
+
|
|
1407
|
+
cpdef int compute_exclusive(
|
|
1408
|
+
DeviceScanBuildResult self,
|
|
1409
|
+
temp_storage_ptr,
|
|
1410
|
+
temp_storage_bytes,
|
|
1411
|
+
Iterator d_in,
|
|
1412
|
+
Iterator d_out,
|
|
1413
|
+
size_t num_items,
|
|
1414
|
+
Op op,
|
|
1415
|
+
Value h_init,
|
|
1416
|
+
stream
|
|
1417
|
+
):
|
|
1418
|
+
cdef CUresult status = -1
|
|
1419
|
+
cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
|
|
1420
|
+
cdef size_t storage_sz = <size_t>temp_storage_bytes
|
|
1421
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
1422
|
+
|
|
1423
|
+
with nogil:
|
|
1424
|
+
status = cccl_device_exclusive_scan(
|
|
1425
|
+
self.build_data,
|
|
1426
|
+
storage_ptr,
|
|
1427
|
+
&storage_sz,
|
|
1428
|
+
d_in.iter_data,
|
|
1429
|
+
d_out.iter_data,
|
|
1430
|
+
<uint64_t>num_items,
|
|
1431
|
+
op.op_data,
|
|
1432
|
+
h_init.value_data,
|
|
1433
|
+
c_stream
|
|
1434
|
+
)
|
|
1435
|
+
if status != 0:
|
|
1436
|
+
raise RuntimeError(
|
|
1437
|
+
f"Failed executing exclusive scan, error code: {status}"
|
|
1438
|
+
)
|
|
1439
|
+
return storage_sz
|
|
1440
|
+
|
|
1441
|
+
def _get_cubin(self):
|
|
1442
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|
|
1443
|
+
|
|
1444
|
+
# -----------------------
|
|
1445
|
+
# DeviceSegmentedReduce
|
|
1446
|
+
# -----------------------
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
cdef extern from "cccl/c/segmented_reduce.h":
|
|
1450
|
+
cdef struct cccl_device_segmented_reduce_build_result_t 'cccl_device_segmented_reduce_build_result_t':
|
|
1451
|
+
const char* cubin
|
|
1452
|
+
size_t cubin_size
|
|
1453
|
+
|
|
1454
|
+
cdef CUresult cccl_device_segmented_reduce_build(
|
|
1455
|
+
cccl_device_segmented_reduce_build_result_t*,
|
|
1456
|
+
cccl_iterator_t,
|
|
1457
|
+
cccl_iterator_t,
|
|
1458
|
+
cccl_iterator_t,
|
|
1459
|
+
cccl_iterator_t,
|
|
1460
|
+
cccl_op_t,
|
|
1461
|
+
cccl_value_t,
|
|
1462
|
+
int, int, const char*, const char*, const char*, const char*
|
|
1463
|
+
) nogil
|
|
1464
|
+
|
|
1465
|
+
cdef CUresult cccl_device_segmented_reduce(
|
|
1466
|
+
cccl_device_segmented_reduce_build_result_t,
|
|
1467
|
+
void *,
|
|
1468
|
+
size_t *,
|
|
1469
|
+
cccl_iterator_t,
|
|
1470
|
+
cccl_iterator_t,
|
|
1471
|
+
uint64_t,
|
|
1472
|
+
cccl_iterator_t,
|
|
1473
|
+
cccl_iterator_t,
|
|
1474
|
+
cccl_op_t,
|
|
1475
|
+
cccl_value_t,
|
|
1476
|
+
CUstream
|
|
1477
|
+
) nogil
|
|
1478
|
+
|
|
1479
|
+
cdef CUresult cccl_device_segmented_reduce_cleanup(
|
|
1480
|
+
cccl_device_segmented_reduce_build_result_t* bld_ptr
|
|
1481
|
+
) nogil
|
|
1482
|
+
|
|
1483
|
+
|
|
1484
|
+
cdef class DeviceSegmentedReduceBuildResult:
|
|
1485
|
+
cdef cccl_device_segmented_reduce_build_result_t build_data
|
|
1486
|
+
|
|
1487
|
+
def __cinit__(
|
|
1488
|
+
DeviceSegmentedReduceBuildResult self,
|
|
1489
|
+
Iterator d_in,
|
|
1490
|
+
Iterator d_out,
|
|
1491
|
+
Iterator start_offsets,
|
|
1492
|
+
Iterator end_offsets,
|
|
1493
|
+
Op op,
|
|
1494
|
+
Value h_init,
|
|
1495
|
+
CommonData common_data
|
|
1496
|
+
):
|
|
1497
|
+
cdef CUresult status = -1
|
|
1498
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
1499
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
1500
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
1501
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
1502
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
1503
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
1504
|
+
|
|
1505
|
+
memset(&self.build_data, 0, sizeof(cccl_device_segmented_reduce_build_result_t))
|
|
1506
|
+
with nogil:
|
|
1507
|
+
status = cccl_device_segmented_reduce_build(
|
|
1508
|
+
&self.build_data,
|
|
1509
|
+
d_in.iter_data,
|
|
1510
|
+
d_out.iter_data,
|
|
1511
|
+
start_offsets.iter_data,
|
|
1512
|
+
end_offsets.iter_data,
|
|
1513
|
+
op.op_data,
|
|
1514
|
+
h_init.value_data,
|
|
1515
|
+
cc_major,
|
|
1516
|
+
cc_minor,
|
|
1517
|
+
cub_path,
|
|
1518
|
+
thrust_path,
|
|
1519
|
+
libcudacxx_path,
|
|
1520
|
+
ctk_path,
|
|
1521
|
+
)
|
|
1522
|
+
if status != 0:
|
|
1523
|
+
raise RuntimeError(
|
|
1524
|
+
f"Failed building segmented_reduce, error code: {status}"
|
|
1525
|
+
)
|
|
1526
|
+
|
|
1527
|
+
def __dealloc__(DeviceSegmentedReduceBuildResult self):
|
|
1528
|
+
cdef CUresult status = -1
|
|
1529
|
+
with nogil:
|
|
1530
|
+
status = cccl_device_segmented_reduce_cleanup(&self.build_data)
|
|
1531
|
+
if (status != 0):
|
|
1532
|
+
print(f"Return code {status} encountered during segmented_reduce result cleanup")
|
|
1533
|
+
|
|
1534
|
+
cpdef int compute(
|
|
1535
|
+
DeviceSegmentedReduceBuildResult self,
|
|
1536
|
+
temp_storage_ptr,
|
|
1537
|
+
temp_storage_bytes,
|
|
1538
|
+
Iterator d_in,
|
|
1539
|
+
Iterator d_out,
|
|
1540
|
+
size_t num_items,
|
|
1541
|
+
Iterator start_offsets,
|
|
1542
|
+
Iterator end_offsets,
|
|
1543
|
+
Op op,
|
|
1544
|
+
Value h_init,
|
|
1545
|
+
stream
|
|
1546
|
+
):
|
|
1547
|
+
cdef CUresult status = -1
|
|
1548
|
+
cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
|
|
1549
|
+
cdef size_t storage_sz = <size_t>temp_storage_bytes
|
|
1550
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
1551
|
+
|
|
1552
|
+
with nogil:
|
|
1553
|
+
status = cccl_device_segmented_reduce(
|
|
1554
|
+
self.build_data,
|
|
1555
|
+
storage_ptr,
|
|
1556
|
+
&storage_sz,
|
|
1557
|
+
d_in.iter_data,
|
|
1558
|
+
d_out.iter_data,
|
|
1559
|
+
<uint64_t>num_items,
|
|
1560
|
+
start_offsets.iter_data,
|
|
1561
|
+
end_offsets.iter_data,
|
|
1562
|
+
op.op_data,
|
|
1563
|
+
h_init.value_data,
|
|
1564
|
+
c_stream
|
|
1565
|
+
)
|
|
1566
|
+
if status != 0:
|
|
1567
|
+
raise RuntimeError(
|
|
1568
|
+
f"Failed executing segmented_reduce, error code: {status}"
|
|
1569
|
+
)
|
|
1570
|
+
return storage_sz
|
|
1571
|
+
|
|
1572
|
+
def _get_cubin(self):
|
|
1573
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|
|
1574
|
+
# -----------------
|
|
1575
|
+
# DeviceMergeSort
|
|
1576
|
+
# -----------------
|
|
1577
|
+
|
|
1578
|
+
|
|
1579
|
+
cdef extern from "cccl/c/merge_sort.h":
|
|
1580
|
+
cdef struct cccl_device_merge_sort_build_result_t 'cccl_device_merge_sort_build_result_t':
|
|
1581
|
+
const char* cubin
|
|
1582
|
+
size_t cubin_size
|
|
1583
|
+
|
|
1584
|
+
cdef CUresult cccl_device_merge_sort_build(
|
|
1585
|
+
cccl_device_merge_sort_build_result_t *bld_ptr,
|
|
1586
|
+
cccl_iterator_t d_in_keys,
|
|
1587
|
+
cccl_iterator_t d_in_items,
|
|
1588
|
+
cccl_iterator_t d_out_keys,
|
|
1589
|
+
cccl_iterator_t d_out_items,
|
|
1590
|
+
cccl_op_t,
|
|
1591
|
+
int, int, const char*, const char*, const char*, const char*
|
|
1592
|
+
) nogil
|
|
1593
|
+
|
|
1594
|
+
cdef CUresult cccl_device_merge_sort(
|
|
1595
|
+
cccl_device_merge_sort_build_result_t,
|
|
1596
|
+
void *,
|
|
1597
|
+
size_t *,
|
|
1598
|
+
cccl_iterator_t,
|
|
1599
|
+
cccl_iterator_t,
|
|
1600
|
+
cccl_iterator_t,
|
|
1601
|
+
cccl_iterator_t,
|
|
1602
|
+
uint64_t,
|
|
1603
|
+
cccl_op_t,
|
|
1604
|
+
CUstream
|
|
1605
|
+
) nogil
|
|
1606
|
+
|
|
1607
|
+
cdef CUresult cccl_device_merge_sort_cleanup(
|
|
1608
|
+
cccl_device_merge_sort_build_result_t* bld_ptr
|
|
1609
|
+
) nogil
|
|
1610
|
+
|
|
1611
|
+
|
|
1612
|
+
cdef class DeviceMergeSortBuildResult:
|
|
1613
|
+
cdef cccl_device_merge_sort_build_result_t build_data
|
|
1614
|
+
|
|
1615
|
+
def __cinit__(
|
|
1616
|
+
DeviceMergeSortBuildResult self,
|
|
1617
|
+
Iterator d_in_keys,
|
|
1618
|
+
Iterator d_in_items,
|
|
1619
|
+
Iterator d_out_keys,
|
|
1620
|
+
Iterator d_out_items,
|
|
1621
|
+
Op op,
|
|
1622
|
+
CommonData common_data
|
|
1623
|
+
):
|
|
1624
|
+
cdef CUresult status = -1
|
|
1625
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
1626
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
1627
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
1628
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
1629
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
1630
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
1631
|
+
|
|
1632
|
+
memset(&self.build_data, 0, sizeof(cccl_device_merge_sort_build_result_t))
|
|
1633
|
+
with nogil:
|
|
1634
|
+
status = cccl_device_merge_sort_build(
|
|
1635
|
+
&self.build_data,
|
|
1636
|
+
d_in_keys.iter_data,
|
|
1637
|
+
d_in_items.iter_data,
|
|
1638
|
+
d_out_keys.iter_data,
|
|
1639
|
+
d_out_items.iter_data,
|
|
1640
|
+
op.op_data,
|
|
1641
|
+
cc_major,
|
|
1642
|
+
cc_minor,
|
|
1643
|
+
cub_path,
|
|
1644
|
+
thrust_path,
|
|
1645
|
+
libcudacxx_path,
|
|
1646
|
+
ctk_path,
|
|
1647
|
+
)
|
|
1648
|
+
if status != 0:
|
|
1649
|
+
raise RuntimeError(
|
|
1650
|
+
f"Failed building merge_sort, error code: {status}"
|
|
1651
|
+
)
|
|
1652
|
+
|
|
1653
|
+
def __dealloc__(DeviceMergeSortBuildResult self):
|
|
1654
|
+
cdef CUresult status = -1
|
|
1655
|
+
with nogil:
|
|
1656
|
+
status = cccl_device_merge_sort_cleanup(&self.build_data)
|
|
1657
|
+
if (status != 0):
|
|
1658
|
+
print(f"Return code {status} encountered during merge_sort result cleanup")
|
|
1659
|
+
|
|
1660
|
+
cpdef int compute(
|
|
1661
|
+
DeviceMergeSortBuildResult self,
|
|
1662
|
+
temp_storage_ptr,
|
|
1663
|
+
temp_storage_bytes,
|
|
1664
|
+
Iterator d_in_keys,
|
|
1665
|
+
Iterator d_in_items,
|
|
1666
|
+
Iterator d_out_keys,
|
|
1667
|
+
Iterator d_out_items,
|
|
1668
|
+
size_t num_items,
|
|
1669
|
+
Op op,
|
|
1670
|
+
stream
|
|
1671
|
+
):
|
|
1672
|
+
cdef CUresult status = -1
|
|
1673
|
+
cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
|
|
1674
|
+
cdef size_t storage_sz = <size_t>temp_storage_bytes
|
|
1675
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
1676
|
+
with nogil:
|
|
1677
|
+
status = cccl_device_merge_sort(
|
|
1678
|
+
self.build_data,
|
|
1679
|
+
storage_ptr,
|
|
1680
|
+
&storage_sz,
|
|
1681
|
+
d_in_keys.iter_data,
|
|
1682
|
+
d_in_items.iter_data,
|
|
1683
|
+
d_out_keys.iter_data,
|
|
1684
|
+
d_out_items.iter_data,
|
|
1685
|
+
<uint64_t>num_items,
|
|
1686
|
+
op.op_data,
|
|
1687
|
+
c_stream
|
|
1688
|
+
)
|
|
1689
|
+
if status != 0:
|
|
1690
|
+
raise RuntimeError(
|
|
1691
|
+
f"Failed executing merge_sort, error code: {status}"
|
|
1692
|
+
)
|
|
1693
|
+
return storage_sz
|
|
1694
|
+
|
|
1695
|
+
|
|
1696
|
+
def _get_cubin(self):
|
|
1697
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|
|
1698
|
+
|
|
1699
|
+
|
|
1700
|
+
# -------------------
|
|
1701
|
+
# DeviceUniqueByKey
|
|
1702
|
+
# -------------------
|
|
1703
|
+
|
|
1704
|
+
cdef extern from "cccl/c/unique_by_key.h":
|
|
1705
|
+
cdef struct cccl_device_unique_by_key_build_result_t 'cccl_device_unique_by_key_build_result_t':
|
|
1706
|
+
const char* cubin
|
|
1707
|
+
size_t cubin_size
|
|
1708
|
+
|
|
1709
|
+
|
|
1710
|
+
cdef CUresult cccl_device_unique_by_key_build(
|
|
1711
|
+
cccl_device_unique_by_key_build_result_t *build_ptr,
|
|
1712
|
+
cccl_iterator_t d_keys_in,
|
|
1713
|
+
cccl_iterator_t d_values_in,
|
|
1714
|
+
cccl_iterator_t d_keys_out,
|
|
1715
|
+
cccl_iterator_t d_values_out,
|
|
1716
|
+
cccl_iterator_t d_num_selected_out,
|
|
1717
|
+
cccl_op_t comparison_op,
|
|
1718
|
+
int, int, const char *, const char *, const char *, const char *
|
|
1719
|
+
) nogil
|
|
1720
|
+
|
|
1721
|
+
cdef CUresult cccl_device_unique_by_key(
|
|
1722
|
+
cccl_device_unique_by_key_build_result_t build,
|
|
1723
|
+
void *d_storage_ptr,
|
|
1724
|
+
size_t *d_storage_nbytes,
|
|
1725
|
+
cccl_iterator_t d_keys_in,
|
|
1726
|
+
cccl_iterator_t d_values_in,
|
|
1727
|
+
cccl_iterator_t d_keys_out,
|
|
1728
|
+
cccl_iterator_t d_values_out,
|
|
1729
|
+
cccl_iterator_t d_num_selected_out,
|
|
1730
|
+
cccl_op_t comparison_op,
|
|
1731
|
+
size_t num_items,
|
|
1732
|
+
CUstream stream
|
|
1733
|
+
) nogil
|
|
1734
|
+
|
|
1735
|
+
cdef CUresult cccl_device_unique_by_key_cleanup(
|
|
1736
|
+
cccl_device_unique_by_key_build_result_t *build_ptr,
|
|
1737
|
+
) nogil
|
|
1738
|
+
|
|
1739
|
+
|
|
1740
|
+
cdef class DeviceUniqueByKeyBuildResult:
|
|
1741
|
+
cdef cccl_device_unique_by_key_build_result_t build_data
|
|
1742
|
+
|
|
1743
|
+
def __cinit__(
|
|
1744
|
+
DeviceUniqueByKeyBuildResult self,
|
|
1745
|
+
Iterator d_keys_in,
|
|
1746
|
+
Iterator d_values_in,
|
|
1747
|
+
Iterator d_keys_out,
|
|
1748
|
+
Iterator d_values_out,
|
|
1749
|
+
Iterator d_num_selected_out,
|
|
1750
|
+
Op comparison_op,
|
|
1751
|
+
CommonData common_data
|
|
1752
|
+
):
|
|
1753
|
+
cdef CUresult status = -1
|
|
1754
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
1755
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
1756
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
1757
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
1758
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
1759
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
1760
|
+
|
|
1761
|
+
memset(&self.build_data, 0, sizeof(cccl_device_unique_by_key_build_result_t))
|
|
1762
|
+
with nogil:
|
|
1763
|
+
status = cccl_device_unique_by_key_build(
|
|
1764
|
+
&self.build_data,
|
|
1765
|
+
d_keys_in.iter_data,
|
|
1766
|
+
d_values_in.iter_data,
|
|
1767
|
+
d_keys_out.iter_data,
|
|
1768
|
+
d_values_out.iter_data,
|
|
1769
|
+
d_num_selected_out.iter_data,
|
|
1770
|
+
comparison_op.op_data,
|
|
1771
|
+
cc_major,
|
|
1772
|
+
cc_minor,
|
|
1773
|
+
cub_path,
|
|
1774
|
+
thrust_path,
|
|
1775
|
+
libcudacxx_path,
|
|
1776
|
+
ctk_path,
|
|
1777
|
+
)
|
|
1778
|
+
if status != 0:
|
|
1779
|
+
raise RuntimeError(
|
|
1780
|
+
f"Failed building unique_by_key, error code: {status}"
|
|
1781
|
+
)
|
|
1782
|
+
|
|
1783
|
+
def __dealloc__(DeviceUniqueByKeyBuildResult self):
|
|
1784
|
+
cdef CUresult status = -1
|
|
1785
|
+
with nogil:
|
|
1786
|
+
status = cccl_device_unique_by_key_cleanup(&self.build_data)
|
|
1787
|
+
if (status != 0):
|
|
1788
|
+
print(f"Return code {status} encountered during unique_by_key result cleanup")
|
|
1789
|
+
|
|
1790
|
+
cpdef int compute(
|
|
1791
|
+
DeviceUniqueByKeyBuildResult self,
|
|
1792
|
+
temp_storage_ptr,
|
|
1793
|
+
temp_storage_bytes,
|
|
1794
|
+
Iterator d_keys_in,
|
|
1795
|
+
Iterator d_values_in,
|
|
1796
|
+
Iterator d_keys_out,
|
|
1797
|
+
Iterator d_values_out,
|
|
1798
|
+
Iterator d_num_selected_out,
|
|
1799
|
+
Op comparison_op,
|
|
1800
|
+
size_t num_items,
|
|
1801
|
+
stream
|
|
1802
|
+
):
|
|
1803
|
+
cdef CUresult status = -1
|
|
1804
|
+
cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
|
|
1805
|
+
cdef size_t storage_sz = <size_t>temp_storage_bytes
|
|
1806
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
1807
|
+
|
|
1808
|
+
with nogil:
|
|
1809
|
+
status = cccl_device_unique_by_key(
|
|
1810
|
+
self.build_data,
|
|
1811
|
+
storage_ptr,
|
|
1812
|
+
&storage_sz,
|
|
1813
|
+
d_keys_in.iter_data,
|
|
1814
|
+
d_values_in.iter_data,
|
|
1815
|
+
d_keys_out.iter_data,
|
|
1816
|
+
d_values_out.iter_data,
|
|
1817
|
+
d_num_selected_out.iter_data,
|
|
1818
|
+
comparison_op.op_data,
|
|
1819
|
+
<uint64_t>num_items,
|
|
1820
|
+
c_stream
|
|
1821
|
+
)
|
|
1822
|
+
|
|
1823
|
+
if status != 0:
|
|
1824
|
+
raise RuntimeError(
|
|
1825
|
+
f"Failed executing unique_by_key, error code: {status}"
|
|
1826
|
+
)
|
|
1827
|
+
return storage_sz
|
|
1828
|
+
|
|
1829
|
+
def _get_cubin(self):
|
|
1830
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|
|
1831
|
+
|
|
1832
|
+
# -----------------
|
|
1833
|
+
# DeviceRadixSort
|
|
1834
|
+
# -----------------
|
|
1835
|
+
|
|
1836
|
+
cdef extern from "cccl/c/radix_sort.h":
|
|
1837
|
+
cdef struct cccl_device_radix_sort_build_result_t 'cccl_device_radix_sort_build_result_t':
|
|
1838
|
+
const char* cubin
|
|
1839
|
+
size_t cubin_size
|
|
1840
|
+
|
|
1841
|
+
cdef CUresult cccl_device_radix_sort_build(
|
|
1842
|
+
cccl_device_radix_sort_build_result_t *build_ptr,
|
|
1843
|
+
cccl_sort_order_t sort_order,
|
|
1844
|
+
cccl_iterator_t d_keys_in,
|
|
1845
|
+
cccl_iterator_t d_values_in,
|
|
1846
|
+
cccl_op_t decomposer,
|
|
1847
|
+
const char* decomposer_return_type,
|
|
1848
|
+
int, int, const char *, const char *, const char *, const char *
|
|
1849
|
+
) nogil
|
|
1850
|
+
|
|
1851
|
+
cdef CUresult cccl_device_radix_sort(
|
|
1852
|
+
cccl_device_radix_sort_build_result_t build,
|
|
1853
|
+
void *d_storage_ptr,
|
|
1854
|
+
size_t *d_storage_nbytes,
|
|
1855
|
+
cccl_iterator_t d_keys_in,
|
|
1856
|
+
cccl_iterator_t d_keys_out,
|
|
1857
|
+
cccl_iterator_t d_values_in,
|
|
1858
|
+
cccl_iterator_t d_values_out,
|
|
1859
|
+
cccl_op_t decomposer,
|
|
1860
|
+
size_t num_items,
|
|
1861
|
+
int begin_bit,
|
|
1862
|
+
int end_bit,
|
|
1863
|
+
bint is_overwrite_okay,
|
|
1864
|
+
int* selector,
|
|
1865
|
+
CUstream stream
|
|
1866
|
+
) nogil
|
|
1867
|
+
|
|
1868
|
+
cdef CUresult cccl_device_radix_sort_cleanup(
|
|
1869
|
+
cccl_device_radix_sort_build_result_t *build_ptr,
|
|
1870
|
+
) nogil
|
|
1871
|
+
|
|
1872
|
+
|
|
1873
|
+
cdef class DeviceRadixSortBuildResult:
|
|
1874
|
+
cdef cccl_device_radix_sort_build_result_t build_data
|
|
1875
|
+
|
|
1876
|
+
def __dealloc__(DeviceRadixSortBuildResult self):
|
|
1877
|
+
cdef CUresult status = -1
|
|
1878
|
+
with nogil:
|
|
1879
|
+
status = cccl_device_radix_sort_cleanup(&self.build_data)
|
|
1880
|
+
if (status != 0):
|
|
1881
|
+
print(f"Return code {status} encountered during radix_sort result cleanup")
|
|
1882
|
+
|
|
1883
|
+
def __cinit__(
|
|
1884
|
+
DeviceRadixSortBuildResult self,
|
|
1885
|
+
cccl_sort_order_t order,
|
|
1886
|
+
Iterator d_keys_in,
|
|
1887
|
+
Iterator d_values_in,
|
|
1888
|
+
Op decomposer_op,
|
|
1889
|
+
const char* decomposer_return_type,
|
|
1890
|
+
CommonData common_data
|
|
1891
|
+
):
|
|
1892
|
+
cdef CUresult status = -1
|
|
1893
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
1894
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
1895
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
1896
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
1897
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
1898
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
1899
|
+
|
|
1900
|
+
memset(&self.build_data, 0, sizeof(cccl_device_radix_sort_build_result_t))
|
|
1901
|
+
with nogil:
|
|
1902
|
+
status = cccl_device_radix_sort_build(
|
|
1903
|
+
&self.build_data,
|
|
1904
|
+
order,
|
|
1905
|
+
d_keys_in.iter_data,
|
|
1906
|
+
d_values_in.iter_data,
|
|
1907
|
+
decomposer_op.op_data,
|
|
1908
|
+
decomposer_return_type,
|
|
1909
|
+
cc_major,
|
|
1910
|
+
cc_minor,
|
|
1911
|
+
cub_path,
|
|
1912
|
+
thrust_path,
|
|
1913
|
+
libcudacxx_path,
|
|
1914
|
+
ctk_path,
|
|
1915
|
+
)
|
|
1916
|
+
if status != 0:
|
|
1917
|
+
raise RuntimeError(
|
|
1918
|
+
f"Failed building radix_sort, error code: {status}"
|
|
1919
|
+
)
|
|
1920
|
+
|
|
1921
|
+
cpdef tuple compute(
|
|
1922
|
+
DeviceRadixSortBuildResult self,
|
|
1923
|
+
temp_storage_ptr,
|
|
1924
|
+
temp_storage_bytes,
|
|
1925
|
+
Iterator d_keys_in,
|
|
1926
|
+
Iterator d_keys_out,
|
|
1927
|
+
Iterator d_values_in,
|
|
1928
|
+
Iterator d_values_out,
|
|
1929
|
+
Op decomposer_op,
|
|
1930
|
+
size_t num_items,
|
|
1931
|
+
int begin_bit,
|
|
1932
|
+
int end_bit,
|
|
1933
|
+
bint is_overwrite_okay,
|
|
1934
|
+
selector,
|
|
1935
|
+
stream
|
|
1936
|
+
):
|
|
1937
|
+
cdef CUresult status = -1
|
|
1938
|
+
cdef void *storage_ptr = (<void *><size_t>temp_storage_ptr) if temp_storage_ptr else NULL
|
|
1939
|
+
cdef size_t storage_sz = <size_t>temp_storage_bytes
|
|
1940
|
+
cdef int selector_int = <int>selector
|
|
1941
|
+
cdef CUstream c_stream = <CUstream><size_t>(stream) if stream else NULL
|
|
1942
|
+
|
|
1943
|
+
with nogil:
|
|
1944
|
+
status = cccl_device_radix_sort(
|
|
1945
|
+
self.build_data,
|
|
1946
|
+
storage_ptr,
|
|
1947
|
+
&storage_sz,
|
|
1948
|
+
d_keys_in.iter_data,
|
|
1949
|
+
d_keys_out.iter_data,
|
|
1950
|
+
d_values_in.iter_data,
|
|
1951
|
+
d_values_out.iter_data,
|
|
1952
|
+
decomposer_op.op_data,
|
|
1953
|
+
<uint64_t>num_items,
|
|
1954
|
+
begin_bit,
|
|
1955
|
+
end_bit,
|
|
1956
|
+
is_overwrite_okay,
|
|
1957
|
+
&selector_int,
|
|
1958
|
+
c_stream
|
|
1959
|
+
)
|
|
1960
|
+
|
|
1961
|
+
if status != 0:
|
|
1962
|
+
raise RuntimeError(
|
|
1963
|
+
f"Failed executing ascending radix_sort, error code: {status}"
|
|
1964
|
+
)
|
|
1965
|
+
return <object>storage_sz, <object>selector_int
|
|
1966
|
+
|
|
1967
|
+
|
|
1968
|
+
def _get_cubin(self):
|
|
1969
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|
|
1970
|
+
|
|
1971
|
+
# --------------------------------------------
|
|
1972
|
+
# DeviceUnaryTransform/DeviceBinaryTransform
|
|
1973
|
+
# --------------------------------------------
|
|
1974
|
+
cdef extern from "cccl/c/transform.h":
|
|
1975
|
+
cdef struct cccl_device_transform_build_result_t:
|
|
1976
|
+
const char* cubin
|
|
1977
|
+
size_t cubin_size
|
|
1978
|
+
|
|
1979
|
+
cdef CUresult cccl_device_unary_transform_build(
|
|
1980
|
+
cccl_device_transform_build_result_t *build_ptr,
|
|
1981
|
+
cccl_iterator_t d_in,
|
|
1982
|
+
cccl_iterator_t d_out,
|
|
1983
|
+
cccl_op_t op,
|
|
1984
|
+
int, int, const char *, const char *, const char *, const char *
|
|
1985
|
+
) nogil
|
|
1986
|
+
|
|
1987
|
+
cdef CUresult cccl_device_unary_transform(
|
|
1988
|
+
cccl_device_transform_build_result_t build,
|
|
1989
|
+
cccl_iterator_t d_in,
|
|
1990
|
+
cccl_iterator_t d_out,
|
|
1991
|
+
uint64_t num_items,
|
|
1992
|
+
cccl_op_t op,
|
|
1993
|
+
CUstream stream) nogil
|
|
1994
|
+
|
|
1995
|
+
cdef CUresult cccl_device_binary_transform_build(
|
|
1996
|
+
cccl_device_transform_build_result_t* build_ptr,
|
|
1997
|
+
cccl_iterator_t d_in1,
|
|
1998
|
+
cccl_iterator_t d_in2,
|
|
1999
|
+
cccl_iterator_t d_out,
|
|
2000
|
+
cccl_op_t op,
|
|
2001
|
+
int, int, const char *, const char *, const char *, const char *
|
|
2002
|
+
) nogil
|
|
2003
|
+
|
|
2004
|
+
cdef CUresult cccl_device_binary_transform(
|
|
2005
|
+
cccl_device_transform_build_result_t build,
|
|
2006
|
+
cccl_iterator_t d_in1,
|
|
2007
|
+
cccl_iterator_t d_in2,
|
|
2008
|
+
cccl_iterator_t d_out,
|
|
2009
|
+
uint64_t num_items,
|
|
2010
|
+
cccl_op_t op,
|
|
2011
|
+
CUstream stream) nogil
|
|
2012
|
+
|
|
2013
|
+
cdef CUresult cccl_device_transform_cleanup(
|
|
2014
|
+
cccl_device_transform_build_result_t *build_ptr,
|
|
2015
|
+
) nogil
|
|
2016
|
+
|
|
2017
|
+
|
|
2018
|
+
cdef class DeviceUnaryTransform:
|
|
2019
|
+
cdef cccl_device_transform_build_result_t build_data
|
|
2020
|
+
|
|
2021
|
+
def __cinit__(
|
|
2022
|
+
self,
|
|
2023
|
+
Iterator d_in,
|
|
2024
|
+
Iterator d_out,
|
|
2025
|
+
Op op,
|
|
2026
|
+
CommonData common_data
|
|
2027
|
+
):
|
|
2028
|
+
memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
|
|
2029
|
+
|
|
2030
|
+
cdef CUresult status = -1
|
|
2031
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
2032
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
2033
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
2034
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
2035
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
2036
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
2037
|
+
|
|
2038
|
+
with nogil:
|
|
2039
|
+
status = cccl_device_unary_transform_build(
|
|
2040
|
+
&self.build_data,
|
|
2041
|
+
d_in.iter_data,
|
|
2042
|
+
d_out.iter_data,
|
|
2043
|
+
op.op_data,
|
|
2044
|
+
cc_major,
|
|
2045
|
+
cc_minor,
|
|
2046
|
+
cub_path,
|
|
2047
|
+
thrust_path,
|
|
2048
|
+
libcudacxx_path,
|
|
2049
|
+
ctk_path,
|
|
2050
|
+
)
|
|
2051
|
+
if status != 0:
|
|
2052
|
+
raise RuntimeError("Failed to build unary transform")
|
|
2053
|
+
|
|
2054
|
+
def __dealloc__(DeviceUnaryTransform self):
|
|
2055
|
+
cdef CUresult status = -1
|
|
2056
|
+
with nogil:
|
|
2057
|
+
status = cccl_device_transform_cleanup(&self.build_data)
|
|
2058
|
+
if (status != 0):
|
|
2059
|
+
print(f"Return code {status} encountered during unary transform result cleanup")
|
|
2060
|
+
|
|
2061
|
+
cpdef void compute(
|
|
2062
|
+
DeviceUnaryTransform self,
|
|
2063
|
+
Iterator d_in,
|
|
2064
|
+
Iterator d_out,
|
|
2065
|
+
size_t num_items,
|
|
2066
|
+
Op op,
|
|
2067
|
+
stream
|
|
2068
|
+
):
|
|
2069
|
+
cdef CUresult status = -1
|
|
2070
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
2071
|
+
with nogil:
|
|
2072
|
+
status = cccl_device_unary_transform(
|
|
2073
|
+
self.build_data,
|
|
2074
|
+
d_in.iter_data,
|
|
2075
|
+
d_out.iter_data,
|
|
2076
|
+
<uint64_t>num_items,
|
|
2077
|
+
op.op_data,
|
|
2078
|
+
c_stream
|
|
2079
|
+
)
|
|
2080
|
+
if (status != 0):
|
|
2081
|
+
raise RuntimeError("Failed to compute unary transform")
|
|
2082
|
+
|
|
2083
|
+
|
|
2084
|
+
def _get_cubin(self):
|
|
2085
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|
|
2086
|
+
|
|
2087
|
+
|
|
2088
|
+
cdef class DeviceBinaryTransform:
|
|
2089
|
+
cdef cccl_device_transform_build_result_t build_data
|
|
2090
|
+
|
|
2091
|
+
def __cinit__(
|
|
2092
|
+
self,
|
|
2093
|
+
Iterator d_in1,
|
|
2094
|
+
Iterator d_in2,
|
|
2095
|
+
Iterator d_out,
|
|
2096
|
+
Op op,
|
|
2097
|
+
CommonData common_data
|
|
2098
|
+
):
|
|
2099
|
+
memset(&self.build_data, 0, sizeof(cccl_device_transform_build_result_t))
|
|
2100
|
+
|
|
2101
|
+
cdef CUresult status = -1
|
|
2102
|
+
cdef int cc_major = common_data.get_cc_major()
|
|
2103
|
+
cdef int cc_minor = common_data.get_cc_minor()
|
|
2104
|
+
cdef const char *cub_path = common_data.cub_path_get_c_str()
|
|
2105
|
+
cdef const char *thrust_path = common_data.thrust_path_get_c_str()
|
|
2106
|
+
cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
|
|
2107
|
+
cdef const char *ctk_path = common_data.ctk_path_get_c_str()
|
|
2108
|
+
|
|
2109
|
+
with nogil:
|
|
2110
|
+
status = cccl_device_binary_transform_build(
|
|
2111
|
+
&self.build_data,
|
|
2112
|
+
d_in1.iter_data,
|
|
2113
|
+
d_in2.iter_data,
|
|
2114
|
+
d_out.iter_data,
|
|
2115
|
+
op.op_data,
|
|
2116
|
+
cc_major,
|
|
2117
|
+
cc_minor,
|
|
2118
|
+
cub_path,
|
|
2119
|
+
thrust_path,
|
|
2120
|
+
libcudacxx_path,
|
|
2121
|
+
ctk_path,
|
|
2122
|
+
)
|
|
2123
|
+
if status != 0:
|
|
2124
|
+
raise RuntimeError("Failed to build binary transform")
|
|
2125
|
+
|
|
2126
|
+
def __dealloc__(DeviceBinaryTransform self):
|
|
2127
|
+
cdef CUresult status = -1
|
|
2128
|
+
with nogil:
|
|
2129
|
+
status = cccl_device_transform_cleanup(&self.build_data)
|
|
2130
|
+
if (status != 0):
|
|
2131
|
+
print(f"Return code {status} encountered during binary transform result cleanup")
|
|
2132
|
+
|
|
2133
|
+
cpdef void compute(
|
|
2134
|
+
DeviceBinaryTransform self,
|
|
2135
|
+
Iterator d_in1,
|
|
2136
|
+
Iterator d_in2,
|
|
2137
|
+
Iterator d_out,
|
|
2138
|
+
size_t num_items,
|
|
2139
|
+
Op op,
|
|
2140
|
+
stream
|
|
2141
|
+
):
|
|
2142
|
+
cdef CUresult status = -1
|
|
2143
|
+
cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
|
|
2144
|
+
with nogil:
|
|
2145
|
+
status = cccl_device_binary_transform(
|
|
2146
|
+
self.build_data,
|
|
2147
|
+
d_in1.iter_data,
|
|
2148
|
+
d_in2.iter_data,
|
|
2149
|
+
d_out.iter_data,
|
|
2150
|
+
<uint64_t>num_items,
|
|
2151
|
+
op.op_data,
|
|
2152
|
+
c_stream
|
|
2153
|
+
)
|
|
2154
|
+
if (status != 0):
|
|
2155
|
+
raise RuntimeError("Failed to compute binary transform")
|
|
2156
|
+
|
|
2157
|
+
def _get_cubin(self):
|
|
2158
|
+
return self.build_data.cubin[:self.build_data.cubin_size]
|