PyPI - cuda-cccl - Versions diffs - 0.1.3.2.0.dev438__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.0__cp312-cp312-manylinux_2_24_aarch64.whl - Mend

cuda-cccl 0.1.3.2.0.dev438__cp312-cp312-manylinux_2_24_aarch64.whl → 0.3.0__cp312-cp312-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cuda-cccl might be problematic. Click here for more details.

Files changed (60) hide show

cuda/cccl/headers/include/cuda/std/__floating_point/cast.h CHANGED Viewed

@@ -76,25 +76,25 @@ template <class _To, class _From>
 #if _CCCL_HAS_NVFP8_E8M0()
     else if constexpr (is_same_v<_To, __nv_fp8_e8m0>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp8_e8m0>(::__nv_cvt_float_to_e8m0(__v, __NV_NOSAT, cudaRoundZero));
+      return ::cuda::std::__fp_from_storage<__nv_fp8_e8m0>(::__nv_cvt_float_to_e8m0(__v, __NV_NOSAT, ::cudaRoundZero));
     }
 #endif // _CCCL_HAS_NVFP8_E8M0()
 #if _CCCL_HAS_NVFP6_E2M3()
     else if constexpr (is_same_v<_To, __nv_fp6_e2m3>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp6_e2m3>(::__nv_cvt_float_to_fp6(__v, __NV_E2M3, cudaRoundNearest));
+      return ::cuda::std::__fp_from_storage<__nv_fp6_e2m3>(::__nv_cvt_float_to_fp6(__v, __NV_E2M3, ::cudaRoundNearest));
     }
 #endif // _CCCL_HAS_NVFP6_E2M3()
 #if _CCCL_HAS_NVFP6_E3M2()
     else if constexpr (is_same_v<_To, __nv_fp6_e3m2>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp6_e3m2>(::__nv_cvt_float_to_fp6(__v, __NV_E3M2, cudaRoundNearest));
+      return ::cuda::std::__fp_from_storage<__nv_fp6_e3m2>(::__nv_cvt_float_to_fp6(__v, __NV_E3M2, ::cudaRoundNearest));
     }
 #endif // _CCCL_HAS_NVFP6_E3M2()
 #if _CCCL_HAS_NVFP4_E2M1()
     else if constexpr (is_same_v<_To, __nv_fp4_e2m1>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp4_e2m1>(::__nv_cvt_float_to_fp4(__v, __NV_E2M1, cudaRoundNearest));
+      return ::cuda::std::__fp_from_storage<__nv_fp4_e2m1>(::__nv_cvt_float_to_fp4(__v, __NV_E2M1, ::cudaRoundNearest));
     }
 #endif // _CCCL_HAS_NVFP4_E2M1()
     else
@@ -145,25 +145,28 @@ template <class _To, class _From>
 #if _CCCL_HAS_NVFP8_E8M0()
     else if constexpr (is_same_v<_To, __nv_fp8_e8m0>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp8_e8m0>(::__nv_cvt_double_to_e8m0(__v, __NV_NOSAT, cudaRoundZero));
+      return ::cuda::std::__fp_from_storage<__nv_fp8_e8m0>(::__nv_cvt_double_to_e8m0(__v, __NV_NOSAT, ::cudaRoundZero));
     }
 #endif // _CCCL_HAS_NVFP8_E8M0()
 #if _CCCL_HAS_NVFP6_E2M3()
     else if constexpr (is_same_v<_To, __nv_fp6_e2m3>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp6_e2m3>(::__nv_cvt_double_to_fp6(__v, __NV_E2M3, cudaRoundNearest));
+      return ::cuda::std::__fp_from_storage<__nv_fp6_e2m3>(
+        ::__nv_cvt_double_to_fp6(__v, __NV_E2M3, ::cudaRoundNearest));
     }
 #endif // _CCCL_HAS_NVFP6_E2M3()
 #if _CCCL_HAS_NVFP6_E3M2()
     else if constexpr (is_same_v<_To, __nv_fp6_e3m2>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp6_e3m2>(::__nv_cvt_double_to_fp6(__v, __NV_E3M2, cudaRoundNearest));
+      return ::cuda::std::__fp_from_storage<__nv_fp6_e3m2>(
+        ::__nv_cvt_double_to_fp6(__v, __NV_E3M2, ::cudaRoundNearest));
     }
 #endif // _CCCL_HAS_NVFP6_E3M2()
 #if _CCCL_HAS_NVFP4_E2M1()
     else if constexpr (is_same_v<_To, __nv_fp4_e2m1>)
     {
-      return ::cuda::std::__fp_from_storage<__nv_fp4_e2m1>(::__nv_cvt_double_to_fp4(__v, __NV_E2M1, cudaRoundNearest));
+      return ::cuda::std::__fp_from_storage<__nv_fp4_e2m1>(
+        ::__nv_cvt_double_to_fp4(__v, __NV_E2M1, ::cudaRoundNearest));
     }
 #endif // _CCCL_HAS_NVFP4_E2M1()
     else
@@ -352,28 +355,28 @@ template <class _To, class _From>
     else if constexpr (is_same_v<_To, __nv_fp8_e8m0>)
     {
       return ::cuda::std::__fp_from_storage<__nv_fp8_e8m0>(
-        ::__nv_cvt_bfloat16raw_to_e8m0(__v, __NV_NOSAT, cudaRoundZero));
+        ::__nv_cvt_bfloat16raw_to_e8m0(__v, __NV_NOSAT, ::cudaRoundZero));
     }
 #  endif // _CCCL_HAS_NVFP8_E8M0()
 #  if _CCCL_HAS_NVFP6_E2M3()
     else if constexpr (is_same_v<_To, __nv_fp6_e2m3>)
     {
       return ::cuda::std::__fp_from_storage<__nv_fp6_e2m3>(
-        ::__nv_cvt_bfloat16raw_to_fp6(__v, __NV_E2M3, cudaRoundNearest));
+        ::__nv_cvt_bfloat16raw_to_fp6(__v, __NV_E2M3, ::cudaRoundNearest));
     }
 #  endif // _CCCL_HAS_NVFP6_E2M3()
 #  if _CCCL_HAS_NVFP6_E3M2()
     else if constexpr (is_same_v<_To, __nv_fp6_e3m2>)
     {
       return ::cuda::std::__fp_from_storage<__nv_fp6_e3m2>(
-        ::__nv_cvt_bfloat16raw_to_fp6(__v, __NV_E3M2, cudaRoundNearest));
+        ::__nv_cvt_bfloat16raw_to_fp6(__v, __NV_E3M2, ::cudaRoundNearest));
     }
 #  endif // _CCCL_HAS_NVFP6_E3M2()
 #  if _CCCL_HAS_NVFP4_E2M1()
     else if constexpr (is_same_v<_To, __nv_fp4_e2m1>)
     {
       return ::cuda::std::__fp_from_storage<__nv_fp4_e2m1>(
-        ::__nv_cvt_bfloat16raw_to_fp4(__v, __NV_E2M1, cudaRoundNearest));
+        ::__nv_cvt_bfloat16raw_to_fp4(__v, __NV_E2M1, ::cudaRoundNearest));
     }
 #  endif // _CCCL_HAS_NVFP4_E2M1()
     else

cuda/cccl/headers/include/cuda/std/__floating_point/cuda_fp_types.h CHANGED Viewed

@@ -55,6 +55,9 @@ _CCCL_DIAG_SUPPRESS_MSVC(4100) // unreferenced formal parameter
 _CCCL_DIAG_POP
 #endif // _CCCL_HAS_NVFP4()
+// crt/device_fp128_functions.h is available in CUDA 12.8+.
+// _CCCL_HAS_FLOAT128() checks the *compiler* compatibility with __float128.
+// We also need to check the toolkit version to ensure the compatibility with nvc++.
 #if _CCCL_HAS_FLOAT128() && _CCCL_DEVICE_COMPILATION() && _CCCL_CTK_AT_LEAST(12, 8)
 #  if !_CCCL_COMPILER(NVRTC)
 _CCCL_DIAG_PUSH

cuda/cccl/headers/include/cuda/std/__mdspan/mdspan.h CHANGED Viewed

@@ -439,7 +439,8 @@ public:
   [[nodiscard]] _CCCL_API constexpr bool is_exhaustive() const
     noexcept(noexcept(::cuda::std::declval<const mapping_type&>().is_exhaustive()))
   {
-    return mapping().is_exhaustive();
+    auto __tmp = mapping(); // workaround for clang with nodiscard
+    return __tmp.is_exhaustive();
   }
   [[nodiscard]] _CCCL_API constexpr bool is_strided() const
     noexcept(noexcept(::cuda::std::declval<const mapping_type&>().is_strided()))

cuda/cccl/headers/include/cuda/std/__type_traits/promote.h CHANGED Viewed

@@ -20,10 +20,8 @@
 #  pragma system_header
 #endif // no system header
-#include <cuda/std/__type_traits/integral_constant.h>
 #include <cuda/std/__type_traits/is_same.h>
 #include <cuda/std/__utility/declval.h>
-#include <cuda/std/cstddef>
 #include <cuda/std/__cccl/prologue.h>
@@ -49,6 +47,9 @@ struct __numeric_type
   _CCCL_API inline static double __test(unsigned long long);
   _CCCL_API inline static double __test(double);
   _CCCL_API inline static long double __test(long double);
+#if _CCCL_HAS_FLOAT128()
+  _CCCL_API inline static __float128 __test(__float128);
+#endif // _CCCL_HAS_FLOAT128()
   using type              = decltype(__test(declval<_Tp>()));
   static const bool value = !is_same_v<type, void>;

cuda/cccl/headers/include/thrust/system/cuda/detail/tabulate.h CHANGED Viewed

@@ -39,37 +39,23 @@
 #if _CCCL_HAS_CUDA_COMPILER()
 #  include <thrust/system/cuda/config.h>
-#  include <thrust/distance.h>
-#  include <thrust/system/cuda/detail/parallel_for.h>
+#  include <thrust/system/cuda/detail/transform.h>
 #  include <thrust/system/cuda/execution_policy.h>
+#  include <cuda/__functional/address_stability.h>
+#  include <cuda/std/iterator>
 THRUST_NAMESPACE_BEGIN
 namespace cuda_cub
 {
-namespace __tabulate
-{
-template <class Iterator, class TabulateOp>
-struct functor
-{
-  Iterator items;
-  TabulateOp op;
-  template <typename Size>
-  void _CCCL_DEVICE operator()(Size idx)
-  {
-    items[idx] = op(idx);
-  }
-};
-} // namespace __tabulate
 template <class Derived, class Iterator, class TabulateOp>
 void _CCCL_HOST_DEVICE tabulate(execution_policy<Derived>& policy, Iterator first, Iterator last, TabulateOp tabulate_op)
 {
-  using size_type = thrust::detail::it_difference_t<Iterator>;
-  size_type count = ::cuda::std::distance(first, last);
-  cuda_cub::parallel_for(policy, __tabulate::functor<Iterator, TabulateOp>{first, tabulate_op}, count);
+  using size_type  = ::cuda::std::iter_difference_t<Iterator>;
+  const auto count = ::cuda::std::distance(first, last);
+  cuda_cub::transform_n(
+    policy, ::cuda::counting_iterator<size_type>{}, count, first, ::cuda::proclaim_copyable_arguments(tabulate_op));
 }
 } // namespace cuda_cub
 THRUST_NAMESPACE_END
 #endif

cuda/cccl/headers/include/thrust/type_traits/unwrap_contiguous_iterator.h CHANGED Viewed

@@ -25,72 +25,39 @@
 THRUST_NAMESPACE_BEGIN
-namespace detail
-{
-// Type traits for contiguous iterators:
-template <typename Iterator>
-struct contiguous_iterator_traits
-{
-  static_assert(thrust::is_contiguous_iterator_v<Iterator>,
-                "contiguous_iterator_traits requires a contiguous iterator.");
-  using raw_pointer =
-    typename thrust::detail::pointer_traits<decltype(&*::cuda::std::declval<Iterator>())>::raw_pointer;
-};
-} // namespace detail
-//! Converts a contiguous iterator type to its underlying raw pointer type.
-template <typename ContiguousIterator>
-using unwrap_contiguous_iterator_t = typename detail::contiguous_iterator_traits<ContiguousIterator>::raw_pointer;
 //! Converts a contiguous iterator to its underlying raw pointer.
+_CCCL_EXEC_CHECK_DISABLE
 template <typename ContiguousIterator>
 _CCCL_HOST_DEVICE auto unwrap_contiguous_iterator(ContiguousIterator it)
-  -> unwrap_contiguous_iterator_t<ContiguousIterator>
 {
   static_assert(thrust::is_contiguous_iterator_v<ContiguousIterator>,
                 "unwrap_contiguous_iterator called with non-contiguous iterator.");
   return thrust::raw_pointer_cast(&*it);
 }
-namespace detail
-{
-// Implementation for non-contiguous iterators -- passthrough.
-template <typename Iterator, bool IsContiguous = thrust::is_contiguous_iterator_v<Iterator>>
-struct try_unwrap_contiguous_iterator_impl
-{
-  using type = Iterator;
-  static _CCCL_HOST_DEVICE type get(Iterator it)
-  {
-    return it;
-  }
-};
+//! Converts a contiguous iterator type to its underlying raw pointer type.
+template <typename ContiguousIterator>
+using unwrap_contiguous_iterator_t = decltype(unwrap_contiguous_iterator(::cuda::std::declval<ContiguousIterator>()));
-// Implementation for contiguous iterators -- unwraps to raw pointer.
+//! Takes an iterator and, if it is contiguous, unwraps it to the raw pointer it represents. Otherwise returns the
+//! iterator unmodified.
+_CCCL_EXEC_CHECK_DISABLE
 template <typename Iterator>
-struct try_unwrap_contiguous_iterator_impl<Iterator, true /*is_contiguous*/>
+_CCCL_HOST_DEVICE auto try_unwrap_contiguous_iterator(Iterator it)
 {
-  using type = unwrap_contiguous_iterator_t<Iterator>;
-  static _CCCL_HOST_DEVICE type get(Iterator it)
+  if constexpr (thrust::is_contiguous_iterator_v<Iterator>)
   {
     return unwrap_contiguous_iterator(it);
   }
-};
-} // namespace detail
+  else
+  {
+    return it;
+  }
+}
 //! Takes an iterator type and, if it is contiguous, yields the raw pointer type it represents. Otherwise returns the
 //! iterator type unmodified.
 template <typename Iterator>
-using try_unwrap_contiguous_iterator_t = typename detail::try_unwrap_contiguous_iterator_impl<Iterator>::type;
-//! Takes an iterator and, if it is contiguous, unwraps it to the raw pointer it represents. Otherwise returns the
-//! iterator unmodified.
-template <typename Iterator>
-_CCCL_HOST_DEVICE auto try_unwrap_contiguous_iterator(Iterator it) -> try_unwrap_contiguous_iterator_t<Iterator>
-{
-  return detail::try_unwrap_contiguous_iterator_impl<Iterator>::get(it);
-}
+using try_unwrap_contiguous_iterator_t = decltype(try_unwrap_contiguous_iterator(::cuda::std::declval<Iterator>()));
 THRUST_NAMESPACE_END

cuda/cccl/parallel/experimental/__init__.py CHANGED Viewed

@@ -17,12 +17,14 @@ from .algorithms import (
     make_radix_sort,
     make_reduce_into,
     make_segmented_reduce,
+    make_three_way_partition,
     make_unary_transform,
     make_unique_by_key,
     merge_sort,
     radix_sort,
     reduce_into,
     segmented_reduce,
+    three_way_partition,
     unary_transform,
     unique_by_key,
 )
@@ -56,6 +58,7 @@ __all__ = [
     "make_radix_sort",
     "make_reduce_into",
     "make_segmented_reduce",
+    "make_three_way_partition",
     "make_unary_transform",
     "make_unique_by_key",
     "merge_sort",
@@ -66,6 +69,7 @@ __all__ = [
     "segmented_reduce",
     "SortOrder",
     "TransformIterator",
+    "three_way_partition",
     "TransformOutputIterator",
     "unary_transform",
     "unique_by_key",

cuda/cccl/parallel/experimental/_bindings.pyi CHANGED Viewed

@@ -390,6 +390,7 @@ class DeviceHistogramBuildResult:
         num_rows: int,
         row_stride_samples: int,
         is_evenly_segmented: bool,
+        info: CommonData,
     ): ...
     def compute_even(
         self,
@@ -403,3 +404,30 @@ class DeviceHistogramBuildResult:
         row_stride_samples: int,
         stream,
     ) -> None: ...
+# ---------------------
+# DeviceThreeWayPartition
+# ---------------------
+class DeviceThreeWayPartitionBuildResult:
+    def __init__(
+        self,
+        d_in: Iterator,
+        d_first_part_out: Iterator,
+        d_second_part_out: Iterator,
+        d_unselected_out: Iterator,
+        d_num_selected_out: Iterator,
+        select_first_part_op: Op,
+        select_second_part_op: Op,
+        info: CommonData,
+    ): ...
+    def compute(
+        self,
+        d_in: Iterator,
+        d_first_part_out: Iterator,
+        d_second_part_out: Iterator,
+        d_unselected_out: Iterator,
+        d_num_selected_out: Iterator,
+        num_items: int,
+        stream,
+    ) -> int: ...

cuda/cccl/parallel/experimental/_bindings_impl.pyx CHANGED Viewed

@@ -1982,3 +1982,143 @@ cdef class DeviceHistogramBuildResult:
             <const char*>self.build_data.cubin,
             self.build_data.cubin_size
         )
+# ----------------------------------
+# DeviceThreeWayPartitionBuildResult
+# ----------------------------------
+cdef extern from "cccl/c/three_way_partition.h":
+    cdef struct cccl_device_three_way_partition_build_result_t 'cccl_device_three_way_partition_build_result_t':
+        const char* cubin
+        size_t cubin_size
+    cdef CUresult cccl_device_three_way_partition_build(
+        cccl_device_three_way_partition_build_result_t *build_ptr,
+        cccl_iterator_t d_in,
+        cccl_iterator_t d_first_part_out,
+        cccl_iterator_t d_second_part_out,
+        cccl_iterator_t d_unselected_out,
+        cccl_iterator_t d_num_selected_out,
+        cccl_op_t select_first_part_op,
+        cccl_op_t select_second_part_op,
+        int, int, const char *, const char *, const char *, const char *
+    ) nogil
+    CUresult cccl_device_three_way_partition(
+        cccl_device_three_way_partition_build_result_t build,
+        void* d_temp_storage,
+        size_t* temp_storage_bytes,
+        cccl_iterator_t d_in,
+        cccl_iterator_t d_first_part_out,
+        cccl_iterator_t d_second_part_out,
+        cccl_iterator_t d_unselected_out,
+        cccl_iterator_t d_num_selected_out,
+        cccl_op_t select_first_part_op,
+        cccl_op_t select_second_part_op,
+        int64_t num_items,
+        CUstream stream
+    ) nogil
+    cdef CUresult cccl_device_three_way_partition_cleanup(
+        cccl_device_three_way_partition_build_result_t *build_ptr
+    ) nogil
+cdef class DeviceThreeWayPartitionBuildResult:
+    cdef cccl_device_three_way_partition_build_result_t build_data
+    def __dealloc__(DeviceThreeWayPartitionBuildResult self):
+        cdef CUresult status = -1
+        with nogil:
+            status = cccl_device_three_way_partition_cleanup(&self.build_data)
+        if (status != 0):
+            print(f"Return code {status} encountered during three_way_partition result cleanup")
+    def __cinit__(
+        DeviceThreeWayPartitionBuildResult self,
+        Iterator d_in,
+        Iterator d_first_part_out,
+        Iterator d_second_part_out,
+        Iterator d_unselected_out,
+        Iterator d_num_selected_out,
+        Op select_first_part_op,
+        Op select_second_part_op,
+        CommonData common_data
+    ):
+        cdef CUresult status = -1
+        cdef int cc_major = common_data.get_cc_major()
+        cdef int cc_minor = common_data.get_cc_minor()
+        cdef const char *cub_path = common_data.cub_path_get_c_str()
+        cdef const char *thrust_path = common_data.thrust_path_get_c_str()
+        cdef const char *libcudacxx_path = common_data.libcudacxx_path_get_c_str()
+        cdef const char *ctk_path = common_data.ctk_path_get_c_str()
+        memset(&self.build_data, 0, sizeof(cccl_device_three_way_partition_build_result_t))
+        with nogil:
+            status = cccl_device_three_way_partition_build(
+                &self.build_data,
+                d_in.iter_data,
+                d_first_part_out.iter_data,
+                d_second_part_out.iter_data,
+                d_unselected_out.iter_data,
+                d_num_selected_out.iter_data,
+                select_first_part_op.op_data,
+                select_second_part_op.op_data,
+                cc_major,
+                cc_minor,
+                cub_path,
+                thrust_path,
+                libcudacxx_path,
+                ctk_path,
+            )
+        if status != 0:
+            raise RuntimeError(
+                f"Failed building three_way_partition, error code: {status}"
+            )
+    cpdef int compute(
+        DeviceThreeWayPartitionBuildResult self,
+        temp_storage_ptr,
+        temp_storage_bytes,
+        Iterator d_in,
+        Iterator d_first_part_out,
+        Iterator d_second_part_out,
+        Iterator d_unselected_out,
+        Iterator d_num_selected_out,
+        Op select_first_part_op,
+        Op select_second_part_op,
+        size_t num_items,
+        stream
+    ):
+        cdef CUresult status = -1
+        cdef void *storage_ptr = (<void *><uintptr_t>temp_storage_ptr) if temp_storage_ptr else NULL
+        cdef size_t storage_sz = <size_t>temp_storage_bytes
+        cdef CUstream c_stream = <CUstream><uintptr_t>(stream) if stream else NULL
+        with nogil:
+            status = cccl_device_three_way_partition(
+                self.build_data,
+                storage_ptr,
+                &storage_sz,
+                d_in.iter_data,
+                d_first_part_out.iter_data,
+                d_second_part_out.iter_data,
+                d_unselected_out.iter_data,
+                d_num_selected_out.iter_data,
+                select_first_part_op.op_data,
+                select_second_part_op.op_data,
+                <uint64_t>num_items,
+                c_stream
+            )
+        if status != 0:
+            raise RuntimeError(
+                f"Failed executing three_way_partition, error code: {status}"
+            )
+        return storage_sz
+    def _get_cubin(self):
+        return PyBytes_FromStringAndSize(
+            <const char*>self.build_data.cubin,
+            self.build_data.cubin_size
+        )

cuda/cccl/parallel/experimental/algorithms/__init__.py CHANGED Viewed

@@ -18,6 +18,8 @@ from ._scan import make_exclusive_scan as make_exclusive_scan
 from ._scan import make_inclusive_scan as make_inclusive_scan
 from ._segmented_reduce import make_segmented_reduce as make_segmented_reduce
 from ._segmented_reduce import segmented_reduce
+from ._three_way_partition import make_three_way_partition as make_three_way_partition
+from ._three_way_partition import three_way_partition as three_way_partition
 from ._transform import binary_transform, unary_transform
 from ._transform import make_binary_transform as make_binary_transform
 from ._transform import make_unary_transform as make_unary_transform
@@ -45,6 +47,8 @@ __all__ = [
     "make_segmented_reduce",
     "unique_by_key",
     "make_unique_by_key",
+    "three_way_partition",
+    "make_three_way_partition",
     "DoubleBuffer",
     "SortOrder",
 ]

cuda/cccl/parallel/experimental/algorithms/_reduce.py CHANGED Viewed

@@ -3,8 +3,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-from __future__ import annotations  # TODO: required for Python 3.7 docs env
 from typing import Callable, Union
 import numba

cuda/cccl/parallel/experimental/algorithms/_scan.py CHANGED Viewed

@@ -3,8 +3,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-from __future__ import annotations  # TODO: required for Python 3.7 docs env
 from typing import Callable, Union
 import numba