PyPI - singlestoredb - Versions diffs - 1.12.3__cp38-abi3-win32.whl → 1.13.0__cp38-abi3-win32.whl - Mend

singlestoredb 1.12.3cp38-abi3-win32.whl → 1.13.0cp38-abi3-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of singlestoredb might be problematic. Click here for more details.

Files changed (32) hide show

_singlestoredb_accel.pyd +0 -0
singlestoredb/__init__.py +1 -1
singlestoredb/apps/__init__.py +1 -0
singlestoredb/apps/_config.py +6 -0
singlestoredb/apps/_connection_info.py +8 -0
singlestoredb/apps/_python_udfs.py +85 -0
singlestoredb/config.py +14 -2
singlestoredb/functions/__init__.py +11 -1
singlestoredb/functions/decorator.py +102 -252
singlestoredb/functions/dtypes.py +545 -198
singlestoredb/functions/ext/asgi.py +288 -90
singlestoredb/functions/ext/json.py +29 -36
singlestoredb/functions/ext/mmap.py +1 -1
singlestoredb/functions/ext/rowdat_1.py +50 -70
singlestoredb/functions/signature.py +816 -144
singlestoredb/functions/typing.py +41 -0
singlestoredb/functions/utils.py +342 -0
singlestoredb/http/connection.py +3 -1
singlestoredb/management/manager.py +6 -1
singlestoredb/management/utils.py +2 -2
singlestoredb/mysql/connection.py +17 -11
singlestoredb/tests/ext_funcs/__init__.py +476 -237
singlestoredb/tests/test_basics.py +2 -0
singlestoredb/tests/test_ext_func.py +192 -3
singlestoredb/tests/test_udf.py +101 -131
singlestoredb/tests/test_udf_returns.py +459 -0
{singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/METADATA +2 -1
{singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/RECORD +32 -28
{singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/LICENSE +0 -0
{singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/WHEEL +0 -0
{singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/entry_points.txt +0 -0
{singlestoredb-1.12.3.dist-info → singlestoredb-1.13.0.dist-info}/top_level.txt +0 -0

singlestoredb/functions/typing.py ADDED Viewed

@@ -0,0 +1,41 @@
+from typing import Any
+from typing import Iterable
+from typing import Tuple
+from typing import TypeVar
+try:
+    from typing import TypeVarTuple  # type: ignore
+    from typing import Unpack  # type: ignore
+except ImportError:
+    # Python 3.8 and earlier do not have TypeVarTuple
+    from typing_extensions import TypeVarTuple  # type: ignore
+    from typing_extensions import Unpack  # type: ignore
+T = TypeVar('T', bound=Iterable[Any])  # Generic type for iterable types
+#
+# Masked types are used for pairs of vectors where the first element is the
+# vector and the second element is a boolean mask indicating which elements
+# are NULL. The boolean mask is a vector of the same length as the first
+# element, where True indicates that the corresponding element in the first
+# element is NULL.
+#
+# This is needed for vector types that do not support NULL values, such as
+# numpy arrays and pandas Series.
+#
+class Masked(Tuple[T, T]):
+    def __new__(cls, *args: T) -> 'Masked[Tuple[T, T]]':  # type: ignore
+        return tuple.__new__(cls, (args[0], args[1]))  # type: ignore
+Ts = TypeVarTuple('Ts')
+class Table(Tuple[Unpack[Ts]]):
+    """Return type for a table valued function."""
+    def __new__(cls, *args: Unpack[Ts]) -> 'Table[Tuple[Unpack[Ts]]]':  # type: ignore
+        return tuple.__new__(cls, args)  # type: ignore

singlestoredb/functions/utils.py ADDED Viewed

@@ -0,0 +1,342 @@
+import dataclasses
+import inspect
+import struct
+import sys
+import types
+import typing
+from enum import Enum
+from typing import Any
+from typing import Dict
+from typing import Iterable
+from .typing import Masked
+if sys.version_info >= (3, 10):
+    _UNION_TYPES = {typing.Union, types.UnionType}
+else:
+    _UNION_TYPES = {typing.Union}
+is_dataclass = dataclasses.is_dataclass
+def is_masked(obj: Any) -> bool:
+    """Check if an object is a Masked type."""
+    origin = typing.get_origin(obj)
+    if origin is not None:
+        return origin is Masked or \
+            (inspect.isclass(origin) and issubclass(origin, Masked))
+    return False
+def is_union(x: Any) -> bool:
+    """Check if the object is a Union."""
+    return typing.get_origin(x) in _UNION_TYPES
+def get_annotations(obj: Any) -> Dict[str, Any]:
+    """Get the annotations of an object."""
+    return typing.get_type_hints(obj)
+def get_module(obj: Any) -> str:
+    """Get the module of an object."""
+    module = getattr(obj, '__module__', '').split('.')
+    if module:
+        return module[0]
+    return ''
+def get_type_name(obj: Any) -> str:
+    """Get the type name of an object."""
+    if hasattr(obj, '__name__'):
+        return obj.__name__
+    if hasattr(obj, '__class__'):
+        return obj.__class__.__name__
+    return ''
+def is_numpy(obj: Any) -> bool:
+    """Check if an object is a numpy array."""
+    if str(obj).startswith('numpy.ndarray['):
+        return True
+    if inspect.isclass(obj):
+        if get_module(obj) == 'numpy':
+            return get_type_name(obj) == 'ndarray'
+    origin = typing.get_origin(obj)
+    if get_module(origin) == 'numpy':
+        if get_type_name(origin) == 'ndarray':
+            return True
+    dtype = type(obj)
+    if get_module(dtype) == 'numpy':
+        return get_type_name(dtype) == 'ndarray'
+    return False
+def is_dataframe(obj: Any) -> bool:
+    """Check if an object is a DataFrame."""
+    # Cheating here a bit so we don't have to import pandas / polars / pyarrow:
+    # unless we absolutely need to
+    if get_module(obj) == 'pandas':
+        return get_type_name(obj) == 'DataFrame'
+    if get_module(obj) == 'polars':
+        return get_type_name(obj) == 'DataFrame'
+    if get_module(obj) == 'pyarrow':
+        return get_type_name(obj) == 'Table'
+    return False
+def is_vector(obj: Any, include_masks: bool = False) -> bool:
+    """Check if an object is a vector type."""
+    return is_pandas_series(obj) \
+        or is_polars_series(obj) \
+        or is_pyarrow_array(obj) \
+        or is_numpy(obj) \
+        or is_masked(obj)
+def get_data_format(obj: Any) -> str:
+    """Return the data format of the DataFrame / Table / vector."""
+    # Cheating here a bit so we don't have to import pandas / polars / pyarrow
+    # unless we absolutely need to
+    if get_module(obj) == 'pandas':
+        return 'pandas'
+    if get_module(obj) == 'polars':
+        return 'polars'
+    if get_module(obj) == 'pyarrow':
+        return 'arrow'
+    if get_module(obj) == 'numpy':
+        return 'numpy'
+    if isinstance(obj, list):
+        return 'list'
+    return 'scalar'
+def is_pandas_series(obj: Any) -> bool:
+    """Check if an object is a pandas Series."""
+    if is_union(obj):
+        obj = typing.get_args(obj)[0]
+    return (
+        get_module(obj) == 'pandas' and
+        get_type_name(obj) == 'Series'
+    )
+def is_polars_series(obj: Any) -> bool:
+    """Check if an object is a polars Series."""
+    if is_union(obj):
+        obj = typing.get_args(obj)[0]
+    return (
+        get_module(obj) == 'polars' and
+        get_type_name(obj) == 'Series'
+    )
+def is_pyarrow_array(obj: Any) -> bool:
+    """Check if an object is a pyarrow Array."""
+    if is_union(obj):
+        obj = typing.get_args(obj)[0]
+    return (
+        get_module(obj) == 'pyarrow' and
+        get_type_name(obj) == 'Array'
+    )
+def is_typeddict(obj: Any) -> bool:
+    """Check if an object is a TypedDict."""
+    if hasattr(typing, 'is_typeddict'):
+        return typing.is_typeddict(obj)  # noqa: TYP006
+    return False
+def is_namedtuple(obj: Any) -> bool:
+    """Check if an object is a named tuple."""
+    if inspect.isclass(obj):
+        return (
+                issubclass(obj, tuple) and
+                hasattr(obj, '_asdict') and
+                hasattr(obj, '_fields')
+        )
+    return (
+            isinstance(obj, tuple) and
+            hasattr(obj, '_asdict') and
+            hasattr(obj, '_fields')
+    )
+def is_pydantic(obj: Any) -> bool:
+    """Check if an object is a pydantic model."""
+    if not inspect.isclass(obj):
+        return False
+    # We don't want to import pydantic here, so we check if
+    # the class is a subclass
+    return bool([
+        x for x in inspect.getmro(obj)
+        if get_module(x) == 'pydantic'
+        and get_type_name(x) == 'BaseModel'
+    ])
+class VectorTypes(str, Enum):
+    """Enum for vector types."""
+    F16 = 'f16'
+    F32 = 'f32'
+    F64 = 'f64'
+    I8 = 'i8'
+    I16 = 'i16'
+    I32 = 'i32'
+    I64 = 'i64'
+def unpack_vector(
+    obj: Any,
+    element_type: VectorTypes = VectorTypes.F32,
+) -> Iterable[Any]:
+    """
+    Unpack a vector from bytes.
+    Parameters
+    ----------
+    obj : Any
+        The object to unpack.
+    element_type : VectorTypes
+        The type of the elements in the vector.
+        Can be one of 'f32', 'f64', 'i8', 'i16', 'i32', or 'i64'.
+        Default is 'f32'.
+    Returns
+    -------
+    Iterable[Any]
+        The unpacked vector.
+    """
+    if isinstance(obj, (bytes, bytearray, list, tuple)):
+        if element_type == 'f32':
+            n = len(obj) // 4
+            fmt = 'f'
+        elif element_type == 'f64':
+            n = len(obj) // 8
+            fmt = 'd'
+        elif element_type == 'i8':
+            n = len(obj)
+            fmt = 'b'
+        elif element_type == 'i16':
+            n = len(obj) // 2
+            fmt = 'h'
+        elif element_type == 'i32':
+            n = len(obj) // 4
+            fmt = 'i'
+        elif element_type == 'i64':
+            n = len(obj) // 8
+            fmt = 'q'
+        else:
+            raise ValueError(f'unsupported element type: {element_type}')
+        if isinstance(obj, (bytes, bytearray)):
+            return struct.unpack(f'<{n}{fmt}', obj)
+        return tuple([struct.unpack(f'<{n}{fmt}', x) for x in obj])
+    if element_type == 'f32':
+        np_type = 'f4'
+    elif element_type == 'f64':
+        np_type = 'f8'
+    elif element_type == 'i8':
+        np_type = 'i1'
+    elif element_type == 'i16':
+        np_type = 'i2'
+    elif element_type == 'i32':
+        np_type = 'i4'
+    elif element_type == 'i64':
+        np_type = 'i8'
+    else:
+        raise ValueError(f'unsupported element type: {element_type}')
+    if is_numpy(obj):
+        import numpy as np
+        return np.array([np.frombuffer(x, dtype=np_type) for x in obj])
+    if is_pandas_series(obj):
+        import numpy as np
+        import pandas as pd
+        return pd.Series([np.frombuffer(x, dtype=np_type) for x in obj])
+    if is_polars_series(obj):
+        import numpy as np
+        import polars as pl
+        return pl.Series([np.frombuffer(x, dtype=np_type) for x in obj])
+    if is_pyarrow_array(obj):
+        import numpy as np
+        import pyarrow as pa
+        return pa.array([np.frombuffer(x, dtype=np_type) for x in obj])
+    raise ValueError(
+        f'unsupported object type: {type(obj)}',
+    )
+def pack_vector(
+    obj: Any,
+    element_type: VectorTypes = VectorTypes.F32,
+) -> bytes:
+    """
+    Pack a vector into bytes.
+    Parameters
+    ----------
+    obj : Any
+        The object to pack.
+    element_type : VectorTypes
+        The type of the elements in the vector.
+        Can be one of 'f32', 'f64', 'i8', 'i16', 'i32', or 'i64'.
+        Default is 'f32'.
+    Returns
+    -------
+    bytes
+        The packed vector.
+    """
+    if element_type == 'f32':
+        fmt = 'f'
+    elif element_type == 'f64':
+        fmt = 'd'
+    elif element_type == 'i8':
+        fmt = 'b'
+    elif element_type == 'i16':
+        fmt = 'h'
+    elif element_type == 'i32':
+        fmt = 'i'
+    elif element_type == 'i64':
+        fmt = 'q'
+    else:
+        raise ValueError(f'unsupported element type: {element_type}')
+    if isinstance(obj, (list, tuple)):
+        return struct.pack(f'<{len(obj)}{fmt}', *obj)
+    elif is_numpy(obj):
+        return obj.tobytes()
+    elif is_pandas_series(obj):
+        # TODO: Nested vectors
+        import pandas as pd
+        return pd.Series(obj).to_numpy().tobytes()
+    elif is_polars_series(obj):
+        # TODO: Nested vectors
+        import polars as pl
+        return pl.Series(obj).to_numpy().tobytes()
+    elif is_pyarrow_array(obj):
+        # TODO: Nested vectors
+        import pyarrow as pa
+        return pa.array(obj).to_numpy().tobytes()
+    raise ValueError(
+        f'unsupported object type: {type(obj)}',
+    )

singlestoredb/http/connection.py CHANGED Viewed

@@ -648,7 +648,9 @@ class Cursor(connection.Cursor):
                         if 'UNSIGNED' in data_type:
                             flags = 32
                         if data_type.endswith('BLOB') or data_type.endswith('BINARY'):
-                            converter = functools.partial(b64decode_converter, converter)
+                            converter = functools.partial(
+                                b64decode_converter, converter,  # type: ignore
+                            )
                             charset = 63  # BINARY
                         if type_code == 0:  # DECIMAL
                             type_code = types.ColumnType.get_code('NEWDECIMAL')

singlestoredb/management/manager.py CHANGED Viewed

@@ -62,6 +62,7 @@ class Manager(object):
         )
         if not new_access_token:
             raise ManagementError(msg='No management token was configured.')
         self._is_jwt = not access_token and new_access_token and is_jwt(new_access_token)
         self._sess = requests.Session()
         self._sess.headers.update({
@@ -70,10 +71,14 @@ class Manager(object):
             'Accept': 'application/json',
             'User-Agent': f'SingleStoreDB-Python/{client_version}',
         })
         self._base_url = urljoin(
-            base_url or type(self).default_base_url,
+            base_url
+            or config.get_option('management.base_url')
+            or type(self).default_base_url,
             version or type(self).default_version,
         ) + '/'
         self._params: Dict[str, str] = {}
         if organization_id:
             self._params['organizationID'] = organization_id

singlestoredb/management/utils.py CHANGED Viewed

@@ -30,7 +30,7 @@ JSONList = List[JSON]
 T = TypeVar('T')
 if sys.version_info < (3, 10):
-    PathLike = Union[str, os.PathLike]
+    PathLike = Union[str, os.PathLike]  # type: ignore
     PathLikeABC = os.PathLike
 else:
     PathLike = Union[str, os.PathLike[str]]
@@ -73,7 +73,7 @@ def ttl_property(ttl: datetime.timedelta) -> Callable[[Any], Any]:
     """Property with a time-to-live."""
     def wrapper(func: Callable[[Any], Any]) -> Any:
         out = TTLProperty(func, ttl=ttl)
-        return functools.wraps(func)(out)
+        return functools.wraps(func)(out)  # type: ignore
     return wrapper

singlestoredb/mysql/connection.py CHANGED Viewed

@@ -989,18 +989,11 @@ class Connection(BaseConnection):
     def set_character_set(self, charset, collation=None):
         """
-        Set session charaset (and collation) on the server.
+        Set charaset (and collation) on the server.
-        Send "SET [COLLATION|CHARACTER_SET]_SERVER = [collation|charset]" query.
+        Send "SET NAMES charset [COLLATE collation]" query.
         Update Connection.encoding based on charset.
-        If charset/collation are being set to utf8mb4, the corresponding global
-        variables (COLLATION_SERVER and CHARACTER_SET_SERVER) must be also set
-        to utf8mb4. This is true by default for SingleStore 8.7+. For previuous
-        versions or non-default setting user must manully run the query
-        `SET global collation_connection = utf8mb4_general_ci`
-        replacing utf8mb4_general_ci with {collation}.
         Parameters
         ----------
         charset : str
@@ -1013,9 +1006,9 @@ class Connection(BaseConnection):
         encoding = charset_by_name(charset).encoding
         if collation:
-            query = f'SET COLLATION_SERVER={collation}'
+            query = f'SET NAMES {charset} COLLATE {collation}'
         else:
-            query = f'SET CHARACTER_SET_SERVER={charset}'
+            query = f'SET NAMES {charset}'
         self._execute_command(COMMAND.COM_QUERY, query)
         self._read_packet()
         self.charset = charset
@@ -1119,6 +1112,19 @@ class Connection(BaseConnection):
             self._get_server_information()
             self._request_authentication()
+            # Send "SET NAMES" query on init for:
+            # - Ensure charaset (and collation) is set to the server.
+            #   - collation_id in handshake packet may be ignored.
+            # - If collation is not specified, we don't know what is server's
+            #   default collation for the charset. For example, default collation
+            #   of utf8mb4 is:
+            #   - MySQL 5.7, MariaDB 10.x: utf8mb4_general_ci
+            #   - MySQL 8.0: utf8mb4_0900_ai_ci
+            #
+            # Reference:
+            # - https://github.com/PyMySQL/PyMySQL/issues/1092
+            # - https://github.com/wagtail/wagtail/issues/9477
+            # - https://zenn.dev/methane/articles/2023-mysql-collation (Japanese)
             self.set_character_set(self.charset, self.collation)
             if self.sql_mode is not None:

singlestoredb 1.12.3__cp38-abi3-win32.whl → 1.13.0__cp38-abi3-win32.whl

Potentially problematic release.

singlestoredb 1.12.3cp38-abi3-win32.whl → 1.13.0cp38-abi3-win32.whl