PyPI - chdb - Versions diffs - 3.6.0__cp38-abi3-macosx_10_15_x86_64.whl - Mend

chdb 3.6.0__cp38-abi3-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chdb might be problematic. Click here for more details.

Files changed (35) hide show

chdb/__init__.py +134 -0
chdb/__main__.py +38 -0
chdb/_chdb.abi3.so +0 -0
chdb/dataframe/__init__.py +19 -0
chdb/dataframe/query.py +356 -0
chdb/dbapi/__init__.py +79 -0
chdb/dbapi/connections.py +100 -0
chdb/dbapi/constants/FIELD_TYPE.py +31 -0
chdb/dbapi/constants/__init__.py +0 -0
chdb/dbapi/converters.py +293 -0
chdb/dbapi/cursors.py +351 -0
chdb/dbapi/err.py +61 -0
chdb/dbapi/times.py +20 -0
chdb/libpybind11nonlimitedapi_chdb_3.10.dylib +0 -0
chdb/libpybind11nonlimitedapi_chdb_3.11.dylib +0 -0
chdb/libpybind11nonlimitedapi_chdb_3.12.dylib +0 -0
chdb/libpybind11nonlimitedapi_chdb_3.13.dylib +0 -0
chdb/libpybind11nonlimitedapi_chdb_3.8.dylib +0 -0
chdb/libpybind11nonlimitedapi_chdb_3.9.dylib +0 -0
chdb/libpybind11nonlimitedapi_stubs.dylib +0 -0
chdb/rwabc.py +65 -0
chdb/session/__init__.py +3 -0
chdb/session/state.py +124 -0
chdb/state/__init__.py +3 -0
chdb/state/sqlitelike.py +505 -0
chdb/udf/__init__.py +3 -0
chdb/udf/udf.py +106 -0
chdb/utils/__init__.py +9 -0
chdb/utils/trace.py +74 -0
chdb/utils/types.py +234 -0
chdb-3.6.0.dist-info/LICENSE.txt +203 -0
chdb-3.6.0.dist-info/METADATA +554 -0
chdb-3.6.0.dist-info/RECORD +35 -0
chdb-3.6.0.dist-info/WHEEL +5 -0
chdb-3.6.0.dist-info/top_level.txt +2 -0

chdb/dbapi/cursors.py ADDED Viewed

@@ -0,0 +1,351 @@
+from . import err
+import re
+# Regular expression for :meth:`Cursor.executemany`.
+# executemany only supports simple bulk insert.
+# You can use it to load large dataset.
+RE_INSERT_VALUES = re.compile(
+    r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
+    + r"(\(\s*(?:%s|%\(.+\)s|\?)\s*(?:,\s*(?:%s|%\(.+\)s|\?)\s*)*\))"
+    + r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
+    re.IGNORECASE | re.DOTALL,
+)
+class Cursor(object):
+    """
+    This is the object you use to interact with the database.
+    Do not create an instance of a Cursor yourself. Call
+    connections.Connection.cursor().
+    See `Cursor <https://www.python.org/dev/peps/pep-0249/#cursor-objects>`_ in
+    the specification.
+    """
+    #: Max statement size which :meth:`executemany` generates.
+    #:
+    #: Default value is 1024000.
+    max_stmt_length = 1024000
+    def __init__(self, connection):
+        self.connection = connection
+        self._cursor = connection._conn.cursor()
+        self.description = None
+        self.rowcount = -1
+        self.arraysize = 1
+        self.lastrowid = None
+        self._executed = None
+    def __enter__(self):
+        return self
+    def __exit__(self, *exc_info):
+        del exc_info
+        self.close()
+    def __iter__(self):
+        return iter(self.fetchone, None)
+    def callproc(self, procname, args=()):
+        """Execute stored procedure procname with args
+        procname -- string, name of procedure to execute on server
+        args -- Sequence of parameters to use with procedure
+        Returns the original args.
+        Compatibility warning: PEP-249 specifies that any modified
+        parameters must be returned. This is currently impossible
+        as they are only available by storing them in a server
+        variable and then retrieved by a query. Since stored
+        procedures return zero or more result sets, there is no
+        reliable way to get at OUT or INOUT parameters via callproc.
+        The server variables are named @_procname_n, where procname
+        is the parameter above and n is the position of the parameter
+        (from zero). Once all result sets generated by the procedure
+        have been fetched, you can issue a SELECT @_procname_0, ...
+        query using .execute() to get any OUT or INOUT values.
+        Compatibility warning: The act of calling a stored procedure
+        itself creates an empty result set. This appears after any
+        result sets generated by the procedure. This is non-standard
+        behavior with respect to the DB-API. Be sure to use nextset()
+        to advance through all result sets; otherwise you may get
+        disconnected.
+        """
+        return args
+    def close(self):
+        """
+        Closing a cursor just exhausts all remaining data.
+        """
+        self._cursor.close()
+    def _get_db(self):
+        if not self.connection:
+            raise err.ProgrammingError("Cursor closed")
+        return self.connection
+    def _escape_args(self, args, conn):
+        if isinstance(args, (tuple, list)):
+            return tuple(conn.escape(arg) for arg in args)
+        elif isinstance(args, dict):
+            return {key: conn.escape(val) for (key, val) in args.items()}
+        else:
+            # If it's not a dictionary let's try escaping it anyway.
+            # Worst case it will throw a Value error
+            return conn.escape(args)
+    def _format_query(self, query, args, conn):
+        """Format query with arguments supporting ? and % placeholders."""
+        if args is None or ('?' not in query and '%' not in query):
+            return query
+        escaped_args = self._escape_args(args, conn)
+        if not isinstance(escaped_args, (tuple, list)):
+            escaped_args = (escaped_args,)
+        result = []
+        arg_index = 0
+        max_args = len(escaped_args)
+        i = 0
+        query_len = len(query)
+        in_string = False
+        quote_char = None
+        while i < query_len:
+            char = query[i]
+            if not in_string:
+                if char in ("'", '"'):
+                    in_string = True
+                    quote_char = char
+                elif arg_index < max_args:
+                    if char == '?':
+                        result.append(str(escaped_args[arg_index]))
+                        arg_index += 1
+                        i += 1
+                        continue
+                    elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
+                        result.append(str(escaped_args[arg_index]))
+                        arg_index += 1
+                        i += 2
+                        continue
+            elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
+                in_string = False
+                quote_char = None
+            result.append(char)
+            i += 1
+        return ''.join(result)
+    def mogrify(self, query, args=None):
+        """
+        Returns the exact string that is sent to the database by calling the
+        execute() method.
+        This method follows the extension to the DB API 2.0 followed by Psycopg.
+        """
+        conn = self._get_db()
+        return self._format_query(query, args, conn)
+    def execute(self, query, args=None):
+        """Execute a query
+        :param str query: Query to execute.
+        :param args: parameters used with query. (optional)
+        :type args: tuple, list or dict
+        :return: Number of affected rows
+        :rtype: int
+        If args is a list or tuple, ? can be used as a placeholder in the query.
+        If args is a dict, %(name)s can be used as a placeholder in the query.
+        Also supports %s placeholder for backward compatibility.
+        """
+        query = self._format_query(query, args, self.connection)
+        self._cursor.execute(query)
+        # Get description from column names and types
+        if hasattr(self._cursor, "_column_names") and self._cursor._column_names:
+            self.description = [
+                (name, type_info, None, None, None, None, None)
+                for name, type_info in zip(
+                    self._cursor._column_names, self._cursor._column_types
+                )
+            ]
+            self.rowcount = (
+                len(self._cursor._current_table) if self._cursor._current_table else -1
+            )
+        else:
+            self.description = None
+            self.rowcount = -1
+        self._executed = query
+        return self.rowcount
+    def executemany(self, query, args):
+        # type: (str, list) -> int
+        """Run several data against one query
+        :param query: query to execute on server
+        :param args:  Sequence of sequences or mappings.  It is used as parameter.
+        :return: Number of rows affected, if any.
+        This method improves performance on multiple-row INSERT and
+        REPLACE. Otherwise, it is equivalent to looping over args with
+        execute().
+        """
+        if not args:
+            return 0
+        m = RE_INSERT_VALUES.match(query)
+        if m:
+            q_prefix = m.group(1) % ()
+            q_values = m.group(2).rstrip()
+            q_postfix = m.group(3) or ""
+            assert q_values[0] == "(" and q_values[-1] == ")"
+            return self._do_execute_many(
+                q_prefix,
+                q_values,
+                q_postfix,
+                args,
+                self.max_stmt_length,
+                self._get_db().encoding,
+            )
+        self.rowcount = sum(self.execute(query, arg) for arg in args)
+        return self.rowcount
+    def _find_placeholder_positions(self, query):
+        positions = []
+        i = 0
+        query_len = len(query)
+        in_string = False
+        quote_char = None
+        while i < query_len:
+            char = query[i]
+            if not in_string:
+                if char in ("'", '"'):
+                    in_string = True
+                    quote_char = char
+                elif char == '?':
+                    positions.append((i, 1))  # (position, length)
+                elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
+                    positions.append((i, 2))
+                    i += 1
+            elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
+                in_string = False
+                quote_char = None
+            i += 1
+        return positions
+    def _do_execute_many(
+        self, prefix, values, postfix, args, max_stmt_length, encoding
+    ):
+        conn = self._get_db()
+        if isinstance(prefix, str):
+            prefix = prefix.encode(encoding)
+        if isinstance(postfix, str):
+            postfix = postfix.encode(encoding)
+        # Pre-compute placeholder positions
+        placeholder_positions = self._find_placeholder_positions(values)
+        sql = prefix
+        args = iter(args)
+        if not placeholder_positions:
+            values_bytes = values.encode(encoding, "surrogateescape") if isinstance(values, str) else values
+            sql += values_bytes
+            rows = 0
+            for _ in args:
+                if len(sql) + len(values_bytes) + len(postfix) + 2 > max_stmt_length:
+                    rows += self.execute(sql + postfix)
+                    sql = prefix + values_bytes
+                else:
+                    sql += ",".encode(encoding)
+                    sql += values_bytes
+            rows += self.execute(sql + postfix)
+            self.rowcount = rows
+            return rows
+        template_parts = []
+        last_pos = 0
+        for pos, length in placeholder_positions:
+            template_parts.append(values[last_pos:pos])
+            last_pos = pos + length
+        template_parts.append(values[last_pos:])
+        def format_values_fast(escaped_arg):
+            if len(escaped_arg) != len(placeholder_positions):
+                return values
+            result = template_parts[0]
+            for i, val in enumerate(escaped_arg):
+                result += str(val) + template_parts[i + 1]
+            return result
+        def format_values_with_positions(arg):
+            escaped_arg = self._escape_args(arg, conn)
+            if not isinstance(escaped_arg, (tuple, list)):
+                escaped_arg = (escaped_arg,)
+            return format_values_fast(escaped_arg)
+        v = format_values_with_positions(next(args))
+        if isinstance(v, str):
+            v = v.encode(encoding, "surrogateescape")
+        sql += v
+        rows = 0
+        for arg in args:
+            v = format_values_with_positions(arg)
+            if isinstance(v, str):
+                v = v.encode(encoding, "surrogateescape")
+            if len(sql) + len(v) + len(postfix) + 2 > max_stmt_length:  # +2 for comma
+                rows += self.execute(sql + postfix)
+                sql = prefix + v
+            else:
+                sql += ",".encode(encoding)
+                sql += v
+        rows += self.execute(sql + postfix)
+        self.rowcount = rows
+        return rows
+    def _check_executed(self):
+        if not self._executed:
+            raise err.ProgrammingError("execute() first")
+    def fetchone(self):
+        """Fetch the next row"""
+        if not self._executed:
+            raise err.ProgrammingError("execute() first")
+        return self._cursor.fetchone()
+    def fetchmany(self, size=1):
+        """Fetch several rows"""
+        if not self._executed:
+            raise err.ProgrammingError("execute() first")
+        return self._cursor.fetchmany(size)
+    def fetchall(self):
+        """Fetch all the rows"""
+        if not self._executed:
+            raise err.ProgrammingError("execute() first")
+        return self._cursor.fetchall()
+    def nextset(self):
+        """Get the next query set"""
+        # Not support for now
+        return None
+    def setinputsizes(self, *args):
+        """Does nothing, required by DB API."""
+    def setoutputsizes(self, *args):
+        """Does nothing, required by DB API."""

chdb/dbapi/err.py ADDED Viewed

@@ -0,0 +1,61 @@
+class StandardError(Exception):
+    """Exception related to operation with chdb."""
+class Warning(StandardError):
+    """Exception raised for important warnings like data truncations
+    while inserting, etc."""
+class Error(StandardError):
+    """Exception that is the base class of all other error exceptions
+    (not Warning)."""
+class InterfaceError(Error):
+    """Exception raised for errors that are related to the database
+    interface rather than the database itself."""
+class DatabaseError(Error):
+    """Exception raised for errors that are related to the
+    database."""
+class DataError(DatabaseError):
+    """Exception raised for errors that are due to problems with the
+    processed data like division by zero, numeric value out of range,
+    etc."""
+class OperationalError(DatabaseError):
+    """Exception raised for errors that are related to the database's
+    operation and not necessarily under the control of the programmer,
+    e.g. an unexpected disconnect occurs, the data source name is not
+    found, a transaction could not be processed, a memory allocation
+    error occurred during processing, etc."""
+class IntegrityError(DatabaseError):
+    """Exception raised when the relational integrity of the database
+    is affected, e.g. a foreign key check fails, duplicate key,
+    etc."""
+class InternalError(DatabaseError):
+    """Exception raised when the database encounters an internal
+    error, e.g. the cursor is not valid anymore, the transaction is
+    out of sync, etc."""
+class ProgrammingError(DatabaseError):
+    """Exception raised for programming errors, e.g. table not found
+    or already exists, syntax error in the SQL statement, wrong number
+    of parameters specified, etc."""
+class NotSupportedError(DatabaseError):
+    """Exception raised in case a method or database API was used
+    which is not supported by the database, e.g. requesting a
+    .rollback() on a connection that does not support transaction or
+    has transactions turned off."""

chdb/dbapi/times.py ADDED Viewed

@@ -0,0 +1,20 @@
+from time import localtime
+from datetime import date, datetime, time, timedelta
+Date = date
+Time = time
+TimeDelta = timedelta
+Timestamp = datetime
+def DateFromTicks(ticks):
+    return date(*localtime(ticks)[:3])
+def TimeFromTicks(ticks):
+    return time(*localtime(ticks)[3:6])
+def TimestampFromTicks(ticks):
+    return datetime(*localtime(ticks)[:6])

chdb/libpybind11nonlimitedapi_chdb_3.10.dylib ADDED Viewed

Binary file

chdb/libpybind11nonlimitedapi_chdb_3.11.dylib ADDED Viewed

Binary file

chdb/libpybind11nonlimitedapi_chdb_3.12.dylib ADDED Viewed

Binary file

chdb/libpybind11nonlimitedapi_chdb_3.13.dylib ADDED Viewed

Binary file

chdb/libpybind11nonlimitedapi_chdb_3.8.dylib ADDED Viewed

Binary file

chdb/libpybind11nonlimitedapi_chdb_3.9.dylib ADDED Viewed

Binary file

chdb/libpybind11nonlimitedapi_stubs.dylib ADDED Viewed

Binary file

chdb/rwabc.py ADDED Viewed

@@ -0,0 +1,65 @@
+from abc import ABC, abstractmethod
+from typing import List, Any
+class PyReader(ABC):
+    def __init__(self, data: Any):
+        """
+        Initialize the reader with data. The exact type and structure of `data` can vary.
+        Args:
+            data (Any): The data with which to initialize the reader, format and type are not strictly defined.
+        """
+        self.data = data
+    @abstractmethod
+    def read(self, col_names: List[str], count: int) -> List[Any]:
+        """
+        Read a specified number of rows from the given columns and return a list of objects,
+        where each object is a sequence of values for a column.
+        Args:
+            col_names (List[str]): List of column names to read.
+            count (int): Maximum number of rows to read.
+        Returns:
+            List[Any]: List of sequences, one for each column.
+        """
+        pass
+class PyWriter(ABC):
+    def __init__(self, col_names: List[str], types: List[type], data: Any):
+        """
+        Initialize the writer with column names, their types, and initial data.
+        Args:
+            col_names (List[str]): List of column names.
+            types (List[type]): List of types corresponding to each column.
+            data (Any): Initial data to setup the writer, format and type are not strictly defined.
+        """
+        self.col_names = col_names
+        self.types = types
+        self.data = data
+        self.blocks = []
+    @abstractmethod
+    def write(self, col_names: List[str], columns: List[List[Any]]) -> None:
+        """
+        Save columns of data to blocks. Must be implemented by subclasses.
+        Args:
+            col_names (List[str]): List of column names that are being written.
+            columns (List[List[Any]]): List of columns data, each column is represented by a list.
+        """
+        pass
+    @abstractmethod
+    def finalize(self) -> bytes:
+        """
+        Assemble and return the final data from blocks. Must be implemented by subclasses.
+        Returns:
+            bytes: The final serialized data.
+        """
+        pass

chdb/session/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .state import Session
+__all__ = ["Session"]

chdb/session/state.py ADDED Viewed

@@ -0,0 +1,124 @@
+import warnings
+import chdb
+from ..state import sqlitelike as chdb_stateful
+from ..state.sqlitelike import StreamingResult
+g_session = None
+g_session_path = None
+class Session:
+    """
+    Session will keep the state of query.
+    If path is None, it will create a temporary directory and use it as the database path
+    and the temporary directory will be removed when the session is closed.
+    You can also pass in a path to create a database at that path where will keep your data.
+    You can also use a connection string to pass in the path and other parameters.
+    Examples:
+        - ":memory:" (for in-memory database)
+        - "test.db" (for relative path)
+        - "file:test.db" (same as above)
+        - "/path/to/test.db" (for absolute path)
+        - "file:/path/to/test.db" (same as above)
+        - "file:test.db?param1=value1&param2=value2" (for relative path with query params)
+        - "file::memory:?verbose&log-level=test" (for in-memory database with query params)
+        - "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
+    Connection string args handling:
+        Connection string can contain query params like "file:test.db?param1=value1&param2=value2"
+        "param1=value1" will be passed to ClickHouse engine as start up args.
+        For more details, see `clickhouse local --help --verbose`
+        Some special args handling:
+        - "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
+    Important:
+        - There can be only one session at a time. If you want to create a new session, you need to close the existing one.
+        - Creating a new session will close the existing one.
+    """
+    def __init__(self, path=None):
+        self._conn = None
+        global g_session, g_session_path
+        if g_session is not None:
+            warnings.warn(
+                "There is already an active session. Creating a new session will close the existing one. "
+                "It is recommended to close the existing session before creating a new one. "
+                f"Closing the existing session {g_session_path}"
+            )
+            g_session.close()
+            g_session_path = None
+        if path is None:
+            self._path = ":memory:"
+        else:
+            self._path = path
+        if chdb.g_udf_path != "":
+            self._udf_path = chdb.g_udf_path
+            # add udf_path to conn_str here.
+            # - the `user_scripts_path` will be the value of `udf_path`
+            # - the `user_defined_executable_functions_config` will be `user_scripts_path/*.xml`
+            # Both of them will be added to the conn_str in the Connection class
+            if "?" in self._path:
+                self._conn_str = f"{self._path}&udf_path={self._udf_path}"
+            else:
+                self._conn_str = f"{self._path}?udf_path={self._udf_path}"
+        else:
+            self._udf_path = ""
+            self._conn_str = f"{self._path}"
+        self._conn = chdb_stateful.Connection(self._conn_str)
+        g_session = self
+        g_session_path = self._path
+    def __del__(self):
+        self.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+    def close(self):
+        if self._conn is not None:
+            self._conn.close()
+            self._conn = None
+        global g_session, g_session_path
+        g_session = None
+        g_session_path = None
+    def cleanup(self):
+        try:
+            self.close()
+        except:  # noqa
+            pass
+    def query(self, sql, fmt="CSV", udf_path=""):
+        """
+        Execute a query.
+        """
+        if fmt == "Debug":
+            warnings.warn(
+                """Debug format is not supported in Session.query
+Please try use parameters in connection string instead:
+Eg: conn = connect(f"db_path?verbose&log-level=test")"""
+            )
+            fmt = "CSV"
+        return self._conn.query(sql, fmt)
+    # alias sql = query
+    sql = query
+    def send_query(self, sql, fmt="CSV") -> StreamingResult:
+        """
+        Execute a streaming query.
+        """
+        if fmt == "Debug":
+            warnings.warn(
+                """Debug format is not supported in Session.query
+Please try use parameters in connection string instead:
+Eg: conn = connect(f"db_path?verbose&log-level=test")"""
+            )
+            fmt = "CSV"
+        return self._conn.send_query(sql, fmt)

chdb/state/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .sqlitelike import connect
+__all__ = ["connect"]