PyPI - chdb - Versions diffs - 3.0.1__cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 3.1.1__cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl - Mend

chdb 3.0.1__cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 3.1.1__cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chdb might be problematic. Click here for more details.

Files changed (10) hide show

chdb/__init__.py +1 -1
chdb/_chdb.cpython-39-aarch64-linux-gnu.so +0 -0
chdb/dbapi/connections.py +1 -1
chdb/dbapi/cursors.py +30 -18
chdb/state/sqlitelike.py +155 -12
{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/METADATA +1 -1
{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/RECORD +10 -10
{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/WHEEL +1 -1
{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/LICENSE.txt +0 -0
{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/top_level.txt +0 -0

chdb/__init__.py CHANGED Viewed

@@ -19,7 +19,7 @@ _process_result_format_funs = {
 # UDF script path will be f"{g_udf_path}/{func_name}.py"
 g_udf_path = ""
-chdb_version = ('3', '0', '1')
+chdb_version = ('3', '1', '1')
 if sys.version_info[:2] >= (3, 7):
     # get the path of the current file
     current_path = os.path.dirname(os.path.abspath(__file__))

chdb/_chdb.cpython-39-aarch64-linux-gnu.so CHANGED Viewed

Binary file

chdb/dbapi/connections.py CHANGED Viewed

@@ -57,7 +57,7 @@ class Connection(object):
             return Cursor(self)
         return Cursor(self)
-    def query(self, sql, fmt="ArrowStream"):
+    def query(self, sql, fmt="CSV"):
         """Execute a query and return the raw result."""
         if self._closed:
             raise err.InterfaceError("Connection closed")

chdb/dbapi/cursors.py CHANGED Viewed

@@ -5,10 +5,11 @@ import re
 # executemany only supports simple bulk insert.
 # You can use it to load large dataset.
 RE_INSERT_VALUES = re.compile(
-    r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)" +
-    r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))" +
-    r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
-    re.IGNORECASE | re.DOTALL)
+    r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
+    + r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))"
+    + r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
+    re.IGNORECASE | re.DOTALL,
+)
 class Cursor(object):
@@ -131,13 +132,17 @@ class Cursor(object):
         self._cursor.execute(query)
-        # Get description from Arrow schema
-        if self._cursor._current_table is not None:
+        # Get description from column names and types
+        if hasattr(self._cursor, "_column_names") and self._cursor._column_names:
             self.description = [
-                (field.name, field.type.to_pandas_dtype(), None, None, None, None, None)
-                for field in self._cursor._current_table.schema
+                (name, type_info, None, None, None, None, None)
+                for name, type_info in zip(
+                    self._cursor._column_names, self._cursor._column_types
+                )
             ]
-            self.rowcount = self._cursor._current_table.num_rows
+            self.rowcount = (
+                len(self._cursor._current_table) if self._cursor._current_table else -1
+            )
         else:
             self.description = None
             self.rowcount = -1
@@ -164,16 +169,23 @@ class Cursor(object):
         if m:
             q_prefix = m.group(1) % ()
             q_values = m.group(2).rstrip()
-            q_postfix = m.group(3) or ''
-            assert q_values[0] == '(' and q_values[-1] == ')'
-            return self._do_execute_many(q_prefix, q_values, q_postfix, args,
-                                         self.max_stmt_length,
-                                         self._get_db().encoding)
+            q_postfix = m.group(3) or ""
+            assert q_values[0] == "(" and q_values[-1] == ")"
+            return self._do_execute_many(
+                q_prefix,
+                q_values,
+                q_postfix,
+                args,
+                self.max_stmt_length,
+                self._get_db().encoding,
+            )
         self.rowcount = sum(self.execute(query, arg) for arg in args)
         return self.rowcount
-    def _do_execute_many(self, prefix, values, postfix, args, max_stmt_length, encoding):
+    def _do_execute_many(
+        self, prefix, values, postfix, args, max_stmt_length, encoding
+    ):
         conn = self._get_db()
         escape = self._escape_args
         if isinstance(prefix, str):
@@ -184,18 +196,18 @@ class Cursor(object):
         args = iter(args)
         v = values % escape(next(args), conn)
         if isinstance(v, str):
-            v = v.encode(encoding, 'surrogateescape')
+            v = v.encode(encoding, "surrogateescape")
         sql += v
         rows = 0
         for arg in args:
             v = values % escape(arg, conn)
             if isinstance(v, str):
-                v = v.encode(encoding, 'surrogateescape')
+                v = v.encode(encoding, "surrogateescape")
             if len(sql) + len(v) + len(postfix) + 1 > max_stmt_length:
                 rows += self.execute(sql + postfix)
                 sql = prefix
             else:
-                sql += ','.encode(encoding)
+                sql += ",".encode(encoding)
             sql += v
         rows += self.execute(sql + postfix)
         self.rowcount = rows

chdb/state/sqlitelike.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import io
 from typing import Optional, Any
 from chdb import _chdb
@@ -11,6 +10,36 @@ except ImportError as e:
     raise ImportError("Failed to import pyarrow") from None
+_arrow_format = set({"dataframe", "arrowtable"})
+_process_result_format_funs = {
+    "dataframe": lambda x: to_df(x),
+    "arrowtable": lambda x: to_arrowTable(x),
+}
+# return pyarrow table
+def to_arrowTable(res):
+    """convert res to arrow table"""
+    # try import pyarrow and pandas, if failed, raise ImportError with suggestion
+    try:
+        import pyarrow as pa  # noqa
+        import pandas as pd  # noqa
+    except ImportError as e:
+        print(f"ImportError: {e}")
+        print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
+        raise ImportError("Failed to import pyarrow or pandas") from None
+    if len(res) == 0:
+        return pa.Table.from_batches([], schema=pa.schema([]))
+    return pa.RecordBatchFileReader(res.bytes()).read_all()
+# return pandas dataframe
+def to_df(r):
+    """convert arrow table to Dataframe"""
+    t = to_arrowTable(r)
+    return t.to_pandas(use_threads=True)
 class Connection:
     def __init__(self, connection_string: str):
         # print("Connection", connection_string)
@@ -22,7 +51,13 @@ class Connection:
         return self._cursor
     def query(self, query: str, format: str = "CSV") -> Any:
-        return self._conn.query(query, format)
+        lower_output_format = format.lower()
+        result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
+        if lower_output_format in _arrow_format:
+            format = "Arrow"
+        result = self._conn.query(query, format)
+        return result_func(result)
     def close(self) -> None:
         # print("close")
@@ -41,17 +76,103 @@ class Cursor:
     def execute(self, query: str) -> None:
         self._cursor.execute(query)
         result_mv = self._cursor.get_memview()
-        # print("get_result", result_mv)
         if self._cursor.has_error():
             raise Exception(self._cursor.error_message())
         if self._cursor.data_size() == 0:
             self._current_table = None
             self._current_row = 0
+            self._column_names = []
+            self._column_types = []
             return
-        arrow_data = result_mv.tobytes()
-        reader = pa.ipc.open_stream(io.BytesIO(arrow_data))
-        self._current_table = reader.read_all()
-        self._current_row = 0
+        # Parse JSON data
+        json_data = result_mv.tobytes().decode("utf-8")
+        import json
+        try:
+            # First line contains column names
+            # Second line contains column types
+            # Following lines contain data
+            lines = json_data.strip().split("\n")
+            if len(lines) < 2:
+                self._current_table = None
+                self._current_row = 0
+                self._column_names = []
+                self._column_types = []
+                return
+            self._column_names = json.loads(lines[0])
+            self._column_types = json.loads(lines[1])
+            # Convert data rows
+            rows = []
+            for line in lines[2:]:
+                if not line.strip():
+                    continue
+                row_data = json.loads(line)
+                converted_row = []
+                for val, type_info in zip(row_data, self._column_types):
+                    # Handle NULL values first
+                    if val is None:
+                        converted_row.append(None)
+                        continue
+                    # Basic type conversion
+                    try:
+                        if type_info.startswith("Int") or type_info.startswith("UInt"):
+                            converted_row.append(int(val))
+                        elif type_info.startswith("Float"):
+                            converted_row.append(float(val))
+                        elif type_info == "Bool":
+                            converted_row.append(bool(val))
+                        elif type_info == "String" or type_info == "FixedString":
+                            converted_row.append(str(val))
+                        elif type_info.startswith("DateTime"):
+                            from datetime import datetime
+                            # Check if the value is numeric (timestamp)
+                            val_str = str(val)
+                            if val_str.replace(".", "").isdigit():
+                                converted_row.append(datetime.fromtimestamp(float(val)))
+                            else:
+                                # Handle datetime string formats
+                                if "." in val_str:  # Has microseconds
+                                    converted_row.append(
+                                        datetime.strptime(
+                                            val_str, "%Y-%m-%d %H:%M:%S.%f"
+                                        )
+                                    )
+                                else:  # No microseconds
+                                    converted_row.append(
+                                        datetime.strptime(val_str, "%Y-%m-%d %H:%M:%S")
+                                    )
+                        elif type_info.startswith("Date"):
+                            from datetime import date, datetime
+                            # Check if the value is numeric (days since epoch)
+                            val_str = str(val)
+                            if val_str.isdigit():
+                                converted_row.append(
+                                    date.fromtimestamp(float(val) * 86400)
+                                )
+                            else:
+                                # Handle date string format
+                                converted_row.append(
+                                    datetime.strptime(val_str, "%Y-%m-%d").date()
+                                )
+                        else:
+                            # For unsupported types, keep as string
+                            converted_row.append(str(val))
+                    except (ValueError, TypeError):
+                        # If conversion fails, keep original value as string
+                        converted_row.append(str(val))
+                rows.append(tuple(converted_row))
+            self._current_table = rows
+            self._current_row = 0
+        except json.JSONDecodeError as e:
+            raise Exception(f"Failed to parse JSON data: {e}")
     def commit(self) -> None:
         self._cursor.commit()
@@ -60,12 +181,10 @@ class Cursor:
         if not self._current_table or self._current_row >= len(self._current_table):
             return None
-        row_dict = {
-            col: self._current_table.column(col)[self._current_row].as_py()
-            for col in self._current_table.column_names
-        }
+        # Now self._current_table is a list of row tuples
+        row = self._current_table[self._current_row]
         self._current_row += 1
-        return tuple(row_dict.values())
+        return row
     def fetchmany(self, size: int = 1) -> tuple:
         if not self._current_table:
@@ -99,6 +218,30 @@ class Cursor:
             raise StopIteration
         return row
+    def column_names(self) -> list:
+        """Return a list of column names from the last executed query"""
+        return self._column_names if hasattr(self, "_column_names") else []
+    def column_types(self) -> list:
+        """Return a list of column types from the last executed query"""
+        return self._column_types if hasattr(self, "_column_types") else []
+    @property
+    def description(self) -> list:
+        """
+        Return a description of the columns as per DB-API 2.0
+        Returns a list of 7-item tuples, each containing:
+        (name, type_code, display_size, internal_size, precision, scale, null_ok)
+        where only name and type_code are provided
+        """
+        if not hasattr(self, "_column_names") or not self._column_names:
+            return []
+        return [
+            (name, type_info, None, None, None, None, None)
+            for name, type_info in zip(self._column_names, self._column_types)
+        ]
 def connect(connection_string: str = ":memory:") -> Connection:
     """

{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: chdb
-Version: 3.0.1
+Version: 3.1.1
 Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
 Home-page: https://github.com/chdb-io/chdb
 Author: auxten

{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-chdb/__init__.py,sha256=6nRIwbUg4kHRY8E8EQzDv2wT-IyR0-tsr3Mu3n_Mi9Q,3762
+chdb/__init__.py,sha256=dhg9VbRbDft2hubgDzR-pUHN11SER5HucmhhaGwb9Ek,3762
 chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
-chdb/_chdb.cpython-39-aarch64-linux-gnu.so,sha256=Q5qdxEuZ8rPe0v4W-PudjB2jM-xBkuCu-netSBOO8EQ,533363072
+chdb/_chdb.cpython-39-aarch64-linux-gnu.so,sha256=QO6tnzkcw93geUwdaxtPmZ-ZW0bdH-GnZux-CCT4yF4,533362920
 chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
 chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
 chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
 chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
-chdb/dbapi/connections.py,sha256=4RBO0h-B149xEicE8cXSSJl9wpXa4FQMY_4SghgEvCw,2762
+chdb/dbapi/connections.py,sha256=RW0EcusyKueMGp7VmSaCO-ukyzY7l2ps_ibA9-pXDvo,2754
 chdb/dbapi/converters.py,sha256=0SDqgixUTCz0LtWke_HHzgF1lFJhpsQrR_-ky3b-JRY,7447
-chdb/dbapi/cursors.py,sha256=OXF36raoyI3MIC5SCQ5IvnCtbOnppga4Q1IKOt2EIsk,7920
+chdb/dbapi/cursors.py,sha256=3ufVB1zt3x7SzCYowVbwAOsuzkMxYPO74q9XW6ctkKo,8120
 chdb/dbapi/err.py,sha256=kUI9-A8LNqBoMoo4jh2NFsLCOLoPEwh9YIuz_qMoLoM,2017
 chdb/dbapi/times.py,sha256=_qXgDaYwsHntvpIKSKXp1rrYIgtq6Z9pLyLnO2XNoL0,360
 chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
@@ -15,14 +15,14 @@ chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
 chdb/session/state.py,sha256=nx9KlqZyPTHAflToXCJVRBUSMjJFvyh6x2akP7Gc7h0,4360
 chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
-chdb/state/sqlitelike.py,sha256=-2bQRs6WDREr90pe8UtaYbQK-BWSkko1Ma8cLSzLUf4,4511
+chdb/state/sqlitelike.py,sha256=6Y57vnf7LnA0KnpByKQq7PkEkEEOKK-ExaHQLb1bedQ,10498
 chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
 chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
 chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
 chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
 chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
-chdb-3.0.1.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
-chdb-3.0.1.dist-info/METADATA,sha256=k9gmduudI-PNBH5QCOle-dKL6ascC9StyUnX4vz6gFs,19490
-chdb-3.0.1.dist-info/WHEEL,sha256=thzyYe9iyVyKmzNo8_HASABjeeH1MtquM7FDzz9VodA,149
-chdb-3.0.1.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
-chdb-3.0.1.dist-info/RECORD,,
+chdb-3.1.1.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
+chdb-3.1.1.dist-info/METADATA,sha256=zzMgz3TT8nQ8oEpbgcBTuirfhG8jZnib0UxcE-jOOZI,19490
+chdb-3.1.1.dist-info/WHEEL,sha256=42cKV5DuucxYepSMOM4R5v9PAQY0oUGHpEqV-Umhiik,149
+chdb-3.1.1.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
+chdb-3.1.1.dist-info/RECORD,,

{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (76.0.0)
 Root-Is-Purelib: false
 Tag: cp39-cp39-manylinux_2_17_aarch64
 Tag: cp39-cp39-manylinux2014_aarch64

{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{chdb-3.0.1.dist-info → chdb-3.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes