chdb 3.0.1__cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 3.1.1__cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/__init__.py CHANGED
@@ -19,7 +19,7 @@ _process_result_format_funs = {
19
19
  # UDF script path will be f"{g_udf_path}/{func_name}.py"
20
20
  g_udf_path = ""
21
21
 
22
- chdb_version = ('3', '0', '1')
22
+ chdb_version = ('3', '1', '1')
23
23
  if sys.version_info[:2] >= (3, 7):
24
24
  # get the path of the current file
25
25
  current_path = os.path.dirname(os.path.abspath(__file__))
Binary file
chdb/dbapi/connections.py CHANGED
@@ -57,7 +57,7 @@ class Connection(object):
57
57
  return Cursor(self)
58
58
  return Cursor(self)
59
59
 
60
- def query(self, sql, fmt="ArrowStream"):
60
+ def query(self, sql, fmt="CSV"):
61
61
  """Execute a query and return the raw result."""
62
62
  if self._closed:
63
63
  raise err.InterfaceError("Connection closed")
chdb/dbapi/cursors.py CHANGED
@@ -5,10 +5,11 @@ import re
5
5
  # executemany only supports simple bulk insert.
6
6
  # You can use it to load large dataset.
7
7
  RE_INSERT_VALUES = re.compile(
8
- r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)" +
9
- r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))" +
10
- r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
11
- re.IGNORECASE | re.DOTALL)
8
+ r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
9
+ + r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))"
10
+ + r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
11
+ re.IGNORECASE | re.DOTALL,
12
+ )
12
13
 
13
14
 
14
15
  class Cursor(object):
@@ -131,13 +132,17 @@ class Cursor(object):
131
132
 
132
133
  self._cursor.execute(query)
133
134
 
134
- # Get description from Arrow schema
135
- if self._cursor._current_table is not None:
135
+ # Get description from column names and types
136
+ if hasattr(self._cursor, "_column_names") and self._cursor._column_names:
136
137
  self.description = [
137
- (field.name, field.type.to_pandas_dtype(), None, None, None, None, None)
138
- for field in self._cursor._current_table.schema
138
+ (name, type_info, None, None, None, None, None)
139
+ for name, type_info in zip(
140
+ self._cursor._column_names, self._cursor._column_types
141
+ )
139
142
  ]
140
- self.rowcount = self._cursor._current_table.num_rows
143
+ self.rowcount = (
144
+ len(self._cursor._current_table) if self._cursor._current_table else -1
145
+ )
141
146
  else:
142
147
  self.description = None
143
148
  self.rowcount = -1
@@ -164,16 +169,23 @@ class Cursor(object):
164
169
  if m:
165
170
  q_prefix = m.group(1) % ()
166
171
  q_values = m.group(2).rstrip()
167
- q_postfix = m.group(3) or ''
168
- assert q_values[0] == '(' and q_values[-1] == ')'
169
- return self._do_execute_many(q_prefix, q_values, q_postfix, args,
170
- self.max_stmt_length,
171
- self._get_db().encoding)
172
+ q_postfix = m.group(3) or ""
173
+ assert q_values[0] == "(" and q_values[-1] == ")"
174
+ return self._do_execute_many(
175
+ q_prefix,
176
+ q_values,
177
+ q_postfix,
178
+ args,
179
+ self.max_stmt_length,
180
+ self._get_db().encoding,
181
+ )
172
182
 
173
183
  self.rowcount = sum(self.execute(query, arg) for arg in args)
174
184
  return self.rowcount
175
185
 
176
- def _do_execute_many(self, prefix, values, postfix, args, max_stmt_length, encoding):
186
+ def _do_execute_many(
187
+ self, prefix, values, postfix, args, max_stmt_length, encoding
188
+ ):
177
189
  conn = self._get_db()
178
190
  escape = self._escape_args
179
191
  if isinstance(prefix, str):
@@ -184,18 +196,18 @@ class Cursor(object):
184
196
  args = iter(args)
185
197
  v = values % escape(next(args), conn)
186
198
  if isinstance(v, str):
187
- v = v.encode(encoding, 'surrogateescape')
199
+ v = v.encode(encoding, "surrogateescape")
188
200
  sql += v
189
201
  rows = 0
190
202
  for arg in args:
191
203
  v = values % escape(arg, conn)
192
204
  if isinstance(v, str):
193
- v = v.encode(encoding, 'surrogateescape')
205
+ v = v.encode(encoding, "surrogateescape")
194
206
  if len(sql) + len(v) + len(postfix) + 1 > max_stmt_length:
195
207
  rows += self.execute(sql + postfix)
196
208
  sql = prefix
197
209
  else:
198
- sql += ','.encode(encoding)
210
+ sql += ",".encode(encoding)
199
211
  sql += v
200
212
  rows += self.execute(sql + postfix)
201
213
  self.rowcount = rows
chdb/state/sqlitelike.py CHANGED
@@ -1,4 +1,3 @@
1
- import io
2
1
  from typing import Optional, Any
3
2
  from chdb import _chdb
4
3
 
@@ -11,6 +10,36 @@ except ImportError as e:
11
10
  raise ImportError("Failed to import pyarrow") from None
12
11
 
13
12
 
13
+ _arrow_format = set({"dataframe", "arrowtable"})
14
+ _process_result_format_funs = {
15
+ "dataframe": lambda x: to_df(x),
16
+ "arrowtable": lambda x: to_arrowTable(x),
17
+ }
18
+
19
+
20
+ # return pyarrow table
21
+ def to_arrowTable(res):
22
+ """convert res to arrow table"""
23
+ # try import pyarrow and pandas, if failed, raise ImportError with suggestion
24
+ try:
25
+ import pyarrow as pa # noqa
26
+ import pandas as pd # noqa
27
+ except ImportError as e:
28
+ print(f"ImportError: {e}")
29
+ print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
30
+ raise ImportError("Failed to import pyarrow or pandas") from None
31
+ if len(res) == 0:
32
+ return pa.Table.from_batches([], schema=pa.schema([]))
33
+ return pa.RecordBatchFileReader(res.bytes()).read_all()
34
+
35
+
36
+ # return pandas dataframe
37
+ def to_df(r):
38
+ """convert arrow table to Dataframe"""
39
+ t = to_arrowTable(r)
40
+ return t.to_pandas(use_threads=True)
41
+
42
+
14
43
  class Connection:
15
44
  def __init__(self, connection_string: str):
16
45
  # print("Connection", connection_string)
@@ -22,7 +51,13 @@ class Connection:
22
51
  return self._cursor
23
52
 
24
53
  def query(self, query: str, format: str = "CSV") -> Any:
25
- return self._conn.query(query, format)
54
+ lower_output_format = format.lower()
55
+ result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
56
+ if lower_output_format in _arrow_format:
57
+ format = "Arrow"
58
+
59
+ result = self._conn.query(query, format)
60
+ return result_func(result)
26
61
 
27
62
  def close(self) -> None:
28
63
  # print("close")
@@ -41,17 +76,103 @@ class Cursor:
41
76
  def execute(self, query: str) -> None:
42
77
  self._cursor.execute(query)
43
78
  result_mv = self._cursor.get_memview()
44
- # print("get_result", result_mv)
45
79
  if self._cursor.has_error():
46
80
  raise Exception(self._cursor.error_message())
47
81
  if self._cursor.data_size() == 0:
48
82
  self._current_table = None
49
83
  self._current_row = 0
84
+ self._column_names = []
85
+ self._column_types = []
50
86
  return
51
- arrow_data = result_mv.tobytes()
52
- reader = pa.ipc.open_stream(io.BytesIO(arrow_data))
53
- self._current_table = reader.read_all()
54
- self._current_row = 0
87
+
88
+ # Parse JSON data
89
+ json_data = result_mv.tobytes().decode("utf-8")
90
+ import json
91
+
92
+ try:
93
+ # First line contains column names
94
+ # Second line contains column types
95
+ # Following lines contain data
96
+ lines = json_data.strip().split("\n")
97
+ if len(lines) < 2:
98
+ self._current_table = None
99
+ self._current_row = 0
100
+ self._column_names = []
101
+ self._column_types = []
102
+ return
103
+
104
+ self._column_names = json.loads(lines[0])
105
+ self._column_types = json.loads(lines[1])
106
+
107
+ # Convert data rows
108
+ rows = []
109
+ for line in lines[2:]:
110
+ if not line.strip():
111
+ continue
112
+ row_data = json.loads(line)
113
+ converted_row = []
114
+ for val, type_info in zip(row_data, self._column_types):
115
+ # Handle NULL values first
116
+ if val is None:
117
+ converted_row.append(None)
118
+ continue
119
+
120
+ # Basic type conversion
121
+ try:
122
+ if type_info.startswith("Int") or type_info.startswith("UInt"):
123
+ converted_row.append(int(val))
124
+ elif type_info.startswith("Float"):
125
+ converted_row.append(float(val))
126
+ elif type_info == "Bool":
127
+ converted_row.append(bool(val))
128
+ elif type_info == "String" or type_info == "FixedString":
129
+ converted_row.append(str(val))
130
+ elif type_info.startswith("DateTime"):
131
+ from datetime import datetime
132
+
133
+ # Check if the value is numeric (timestamp)
134
+ val_str = str(val)
135
+ if val_str.replace(".", "").isdigit():
136
+ converted_row.append(datetime.fromtimestamp(float(val)))
137
+ else:
138
+ # Handle datetime string formats
139
+ if "." in val_str: # Has microseconds
140
+ converted_row.append(
141
+ datetime.strptime(
142
+ val_str, "%Y-%m-%d %H:%M:%S.%f"
143
+ )
144
+ )
145
+ else: # No microseconds
146
+ converted_row.append(
147
+ datetime.strptime(val_str, "%Y-%m-%d %H:%M:%S")
148
+ )
149
+ elif type_info.startswith("Date"):
150
+ from datetime import date, datetime
151
+
152
+ # Check if the value is numeric (days since epoch)
153
+ val_str = str(val)
154
+ if val_str.isdigit():
155
+ converted_row.append(
156
+ date.fromtimestamp(float(val) * 86400)
157
+ )
158
+ else:
159
+ # Handle date string format
160
+ converted_row.append(
161
+ datetime.strptime(val_str, "%Y-%m-%d").date()
162
+ )
163
+ else:
164
+ # For unsupported types, keep as string
165
+ converted_row.append(str(val))
166
+ except (ValueError, TypeError):
167
+ # If conversion fails, keep original value as string
168
+ converted_row.append(str(val))
169
+ rows.append(tuple(converted_row))
170
+
171
+ self._current_table = rows
172
+ self._current_row = 0
173
+
174
+ except json.JSONDecodeError as e:
175
+ raise Exception(f"Failed to parse JSON data: {e}")
55
176
 
56
177
  def commit(self) -> None:
57
178
  self._cursor.commit()
@@ -60,12 +181,10 @@ class Cursor:
60
181
  if not self._current_table or self._current_row >= len(self._current_table):
61
182
  return None
62
183
 
63
- row_dict = {
64
- col: self._current_table.column(col)[self._current_row].as_py()
65
- for col in self._current_table.column_names
66
- }
184
+ # Now self._current_table is a list of row tuples
185
+ row = self._current_table[self._current_row]
67
186
  self._current_row += 1
68
- return tuple(row_dict.values())
187
+ return row
69
188
 
70
189
  def fetchmany(self, size: int = 1) -> tuple:
71
190
  if not self._current_table:
@@ -99,6 +218,30 @@ class Cursor:
99
218
  raise StopIteration
100
219
  return row
101
220
 
221
+ def column_names(self) -> list:
222
+ """Return a list of column names from the last executed query"""
223
+ return self._column_names if hasattr(self, "_column_names") else []
224
+
225
+ def column_types(self) -> list:
226
+ """Return a list of column types from the last executed query"""
227
+ return self._column_types if hasattr(self, "_column_types") else []
228
+
229
+ @property
230
+ def description(self) -> list:
231
+ """
232
+ Return a description of the columns as per DB-API 2.0
233
+ Returns a list of 7-item tuples, each containing:
234
+ (name, type_code, display_size, internal_size, precision, scale, null_ok)
235
+ where only name and type_code are provided
236
+ """
237
+ if not hasattr(self, "_column_names") or not self._column_names:
238
+ return []
239
+
240
+ return [
241
+ (name, type_info, None, None, None, None, None)
242
+ for name, type_info in zip(self._column_names, self._column_types)
243
+ ]
244
+
102
245
 
103
246
  def connect(connection_string: str = ":memory:") -> Connection:
104
247
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chdb
3
- Version: 3.0.1
3
+ Version: 3.1.1
4
4
  Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
5
5
  Home-page: https://github.com/chdb-io/chdb
6
6
  Author: auxten
@@ -1,13 +1,13 @@
1
- chdb/__init__.py,sha256=6nRIwbUg4kHRY8E8EQzDv2wT-IyR0-tsr3Mu3n_Mi9Q,3762
1
+ chdb/__init__.py,sha256=dhg9VbRbDft2hubgDzR-pUHN11SER5HucmhhaGwb9Ek,3762
2
2
  chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
3
- chdb/_chdb.cpython-39-aarch64-linux-gnu.so,sha256=Q5qdxEuZ8rPe0v4W-PudjB2jM-xBkuCu-netSBOO8EQ,533363072
3
+ chdb/_chdb.cpython-39-aarch64-linux-gnu.so,sha256=QO6tnzkcw93geUwdaxtPmZ-ZW0bdH-GnZux-CCT4yF4,533362920
4
4
  chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
5
5
  chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
6
6
  chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
7
7
  chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
8
- chdb/dbapi/connections.py,sha256=4RBO0h-B149xEicE8cXSSJl9wpXa4FQMY_4SghgEvCw,2762
8
+ chdb/dbapi/connections.py,sha256=RW0EcusyKueMGp7VmSaCO-ukyzY7l2ps_ibA9-pXDvo,2754
9
9
  chdb/dbapi/converters.py,sha256=0SDqgixUTCz0LtWke_HHzgF1lFJhpsQrR_-ky3b-JRY,7447
10
- chdb/dbapi/cursors.py,sha256=OXF36raoyI3MIC5SCQ5IvnCtbOnppga4Q1IKOt2EIsk,7920
10
+ chdb/dbapi/cursors.py,sha256=3ufVB1zt3x7SzCYowVbwAOsuzkMxYPO74q9XW6ctkKo,8120
11
11
  chdb/dbapi/err.py,sha256=kUI9-A8LNqBoMoo4jh2NFsLCOLoPEwh9YIuz_qMoLoM,2017
12
12
  chdb/dbapi/times.py,sha256=_qXgDaYwsHntvpIKSKXp1rrYIgtq6Z9pLyLnO2XNoL0,360
13
13
  chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
@@ -15,14 +15,14 @@ chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
15
15
  chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
16
16
  chdb/session/state.py,sha256=nx9KlqZyPTHAflToXCJVRBUSMjJFvyh6x2akP7Gc7h0,4360
17
17
  chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
18
- chdb/state/sqlitelike.py,sha256=-2bQRs6WDREr90pe8UtaYbQK-BWSkko1Ma8cLSzLUf4,4511
18
+ chdb/state/sqlitelike.py,sha256=6Y57vnf7LnA0KnpByKQq7PkEkEEOKK-ExaHQLb1bedQ,10498
19
19
  chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
20
20
  chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
21
21
  chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
22
22
  chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
23
23
  chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
24
- chdb-3.0.1.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
- chdb-3.0.1.dist-info/METADATA,sha256=k9gmduudI-PNBH5QCOle-dKL6ascC9StyUnX4vz6gFs,19490
26
- chdb-3.0.1.dist-info/WHEEL,sha256=thzyYe9iyVyKmzNo8_HASABjeeH1MtquM7FDzz9VodA,149
27
- chdb-3.0.1.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
- chdb-3.0.1.dist-info/RECORD,,
24
+ chdb-3.1.1.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
+ chdb-3.1.1.dist-info/METADATA,sha256=zzMgz3TT8nQ8oEpbgcBTuirfhG8jZnib0UxcE-jOOZI,19490
26
+ chdb-3.1.1.dist-info/WHEEL,sha256=42cKV5DuucxYepSMOM4R5v9PAQY0oUGHpEqV-Umhiik,149
27
+ chdb-3.1.1.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
+ chdb-3.1.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp39-cp39-manylinux_2_17_aarch64
5
5
  Tag: cp39-cp39-manylinux2014_aarch64