chdb 3.1.0__cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 3.1.2__cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/__init__.py +1 -1
- chdb/_chdb.cpython-313-aarch64-linux-gnu.so +0 -0
- chdb/dbapi/connections.py +1 -1
- chdb/dbapi/cursors.py +30 -18
- chdb/state/sqlitelike.py +155 -12
- {chdb-3.1.0.dist-info → chdb-3.1.2.dist-info}/METADATA +3 -2
- {chdb-3.1.0.dist-info → chdb-3.1.2.dist-info}/RECORD +10 -10
- {chdb-3.1.0.dist-info → chdb-3.1.2.dist-info}/WHEEL +1 -1
- {chdb-3.1.0.dist-info → chdb-3.1.2.dist-info/licenses}/LICENSE.txt +0 -0
- {chdb-3.1.0.dist-info → chdb-3.1.2.dist-info}/top_level.txt +0 -0
chdb/__init__.py
CHANGED
|
@@ -19,7 +19,7 @@ _process_result_format_funs = {
|
|
|
19
19
|
# UDF script path will be f"{g_udf_path}/{func_name}.py"
|
|
20
20
|
g_udf_path = ""
|
|
21
21
|
|
|
22
|
-
chdb_version = ('3', '1', '
|
|
22
|
+
chdb_version = ('3', '1', '2')
|
|
23
23
|
if sys.version_info[:2] >= (3, 7):
|
|
24
24
|
# get the path of the current file
|
|
25
25
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
|
Binary file
|
chdb/dbapi/connections.py
CHANGED
|
@@ -57,7 +57,7 @@ class Connection(object):
|
|
|
57
57
|
return Cursor(self)
|
|
58
58
|
return Cursor(self)
|
|
59
59
|
|
|
60
|
-
def query(self, sql, fmt="
|
|
60
|
+
def query(self, sql, fmt="CSV"):
|
|
61
61
|
"""Execute a query and return the raw result."""
|
|
62
62
|
if self._closed:
|
|
63
63
|
raise err.InterfaceError("Connection closed")
|
chdb/dbapi/cursors.py
CHANGED
|
@@ -5,10 +5,11 @@ import re
|
|
|
5
5
|
# executemany only supports simple bulk insert.
|
|
6
6
|
# You can use it to load large dataset.
|
|
7
7
|
RE_INSERT_VALUES = re.compile(
|
|
8
|
-
r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
|
|
9
|
-
r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))"
|
|
10
|
-
r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
|
|
11
|
-
re.IGNORECASE | re.DOTALL
|
|
8
|
+
r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
|
|
9
|
+
+ r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))"
|
|
10
|
+
+ r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
|
|
11
|
+
re.IGNORECASE | re.DOTALL,
|
|
12
|
+
)
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class Cursor(object):
|
|
@@ -131,13 +132,17 @@ class Cursor(object):
|
|
|
131
132
|
|
|
132
133
|
self._cursor.execute(query)
|
|
133
134
|
|
|
134
|
-
# Get description from
|
|
135
|
-
if self._cursor
|
|
135
|
+
# Get description from column names and types
|
|
136
|
+
if hasattr(self._cursor, "_column_names") and self._cursor._column_names:
|
|
136
137
|
self.description = [
|
|
137
|
-
(
|
|
138
|
-
for
|
|
138
|
+
(name, type_info, None, None, None, None, None)
|
|
139
|
+
for name, type_info in zip(
|
|
140
|
+
self._cursor._column_names, self._cursor._column_types
|
|
141
|
+
)
|
|
139
142
|
]
|
|
140
|
-
self.rowcount =
|
|
143
|
+
self.rowcount = (
|
|
144
|
+
len(self._cursor._current_table) if self._cursor._current_table else -1
|
|
145
|
+
)
|
|
141
146
|
else:
|
|
142
147
|
self.description = None
|
|
143
148
|
self.rowcount = -1
|
|
@@ -164,16 +169,23 @@ class Cursor(object):
|
|
|
164
169
|
if m:
|
|
165
170
|
q_prefix = m.group(1) % ()
|
|
166
171
|
q_values = m.group(2).rstrip()
|
|
167
|
-
q_postfix = m.group(3) or
|
|
168
|
-
assert q_values[0] ==
|
|
169
|
-
return self._do_execute_many(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
+
q_postfix = m.group(3) or ""
|
|
173
|
+
assert q_values[0] == "(" and q_values[-1] == ")"
|
|
174
|
+
return self._do_execute_many(
|
|
175
|
+
q_prefix,
|
|
176
|
+
q_values,
|
|
177
|
+
q_postfix,
|
|
178
|
+
args,
|
|
179
|
+
self.max_stmt_length,
|
|
180
|
+
self._get_db().encoding,
|
|
181
|
+
)
|
|
172
182
|
|
|
173
183
|
self.rowcount = sum(self.execute(query, arg) for arg in args)
|
|
174
184
|
return self.rowcount
|
|
175
185
|
|
|
176
|
-
def _do_execute_many(
|
|
186
|
+
def _do_execute_many(
|
|
187
|
+
self, prefix, values, postfix, args, max_stmt_length, encoding
|
|
188
|
+
):
|
|
177
189
|
conn = self._get_db()
|
|
178
190
|
escape = self._escape_args
|
|
179
191
|
if isinstance(prefix, str):
|
|
@@ -184,18 +196,18 @@ class Cursor(object):
|
|
|
184
196
|
args = iter(args)
|
|
185
197
|
v = values % escape(next(args), conn)
|
|
186
198
|
if isinstance(v, str):
|
|
187
|
-
v = v.encode(encoding,
|
|
199
|
+
v = v.encode(encoding, "surrogateescape")
|
|
188
200
|
sql += v
|
|
189
201
|
rows = 0
|
|
190
202
|
for arg in args:
|
|
191
203
|
v = values % escape(arg, conn)
|
|
192
204
|
if isinstance(v, str):
|
|
193
|
-
v = v.encode(encoding,
|
|
205
|
+
v = v.encode(encoding, "surrogateescape")
|
|
194
206
|
if len(sql) + len(v) + len(postfix) + 1 > max_stmt_length:
|
|
195
207
|
rows += self.execute(sql + postfix)
|
|
196
208
|
sql = prefix
|
|
197
209
|
else:
|
|
198
|
-
sql +=
|
|
210
|
+
sql += ",".encode(encoding)
|
|
199
211
|
sql += v
|
|
200
212
|
rows += self.execute(sql + postfix)
|
|
201
213
|
self.rowcount = rows
|
chdb/state/sqlitelike.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import io
|
|
2
1
|
from typing import Optional, Any
|
|
3
2
|
from chdb import _chdb
|
|
4
3
|
|
|
@@ -11,6 +10,36 @@ except ImportError as e:
|
|
|
11
10
|
raise ImportError("Failed to import pyarrow") from None
|
|
12
11
|
|
|
13
12
|
|
|
13
|
+
_arrow_format = set({"dataframe", "arrowtable"})
|
|
14
|
+
_process_result_format_funs = {
|
|
15
|
+
"dataframe": lambda x: to_df(x),
|
|
16
|
+
"arrowtable": lambda x: to_arrowTable(x),
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# return pyarrow table
|
|
21
|
+
def to_arrowTable(res):
|
|
22
|
+
"""convert res to arrow table"""
|
|
23
|
+
# try import pyarrow and pandas, if failed, raise ImportError with suggestion
|
|
24
|
+
try:
|
|
25
|
+
import pyarrow as pa # noqa
|
|
26
|
+
import pandas as pd # noqa
|
|
27
|
+
except ImportError as e:
|
|
28
|
+
print(f"ImportError: {e}")
|
|
29
|
+
print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
|
|
30
|
+
raise ImportError("Failed to import pyarrow or pandas") from None
|
|
31
|
+
if len(res) == 0:
|
|
32
|
+
return pa.Table.from_batches([], schema=pa.schema([]))
|
|
33
|
+
return pa.RecordBatchFileReader(res.bytes()).read_all()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# return pandas dataframe
|
|
37
|
+
def to_df(r):
|
|
38
|
+
"""convert arrow table to Dataframe"""
|
|
39
|
+
t = to_arrowTable(r)
|
|
40
|
+
return t.to_pandas(use_threads=True)
|
|
41
|
+
|
|
42
|
+
|
|
14
43
|
class Connection:
|
|
15
44
|
def __init__(self, connection_string: str):
|
|
16
45
|
# print("Connection", connection_string)
|
|
@@ -22,7 +51,13 @@ class Connection:
|
|
|
22
51
|
return self._cursor
|
|
23
52
|
|
|
24
53
|
def query(self, query: str, format: str = "CSV") -> Any:
|
|
25
|
-
|
|
54
|
+
lower_output_format = format.lower()
|
|
55
|
+
result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
|
|
56
|
+
if lower_output_format in _arrow_format:
|
|
57
|
+
format = "Arrow"
|
|
58
|
+
|
|
59
|
+
result = self._conn.query(query, format)
|
|
60
|
+
return result_func(result)
|
|
26
61
|
|
|
27
62
|
def close(self) -> None:
|
|
28
63
|
# print("close")
|
|
@@ -41,17 +76,103 @@ class Cursor:
|
|
|
41
76
|
def execute(self, query: str) -> None:
|
|
42
77
|
self._cursor.execute(query)
|
|
43
78
|
result_mv = self._cursor.get_memview()
|
|
44
|
-
# print("get_result", result_mv)
|
|
45
79
|
if self._cursor.has_error():
|
|
46
80
|
raise Exception(self._cursor.error_message())
|
|
47
81
|
if self._cursor.data_size() == 0:
|
|
48
82
|
self._current_table = None
|
|
49
83
|
self._current_row = 0
|
|
84
|
+
self._column_names = []
|
|
85
|
+
self._column_types = []
|
|
50
86
|
return
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
87
|
+
|
|
88
|
+
# Parse JSON data
|
|
89
|
+
json_data = result_mv.tobytes().decode("utf-8")
|
|
90
|
+
import json
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
# First line contains column names
|
|
94
|
+
# Second line contains column types
|
|
95
|
+
# Following lines contain data
|
|
96
|
+
lines = json_data.strip().split("\n")
|
|
97
|
+
if len(lines) < 2:
|
|
98
|
+
self._current_table = None
|
|
99
|
+
self._current_row = 0
|
|
100
|
+
self._column_names = []
|
|
101
|
+
self._column_types = []
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
self._column_names = json.loads(lines[0])
|
|
105
|
+
self._column_types = json.loads(lines[1])
|
|
106
|
+
|
|
107
|
+
# Convert data rows
|
|
108
|
+
rows = []
|
|
109
|
+
for line in lines[2:]:
|
|
110
|
+
if not line.strip():
|
|
111
|
+
continue
|
|
112
|
+
row_data = json.loads(line)
|
|
113
|
+
converted_row = []
|
|
114
|
+
for val, type_info in zip(row_data, self._column_types):
|
|
115
|
+
# Handle NULL values first
|
|
116
|
+
if val is None:
|
|
117
|
+
converted_row.append(None)
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
# Basic type conversion
|
|
121
|
+
try:
|
|
122
|
+
if type_info.startswith("Int") or type_info.startswith("UInt"):
|
|
123
|
+
converted_row.append(int(val))
|
|
124
|
+
elif type_info.startswith("Float"):
|
|
125
|
+
converted_row.append(float(val))
|
|
126
|
+
elif type_info == "Bool":
|
|
127
|
+
converted_row.append(bool(val))
|
|
128
|
+
elif type_info == "String" or type_info == "FixedString":
|
|
129
|
+
converted_row.append(str(val))
|
|
130
|
+
elif type_info.startswith("DateTime"):
|
|
131
|
+
from datetime import datetime
|
|
132
|
+
|
|
133
|
+
# Check if the value is numeric (timestamp)
|
|
134
|
+
val_str = str(val)
|
|
135
|
+
if val_str.replace(".", "").isdigit():
|
|
136
|
+
converted_row.append(datetime.fromtimestamp(float(val)))
|
|
137
|
+
else:
|
|
138
|
+
# Handle datetime string formats
|
|
139
|
+
if "." in val_str: # Has microseconds
|
|
140
|
+
converted_row.append(
|
|
141
|
+
datetime.strptime(
|
|
142
|
+
val_str, "%Y-%m-%d %H:%M:%S.%f"
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
else: # No microseconds
|
|
146
|
+
converted_row.append(
|
|
147
|
+
datetime.strptime(val_str, "%Y-%m-%d %H:%M:%S")
|
|
148
|
+
)
|
|
149
|
+
elif type_info.startswith("Date"):
|
|
150
|
+
from datetime import date, datetime
|
|
151
|
+
|
|
152
|
+
# Check if the value is numeric (days since epoch)
|
|
153
|
+
val_str = str(val)
|
|
154
|
+
if val_str.isdigit():
|
|
155
|
+
converted_row.append(
|
|
156
|
+
date.fromtimestamp(float(val) * 86400)
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
# Handle date string format
|
|
160
|
+
converted_row.append(
|
|
161
|
+
datetime.strptime(val_str, "%Y-%m-%d").date()
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
# For unsupported types, keep as string
|
|
165
|
+
converted_row.append(str(val))
|
|
166
|
+
except (ValueError, TypeError):
|
|
167
|
+
# If conversion fails, keep original value as string
|
|
168
|
+
converted_row.append(str(val))
|
|
169
|
+
rows.append(tuple(converted_row))
|
|
170
|
+
|
|
171
|
+
self._current_table = rows
|
|
172
|
+
self._current_row = 0
|
|
173
|
+
|
|
174
|
+
except json.JSONDecodeError as e:
|
|
175
|
+
raise Exception(f"Failed to parse JSON data: {e}")
|
|
55
176
|
|
|
56
177
|
def commit(self) -> None:
|
|
57
178
|
self._cursor.commit()
|
|
@@ -60,12 +181,10 @@ class Cursor:
|
|
|
60
181
|
if not self._current_table or self._current_row >= len(self._current_table):
|
|
61
182
|
return None
|
|
62
183
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
for col in self._current_table.column_names
|
|
66
|
-
}
|
|
184
|
+
# Now self._current_table is a list of row tuples
|
|
185
|
+
row = self._current_table[self._current_row]
|
|
67
186
|
self._current_row += 1
|
|
68
|
-
return
|
|
187
|
+
return row
|
|
69
188
|
|
|
70
189
|
def fetchmany(self, size: int = 1) -> tuple:
|
|
71
190
|
if not self._current_table:
|
|
@@ -99,6 +218,30 @@ class Cursor:
|
|
|
99
218
|
raise StopIteration
|
|
100
219
|
return row
|
|
101
220
|
|
|
221
|
+
def column_names(self) -> list:
|
|
222
|
+
"""Return a list of column names from the last executed query"""
|
|
223
|
+
return self._column_names if hasattr(self, "_column_names") else []
|
|
224
|
+
|
|
225
|
+
def column_types(self) -> list:
|
|
226
|
+
"""Return a list of column types from the last executed query"""
|
|
227
|
+
return self._column_types if hasattr(self, "_column_types") else []
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def description(self) -> list:
|
|
231
|
+
"""
|
|
232
|
+
Return a description of the columns as per DB-API 2.0
|
|
233
|
+
Returns a list of 7-item tuples, each containing:
|
|
234
|
+
(name, type_code, display_size, internal_size, precision, scale, null_ok)
|
|
235
|
+
where only name and type_code are provided
|
|
236
|
+
"""
|
|
237
|
+
if not hasattr(self, "_column_names") or not self._column_names:
|
|
238
|
+
return []
|
|
239
|
+
|
|
240
|
+
return [
|
|
241
|
+
(name, type_info, None, None, None, None, None)
|
|
242
|
+
for name, type_info in zip(self._column_names, self._column_types)
|
|
243
|
+
]
|
|
244
|
+
|
|
102
245
|
|
|
103
246
|
def connect(connection_string: str = ":memory:") -> Connection:
|
|
104
247
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: chdb
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.2
|
|
4
4
|
Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
|
|
5
5
|
Home-page: https://github.com/chdb-io/chdb
|
|
6
6
|
Author: auxten
|
|
@@ -30,6 +30,7 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
30
30
|
License-File: LICENSE.txt
|
|
31
31
|
Requires-Dist: pyarrow>=13.0.0
|
|
32
32
|
Requires-Dist: pandas>=2.0.0
|
|
33
|
+
Dynamic: license-file
|
|
33
34
|
Dynamic: requires-dist
|
|
34
35
|
Dynamic: requires-python
|
|
35
36
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
chdb/__init__.py,sha256=
|
|
1
|
+
chdb/__init__.py,sha256=Wb4a4CPgJ0j44kDuehkwITZV9Q6QOqyUmxA5PM6BbYk,3762
|
|
2
2
|
chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
|
|
3
|
-
chdb/_chdb.cpython-313-aarch64-linux-gnu.so,sha256=
|
|
3
|
+
chdb/_chdb.cpython-313-aarch64-linux-gnu.so,sha256=kyVo1FUWUcnAJIh_Ed_Htfh0L8uGlLvVIH4hNlmDKzA,533371480
|
|
4
4
|
chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
|
|
5
5
|
chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
|
|
6
6
|
chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
|
|
7
7
|
chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
|
|
8
|
-
chdb/dbapi/connections.py,sha256=
|
|
8
|
+
chdb/dbapi/connections.py,sha256=RW0EcusyKueMGp7VmSaCO-ukyzY7l2ps_ibA9-pXDvo,2754
|
|
9
9
|
chdb/dbapi/converters.py,sha256=0SDqgixUTCz0LtWke_HHzgF1lFJhpsQrR_-ky3b-JRY,7447
|
|
10
|
-
chdb/dbapi/cursors.py,sha256=
|
|
10
|
+
chdb/dbapi/cursors.py,sha256=3ufVB1zt3x7SzCYowVbwAOsuzkMxYPO74q9XW6ctkKo,8120
|
|
11
11
|
chdb/dbapi/err.py,sha256=kUI9-A8LNqBoMoo4jh2NFsLCOLoPEwh9YIuz_qMoLoM,2017
|
|
12
12
|
chdb/dbapi/times.py,sha256=_qXgDaYwsHntvpIKSKXp1rrYIgtq6Z9pLyLnO2XNoL0,360
|
|
13
13
|
chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
|
|
@@ -15,14 +15,14 @@ chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
15
15
|
chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
|
|
16
16
|
chdb/session/state.py,sha256=nx9KlqZyPTHAflToXCJVRBUSMjJFvyh6x2akP7Gc7h0,4360
|
|
17
17
|
chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
|
|
18
|
-
chdb/state/sqlitelike.py,sha256
|
|
18
|
+
chdb/state/sqlitelike.py,sha256=6Y57vnf7LnA0KnpByKQq7PkEkEEOKK-ExaHQLb1bedQ,10498
|
|
19
19
|
chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
|
|
20
20
|
chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
|
|
21
21
|
chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
|
|
22
22
|
chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
|
|
23
23
|
chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
|
|
24
|
-
chdb-3.1.
|
|
25
|
-
chdb-3.1.
|
|
26
|
-
chdb-3.1.
|
|
27
|
-
chdb-3.1.
|
|
28
|
-
chdb-3.1.
|
|
24
|
+
chdb-3.1.2.dist-info/METADATA,sha256=cr0z9tTvWd_9_cckipHt8UBVZ0FBVOarA_6CTYwDxjA,19512
|
|
25
|
+
chdb-3.1.2.dist-info/WHEEL,sha256=78Ydk8uhU-xi59xREigtJhObz27LqK1yITicZuH3lLI,153
|
|
26
|
+
chdb-3.1.2.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
|
|
27
|
+
chdb-3.1.2.dist-info/RECORD,,
|
|
28
|
+
chdb-3.1.2.dist-info/licenses/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
|
|
File without changes
|
|
File without changes
|