chdb 3.4.0__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 3.5.0__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/__init__.py +1 -1
- chdb/__main__.py +4 -1
- chdb/_chdb.cpython-38-aarch64-linux-gnu.so +0 -0
- chdb/state/sqlitelike.py +177 -8
- {chdb-3.4.0.dist-info → chdb-3.5.0.dist-info}/METADATA +28 -3
- {chdb-3.4.0.dist-info → chdb-3.5.0.dist-info}/RECORD +17 -17
- {chdb-3.4.0.dist-info → chdb-3.5.0.dist-info}/LICENSE.txt +0 -0
- {chdb-3.4.0.dist-info → chdb-3.5.0.dist-info}/WHEEL +0 -0
- {chdb-3.4.0.dist-info → chdb-3.5.0.dist-info}/top_level.txt +0 -0
chdb/__init__.py
CHANGED
|
@@ -19,7 +19,7 @@ _process_result_format_funs = {
|
|
|
19
19
|
# UDF script path will be f"{g_udf_path}/{func_name}.py"
|
|
20
20
|
g_udf_path = ""
|
|
21
21
|
|
|
22
|
-
chdb_version = ('3', '
|
|
22
|
+
chdb_version = ('3', '5', '0')
|
|
23
23
|
if sys.version_info[:2] >= (3, 7):
|
|
24
24
|
# get the path of the current file
|
|
25
25
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
chdb/__main__.py
CHANGED
|
@@ -4,9 +4,12 @@ from .__init__ import query
|
|
|
4
4
|
|
|
5
5
|
def main():
|
|
6
6
|
prog = 'python -m chdb'
|
|
7
|
+
custom_usage = "%(prog)s [-h] \"SELECT 1\" [format]"
|
|
7
8
|
description = ('''A simple command line interface for chdb
|
|
8
9
|
to run SQL and output in specified format''')
|
|
9
|
-
parser = argparse.ArgumentParser(prog=prog,
|
|
10
|
+
parser = argparse.ArgumentParser(prog=prog,
|
|
11
|
+
usage=custom_usage,
|
|
12
|
+
description=description)
|
|
10
13
|
parser.add_argument('sql', nargs=1,
|
|
11
14
|
type=str,
|
|
12
15
|
help='sql, e.g: select 1112222222,555')
|
|
Binary file
|
chdb/state/sqlitelike.py
CHANGED
|
@@ -41,11 +41,12 @@ def to_df(r):
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class StreamingResult:
|
|
44
|
-
def __init__(self, c_result, conn, result_func):
|
|
44
|
+
def __init__(self, c_result, conn, result_func, supports_record_batch):
|
|
45
45
|
self._result = c_result
|
|
46
46
|
self._result_func = result_func
|
|
47
47
|
self._conn = conn
|
|
48
48
|
self._exhausted = False
|
|
49
|
+
self._supports_record_batch = supports_record_batch
|
|
49
50
|
|
|
50
51
|
def fetch(self):
|
|
51
52
|
"""Fetch next chunk of streaming results"""
|
|
@@ -80,15 +81,182 @@ class StreamingResult:
|
|
|
80
81
|
return self
|
|
81
82
|
|
|
82
83
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
83
|
-
|
|
84
|
+
self.cancel()
|
|
85
|
+
|
|
86
|
+
def close(self):
|
|
87
|
+
self.cancel()
|
|
84
88
|
|
|
85
89
|
def cancel(self):
|
|
86
|
-
self._exhausted
|
|
90
|
+
if not self._exhausted:
|
|
91
|
+
self._exhausted = True
|
|
92
|
+
try:
|
|
93
|
+
self._conn.streaming_cancel_query(self._result)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
|
|
87
96
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
97
|
+
def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
|
|
98
|
+
"""
|
|
99
|
+
Create a PyArrow RecordBatchReader from this StreamingResult.
|
|
100
|
+
|
|
101
|
+
This method requires that the StreamingResult was created with arrow format.
|
|
102
|
+
It wraps the streaming result with ChdbRecordBatchReader to provide efficient
|
|
103
|
+
batching with configurable batch sizes.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
rows_per_batch (int): Number of rows per batch. Defaults to 1000000.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
pa.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
ValueError: If the StreamingResult was not created with arrow format
|
|
113
|
+
"""
|
|
114
|
+
if not self._supports_record_batch:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"record_batch() can only be used with arrow format. "
|
|
117
|
+
"Please use format='Arrow' when calling send_query."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
chdb_reader = ChdbRecordBatchReader(self, rows_per_batch)
|
|
121
|
+
return pa.RecordBatchReader.from_batches(chdb_reader.schema(), chdb_reader)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ChdbRecordBatchReader:
|
|
125
|
+
"""
|
|
126
|
+
A PyArrow RecordBatchReader wrapper for chdb StreamingResult.
|
|
127
|
+
|
|
128
|
+
This class provides an efficient way to read large result sets as PyArrow RecordBatches
|
|
129
|
+
with configurable batch sizes to optimize memory usage and performance.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def __init__(self, chdb_stream_result, batch_size_rows):
|
|
133
|
+
self._stream_result = chdb_stream_result
|
|
134
|
+
self._schema = None
|
|
135
|
+
self._closed = False
|
|
136
|
+
self._pending_batches = []
|
|
137
|
+
self._accumulator = []
|
|
138
|
+
self._batch_size_rows = batch_size_rows
|
|
139
|
+
self._current_rows = 0
|
|
140
|
+
self._first_batch = None
|
|
141
|
+
self._first_batch_consumed = True
|
|
142
|
+
self._schema = self.schema()
|
|
143
|
+
|
|
144
|
+
def schema(self):
|
|
145
|
+
if self._schema is None:
|
|
146
|
+
# Get the first chunk to determine schema
|
|
147
|
+
chunk = self._stream_result.fetch()
|
|
148
|
+
if chunk is not None:
|
|
149
|
+
arrow_bytes = chunk.bytes()
|
|
150
|
+
reader = pa.RecordBatchFileReader(arrow_bytes)
|
|
151
|
+
self._schema = reader.schema
|
|
152
|
+
|
|
153
|
+
table = reader.read_all()
|
|
154
|
+
if table.num_rows > 0:
|
|
155
|
+
batches = table.to_batches()
|
|
156
|
+
self._first_batch = batches[0]
|
|
157
|
+
if len(batches) > 1:
|
|
158
|
+
self._pending_batches = batches[1:]
|
|
159
|
+
self._first_batch_consumed = False
|
|
160
|
+
else:
|
|
161
|
+
self._first_batch = None
|
|
162
|
+
self._first_batch_consumed = True
|
|
163
|
+
else:
|
|
164
|
+
self._schema = pa.schema([])
|
|
165
|
+
self._first_batch = None
|
|
166
|
+
self._first_batch_consumed = True
|
|
167
|
+
self._closed = True
|
|
168
|
+
return self._schema
|
|
169
|
+
|
|
170
|
+
def read_next_batch(self):
|
|
171
|
+
if self._accumulator:
|
|
172
|
+
result = self._accumulator.pop(0)
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
if self._closed:
|
|
176
|
+
raise StopIteration
|
|
177
|
+
|
|
178
|
+
while True:
|
|
179
|
+
batch = None
|
|
180
|
+
|
|
181
|
+
# 1. Return the first batch if not consumed yet
|
|
182
|
+
if not self._first_batch_consumed:
|
|
183
|
+
self._first_batch_consumed = True
|
|
184
|
+
batch = self._first_batch
|
|
185
|
+
|
|
186
|
+
# 2. Check pending batches from current chunk
|
|
187
|
+
elif self._pending_batches:
|
|
188
|
+
batch = self._pending_batches.pop(0)
|
|
189
|
+
|
|
190
|
+
# 3. Fetch new chunk from chdb stream
|
|
191
|
+
else:
|
|
192
|
+
chunk = self._stream_result.fetch()
|
|
193
|
+
if chunk is None:
|
|
194
|
+
# No more data - return accumulated batches if any
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
arrow_bytes = chunk.bytes()
|
|
198
|
+
if not arrow_bytes:
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
reader = pa.RecordBatchFileReader(arrow_bytes)
|
|
202
|
+
table = reader.read_all()
|
|
203
|
+
|
|
204
|
+
if table.num_rows > 0:
|
|
205
|
+
batches = table.to_batches()
|
|
206
|
+
batch = batches[0]
|
|
207
|
+
if len(batches) > 1:
|
|
208
|
+
self._pending_batches = batches[1:]
|
|
209
|
+
else:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
# Process the batch if we got one
|
|
213
|
+
if batch is not None:
|
|
214
|
+
self._accumulator.append(batch)
|
|
215
|
+
self._current_rows += batch.num_rows
|
|
216
|
+
|
|
217
|
+
# If accumulated enough rows, return combined batch
|
|
218
|
+
if self._current_rows >= self._batch_size_rows:
|
|
219
|
+
if len(self._accumulator) == 1:
|
|
220
|
+
result = self._accumulator.pop(0)
|
|
221
|
+
else:
|
|
222
|
+
if hasattr(pa, 'concat_batches'):
|
|
223
|
+
result = pa.concat_batches(self._accumulator)
|
|
224
|
+
self._accumulator = []
|
|
225
|
+
else:
|
|
226
|
+
result = self._accumulator.pop(0)
|
|
227
|
+
|
|
228
|
+
self._current_rows = 0
|
|
229
|
+
return result
|
|
230
|
+
|
|
231
|
+
# End of stream - return any accumulated batches
|
|
232
|
+
if self._accumulator:
|
|
233
|
+
if len(self._accumulator) == 1:
|
|
234
|
+
result = self._accumulator.pop(0)
|
|
235
|
+
else:
|
|
236
|
+
if hasattr(pa, 'concat_batches'):
|
|
237
|
+
result = pa.concat_batches(self._accumulator)
|
|
238
|
+
self._accumulator = []
|
|
239
|
+
else:
|
|
240
|
+
result = self._accumulator.pop(0)
|
|
241
|
+
|
|
242
|
+
self._current_rows = 0
|
|
243
|
+
self._closed = True
|
|
244
|
+
return result
|
|
245
|
+
|
|
246
|
+
# No more data
|
|
247
|
+
self._closed = True
|
|
248
|
+
raise StopIteration
|
|
249
|
+
|
|
250
|
+
def close(self):
|
|
251
|
+
if not self._closed:
|
|
252
|
+
self._stream_result.close()
|
|
253
|
+
self._closed = True
|
|
254
|
+
|
|
255
|
+
def __iter__(self):
|
|
256
|
+
return self
|
|
257
|
+
|
|
258
|
+
def __next__(self):
|
|
259
|
+
return self.read_next_batch()
|
|
92
260
|
|
|
93
261
|
|
|
94
262
|
class Connection:
|
|
@@ -112,12 +280,13 @@ class Connection:
|
|
|
112
280
|
|
|
113
281
|
def send_query(self, query: str, format: str = "CSV") -> StreamingResult:
|
|
114
282
|
lower_output_format = format.lower()
|
|
283
|
+
supports_record_batch = lower_output_format == "arrow"
|
|
115
284
|
result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
|
|
116
285
|
if lower_output_format in _arrow_format:
|
|
117
286
|
format = "Arrow"
|
|
118
287
|
|
|
119
288
|
c_stream_result = self._conn.send_query(query, format)
|
|
120
|
-
return StreamingResult(c_stream_result, self._conn, result_func)
|
|
289
|
+
return StreamingResult(c_stream_result, self._conn, result_func, supports_record_batch)
|
|
121
290
|
|
|
122
291
|
def close(self) -> None:
|
|
123
292
|
# print("close")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: chdb
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.0
|
|
4
4
|
Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
|
|
5
5
|
Home-page: https://github.com/chdb-io/chdb
|
|
6
6
|
Author: auxten
|
|
@@ -295,16 +295,37 @@ while True:
|
|
|
295
295
|
if chunk is None:
|
|
296
296
|
break
|
|
297
297
|
if rows_cnt > 0:
|
|
298
|
-
stream_result.
|
|
298
|
+
stream_result.close()
|
|
299
299
|
break
|
|
300
300
|
rows_cnt += chunk.rows_read()
|
|
301
301
|
|
|
302
302
|
print(rows_cnt) # 65409
|
|
303
303
|
|
|
304
|
+
# Example 4: Using PyArrow RecordBatchReader for batch export and integration with other libraries
|
|
305
|
+
import pyarrow as pa
|
|
306
|
+
from deltalake import write_deltalake
|
|
307
|
+
|
|
308
|
+
# Get streaming result in arrow format
|
|
309
|
+
stream_result = sess.send_query("SELECT * FROM numbers(100000)", "Arrow")
|
|
310
|
+
|
|
311
|
+
# Create RecordBatchReader with custom batch size (default rows_per_batch=1000000)
|
|
312
|
+
batch_reader = stream_result.record_batch(rows_per_batch=10000)
|
|
313
|
+
|
|
314
|
+
# Use RecordBatchReader with external libraries like Delta Lake
|
|
315
|
+
write_deltalake(
|
|
316
|
+
table_or_uri="./my_delta_table",
|
|
317
|
+
data=batch_reader,
|
|
318
|
+
mode="overwrite"
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
stream_result.close()
|
|
322
|
+
|
|
304
323
|
sess.close()
|
|
305
324
|
```
|
|
306
325
|
|
|
307
|
-
|
|
326
|
+
**Important Note**: When using streaming queries, if the `StreamingResult` is not fully consumed (due to errors or early termination), you must explicitly call `stream_result.close()` to release resources, or use the `with` statement for automatic cleanup. Failure to do so may block subsequent queries.
|
|
327
|
+
|
|
328
|
+
For more details, see [test_streaming_query.py](tests/test_streaming_query.py) and [test_arrow_record_reader_deltalake.py](tests/test_arrow_record_reader_deltalake.py).
|
|
308
329
|
</details>
|
|
309
330
|
|
|
310
331
|
|
|
@@ -507,6 +528,10 @@ There are something you can help:
|
|
|
507
528
|
|
|
508
529
|
We welcome bindings for other languages, please refer to [bindings](bindings.md) for more details.
|
|
509
530
|
|
|
531
|
+
## Version Guide
|
|
532
|
+
|
|
533
|
+
Please refer to [VERSION-GUIDE.md](VERSION-GUIDE.md) for more details.
|
|
534
|
+
|
|
510
535
|
## Paper
|
|
511
536
|
|
|
512
537
|
- [ClickHouse - Lightning Fast Analytics for Everyone](https://www.vldb.org/pvldb/vol17/p3731-schulze.pdf)
|
|
@@ -1,28 +1,28 @@
|
|
|
1
|
-
chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
|
|
2
|
-
chdb/__init__.py,sha256=pNYyLRqm2s5hG1rPhVLECZXvJQ60EGPt_OC88W90j0w,3762
|
|
3
|
-
chdb/_chdb.cpython-38-aarch64-linux-gnu.so,sha256=jWd7xJeE74xK21ep7dDeYMtYrEW-VVzxYN4HgQf8PiY,568828616
|
|
4
1
|
chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
|
|
2
|
+
chdb/__main__.py,sha256=vl-gorTYCT9Uh_h4jbQ8O-a5_pokCJPFbF_yplIgKYc,1336
|
|
3
|
+
chdb/_chdb.cpython-38-aarch64-linux-gnu.so,sha256=ljNW7N1IUXRFGwIJdiBZCfG_Fi7n25f2VOMWfb4Qsgw,568916304
|
|
4
|
+
chdb/__init__.py,sha256=7GGNYb_0PsxWRaU5QubVCgH1gHXa5ZPAqPuKir1Rx-I,3762
|
|
5
5
|
chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
|
|
6
6
|
chdb/session/state.py,sha256=m7K9zZtoMQTlh-pfmSyJV38pAe6eHNTPtOvlHYrImhA,4436
|
|
7
|
-
chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
|
|
8
|
-
chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
|
|
9
7
|
chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
|
|
10
|
-
chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
|
|
11
8
|
chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
|
|
12
|
-
chdb/
|
|
9
|
+
chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
|
|
10
|
+
chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
|
|
11
|
+
chdb/state/sqlitelike.py,sha256=PHdIJVfbSUvJWU6doMnrg0jVpovtHUG12I_-acZHOko,18338
|
|
13
12
|
chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
|
|
14
|
-
chdb/
|
|
13
|
+
chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
|
|
14
|
+
chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
|
|
15
|
+
chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
|
|
15
16
|
chdb/dbapi/times.py,sha256=_qXgDaYwsHntvpIKSKXp1rrYIgtq6Z9pLyLnO2XNoL0,360
|
|
16
|
-
chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
|
|
17
17
|
chdb/dbapi/converters.py,sha256=0SDqgixUTCz0LtWke_HHzgF1lFJhpsQrR_-ky3b-JRY,7447
|
|
18
18
|
chdb/dbapi/cursors.py,sha256=3ufVB1zt3x7SzCYowVbwAOsuzkMxYPO74q9XW6ctkKo,8120
|
|
19
|
+
chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
|
|
20
|
+
chdb/dbapi/connections.py,sha256=RW0EcusyKueMGp7VmSaCO-ukyzY7l2ps_ibA9-pXDvo,2754
|
|
19
21
|
chdb/dbapi/err.py,sha256=kUI9-A8LNqBoMoo4jh2NFsLCOLoPEwh9YIuz_qMoLoM,2017
|
|
20
|
-
chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
|
|
21
22
|
chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
-
chdb/
|
|
23
|
-
chdb/
|
|
24
|
-
chdb-3.
|
|
25
|
-
chdb-3.
|
|
26
|
-
chdb-3.
|
|
27
|
-
chdb-3.
|
|
28
|
-
chdb-3.4.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
|
|
23
|
+
chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
|
|
24
|
+
chdb-3.5.0.dist-info/WHEEL,sha256=kTTaziUGh4IgOXqp-kkEc0J_ej7kOw0CdWocYU-1ZR0,149
|
|
25
|
+
chdb-3.5.0.dist-info/METADATA,sha256=BV7QlaT95FZsDo26kyyq8eN9z6hVkIyI58ZogC45s2s,25714
|
|
26
|
+
chdb-3.5.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
|
|
27
|
+
chdb-3.5.0.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
|
|
28
|
+
chdb-3.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|