chdb 3.4.1__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl → 3.5.0__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/__init__.py +1 -1
- chdb/__main__.py +4 -1
- chdb/_chdb.cpython-311-aarch64-linux-gnu.so +0 -0
- chdb/state/sqlitelike.py +177 -8
- {chdb-3.4.1.dist-info → chdb-3.5.0.dist-info}/METADATA +28 -3
- {chdb-3.4.1.dist-info → chdb-3.5.0.dist-info}/RECORD +9 -9
- {chdb-3.4.1.dist-info → chdb-3.5.0.dist-info}/WHEEL +0 -0
- {chdb-3.4.1.dist-info → chdb-3.5.0.dist-info}/licenses/LICENSE.txt +0 -0
- {chdb-3.4.1.dist-info → chdb-3.5.0.dist-info}/top_level.txt +0 -0
chdb/__init__.py
CHANGED
|
@@ -19,7 +19,7 @@ _process_result_format_funs = {
|
|
|
19
19
|
# UDF script path will be f"{g_udf_path}/{func_name}.py"
|
|
20
20
|
g_udf_path = ""
|
|
21
21
|
|
|
22
|
-
chdb_version = ('3', '
|
|
22
|
+
chdb_version = ('3', '5', '0')
|
|
23
23
|
if sys.version_info[:2] >= (3, 7):
|
|
24
24
|
# get the path of the current file
|
|
25
25
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
chdb/__main__.py
CHANGED
|
@@ -4,9 +4,12 @@ from .__init__ import query
|
|
|
4
4
|
|
|
5
5
|
def main():
|
|
6
6
|
prog = 'python -m chdb'
|
|
7
|
+
custom_usage = "%(prog)s [-h] \"SELECT 1\" [format]"
|
|
7
8
|
description = ('''A simple command line interface for chdb
|
|
8
9
|
to run SQL and output in specified format''')
|
|
9
|
-
parser = argparse.ArgumentParser(prog=prog,
|
|
10
|
+
parser = argparse.ArgumentParser(prog=prog,
|
|
11
|
+
usage=custom_usage,
|
|
12
|
+
description=description)
|
|
10
13
|
parser.add_argument('sql', nargs=1,
|
|
11
14
|
type=str,
|
|
12
15
|
help='sql, e.g: select 1112222222,555')
|
|
Binary file
|
chdb/state/sqlitelike.py
CHANGED
|
@@ -41,11 +41,12 @@ def to_df(r):
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class StreamingResult:
|
|
44
|
-
def __init__(self, c_result, conn, result_func):
|
|
44
|
+
def __init__(self, c_result, conn, result_func, supports_record_batch):
|
|
45
45
|
self._result = c_result
|
|
46
46
|
self._result_func = result_func
|
|
47
47
|
self._conn = conn
|
|
48
48
|
self._exhausted = False
|
|
49
|
+
self._supports_record_batch = supports_record_batch
|
|
49
50
|
|
|
50
51
|
def fetch(self):
|
|
51
52
|
"""Fetch next chunk of streaming results"""
|
|
@@ -80,15 +81,182 @@ class StreamingResult:
|
|
|
80
81
|
return self
|
|
81
82
|
|
|
82
83
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
83
|
-
|
|
84
|
+
self.cancel()
|
|
85
|
+
|
|
86
|
+
def close(self):
|
|
87
|
+
self.cancel()
|
|
84
88
|
|
|
85
89
|
def cancel(self):
|
|
86
|
-
self._exhausted
|
|
90
|
+
if not self._exhausted:
|
|
91
|
+
self._exhausted = True
|
|
92
|
+
try:
|
|
93
|
+
self._conn.streaming_cancel_query(self._result)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
|
|
87
96
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
97
|
+
def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
|
|
98
|
+
"""
|
|
99
|
+
Create a PyArrow RecordBatchReader from this StreamingResult.
|
|
100
|
+
|
|
101
|
+
This method requires that the StreamingResult was created with arrow format.
|
|
102
|
+
It wraps the streaming result with ChdbRecordBatchReader to provide efficient
|
|
103
|
+
batching with configurable batch sizes.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
rows_per_batch (int): Number of rows per batch. Defaults to 1000000.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
pa.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
ValueError: If the StreamingResult was not created with arrow format
|
|
113
|
+
"""
|
|
114
|
+
if not self._supports_record_batch:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"record_batch() can only be used with arrow format. "
|
|
117
|
+
"Please use format='Arrow' when calling send_query."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
chdb_reader = ChdbRecordBatchReader(self, rows_per_batch)
|
|
121
|
+
return pa.RecordBatchReader.from_batches(chdb_reader.schema(), chdb_reader)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ChdbRecordBatchReader:
|
|
125
|
+
"""
|
|
126
|
+
A PyArrow RecordBatchReader wrapper for chdb StreamingResult.
|
|
127
|
+
|
|
128
|
+
This class provides an efficient way to read large result sets as PyArrow RecordBatches
|
|
129
|
+
with configurable batch sizes to optimize memory usage and performance.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def __init__(self, chdb_stream_result, batch_size_rows):
|
|
133
|
+
self._stream_result = chdb_stream_result
|
|
134
|
+
self._schema = None
|
|
135
|
+
self._closed = False
|
|
136
|
+
self._pending_batches = []
|
|
137
|
+
self._accumulator = []
|
|
138
|
+
self._batch_size_rows = batch_size_rows
|
|
139
|
+
self._current_rows = 0
|
|
140
|
+
self._first_batch = None
|
|
141
|
+
self._first_batch_consumed = True
|
|
142
|
+
self._schema = self.schema()
|
|
143
|
+
|
|
144
|
+
def schema(self):
|
|
145
|
+
if self._schema is None:
|
|
146
|
+
# Get the first chunk to determine schema
|
|
147
|
+
chunk = self._stream_result.fetch()
|
|
148
|
+
if chunk is not None:
|
|
149
|
+
arrow_bytes = chunk.bytes()
|
|
150
|
+
reader = pa.RecordBatchFileReader(arrow_bytes)
|
|
151
|
+
self._schema = reader.schema
|
|
152
|
+
|
|
153
|
+
table = reader.read_all()
|
|
154
|
+
if table.num_rows > 0:
|
|
155
|
+
batches = table.to_batches()
|
|
156
|
+
self._first_batch = batches[0]
|
|
157
|
+
if len(batches) > 1:
|
|
158
|
+
self._pending_batches = batches[1:]
|
|
159
|
+
self._first_batch_consumed = False
|
|
160
|
+
else:
|
|
161
|
+
self._first_batch = None
|
|
162
|
+
self._first_batch_consumed = True
|
|
163
|
+
else:
|
|
164
|
+
self._schema = pa.schema([])
|
|
165
|
+
self._first_batch = None
|
|
166
|
+
self._first_batch_consumed = True
|
|
167
|
+
self._closed = True
|
|
168
|
+
return self._schema
|
|
169
|
+
|
|
170
|
+
def read_next_batch(self):
|
|
171
|
+
if self._accumulator:
|
|
172
|
+
result = self._accumulator.pop(0)
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
if self._closed:
|
|
176
|
+
raise StopIteration
|
|
177
|
+
|
|
178
|
+
while True:
|
|
179
|
+
batch = None
|
|
180
|
+
|
|
181
|
+
# 1. Return the first batch if not consumed yet
|
|
182
|
+
if not self._first_batch_consumed:
|
|
183
|
+
self._first_batch_consumed = True
|
|
184
|
+
batch = self._first_batch
|
|
185
|
+
|
|
186
|
+
# 2. Check pending batches from current chunk
|
|
187
|
+
elif self._pending_batches:
|
|
188
|
+
batch = self._pending_batches.pop(0)
|
|
189
|
+
|
|
190
|
+
# 3. Fetch new chunk from chdb stream
|
|
191
|
+
else:
|
|
192
|
+
chunk = self._stream_result.fetch()
|
|
193
|
+
if chunk is None:
|
|
194
|
+
# No more data - return accumulated batches if any
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
arrow_bytes = chunk.bytes()
|
|
198
|
+
if not arrow_bytes:
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
reader = pa.RecordBatchFileReader(arrow_bytes)
|
|
202
|
+
table = reader.read_all()
|
|
203
|
+
|
|
204
|
+
if table.num_rows > 0:
|
|
205
|
+
batches = table.to_batches()
|
|
206
|
+
batch = batches[0]
|
|
207
|
+
if len(batches) > 1:
|
|
208
|
+
self._pending_batches = batches[1:]
|
|
209
|
+
else:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
# Process the batch if we got one
|
|
213
|
+
if batch is not None:
|
|
214
|
+
self._accumulator.append(batch)
|
|
215
|
+
self._current_rows += batch.num_rows
|
|
216
|
+
|
|
217
|
+
# If accumulated enough rows, return combined batch
|
|
218
|
+
if self._current_rows >= self._batch_size_rows:
|
|
219
|
+
if len(self._accumulator) == 1:
|
|
220
|
+
result = self._accumulator.pop(0)
|
|
221
|
+
else:
|
|
222
|
+
if hasattr(pa, 'concat_batches'):
|
|
223
|
+
result = pa.concat_batches(self._accumulator)
|
|
224
|
+
self._accumulator = []
|
|
225
|
+
else:
|
|
226
|
+
result = self._accumulator.pop(0)
|
|
227
|
+
|
|
228
|
+
self._current_rows = 0
|
|
229
|
+
return result
|
|
230
|
+
|
|
231
|
+
# End of stream - return any accumulated batches
|
|
232
|
+
if self._accumulator:
|
|
233
|
+
if len(self._accumulator) == 1:
|
|
234
|
+
result = self._accumulator.pop(0)
|
|
235
|
+
else:
|
|
236
|
+
if hasattr(pa, 'concat_batches'):
|
|
237
|
+
result = pa.concat_batches(self._accumulator)
|
|
238
|
+
self._accumulator = []
|
|
239
|
+
else:
|
|
240
|
+
result = self._accumulator.pop(0)
|
|
241
|
+
|
|
242
|
+
self._current_rows = 0
|
|
243
|
+
self._closed = True
|
|
244
|
+
return result
|
|
245
|
+
|
|
246
|
+
# No more data
|
|
247
|
+
self._closed = True
|
|
248
|
+
raise StopIteration
|
|
249
|
+
|
|
250
|
+
def close(self):
|
|
251
|
+
if not self._closed:
|
|
252
|
+
self._stream_result.close()
|
|
253
|
+
self._closed = True
|
|
254
|
+
|
|
255
|
+
def __iter__(self):
|
|
256
|
+
return self
|
|
257
|
+
|
|
258
|
+
def __next__(self):
|
|
259
|
+
return self.read_next_batch()
|
|
92
260
|
|
|
93
261
|
|
|
94
262
|
class Connection:
|
|
@@ -112,12 +280,13 @@ class Connection:
|
|
|
112
280
|
|
|
113
281
|
def send_query(self, query: str, format: str = "CSV") -> StreamingResult:
|
|
114
282
|
lower_output_format = format.lower()
|
|
283
|
+
supports_record_batch = lower_output_format == "arrow"
|
|
115
284
|
result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
|
|
116
285
|
if lower_output_format in _arrow_format:
|
|
117
286
|
format = "Arrow"
|
|
118
287
|
|
|
119
288
|
c_stream_result = self._conn.send_query(query, format)
|
|
120
|
-
return StreamingResult(c_stream_result, self._conn, result_func)
|
|
289
|
+
return StreamingResult(c_stream_result, self._conn, result_func, supports_record_batch)
|
|
121
290
|
|
|
122
291
|
def close(self) -> None:
|
|
123
292
|
# print("close")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chdb
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.0
|
|
4
4
|
Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
|
|
5
5
|
Home-page: https://github.com/chdb-io/chdb
|
|
6
6
|
Author: auxten
|
|
@@ -298,16 +298,37 @@ while True:
|
|
|
298
298
|
if chunk is None:
|
|
299
299
|
break
|
|
300
300
|
if rows_cnt > 0:
|
|
301
|
-
stream_result.
|
|
301
|
+
stream_result.close()
|
|
302
302
|
break
|
|
303
303
|
rows_cnt += chunk.rows_read()
|
|
304
304
|
|
|
305
305
|
print(rows_cnt) # 65409
|
|
306
306
|
|
|
307
|
+
# Example 4: Using PyArrow RecordBatchReader for batch export and integration with other libraries
|
|
308
|
+
import pyarrow as pa
|
|
309
|
+
from deltalake import write_deltalake
|
|
310
|
+
|
|
311
|
+
# Get streaming result in arrow format
|
|
312
|
+
stream_result = sess.send_query("SELECT * FROM numbers(100000)", "Arrow")
|
|
313
|
+
|
|
314
|
+
# Create RecordBatchReader with custom batch size (default rows_per_batch=1000000)
|
|
315
|
+
batch_reader = stream_result.record_batch(rows_per_batch=10000)
|
|
316
|
+
|
|
317
|
+
# Use RecordBatchReader with external libraries like Delta Lake
|
|
318
|
+
write_deltalake(
|
|
319
|
+
table_or_uri="./my_delta_table",
|
|
320
|
+
data=batch_reader,
|
|
321
|
+
mode="overwrite"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
stream_result.close()
|
|
325
|
+
|
|
307
326
|
sess.close()
|
|
308
327
|
```
|
|
309
328
|
|
|
310
|
-
|
|
329
|
+
**Important Note**: When using streaming queries, if the `StreamingResult` is not fully consumed (due to errors or early termination), you must explicitly call `stream_result.close()` to release resources, or use the `with` statement for automatic cleanup. Failure to do so may block subsequent queries.
|
|
330
|
+
|
|
331
|
+
For more details, see [test_streaming_query.py](tests/test_streaming_query.py) and [test_arrow_record_reader_deltalake.py](tests/test_arrow_record_reader_deltalake.py).
|
|
311
332
|
</details>
|
|
312
333
|
|
|
313
334
|
|
|
@@ -510,6 +531,10 @@ There are something you can help:
|
|
|
510
531
|
|
|
511
532
|
We welcome bindings for other languages, please refer to [bindings](bindings.md) for more details.
|
|
512
533
|
|
|
534
|
+
## Version Guide
|
|
535
|
+
|
|
536
|
+
Please refer to [VERSION-GUIDE.md](VERSION-GUIDE.md) for more details.
|
|
537
|
+
|
|
513
538
|
## Paper
|
|
514
539
|
|
|
515
540
|
- [ClickHouse - Lightning Fast Analytics for Everyone](https://www.vldb.org/pvldb/vol17/p3731-schulze.pdf)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
chdb/__init__.py,sha256=
|
|
2
|
-
chdb/__main__.py,sha256=
|
|
3
|
-
chdb/_chdb.cpython-311-aarch64-linux-gnu.so,sha256=
|
|
1
|
+
chdb/__init__.py,sha256=7GGNYb_0PsxWRaU5QubVCgH1gHXa5ZPAqPuKir1Rx-I,3762
|
|
2
|
+
chdb/__main__.py,sha256=vl-gorTYCT9Uh_h4jbQ8O-a5_pokCJPFbF_yplIgKYc,1336
|
|
3
|
+
chdb/_chdb.cpython-311-aarch64-linux-gnu.so,sha256=fPfRuECMnjTZZLqse_aRuF3Q-w5g_jo0Lv7CKS0BQt8,568920736
|
|
4
4
|
chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
|
|
5
5
|
chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
|
|
6
6
|
chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
|
|
@@ -15,14 +15,14 @@ chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
|
15
15
|
chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
|
|
16
16
|
chdb/session/state.py,sha256=m7K9zZtoMQTlh-pfmSyJV38pAe6eHNTPtOvlHYrImhA,4436
|
|
17
17
|
chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
|
|
18
|
-
chdb/state/sqlitelike.py,sha256=
|
|
18
|
+
chdb/state/sqlitelike.py,sha256=PHdIJVfbSUvJWU6doMnrg0jVpovtHUG12I_-acZHOko,18338
|
|
19
19
|
chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
|
|
20
20
|
chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
|
|
21
21
|
chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
|
|
22
22
|
chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
|
|
23
23
|
chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
|
|
24
|
-
chdb-3.
|
|
25
|
-
chdb-3.
|
|
26
|
-
chdb-3.
|
|
27
|
-
chdb-3.
|
|
28
|
-
chdb-3.
|
|
24
|
+
chdb-3.5.0.dist-info/METADATA,sha256=tW6qWsjBl4ZTCfUwIzN2kXvQdbWSM_YWAzSxjIRmk_I,25782
|
|
25
|
+
chdb-3.5.0.dist-info/WHEEL,sha256=QJg38rE8f0PT7_ZWlFpvwOoUFGenUbSJhXM-6SbDiao,153
|
|
26
|
+
chdb-3.5.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
|
|
27
|
+
chdb-3.5.0.dist-info/RECORD,,
|
|
28
|
+
chdb-3.5.0.dist-info/licenses/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
|
|
File without changes
|
|
File without changes
|
|
File without changes
|