chdb 3.4.1__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 3.5.0__cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/__init__.py CHANGED
@@ -19,7 +19,7 @@ _process_result_format_funs = {
19
19
  # UDF script path will be f"{g_udf_path}/{func_name}.py"
20
20
  g_udf_path = ""
21
21
 
22
- chdb_version = ('3', '4', '1')
22
+ chdb_version = ('3', '5', '0')
23
23
  if sys.version_info[:2] >= (3, 7):
24
24
  # get the path of the current file
25
25
  current_path = os.path.dirname(os.path.abspath(__file__))
chdb/__main__.py CHANGED
@@ -4,9 +4,12 @@ from .__init__ import query
4
4
 
5
5
  def main():
6
6
  prog = 'python -m chdb'
7
+ custom_usage = "%(prog)s [-h] \"SELECT 1\" [format]"
7
8
  description = ('''A simple command line interface for chdb
8
9
  to run SQL and output in specified format''')
9
- parser = argparse.ArgumentParser(prog=prog, description=description)
10
+ parser = argparse.ArgumentParser(prog=prog,
11
+ usage=custom_usage,
12
+ description=description)
10
13
  parser.add_argument('sql', nargs=1,
11
14
  type=str,
12
15
  help='sql, e.g: select 1112222222,555')
Binary file
chdb/state/sqlitelike.py CHANGED
@@ -41,11 +41,12 @@ def to_df(r):
41
41
 
42
42
 
43
43
  class StreamingResult:
44
- def __init__(self, c_result, conn, result_func):
44
+ def __init__(self, c_result, conn, result_func, supports_record_batch):
45
45
  self._result = c_result
46
46
  self._result_func = result_func
47
47
  self._conn = conn
48
48
  self._exhausted = False
49
+ self._supports_record_batch = supports_record_batch
49
50
 
50
51
  def fetch(self):
51
52
  """Fetch next chunk of streaming results"""
@@ -80,15 +81,182 @@ class StreamingResult:
80
81
  return self
81
82
 
82
83
  def __exit__(self, exc_type, exc_val, exc_tb):
83
- pass
84
+ self.cancel()
85
+
86
+ def close(self):
87
+ self.cancel()
84
88
 
85
89
  def cancel(self):
86
- self._exhausted = True
90
+ if not self._exhausted:
91
+ self._exhausted = True
92
+ try:
93
+ self._conn.streaming_cancel_query(self._result)
94
+ except Exception as e:
95
+ raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
87
96
 
88
- try:
89
- self._conn.streaming_cancel_query(self._result)
90
- except Exception as e:
91
- raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
97
+ def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
98
+ """
99
+ Create a PyArrow RecordBatchReader from this StreamingResult.
100
+
101
+ This method requires that the StreamingResult was created with arrow format.
102
+ It wraps the streaming result with ChdbRecordBatchReader to provide efficient
103
+ batching with configurable batch sizes.
104
+
105
+ Args:
106
+ rows_per_batch (int): Number of rows per batch. Defaults to 1000000.
107
+
108
+ Returns:
109
+ pa.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
110
+
111
+ Raises:
112
+ ValueError: If the StreamingResult was not created with arrow format
113
+ """
114
+ if not self._supports_record_batch:
115
+ raise ValueError(
116
+ "record_batch() can only be used with arrow format. "
117
+ "Please use format='Arrow' when calling send_query."
118
+ )
119
+
120
+ chdb_reader = ChdbRecordBatchReader(self, rows_per_batch)
121
+ return pa.RecordBatchReader.from_batches(chdb_reader.schema(), chdb_reader)
122
+
123
+
124
+ class ChdbRecordBatchReader:
125
+ """
126
+ A PyArrow RecordBatchReader wrapper for chdb StreamingResult.
127
+
128
+ This class provides an efficient way to read large result sets as PyArrow RecordBatches
129
+ with configurable batch sizes to optimize memory usage and performance.
130
+ """
131
+
132
+ def __init__(self, chdb_stream_result, batch_size_rows):
133
+ self._stream_result = chdb_stream_result
134
+ self._schema = None
135
+ self._closed = False
136
+ self._pending_batches = []
137
+ self._accumulator = []
138
+ self._batch_size_rows = batch_size_rows
139
+ self._current_rows = 0
140
+ self._first_batch = None
141
+ self._first_batch_consumed = True
142
+ self._schema = self.schema()
143
+
144
+ def schema(self):
145
+ if self._schema is None:
146
+ # Get the first chunk to determine schema
147
+ chunk = self._stream_result.fetch()
148
+ if chunk is not None:
149
+ arrow_bytes = chunk.bytes()
150
+ reader = pa.RecordBatchFileReader(arrow_bytes)
151
+ self._schema = reader.schema
152
+
153
+ table = reader.read_all()
154
+ if table.num_rows > 0:
155
+ batches = table.to_batches()
156
+ self._first_batch = batches[0]
157
+ if len(batches) > 1:
158
+ self._pending_batches = batches[1:]
159
+ self._first_batch_consumed = False
160
+ else:
161
+ self._first_batch = None
162
+ self._first_batch_consumed = True
163
+ else:
164
+ self._schema = pa.schema([])
165
+ self._first_batch = None
166
+ self._first_batch_consumed = True
167
+ self._closed = True
168
+ return self._schema
169
+
170
+ def read_next_batch(self):
171
+ if self._accumulator:
172
+ result = self._accumulator.pop(0)
173
+ return result
174
+
175
+ if self._closed:
176
+ raise StopIteration
177
+
178
+ while True:
179
+ batch = None
180
+
181
+ # 1. Return the first batch if not consumed yet
182
+ if not self._first_batch_consumed:
183
+ self._first_batch_consumed = True
184
+ batch = self._first_batch
185
+
186
+ # 2. Check pending batches from current chunk
187
+ elif self._pending_batches:
188
+ batch = self._pending_batches.pop(0)
189
+
190
+ # 3. Fetch new chunk from chdb stream
191
+ else:
192
+ chunk = self._stream_result.fetch()
193
+ if chunk is None:
194
+ # No more data - return accumulated batches if any
195
+ break
196
+
197
+ arrow_bytes = chunk.bytes()
198
+ if not arrow_bytes:
199
+ continue
200
+
201
+ reader = pa.RecordBatchFileReader(arrow_bytes)
202
+ table = reader.read_all()
203
+
204
+ if table.num_rows > 0:
205
+ batches = table.to_batches()
206
+ batch = batches[0]
207
+ if len(batches) > 1:
208
+ self._pending_batches = batches[1:]
209
+ else:
210
+ continue
211
+
212
+ # Process the batch if we got one
213
+ if batch is not None:
214
+ self._accumulator.append(batch)
215
+ self._current_rows += batch.num_rows
216
+
217
+ # If accumulated enough rows, return combined batch
218
+ if self._current_rows >= self._batch_size_rows:
219
+ if len(self._accumulator) == 1:
220
+ result = self._accumulator.pop(0)
221
+ else:
222
+ if hasattr(pa, 'concat_batches'):
223
+ result = pa.concat_batches(self._accumulator)
224
+ self._accumulator = []
225
+ else:
226
+ result = self._accumulator.pop(0)
227
+
228
+ self._current_rows = 0
229
+ return result
230
+
231
+ # End of stream - return any accumulated batches
232
+ if self._accumulator:
233
+ if len(self._accumulator) == 1:
234
+ result = self._accumulator.pop(0)
235
+ else:
236
+ if hasattr(pa, 'concat_batches'):
237
+ result = pa.concat_batches(self._accumulator)
238
+ self._accumulator = []
239
+ else:
240
+ result = self._accumulator.pop(0)
241
+
242
+ self._current_rows = 0
243
+ self._closed = True
244
+ return result
245
+
246
+ # No more data
247
+ self._closed = True
248
+ raise StopIteration
249
+
250
+ def close(self):
251
+ if not self._closed:
252
+ self._stream_result.close()
253
+ self._closed = True
254
+
255
+ def __iter__(self):
256
+ return self
257
+
258
+ def __next__(self):
259
+ return self.read_next_batch()
92
260
 
93
261
 
94
262
  class Connection:
@@ -112,12 +280,13 @@ class Connection:
112
280
 
113
281
  def send_query(self, query: str, format: str = "CSV") -> StreamingResult:
114
282
  lower_output_format = format.lower()
283
+ supports_record_batch = lower_output_format == "arrow"
115
284
  result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
116
285
  if lower_output_format in _arrow_format:
117
286
  format = "Arrow"
118
287
 
119
288
  c_stream_result = self._conn.send_query(query, format)
120
- return StreamingResult(c_stream_result, self._conn, result_func)
289
+ return StreamingResult(c_stream_result, self._conn, result_func, supports_record_batch)
121
290
 
122
291
  def close(self) -> None:
123
292
  # print("close")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chdb
3
- Version: 3.4.1
3
+ Version: 3.5.0
4
4
  Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
5
5
  Home-page: https://github.com/chdb-io/chdb
6
6
  Author: auxten
@@ -295,16 +295,37 @@ while True:
295
295
  if chunk is None:
296
296
  break
297
297
  if rows_cnt > 0:
298
- stream_result.cancel()
298
+ stream_result.close()
299
299
  break
300
300
  rows_cnt += chunk.rows_read()
301
301
 
302
302
  print(rows_cnt) # 65409
303
303
 
304
+ # Example 4: Using PyArrow RecordBatchReader for batch export and integration with other libraries
305
+ import pyarrow as pa
306
+ from deltalake import write_deltalake
307
+
308
+ # Get streaming result in arrow format
309
+ stream_result = sess.send_query("SELECT * FROM numbers(100000)", "Arrow")
310
+
311
+ # Create RecordBatchReader with custom batch size (default rows_per_batch=1000000)
312
+ batch_reader = stream_result.record_batch(rows_per_batch=10000)
313
+
314
+ # Use RecordBatchReader with external libraries like Delta Lake
315
+ write_deltalake(
316
+ table_or_uri="./my_delta_table",
317
+ data=batch_reader,
318
+ mode="overwrite"
319
+ )
320
+
321
+ stream_result.close()
322
+
304
323
  sess.close()
305
324
  ```
306
325
 
307
- For more details, see [test_streaming_query.py](tests/test_streaming_query.py).
326
+ **Important Note**: When using streaming queries, if the `StreamingResult` is not fully consumed (due to errors or early termination), you must explicitly call `stream_result.close()` to release resources, or use the `with` statement for automatic cleanup. Failure to do so may block subsequent queries.
327
+
328
+ For more details, see [test_streaming_query.py](tests/test_streaming_query.py) and [test_arrow_record_reader_deltalake.py](tests/test_arrow_record_reader_deltalake.py).
308
329
  </details>
309
330
 
310
331
 
@@ -507,6 +528,10 @@ There are something you can help:
507
528
 
508
529
  We welcome bindings for other languages, please refer to [bindings](bindings.md) for more details.
509
530
 
531
+ ## Version Guide
532
+
533
+ Please refer to [VERSION-GUIDE.md](VERSION-GUIDE.md) for more details.
534
+
510
535
  ## Paper
511
536
 
512
537
  - [ClickHouse - Lightning Fast Analytics for Everyone](https://www.vldb.org/pvldb/vol17/p3731-schulze.pdf)
@@ -1,28 +1,28 @@
1
- chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
2
- chdb/__init__.py,sha256=KjR7cb7QFtjCqvasu81WQvX-2LeHjx-rSB3preiRefI,3762
3
- chdb/_chdb.cpython-38-aarch64-linux-gnu.so,sha256=9hdZgNc0h05058RlHQCkoGF8u-oECZPvgTggNkBeLkA,568828904
4
1
  chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
2
+ chdb/__main__.py,sha256=vl-gorTYCT9Uh_h4jbQ8O-a5_pokCJPFbF_yplIgKYc,1336
3
+ chdb/_chdb.cpython-38-aarch64-linux-gnu.so,sha256=ljNW7N1IUXRFGwIJdiBZCfG_Fi7n25f2VOMWfb4Qsgw,568916304
4
+ chdb/__init__.py,sha256=7GGNYb_0PsxWRaU5QubVCgH1gHXa5ZPAqPuKir1Rx-I,3762
5
5
  chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
6
6
  chdb/session/state.py,sha256=m7K9zZtoMQTlh-pfmSyJV38pAe6eHNTPtOvlHYrImhA,4436
7
- chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
8
- chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
9
7
  chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
10
- chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
11
8
  chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
12
- chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
9
+ chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
10
+ chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
11
+ chdb/state/sqlitelike.py,sha256=PHdIJVfbSUvJWU6doMnrg0jVpovtHUG12I_-acZHOko,18338
13
12
  chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
14
- chdb/dbapi/connections.py,sha256=RW0EcusyKueMGp7VmSaCO-ukyzY7l2ps_ibA9-pXDvo,2754
13
+ chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
14
+ chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
15
+ chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
15
16
  chdb/dbapi/times.py,sha256=_qXgDaYwsHntvpIKSKXp1rrYIgtq6Z9pLyLnO2XNoL0,360
16
- chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
17
17
  chdb/dbapi/converters.py,sha256=0SDqgixUTCz0LtWke_HHzgF1lFJhpsQrR_-ky3b-JRY,7447
18
18
  chdb/dbapi/cursors.py,sha256=3ufVB1zt3x7SzCYowVbwAOsuzkMxYPO74q9XW6ctkKo,8120
19
+ chdb/dbapi/__init__.py,sha256=aaNhxXNBC1ZkFr260cbGR8msOinTp0VoNTT_j8AXGUc,2205
20
+ chdb/dbapi/connections.py,sha256=RW0EcusyKueMGp7VmSaCO-ukyzY7l2ps_ibA9-pXDvo,2754
19
21
  chdb/dbapi/err.py,sha256=kUI9-A8LNqBoMoo4jh2NFsLCOLoPEwh9YIuz_qMoLoM,2017
20
- chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
21
22
  chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
23
- chdb/state/sqlitelike.py,sha256=v0xh9jWirHzhDVq26C2213LxfaDbRulSAhSHaTiZ24c,12283
24
- chdb-3.4.1.dist-info/RECORD,,
25
- chdb-3.4.1.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
26
- chdb-3.4.1.dist-info/WHEEL,sha256=kTTaziUGh4IgOXqp-kkEc0J_ej7kOw0CdWocYU-1ZR0,149
27
- chdb-3.4.1.dist-info/METADATA,sha256=5nvTBNyYcoHPpDBLIxbCz5ZMWbFueTWEOOTwwFHq1c0,24622
28
- chdb-3.4.1.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
23
+ chdb/dbapi/constants/FIELD_TYPE.py,sha256=ytFzgAnGmb9hvdsBlnK68qdZv_a6jYFIXT6VSAb60z8,370
24
+ chdb-3.5.0.dist-info/WHEEL,sha256=kTTaziUGh4IgOXqp-kkEc0J_ej7kOw0CdWocYU-1ZR0,149
25
+ chdb-3.5.0.dist-info/METADATA,sha256=BV7QlaT95FZsDo26kyyq8eN9z6hVkIyI58ZogC45s2s,25714
26
+ chdb-3.5.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
27
+ chdb-3.5.0.dist-info/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
28
+ chdb-3.5.0.dist-info/RECORD,,
File without changes