chdb 3.4.0__cp313-cp313-macosx_11_0_arm64.whl → 3.5.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/__init__.py CHANGED
@@ -19,7 +19,7 @@ _process_result_format_funs = {
19
19
  # UDF script path will be f"{g_udf_path}/{func_name}.py"
20
20
  g_udf_path = ""
21
21
 
22
- chdb_version = ('3', '4', '0')
22
+ chdb_version = ('3', '5', '0')
23
23
  if sys.version_info[:2] >= (3, 7):
24
24
  # get the path of the current file
25
25
  current_path = os.path.dirname(os.path.abspath(__file__))
chdb/__main__.py CHANGED
@@ -4,9 +4,12 @@ from .__init__ import query
4
4
 
5
5
  def main():
6
6
  prog = 'python -m chdb'
7
+ custom_usage = "%(prog)s [-h] \"SELECT 1\" [format]"
7
8
  description = ('''A simple command line interface for chdb
8
9
  to run SQL and output in specified format''')
9
- parser = argparse.ArgumentParser(prog=prog, description=description)
10
+ parser = argparse.ArgumentParser(prog=prog,
11
+ usage=custom_usage,
12
+ description=description)
10
13
  parser.add_argument('sql', nargs=1,
11
14
  type=str,
12
15
  help='sql, e.g: select 1112222222,555')
Binary file
chdb/state/sqlitelike.py CHANGED
@@ -41,11 +41,12 @@ def to_df(r):
41
41
 
42
42
 
43
43
  class StreamingResult:
44
- def __init__(self, c_result, conn, result_func):
44
+ def __init__(self, c_result, conn, result_func, supports_record_batch):
45
45
  self._result = c_result
46
46
  self._result_func = result_func
47
47
  self._conn = conn
48
48
  self._exhausted = False
49
+ self._supports_record_batch = supports_record_batch
49
50
 
50
51
  def fetch(self):
51
52
  """Fetch next chunk of streaming results"""
@@ -80,15 +81,182 @@ class StreamingResult:
80
81
  return self
81
82
 
82
83
  def __exit__(self, exc_type, exc_val, exc_tb):
83
- pass
84
+ self.cancel()
85
+
86
+ def close(self):
87
+ self.cancel()
84
88
 
85
89
  def cancel(self):
86
- self._exhausted = True
90
+ if not self._exhausted:
91
+ self._exhausted = True
92
+ try:
93
+ self._conn.streaming_cancel_query(self._result)
94
+ except Exception as e:
95
+ raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
87
96
 
88
- try:
89
- self._conn.streaming_cancel_query(self._result)
90
- except Exception as e:
91
- raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
97
+ def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
98
+ """
99
+ Create a PyArrow RecordBatchReader from this StreamingResult.
100
+
101
+ This method requires that the StreamingResult was created with arrow format.
102
+ It wraps the streaming result with ChdbRecordBatchReader to provide efficient
103
+ batching with configurable batch sizes.
104
+
105
+ Args:
106
+ rows_per_batch (int): Number of rows per batch. Defaults to 1000000.
107
+
108
+ Returns:
109
+ pa.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
110
+
111
+ Raises:
112
+ ValueError: If the StreamingResult was not created with arrow format
113
+ """
114
+ if not self._supports_record_batch:
115
+ raise ValueError(
116
+ "record_batch() can only be used with arrow format. "
117
+ "Please use format='Arrow' when calling send_query."
118
+ )
119
+
120
+ chdb_reader = ChdbRecordBatchReader(self, rows_per_batch)
121
+ return pa.RecordBatchReader.from_batches(chdb_reader.schema(), chdb_reader)
122
+
123
+
124
+ class ChdbRecordBatchReader:
125
+ """
126
+ A PyArrow RecordBatchReader wrapper for chdb StreamingResult.
127
+
128
+ This class provides an efficient way to read large result sets as PyArrow RecordBatches
129
+ with configurable batch sizes to optimize memory usage and performance.
130
+ """
131
+
132
+ def __init__(self, chdb_stream_result, batch_size_rows):
133
+ self._stream_result = chdb_stream_result
134
+ self._schema = None
135
+ self._closed = False
136
+ self._pending_batches = []
137
+ self._accumulator = []
138
+ self._batch_size_rows = batch_size_rows
139
+ self._current_rows = 0
140
+ self._first_batch = None
141
+ self._first_batch_consumed = True
142
+ self._schema = self.schema()
143
+
144
+ def schema(self):
145
+ if self._schema is None:
146
+ # Get the first chunk to determine schema
147
+ chunk = self._stream_result.fetch()
148
+ if chunk is not None:
149
+ arrow_bytes = chunk.bytes()
150
+ reader = pa.RecordBatchFileReader(arrow_bytes)
151
+ self._schema = reader.schema
152
+
153
+ table = reader.read_all()
154
+ if table.num_rows > 0:
155
+ batches = table.to_batches()
156
+ self._first_batch = batches[0]
157
+ if len(batches) > 1:
158
+ self._pending_batches = batches[1:]
159
+ self._first_batch_consumed = False
160
+ else:
161
+ self._first_batch = None
162
+ self._first_batch_consumed = True
163
+ else:
164
+ self._schema = pa.schema([])
165
+ self._first_batch = None
166
+ self._first_batch_consumed = True
167
+ self._closed = True
168
+ return self._schema
169
+
170
+ def read_next_batch(self):
171
+ if self._accumulator:
172
+ result = self._accumulator.pop(0)
173
+ return result
174
+
175
+ if self._closed:
176
+ raise StopIteration
177
+
178
+ while True:
179
+ batch = None
180
+
181
+ # 1. Return the first batch if not consumed yet
182
+ if not self._first_batch_consumed:
183
+ self._first_batch_consumed = True
184
+ batch = self._first_batch
185
+
186
+ # 2. Check pending batches from current chunk
187
+ elif self._pending_batches:
188
+ batch = self._pending_batches.pop(0)
189
+
190
+ # 3. Fetch new chunk from chdb stream
191
+ else:
192
+ chunk = self._stream_result.fetch()
193
+ if chunk is None:
194
+ # No more data - return accumulated batches if any
195
+ break
196
+
197
+ arrow_bytes = chunk.bytes()
198
+ if not arrow_bytes:
199
+ continue
200
+
201
+ reader = pa.RecordBatchFileReader(arrow_bytes)
202
+ table = reader.read_all()
203
+
204
+ if table.num_rows > 0:
205
+ batches = table.to_batches()
206
+ batch = batches[0]
207
+ if len(batches) > 1:
208
+ self._pending_batches = batches[1:]
209
+ else:
210
+ continue
211
+
212
+ # Process the batch if we got one
213
+ if batch is not None:
214
+ self._accumulator.append(batch)
215
+ self._current_rows += batch.num_rows
216
+
217
+ # If accumulated enough rows, return combined batch
218
+ if self._current_rows >= self._batch_size_rows:
219
+ if len(self._accumulator) == 1:
220
+ result = self._accumulator.pop(0)
221
+ else:
222
+ if hasattr(pa, 'concat_batches'):
223
+ result = pa.concat_batches(self._accumulator)
224
+ self._accumulator = []
225
+ else:
226
+ result = self._accumulator.pop(0)
227
+
228
+ self._current_rows = 0
229
+ return result
230
+
231
+ # End of stream - return any accumulated batches
232
+ if self._accumulator:
233
+ if len(self._accumulator) == 1:
234
+ result = self._accumulator.pop(0)
235
+ else:
236
+ if hasattr(pa, 'concat_batches'):
237
+ result = pa.concat_batches(self._accumulator)
238
+ self._accumulator = []
239
+ else:
240
+ result = self._accumulator.pop(0)
241
+
242
+ self._current_rows = 0
243
+ self._closed = True
244
+ return result
245
+
246
+ # No more data
247
+ self._closed = True
248
+ raise StopIteration
249
+
250
+ def close(self):
251
+ if not self._closed:
252
+ self._stream_result.close()
253
+ self._closed = True
254
+
255
+ def __iter__(self):
256
+ return self
257
+
258
+ def __next__(self):
259
+ return self.read_next_batch()
92
260
 
93
261
 
94
262
  class Connection:
@@ -112,12 +280,13 @@ class Connection:
112
280
 
113
281
  def send_query(self, query: str, format: str = "CSV") -> StreamingResult:
114
282
  lower_output_format = format.lower()
283
+ supports_record_batch = lower_output_format == "arrow"
115
284
  result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
116
285
  if lower_output_format in _arrow_format:
117
286
  format = "Arrow"
118
287
 
119
288
  c_stream_result = self._conn.send_query(query, format)
120
- return StreamingResult(c_stream_result, self._conn, result_func)
289
+ return StreamingResult(c_stream_result, self._conn, result_func, supports_record_batch)
121
290
 
122
291
  def close(self) -> None:
123
292
  # print("close")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chdb
3
- Version: 3.4.0
3
+ Version: 3.5.0
4
4
  Summary: chDB is an in-process SQL OLAP Engine powered by ClickHouse
5
5
  Home-page: https://github.com/chdb-io/chdb
6
6
  Author: auxten
@@ -298,16 +298,37 @@ while True:
298
298
  if chunk is None:
299
299
  break
300
300
  if rows_cnt > 0:
301
- stream_result.cancel()
301
+ stream_result.close()
302
302
  break
303
303
  rows_cnt += chunk.rows_read()
304
304
 
305
305
  print(rows_cnt) # 65409
306
306
 
307
+ # Example 4: Using PyArrow RecordBatchReader for batch export and integration with other libraries
308
+ import pyarrow as pa
309
+ from deltalake import write_deltalake
310
+
311
+ # Get streaming result in arrow format
312
+ stream_result = sess.send_query("SELECT * FROM numbers(100000)", "Arrow")
313
+
314
+ # Create RecordBatchReader with custom batch size (default rows_per_batch=1000000)
315
+ batch_reader = stream_result.record_batch(rows_per_batch=10000)
316
+
317
+ # Use RecordBatchReader with external libraries like Delta Lake
318
+ write_deltalake(
319
+ table_or_uri="./my_delta_table",
320
+ data=batch_reader,
321
+ mode="overwrite"
322
+ )
323
+
324
+ stream_result.close()
325
+
307
326
  sess.close()
308
327
  ```
309
328
 
310
- For more details, see [test_streaming_query.py](tests/test_streaming_query.py).
329
+ **Important Note**: When using streaming queries, if the `StreamingResult` is not fully consumed (due to errors or early termination), you must explicitly call `stream_result.close()` to release resources, or use the `with` statement for automatic cleanup. Failure to do so may block subsequent queries.
330
+
331
+ For more details, see [test_streaming_query.py](tests/test_streaming_query.py) and [test_arrow_record_reader_deltalake.py](tests/test_arrow_record_reader_deltalake.py).
311
332
  </details>
312
333
 
313
334
 
@@ -510,6 +531,10 @@ There are something you can help:
510
531
 
511
532
  We welcome bindings for other languages, please refer to [bindings](bindings.md) for more details.
512
533
 
534
+ ## Version Guide
535
+
536
+ Please refer to [VERSION-GUIDE.md](VERSION-GUIDE.md) for more details.
537
+
513
538
  ## Paper
514
539
 
515
540
  - [ClickHouse - Lightning Fast Analytics for Everyone](https://www.vldb.org/pvldb/vol17/p3731-schulze.pdf)
@@ -1,6 +1,6 @@
1
- chdb/__init__.py,sha256=pNYyLRqm2s5hG1rPhVLECZXvJQ60EGPt_OC88W90j0w,3762
2
- chdb/__main__.py,sha256=xNNtDY38d973YM5dlxiIazcqqKhXJSpNb7JflyyrXGE,1185
3
- chdb/_chdb.cpython-313-darwin.so,sha256=zvlFUFEwpV8MTZU3qAKyoR0iEdNUAoTf3vHwuYuitGQ,383559216
1
+ chdb/__init__.py,sha256=7GGNYb_0PsxWRaU5QubVCgH1gHXa5ZPAqPuKir1Rx-I,3762
2
+ chdb/__main__.py,sha256=vl-gorTYCT9Uh_h4jbQ8O-a5_pokCJPFbF_yplIgKYc,1336
3
+ chdb/_chdb.cpython-313-darwin.so,sha256=v9RnBJpctzPG7ktMWo7olI0M4_d2efy6ilsleedL9HQ,383643856
4
4
  chdb/rwabc.py,sha256=tbiwCrXirfrfx46wCJxS64yvFe6pVWIPGdSuvrAL5Ys,2102
5
5
  chdb/dataframe/__init__.py,sha256=1_mrZZiJwqBTnH_P8_FCbbYXIWWY5sxnaFpe3-tDLF4,680
6
6
  chdb/dataframe/query.py,sha256=ggvE8A5vtabFg9gSTp99S7LCrnIEwbWtb-PtJVT8Ct0,12759
@@ -15,14 +15,14 @@ chdb/dbapi/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
15
15
  chdb/session/__init__.py,sha256=fCUROZ5L1-92o2lcASiWJpFu-80-kDoSrNfouLEmLg8,50
16
16
  chdb/session/state.py,sha256=m7K9zZtoMQTlh-pfmSyJV38pAe6eHNTPtOvlHYrImhA,4436
17
17
  chdb/state/__init__.py,sha256=RVUIWDqDi7gte4Os7Mz1wPXFyFpdHT_p1klJC7QtluI,55
18
- chdb/state/sqlitelike.py,sha256=v0xh9jWirHzhDVq26C2213LxfaDbRulSAhSHaTiZ24c,12283
18
+ chdb/state/sqlitelike.py,sha256=PHdIJVfbSUvJWU6doMnrg0jVpovtHUG12I_-acZHOko,18338
19
19
  chdb/udf/__init__.py,sha256=qSMaPEre7w1pYz8uJ-iZtuu8wYOUNRcI_8UNuaOymGE,80
20
20
  chdb/udf/udf.py,sha256=z0A1RmyZrx55bykpvvS-LpVt1lMrQOexjvU5zxCdCSA,3935
21
21
  chdb/utils/__init__.py,sha256=tXRcwBRGW2YQNBZWV4Mitw5QlCu_qlSRCjllw15XHbs,171
22
22
  chdb/utils/trace.py,sha256=W-pvDoKlnzq6H_7FiWjr5_teN40UNE4E5--zbUrjOIc,2511
23
23
  chdb/utils/types.py,sha256=MGLFIjoDvu7Uc2Wy8EDY60jjue66HmMPxbhrujjrZxQ,7530
24
- chdb-3.4.0.dist-info/licenses/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
- chdb-3.4.0.dist-info/METADATA,sha256=pwAsyrEzxFDBifcZoNLzRQHB7bCXd2B8y-31HX3APPE,24690
26
- chdb-3.4.0.dist-info/WHEEL,sha256=KreXLeNnYSLDPpk7qnNyKd0DQEhtY-je-mdlEpkBMmo,109
27
- chdb-3.4.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
- chdb-3.4.0.dist-info/RECORD,,
24
+ chdb-3.5.0.dist-info/licenses/LICENSE.txt,sha256=isYVtNCO5910aj6e9bJJ6kQceivkLqsMlFSNYwzGGKI,11366
25
+ chdb-3.5.0.dist-info/METADATA,sha256=tW6qWsjBl4ZTCfUwIzN2kXvQdbWSM_YWAzSxjIRmk_I,25782
26
+ chdb-3.5.0.dist-info/WHEEL,sha256=KreXLeNnYSLDPpk7qnNyKd0DQEhtY-je-mdlEpkBMmo,109
27
+ chdb-3.5.0.dist-info/top_level.txt,sha256=se0Jj0A2-ijfMW51hIjiuNyDJPqy5xJU1G8a_IEdllI,11
28
+ chdb-3.5.0.dist-info/RECORD,,
File without changes