chdb 3.6.0__cp38-abi3-macosx_11_0_arm64.whl → 3.7.0__cp38-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/.flake8 +5 -0
- chdb/__init__.py +132 -11
- chdb/_chdb.abi3.so +0 -0
- chdb/build-musl.sh +166 -0
- chdb/build.sh +370 -0
- chdb/build_linux_arm64.sh +63 -0
- chdb/build_mac_arm64.sh +121 -0
- chdb/build_pybind11.sh +131 -0
- chdb/dataframe/__init__.py +7 -2
- chdb/dataframe/query.py +211 -23
- chdb/dbapi/__init__.py +57 -2
- chdb/dbapi/connections.py +169 -12
- chdb/dbapi/converters.py +352 -34
- chdb/dbapi/cursors.py +264 -70
- chdb/dbapi/err.py +269 -30
- chdb/dbapi/times.py +171 -0
- chdb/libpybind11nonlimitedapi_chdb_3.10.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.11.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.12.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.13.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.8.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.9.dylib +0 -0
- chdb/libpybind11nonlimitedapi_stubs.dylib +0 -0
- chdb/session/state.py +167 -4
- chdb/state/sqlitelike.py +608 -34
- chdb/test_smoke.sh +32 -0
- chdb/udf/__init__.py +7 -0
- chdb/udf/udf.py +41 -25
- chdb/utils/__init__.py +6 -0
- chdb/utils/trace.py +31 -0
- chdb/utils/types.py +62 -64
- chdb/vars.sh +48 -0
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/METADATA +29 -18
- chdb-3.7.0.dist-info/RECORD +43 -0
- chdb-3.6.0.dist-info/RECORD +0 -35
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/LICENSE.txt +0 -0
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/WHEEL +0 -0
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/top_level.txt +0 -0
chdb/state/sqlitelike.py
CHANGED
|
@@ -19,7 +19,39 @@ _process_result_format_funs = {
|
|
|
19
19
|
|
|
20
20
|
# return pyarrow table
|
|
21
21
|
def to_arrowTable(res):
|
|
22
|
-
"""
|
|
22
|
+
"""Convert query result to PyArrow Table.
|
|
23
|
+
|
|
24
|
+
This function converts chdb query results to a PyArrow Table format,
|
|
25
|
+
which provides efficient columnar data access and interoperability
|
|
26
|
+
with other data processing libraries.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
res: Query result object from chdb containing Arrow format data
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
pyarrow.Table: PyArrow Table containing the query results
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
ImportError: If pyarrow or pandas packages are not installed
|
|
36
|
+
|
|
37
|
+
.. note::
|
|
38
|
+
This function requires both pyarrow and pandas to be installed.
|
|
39
|
+
Install them with: ``pip install pyarrow pandas``
|
|
40
|
+
|
|
41
|
+
.. warning::
|
|
42
|
+
Empty results return an empty PyArrow Table with no schema.
|
|
43
|
+
|
|
44
|
+
Examples:
|
|
45
|
+
>>> import chdb
|
|
46
|
+
>>> result = chdb.query("SELECT 1 as num, 'hello' as text", "Arrow")
|
|
47
|
+
>>> table = to_arrowTable(result)
|
|
48
|
+
>>> print(table.schema)
|
|
49
|
+
num: int64
|
|
50
|
+
text: string
|
|
51
|
+
>>> print(table.to_pandas())
|
|
52
|
+
num text
|
|
53
|
+
0 1 hello
|
|
54
|
+
"""
|
|
23
55
|
# try import pyarrow and pandas, if failed, raise ImportError with suggestion
|
|
24
56
|
try:
|
|
25
57
|
import pyarrow as pa # noqa
|
|
@@ -30,12 +62,48 @@ def to_arrowTable(res):
|
|
|
30
62
|
raise ImportError("Failed to import pyarrow or pandas") from None
|
|
31
63
|
if len(res) == 0:
|
|
32
64
|
return pa.Table.from_batches([], schema=pa.schema([]))
|
|
33
|
-
|
|
65
|
+
|
|
66
|
+
memview = res.get_memview()
|
|
67
|
+
return pa.RecordBatchFileReader(memview.view()).read_all()
|
|
34
68
|
|
|
35
69
|
|
|
36
70
|
# return pandas dataframe
|
|
37
71
|
def to_df(r):
|
|
38
|
-
"""
|
|
72
|
+
"""Convert query result to Pandas DataFrame.
|
|
73
|
+
|
|
74
|
+
This function converts chdb query results to a Pandas DataFrame format
|
|
75
|
+
by first converting to PyArrow Table and then to DataFrame. This provides
|
|
76
|
+
convenient data analysis capabilities with Pandas API.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
r: Query result object from chdb containing Arrow format data
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
pandas.DataFrame: DataFrame containing the query results with
|
|
83
|
+
appropriate column names and data types
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
ImportError: If pyarrow or pandas packages are not installed
|
|
87
|
+
|
|
88
|
+
.. note::
|
|
89
|
+
This function uses multi-threading for the Arrow to Pandas conversion
|
|
90
|
+
to improve performance on large datasets.
|
|
91
|
+
|
|
92
|
+
.. seealso::
|
|
93
|
+
:func:`to_arrowTable` - For PyArrow Table format conversion
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
>>> import chdb
|
|
97
|
+
>>> result = chdb.query("SELECT 1 as num, 'hello' as text", "Arrow")
|
|
98
|
+
>>> df = to_df(result)
|
|
99
|
+
>>> print(df)
|
|
100
|
+
num text
|
|
101
|
+
0 1 hello
|
|
102
|
+
>>> print(df.dtypes)
|
|
103
|
+
num int64
|
|
104
|
+
text object
|
|
105
|
+
dtype: object
|
|
106
|
+
"""
|
|
39
107
|
t = to_arrowTable(r)
|
|
40
108
|
return t.to_pandas(use_threads=True)
|
|
41
109
|
|
|
@@ -49,7 +117,35 @@ class StreamingResult:
|
|
|
49
117
|
self._supports_record_batch = supports_record_batch
|
|
50
118
|
|
|
51
119
|
def fetch(self):
|
|
52
|
-
"""Fetch next chunk of streaming results
|
|
120
|
+
"""Fetch the next chunk of streaming results.
|
|
121
|
+
|
|
122
|
+
This method retrieves the next available chunk of data from the streaming
|
|
123
|
+
query result. It automatically handles exhaustion detection and applies
|
|
124
|
+
the configured result transformation function.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
The next chunk of results in the format specified during query execution,
|
|
128
|
+
or None if no more data is available
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
RuntimeError: If the streaming query encounters an error
|
|
132
|
+
|
|
133
|
+
.. note::
|
|
134
|
+
Once the stream is exhausted (returns None), subsequent calls will
|
|
135
|
+
continue to return None.
|
|
136
|
+
|
|
137
|
+
.. warning::
|
|
138
|
+
This method should be called sequentially. Concurrent calls may
|
|
139
|
+
result in undefined behavior.
|
|
140
|
+
|
|
141
|
+
Examples:
|
|
142
|
+
>>> conn = Connection(":memory:")
|
|
143
|
+
>>> stream = conn.send_query("SELECT number FROM numbers(100)")
|
|
144
|
+
>>> chunk = stream.fetch()
|
|
145
|
+
>>> while chunk is not None:
|
|
146
|
+
... print(f"Got chunk with {len(chunk)} bytes")
|
|
147
|
+
... chunk = stream.fetch()
|
|
148
|
+
"""
|
|
53
149
|
if self._exhausted:
|
|
54
150
|
return None
|
|
55
151
|
|
|
@@ -84,9 +180,51 @@ class StreamingResult:
|
|
|
84
180
|
self.cancel()
|
|
85
181
|
|
|
86
182
|
def close(self):
|
|
183
|
+
"""Close the streaming result and cleanup resources.
|
|
184
|
+
|
|
185
|
+
This method is an alias for :meth:`cancel` and provides a more
|
|
186
|
+
intuitive interface for resource cleanup. It cancels the streaming
|
|
187
|
+
query and marks the result as exhausted.
|
|
188
|
+
|
|
189
|
+
.. seealso::
|
|
190
|
+
:meth:`cancel` - The underlying cancellation method
|
|
191
|
+
|
|
192
|
+
Examples:
|
|
193
|
+
>>> stream = conn.send_query("SELECT * FROM large_table")
|
|
194
|
+
>>> # Process some data
|
|
195
|
+
>>> chunk = stream.fetch()
|
|
196
|
+
>>> # Close when done
|
|
197
|
+
>>> stream.close()
|
|
198
|
+
"""
|
|
87
199
|
self.cancel()
|
|
88
200
|
|
|
89
201
|
def cancel(self):
|
|
202
|
+
"""Cancel the streaming query and cleanup resources.
|
|
203
|
+
|
|
204
|
+
This method cancels the streaming query on the server side and marks
|
|
205
|
+
the StreamingResult as exhausted. After calling this method, no more
|
|
206
|
+
data can be fetched from this result.
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
RuntimeError: If cancellation fails on the server side
|
|
210
|
+
|
|
211
|
+
.. note::
|
|
212
|
+
This method is idempotent - calling it multiple times is safe
|
|
213
|
+
and will not cause errors.
|
|
214
|
+
|
|
215
|
+
.. warning::
|
|
216
|
+
Once cancelled, the streaming result cannot be resumed or reset.
|
|
217
|
+
You must create a new query to get fresh results.
|
|
218
|
+
|
|
219
|
+
Examples:
|
|
220
|
+
>>> stream = conn.send_query("SELECT * FROM huge_table")
|
|
221
|
+
>>> # Process first few chunks
|
|
222
|
+
>>> for i, chunk in enumerate(stream):
|
|
223
|
+
... if i >= 5: # Stop after 5 chunks
|
|
224
|
+
... stream.cancel()
|
|
225
|
+
... break
|
|
226
|
+
... process_chunk(chunk)
|
|
227
|
+
"""
|
|
90
228
|
if not self._exhausted:
|
|
91
229
|
self._exhausted = True
|
|
92
230
|
try:
|
|
@@ -266,10 +404,92 @@ class Connection:
|
|
|
266
404
|
self._conn = _chdb.connect(connection_string)
|
|
267
405
|
|
|
268
406
|
def cursor(self) -> "Cursor":
|
|
407
|
+
"""Create a cursor object for executing queries.
|
|
408
|
+
|
|
409
|
+
This method creates a database cursor that provides the standard
|
|
410
|
+
DB-API 2.0 interface for executing queries and fetching results.
|
|
411
|
+
The cursor allows for fine-grained control over query execution
|
|
412
|
+
and result retrieval.
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
Cursor: A cursor object for database operations
|
|
416
|
+
|
|
417
|
+
.. note::
|
|
418
|
+
Creating a new cursor will replace any existing cursor associated
|
|
419
|
+
with this connection. Only one cursor per connection is supported.
|
|
420
|
+
|
|
421
|
+
Examples:
|
|
422
|
+
>>> conn = connect(":memory:")
|
|
423
|
+
>>> cursor = conn.cursor()
|
|
424
|
+
>>> cursor.execute("CREATE TABLE test (id INT, name String)")
|
|
425
|
+
>>> cursor.execute("INSERT INTO test VALUES (1, 'Alice')")
|
|
426
|
+
>>> cursor.execute("SELECT * FROM test")
|
|
427
|
+
>>> rows = cursor.fetchall()
|
|
428
|
+
>>> print(rows)
|
|
429
|
+
((1, 'Alice'),)
|
|
430
|
+
|
|
431
|
+
.. seealso::
|
|
432
|
+
:class:`Cursor` - Database cursor implementation
|
|
433
|
+
"""
|
|
269
434
|
self._cursor = Cursor(self._conn)
|
|
270
435
|
return self._cursor
|
|
271
436
|
|
|
272
437
|
def query(self, query: str, format: str = "CSV") -> Any:
|
|
438
|
+
"""Execute a SQL query and return the complete results.
|
|
439
|
+
|
|
440
|
+
This method executes a SQL query synchronously and returns the complete
|
|
441
|
+
result set. It supports various output formats and automatically applies
|
|
442
|
+
format-specific post-processing.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
query (str): SQL query string to execute
|
|
446
|
+
format (str, optional): Output format for results. Defaults to "CSV".
|
|
447
|
+
Supported formats:
|
|
448
|
+
|
|
449
|
+
- "CSV" - Comma-separated values (string)
|
|
450
|
+
- "JSON" - JSON format (string)
|
|
451
|
+
- "Arrow" - Apache Arrow format (bytes)
|
|
452
|
+
- "Dataframe" - Pandas DataFrame (requires pandas)
|
|
453
|
+
- "Arrowtable" - PyArrow Table (requires pyarrow)
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
Query results in the specified format. Type depends on format:
|
|
457
|
+
|
|
458
|
+
- String formats return str
|
|
459
|
+
- Arrow format returns bytes
|
|
460
|
+
- dataframe format returns pandas.DataFrame
|
|
461
|
+
- arrowtable format returns pyarrow.Table
|
|
462
|
+
|
|
463
|
+
Raises:
|
|
464
|
+
RuntimeError: If query execution fails
|
|
465
|
+
ImportError: If required packages for format are not installed
|
|
466
|
+
|
|
467
|
+
.. warning::
|
|
468
|
+
This method loads the entire result set into memory. For large
|
|
469
|
+
results, consider using :meth:`send_query` for streaming.
|
|
470
|
+
|
|
471
|
+
Examples:
|
|
472
|
+
>>> conn = connect(":memory:")
|
|
473
|
+
>>>
|
|
474
|
+
>>> # Basic CSV query
|
|
475
|
+
>>> result = conn.query("SELECT 1 as num, 'hello' as text")
|
|
476
|
+
>>> print(result)
|
|
477
|
+
num,text
|
|
478
|
+
1,hello
|
|
479
|
+
|
|
480
|
+
>>> # DataFrame format
|
|
481
|
+
>>> df = conn.query("SELECT number FROM numbers(5)", "dataframe")
|
|
482
|
+
>>> print(df)
|
|
483
|
+
number
|
|
484
|
+
0 0
|
|
485
|
+
1 1
|
|
486
|
+
2 2
|
|
487
|
+
3 3
|
|
488
|
+
4 4
|
|
489
|
+
|
|
490
|
+
.. seealso::
|
|
491
|
+
:meth:`send_query` - For streaming query execution
|
|
492
|
+
"""
|
|
273
493
|
lower_output_format = format.lower()
|
|
274
494
|
result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
|
|
275
495
|
if lower_output_format in _arrow_format:
|
|
@@ -279,6 +499,64 @@ class Connection:
|
|
|
279
499
|
return result_func(result)
|
|
280
500
|
|
|
281
501
|
def send_query(self, query: str, format: str = "CSV") -> StreamingResult:
|
|
502
|
+
"""Execute a SQL query and return a streaming result iterator.
|
|
503
|
+
|
|
504
|
+
This method executes a SQL query and returns a StreamingResult object
|
|
505
|
+
that allows you to iterate over the results without loading everything
|
|
506
|
+
into memory at once. This is ideal for processing large result sets.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
query (str): SQL query string to execute
|
|
510
|
+
format (str, optional): Output format for results. Defaults to "CSV".
|
|
511
|
+
Supported formats:
|
|
512
|
+
|
|
513
|
+
- "CSV" - Comma-separated values
|
|
514
|
+
- "JSON" - JSON format
|
|
515
|
+
- "Arrow" - Apache Arrow format (enables record_batch() method)
|
|
516
|
+
- "dataframe" - Pandas DataFrame chunks
|
|
517
|
+
- "arrowtable" - PyArrow Table chunks
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
StreamingResult: A streaming iterator for query results that supports:
|
|
521
|
+
|
|
522
|
+
- Iterator protocol (for loops)
|
|
523
|
+
- Context manager protocol (with statements)
|
|
524
|
+
- Manual fetching with fetch() method
|
|
525
|
+
- PyArrow RecordBatch streaming (Arrow format only)
|
|
526
|
+
|
|
527
|
+
Raises:
|
|
528
|
+
RuntimeError: If query execution fails
|
|
529
|
+
ImportError: If required packages for format are not installed
|
|
530
|
+
|
|
531
|
+
.. note::
|
|
532
|
+
Only the "Arrow" format supports the record_batch() method on the
|
|
533
|
+
returned StreamingResult.
|
|
534
|
+
|
|
535
|
+
Examples:
|
|
536
|
+
>>> conn = connect(":memory:")
|
|
537
|
+
>>>
|
|
538
|
+
>>> # Basic streaming
|
|
539
|
+
>>> stream = conn.send_query("SELECT number FROM numbers(1000)")
|
|
540
|
+
>>> for chunk in stream:
|
|
541
|
+
... print(f"Processing chunk: {len(chunk)} bytes")
|
|
542
|
+
|
|
543
|
+
>>> # Using context manager for cleanup
|
|
544
|
+
>>> with conn.send_query("SELECT * FROM large_table") as stream:
|
|
545
|
+
... chunk = stream.fetch()
|
|
546
|
+
... while chunk:
|
|
547
|
+
... process_data(chunk)
|
|
548
|
+
... chunk = stream.fetch()
|
|
549
|
+
|
|
550
|
+
>>> # Arrow format with RecordBatch streaming
|
|
551
|
+
>>> stream = conn.send_query("SELECT * FROM data", "Arrow")
|
|
552
|
+
>>> reader = stream.record_batch(rows_per_batch=10000)
|
|
553
|
+
>>> for batch in reader:
|
|
554
|
+
... print(f"Batch shape: {batch.num_rows} x {batch.num_columns}")
|
|
555
|
+
|
|
556
|
+
.. seealso::
|
|
557
|
+
:meth:`query` - For non-streaming query execution
|
|
558
|
+
:class:`StreamingResult` - Streaming result iterator
|
|
559
|
+
"""
|
|
282
560
|
lower_output_format = format.lower()
|
|
283
561
|
supports_record_batch = lower_output_format == "arrow"
|
|
284
562
|
result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
|
|
@@ -289,6 +567,31 @@ class Connection:
|
|
|
289
567
|
return StreamingResult(c_stream_result, self._conn, result_func, supports_record_batch)
|
|
290
568
|
|
|
291
569
|
def close(self) -> None:
|
|
570
|
+
"""Close the connection and cleanup resources.
|
|
571
|
+
|
|
572
|
+
This method closes the database connection and cleans up any associated
|
|
573
|
+
resources including active cursors. After calling this method, the
|
|
574
|
+
connection becomes invalid and cannot be used for further operations.
|
|
575
|
+
|
|
576
|
+
.. note::
|
|
577
|
+
This method is idempotent - calling it multiple times is safe.
|
|
578
|
+
|
|
579
|
+
.. warning::
|
|
580
|
+
Any ongoing streaming queries will be cancelled when the connection
|
|
581
|
+
is closed. Ensure all important data is processed before closing.
|
|
582
|
+
|
|
583
|
+
Examples:
|
|
584
|
+
>>> conn = connect("test.db")
|
|
585
|
+
>>> # Use connection for queries
|
|
586
|
+
>>> conn.query("CREATE TABLE test (id INT)")
|
|
587
|
+
>>> # Close when done
|
|
588
|
+
>>> conn.close()
|
|
589
|
+
|
|
590
|
+
>>> # Using with context manager (automatic cleanup)
|
|
591
|
+
>>> with connect("test.db") as conn:
|
|
592
|
+
... conn.query("SELECT 1")
|
|
593
|
+
... # Connection automatically closed
|
|
594
|
+
"""
|
|
292
595
|
# print("close")
|
|
293
596
|
if self._cursor:
|
|
294
597
|
self._cursor.close()
|
|
@@ -303,6 +606,54 @@ class Cursor:
|
|
|
303
606
|
self._current_row: int = 0
|
|
304
607
|
|
|
305
608
|
def execute(self, query: str) -> None:
|
|
609
|
+
"""Execute a SQL query and prepare results for fetching.
|
|
610
|
+
|
|
611
|
+
This method executes a SQL query and prepares the results for retrieval
|
|
612
|
+
using the fetch methods. It handles the parsing of result data and
|
|
613
|
+
automatic type conversion for ClickHouse data types.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
query (str): SQL query string to execute
|
|
617
|
+
|
|
618
|
+
Raises:
|
|
619
|
+
Exception: If query execution fails or result parsing fails
|
|
620
|
+
|
|
621
|
+
.. note::
|
|
622
|
+
This method follows DB-API 2.0 specifications for cursor.execute().
|
|
623
|
+
After execution, use fetchone(), fetchmany(), or fetchall() to
|
|
624
|
+
retrieve results.
|
|
625
|
+
|
|
626
|
+
.. note::
|
|
627
|
+
The method automatically converts ClickHouse data types to appropriate
|
|
628
|
+
Python types:
|
|
629
|
+
|
|
630
|
+
- Int/UInt types → int
|
|
631
|
+
- Float types → float
|
|
632
|
+
- String/FixedString → str
|
|
633
|
+
- DateTime → datetime.datetime
|
|
634
|
+
- Date → datetime.date
|
|
635
|
+
- Bool → bool
|
|
636
|
+
|
|
637
|
+
Examples:
|
|
638
|
+
>>> cursor = conn.cursor()
|
|
639
|
+
>>>
|
|
640
|
+
>>> # Execute DDL
|
|
641
|
+
>>> cursor.execute("CREATE TABLE test (id INT, name String)")
|
|
642
|
+
>>>
|
|
643
|
+
>>> # Execute DML
|
|
644
|
+
>>> cursor.execute("INSERT INTO test VALUES (1, 'Alice')")
|
|
645
|
+
>>>
|
|
646
|
+
>>> # Execute SELECT and fetch results
|
|
647
|
+
>>> cursor.execute("SELECT * FROM test")
|
|
648
|
+
>>> rows = cursor.fetchall()
|
|
649
|
+
>>> print(rows)
|
|
650
|
+
((1, 'Alice'),)
|
|
651
|
+
|
|
652
|
+
.. seealso::
|
|
653
|
+
:meth:`fetchone` - Fetch single row
|
|
654
|
+
:meth:`fetchmany` - Fetch multiple rows
|
|
655
|
+
:meth:`fetchall` - Fetch all remaining rows
|
|
656
|
+
"""
|
|
306
657
|
self._cursor.execute(query)
|
|
307
658
|
result_mv = self._cursor.get_memview()
|
|
308
659
|
if self._cursor.has_error():
|
|
@@ -404,9 +755,53 @@ class Cursor:
|
|
|
404
755
|
raise Exception(f"Failed to parse JSON data: {e}")
|
|
405
756
|
|
|
406
757
|
def commit(self) -> None:
|
|
758
|
+
"""Commit any pending transaction.
|
|
759
|
+
|
|
760
|
+
This method commits any pending database transaction. In ClickHouse,
|
|
761
|
+
most operations are auto-committed, but this method is provided for
|
|
762
|
+
DB-API 2.0 compatibility.
|
|
763
|
+
|
|
764
|
+
.. note::
|
|
765
|
+
ClickHouse typically auto-commits operations, so explicit commits
|
|
766
|
+
are usually not necessary. This method is provided for compatibility
|
|
767
|
+
with standard DB-API 2.0 workflow.
|
|
768
|
+
|
|
769
|
+
Examples:
|
|
770
|
+
>>> cursor = conn.cursor()
|
|
771
|
+
>>> cursor.execute("INSERT INTO test VALUES (1, 'data')")
|
|
772
|
+
>>> cursor.commit()
|
|
773
|
+
"""
|
|
407
774
|
self._cursor.commit()
|
|
408
775
|
|
|
409
776
|
def fetchone(self) -> Optional[tuple]:
|
|
777
|
+
"""Fetch the next row from the query result.
|
|
778
|
+
|
|
779
|
+
This method retrieves the next available row from the current query
|
|
780
|
+
result set. It returns a tuple containing the column values with
|
|
781
|
+
appropriate Python type conversion applied.
|
|
782
|
+
|
|
783
|
+
Returns:
|
|
784
|
+
Optional[tuple]: Next row as a tuple of column values, or None
|
|
785
|
+
if no more rows are available
|
|
786
|
+
|
|
787
|
+
.. note::
|
|
788
|
+
This method follows DB-API 2.0 specifications. Column values are
|
|
789
|
+
automatically converted to appropriate Python types based on
|
|
790
|
+
ClickHouse column types.
|
|
791
|
+
|
|
792
|
+
Examples:
|
|
793
|
+
>>> cursor = conn.cursor()
|
|
794
|
+
>>> cursor.execute("SELECT id, name FROM users")
|
|
795
|
+
>>> row = cursor.fetchone()
|
|
796
|
+
>>> while row is not None:
|
|
797
|
+
... user_id, user_name = row
|
|
798
|
+
... print(f"User {user_id}: {user_name}")
|
|
799
|
+
... row = cursor.fetchone()
|
|
800
|
+
|
|
801
|
+
.. seealso::
|
|
802
|
+
:meth:`fetchmany` - Fetch multiple rows
|
|
803
|
+
:meth:`fetchall` - Fetch all remaining rows
|
|
804
|
+
"""
|
|
410
805
|
if not self._current_table or self._current_row >= len(self._current_table):
|
|
411
806
|
return None
|
|
412
807
|
|
|
@@ -416,6 +811,38 @@ class Cursor:
|
|
|
416
811
|
return row
|
|
417
812
|
|
|
418
813
|
def fetchmany(self, size: int = 1) -> tuple:
|
|
814
|
+
"""Fetch multiple rows from the query result.
|
|
815
|
+
|
|
816
|
+
This method retrieves up to 'size' rows from the current query result
|
|
817
|
+
set. It returns a tuple of row tuples, with each row containing column
|
|
818
|
+
values with appropriate Python type conversion.
|
|
819
|
+
|
|
820
|
+
Args:
|
|
821
|
+
size (int, optional): Maximum number of rows to fetch. Defaults to 1.
|
|
822
|
+
|
|
823
|
+
Returns:
|
|
824
|
+
tuple: Tuple containing up to 'size' row tuples. May contain fewer
|
|
825
|
+
rows if the result set is exhausted.
|
|
826
|
+
|
|
827
|
+
.. note::
|
|
828
|
+
This method follows DB-API 2.0 specifications. It will return fewer
|
|
829
|
+
than 'size' rows if the result set is exhausted.
|
|
830
|
+
|
|
831
|
+
Examples:
|
|
832
|
+
>>> cursor = conn.cursor()
|
|
833
|
+
>>> cursor.execute("SELECT * FROM large_table")
|
|
834
|
+
>>>
|
|
835
|
+
>>> # Process results in batches
|
|
836
|
+
>>> while True:
|
|
837
|
+
... batch = cursor.fetchmany(100) # Fetch 100 rows at a time
|
|
838
|
+
... if not batch:
|
|
839
|
+
... break
|
|
840
|
+
... process_batch(batch)
|
|
841
|
+
|
|
842
|
+
.. seealso::
|
|
843
|
+
:meth:`fetchone` - Fetch single row
|
|
844
|
+
:meth:`fetchall` - Fetch all remaining rows
|
|
845
|
+
"""
|
|
419
846
|
if not self._current_table:
|
|
420
847
|
return tuple()
|
|
421
848
|
|
|
@@ -427,6 +854,32 @@ class Cursor:
|
|
|
427
854
|
return tuple(rows)
|
|
428
855
|
|
|
429
856
|
def fetchall(self) -> tuple:
|
|
857
|
+
"""Fetch all remaining rows from the query result.
|
|
858
|
+
|
|
859
|
+
This method retrieves all remaining rows from the current query result
|
|
860
|
+
set starting from the current cursor position. It returns a tuple of
|
|
861
|
+
row tuples with appropriate Python type conversion applied.
|
|
862
|
+
|
|
863
|
+
Returns:
|
|
864
|
+
tuple: Tuple containing all remaining row tuples from the result set.
|
|
865
|
+
Returns empty tuple if no rows are available.
|
|
866
|
+
|
|
867
|
+
.. warning::
|
|
868
|
+
This method loads all remaining rows into memory at once. For large
|
|
869
|
+
result sets, consider using :meth:`fetchmany` to process results
|
|
870
|
+
in batches.
|
|
871
|
+
|
|
872
|
+
Examples:
|
|
873
|
+
>>> cursor = conn.cursor()
|
|
874
|
+
>>> cursor.execute("SELECT id, name FROM users")
|
|
875
|
+
>>> all_users = cursor.fetchall()
|
|
876
|
+
>>> for user_id, user_name in all_users:
|
|
877
|
+
... print(f"User {user_id}: {user_name}")
|
|
878
|
+
|
|
879
|
+
.. seealso::
|
|
880
|
+
:meth:`fetchone` - Fetch single row
|
|
881
|
+
:meth:`fetchmany` - Fetch multiple rows in batches
|
|
882
|
+
"""
|
|
430
883
|
if not self._current_table:
|
|
431
884
|
return tuple()
|
|
432
885
|
|
|
@@ -436,6 +889,22 @@ class Cursor:
|
|
|
436
889
|
return tuple(remaining_rows)
|
|
437
890
|
|
|
438
891
|
def close(self) -> None:
|
|
892
|
+
"""Close the cursor and cleanup resources.
|
|
893
|
+
|
|
894
|
+
This method closes the cursor and cleans up any associated resources.
|
|
895
|
+
After calling this method, the cursor becomes invalid and cannot be
|
|
896
|
+
used for further operations.
|
|
897
|
+
|
|
898
|
+
.. note::
|
|
899
|
+
This method is idempotent - calling it multiple times is safe.
|
|
900
|
+
The cursor is also automatically closed when the connection is closed.
|
|
901
|
+
|
|
902
|
+
Examples:
|
|
903
|
+
>>> cursor = conn.cursor()
|
|
904
|
+
>>> cursor.execute("SELECT 1")
|
|
905
|
+
>>> result = cursor.fetchone()
|
|
906
|
+
>>> cursor.close() # Cleanup cursor resources
|
|
907
|
+
"""
|
|
439
908
|
self._cursor.close()
|
|
440
909
|
|
|
441
910
|
def __iter__(self):
|
|
@@ -448,20 +917,81 @@ class Cursor:
|
|
|
448
917
|
return row
|
|
449
918
|
|
|
450
919
|
def column_names(self) -> list:
|
|
451
|
-
"""Return a list of column names from the last executed query
|
|
920
|
+
"""Return a list of column names from the last executed query.
|
|
921
|
+
|
|
922
|
+
This method returns the column names from the most recently executed
|
|
923
|
+
SELECT query. The names are returned in the same order as they appear
|
|
924
|
+
in the result set.
|
|
925
|
+
|
|
926
|
+
Returns:
|
|
927
|
+
list: List of column name strings, or empty list if no query
|
|
928
|
+
has been executed or the query returned no columns
|
|
929
|
+
|
|
930
|
+
Examples:
|
|
931
|
+
>>> cursor = conn.cursor()
|
|
932
|
+
>>> cursor.execute("SELECT id, name, email FROM users LIMIT 1")
|
|
933
|
+
>>> print(cursor.column_names())
|
|
934
|
+
['id', 'name', 'email']
|
|
935
|
+
|
|
936
|
+
.. seealso::
|
|
937
|
+
:meth:`column_types` - Get column type information
|
|
938
|
+
:attr:`description` - DB-API 2.0 column description
|
|
939
|
+
"""
|
|
452
940
|
return self._column_names if hasattr(self, "_column_names") else []
|
|
453
941
|
|
|
454
942
|
def column_types(self) -> list:
|
|
455
|
-
"""Return a list of column types from the last executed query
|
|
943
|
+
"""Return a list of column types from the last executed query.
|
|
944
|
+
|
|
945
|
+
This method returns the ClickHouse column type names from the most
|
|
946
|
+
recently executed SELECT query. The types are returned in the same
|
|
947
|
+
order as they appear in the result set.
|
|
948
|
+
|
|
949
|
+
Returns:
|
|
950
|
+
list: List of ClickHouse type name strings, or empty list if no
|
|
951
|
+
query has been executed or the query returned no columns
|
|
952
|
+
|
|
953
|
+
Examples:
|
|
954
|
+
>>> cursor = conn.cursor()
|
|
955
|
+
>>> cursor.execute("SELECT toInt32(1), toString('hello')")
|
|
956
|
+
>>> print(cursor.column_types())
|
|
957
|
+
['Int32', 'String']
|
|
958
|
+
|
|
959
|
+
.. seealso::
|
|
960
|
+
:meth:`column_names` - Get column name information
|
|
961
|
+
:attr:`description` - DB-API 2.0 column description
|
|
962
|
+
"""
|
|
456
963
|
return self._column_types if hasattr(self, "_column_types") else []
|
|
457
964
|
|
|
458
965
|
@property
|
|
459
966
|
def description(self) -> list:
|
|
460
|
-
"""
|
|
461
|
-
|
|
462
|
-
|
|
967
|
+
"""Return column description as per DB-API 2.0 specification.
|
|
968
|
+
|
|
969
|
+
This property returns a list of 7-item tuples describing each column
|
|
970
|
+
in the result set of the last executed SELECT query. Each tuple contains:
|
|
463
971
|
(name, type_code, display_size, internal_size, precision, scale, null_ok)
|
|
464
|
-
|
|
972
|
+
|
|
973
|
+
Currently, only name and type_code are provided, with other fields set to None.
|
|
974
|
+
|
|
975
|
+
Returns:
|
|
976
|
+
list: List of 7-tuples describing each column, or empty list if no
|
|
977
|
+
SELECT query has been executed
|
|
978
|
+
|
|
979
|
+
.. note::
|
|
980
|
+
This follows the DB-API 2.0 specification for cursor.description.
|
|
981
|
+
Only the first two elements (name and type_code) contain meaningful
|
|
982
|
+
data in this implementation.
|
|
983
|
+
|
|
984
|
+
Examples:
|
|
985
|
+
>>> cursor = conn.cursor()
|
|
986
|
+
>>> cursor.execute("SELECT id, name FROM users LIMIT 1")
|
|
987
|
+
>>> for desc in cursor.description:
|
|
988
|
+
... print(f"Column: {desc[0]}, Type: {desc[1]}")
|
|
989
|
+
Column: id, Type: Int32
|
|
990
|
+
Column: name, Type: String
|
|
991
|
+
|
|
992
|
+
.. seealso::
|
|
993
|
+
:meth:`column_names` - Get just column names
|
|
994
|
+
:meth:`column_types` - Get just column types
|
|
465
995
|
"""
|
|
466
996
|
if not hasattr(self, "_column_names") or not self._column_names:
|
|
467
997
|
return []
|
|
@@ -473,33 +1003,77 @@ class Cursor:
|
|
|
473
1003
|
|
|
474
1004
|
|
|
475
1005
|
def connect(connection_string: str = ":memory:") -> Connection:
|
|
476
|
-
"""
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
1006
|
+
"""Create a connection to chDB background server.
|
|
1007
|
+
|
|
1008
|
+
This function establishes a connection to the chDB (ClickHouse) database engine.
|
|
1009
|
+
Only one open connection is allowed per process. Multiple calls with the same
|
|
1010
|
+
connection string will return the same connection object.
|
|
481
1011
|
|
|
482
1012
|
Args:
|
|
483
|
-
connection_string (str, optional):
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
1013
|
+
connection_string (str, optional): Database connection string. Defaults to ":memory:".
|
|
1014
|
+
Supported connection string formats:
|
|
1015
|
+
|
|
1016
|
+
**Basic formats:**
|
|
1017
|
+
|
|
1018
|
+
- ":memory:" - In-memory database (default)
|
|
1019
|
+
- "test.db" - Relative path database file
|
|
1020
|
+
- "file:test.db" - Same as relative path
|
|
1021
|
+
- "/path/to/test.db" - Absolute path database file
|
|
1022
|
+
- "file:/path/to/test.db" - Same as absolute path
|
|
1023
|
+
|
|
1024
|
+
**With query parameters:**
|
|
1025
|
+
|
|
1026
|
+
- "file:test.db?param1=value1¶m2=value2" - Relative path with params
|
|
1027
|
+
- "file::memory:?verbose&log-level=test" - In-memory with params
|
|
1028
|
+
- "///path/to/test.db?param1=value1¶m2=value2" - Absolute path with params
|
|
1029
|
+
|
|
1030
|
+
**Query parameter handling:**
|
|
1031
|
+
|
|
1032
|
+
Query parameters are passed to ClickHouse engine as startup arguments.
|
|
1033
|
+
Special parameter handling:
|
|
1034
|
+
|
|
1035
|
+
- "mode=ro" becomes "--readonly=1" (read-only mode)
|
|
1036
|
+
- "verbose" enables verbose logging
|
|
1037
|
+
- "log-level=test" sets logging level
|
|
1038
|
+
|
|
1039
|
+
For complete parameter list, see ``clickhouse local --help --verbose``
|
|
501
1040
|
|
|
502
1041
|
Returns:
|
|
503
|
-
Connection:
|
|
1042
|
+
Connection: Database connection object that supports:
|
|
1043
|
+
|
|
1044
|
+
- Creating cursors with :meth:`Connection.cursor`
|
|
1045
|
+
- Direct queries with :meth:`Connection.query`
|
|
1046
|
+
- Streaming queries with :meth:`Connection.send_query`
|
|
1047
|
+
- Context manager protocol for automatic cleanup
|
|
1048
|
+
|
|
1049
|
+
Raises:
|
|
1050
|
+
RuntimeError: If connection to database fails
|
|
1051
|
+
|
|
1052
|
+
.. warning::
|
|
1053
|
+
Only one connection per process is supported. Creating a new connection
|
|
1054
|
+
will close any existing connection.
|
|
1055
|
+
|
|
1056
|
+
Examples:
|
|
1057
|
+
>>> # In-memory database
|
|
1058
|
+
>>> conn = connect()
|
|
1059
|
+
>>> conn = connect(":memory:")
|
|
1060
|
+
>>>
|
|
1061
|
+
>>> # File-based database
|
|
1062
|
+
>>> conn = connect("my_data.db")
|
|
1063
|
+
>>> conn = connect("/path/to/data.db")
|
|
1064
|
+
>>>
|
|
1065
|
+
>>> # With parameters
|
|
1066
|
+
>>> conn = connect("data.db?mode=ro") # Read-only mode
|
|
1067
|
+
>>> conn = connect(":memory:?verbose&log-level=debug") # Debug logging
|
|
1068
|
+
>>>
|
|
1069
|
+
>>> # Using context manager for automatic cleanup
|
|
1070
|
+
>>> with connect("data.db") as conn:
|
|
1071
|
+
... result = conn.query("SELECT 1")
|
|
1072
|
+
... print(result)
|
|
1073
|
+
>>> # Connection automatically closed
|
|
1074
|
+
|
|
1075
|
+
.. seealso::
|
|
1076
|
+
:class:`Connection` - Database connection class
|
|
1077
|
+
:class:`Cursor` - Database cursor for DB-API 2.0 operations
|
|
504
1078
|
"""
|
|
505
1079
|
return Connection(connection_string)
|