chdb 3.6.0__cp38-abi3-macosx_11_0_arm64.whl → 3.7.0__cp38-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/.flake8 +5 -0
- chdb/__init__.py +132 -11
- chdb/_chdb.abi3.so +0 -0
- chdb/build-musl.sh +166 -0
- chdb/build.sh +370 -0
- chdb/build_linux_arm64.sh +63 -0
- chdb/build_mac_arm64.sh +121 -0
- chdb/build_pybind11.sh +131 -0
- chdb/dataframe/__init__.py +7 -2
- chdb/dataframe/query.py +211 -23
- chdb/dbapi/__init__.py +57 -2
- chdb/dbapi/connections.py +169 -12
- chdb/dbapi/converters.py +352 -34
- chdb/dbapi/cursors.py +264 -70
- chdb/dbapi/err.py +269 -30
- chdb/dbapi/times.py +171 -0
- chdb/libpybind11nonlimitedapi_chdb_3.10.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.11.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.12.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.13.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.8.dylib +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.9.dylib +0 -0
- chdb/libpybind11nonlimitedapi_stubs.dylib +0 -0
- chdb/session/state.py +167 -4
- chdb/state/sqlitelike.py +608 -34
- chdb/test_smoke.sh +32 -0
- chdb/udf/__init__.py +7 -0
- chdb/udf/udf.py +41 -25
- chdb/utils/__init__.py +6 -0
- chdb/utils/trace.py +31 -0
- chdb/utils/types.py +62 -64
- chdb/vars.sh +48 -0
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/METADATA +29 -18
- chdb-3.7.0.dist-info/RECORD +43 -0
- chdb-3.6.0.dist-info/RECORD +0 -35
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/LICENSE.txt +0 -0
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/WHEEL +0 -0
- {chdb-3.6.0.dist-info → chdb-3.7.0.dist-info}/top_level.txt +0 -0
chdb/dataframe/__init__.py
CHANGED
|
@@ -8,8 +8,13 @@ except ImportError as e:
|
|
|
8
8
|
raise ImportError('Failed to import pyarrow or pandas') from None
|
|
9
9
|
|
|
10
10
|
# check if pandas version >= 2.0.0
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
try:
|
|
12
|
+
version_parts = pd.__version__.split('.')
|
|
13
|
+
major_version = int(version_parts[0])
|
|
14
|
+
if major_version < 2:
|
|
15
|
+
print('Please upgrade pandas to version 2.0.0 or higher to have better performance')
|
|
16
|
+
except (ValueError, IndexError, AttributeError):
|
|
17
|
+
pass
|
|
13
18
|
|
|
14
19
|
from .query import Table, pandas_read_parquet # noqa: C0413
|
|
15
20
|
|
chdb/dataframe/query.py
CHANGED
|
@@ -8,11 +8,38 @@ from chdb import query as chdb_query
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class Table:
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
"""Wrapper for multiple data formats enabling SQL queries on DataFrames, Parquet files, and Arrow tables.
|
|
12
|
+
|
|
13
|
+
The Table class provides a unified interface for querying different data formats using SQL.
|
|
14
|
+
It supports pandas DataFrames, Parquet files (both on disk and in memory), and PyArrow Tables.
|
|
15
|
+
All data is internally converted to Parquet format for efficient querying with chDB.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
parquet_path (str, optional): Path to an existing Parquet file
|
|
19
|
+
temp_parquet_path (str, optional): Path to a temporary Parquet file
|
|
20
|
+
parquet_memoryview (memoryview, optional): Parquet data in memory as memoryview
|
|
21
|
+
dataframe (pd.DataFrame, optional): pandas DataFrame to wrap
|
|
22
|
+
arrow_table (pa.Table, optional): PyArrow Table to wrap
|
|
23
|
+
use_memfd (bool, optional): Use memfd_create for temporary files (Linux only). Defaults to False.
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
>>> # Create from pandas DataFrame
|
|
27
|
+
>>> import pandas as pd
|
|
28
|
+
>>> df = pd.DataFrame({'id': [1, 2], 'name': ['Alice', 'Bob']})
|
|
29
|
+
>>> table = Table(dataframe=df)
|
|
30
|
+
>>> result = table.query("SELECT * FROM __table__ WHERE id > 1")
|
|
31
|
+
|
|
32
|
+
>>> # Create from Parquet file
|
|
33
|
+
>>> table = Table(parquet_path="data.parquet")
|
|
34
|
+
>>> result = table.query("SELECT COUNT(*) FROM __table__")
|
|
35
|
+
|
|
36
|
+
>>> # Multi-table queries
|
|
37
|
+
>>> table1 = Table(dataframe=df1)
|
|
38
|
+
>>> table2 = Table(dataframe=df2)
|
|
39
|
+
>>> result = Table.queryStatic(
|
|
40
|
+
... "SELECT * FROM __table1__ JOIN __table2__ ON __table1__.id = __table2__.id",
|
|
41
|
+
... table1=table1, table2=table2
|
|
42
|
+
... )
|
|
16
43
|
"""
|
|
17
44
|
|
|
18
45
|
def __init__(
|
|
@@ -24,9 +51,18 @@ class Table:
|
|
|
24
51
|
arrow_table: pa.Table = None,
|
|
25
52
|
use_memfd: bool = False,
|
|
26
53
|
):
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
|
|
54
|
+
"""Initialize a Table object with one of the supported data formats.
|
|
55
|
+
|
|
56
|
+
Only one data source should be provided. The Table will wrap the provided data
|
|
57
|
+
and enable SQL querying capabilities.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
parquet_path (str, optional): Path to existing Parquet file
|
|
61
|
+
temp_parquet_path (str, optional): Path to temporary Parquet file
|
|
62
|
+
parquet_memoryview (memoryview, optional): Parquet data in memory
|
|
63
|
+
dataframe (pd.DataFrame, optional): pandas DataFrame to wrap
|
|
64
|
+
arrow_table (pa.Table, optional): PyArrow Table to wrap
|
|
65
|
+
use_memfd (bool, optional): Use memory-based file descriptors on Linux
|
|
30
66
|
"""
|
|
31
67
|
self._parquet_path = parquet_path
|
|
32
68
|
self._temp_parquet_path = temp_parquet_path
|
|
@@ -46,15 +82,47 @@ class Table:
|
|
|
46
82
|
pass
|
|
47
83
|
|
|
48
84
|
def rows_read(self):
|
|
85
|
+
"""Get the number of rows read from the last query operation.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
int: Number of rows processed in the last query
|
|
89
|
+
"""
|
|
49
90
|
return self._rows_read
|
|
50
91
|
|
|
51
92
|
def bytes_read(self):
|
|
93
|
+
"""Get the number of bytes read from the last query operation.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
int: Number of bytes processed in the last query
|
|
97
|
+
"""
|
|
52
98
|
return self._bytes_read
|
|
53
99
|
|
|
54
100
|
def elapsed(self):
|
|
101
|
+
"""Get the elapsed time for the last query operation.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
float: Query execution time
|
|
105
|
+
"""
|
|
55
106
|
return self._elapsed
|
|
56
107
|
|
|
57
108
|
def to_pandas(self) -> pd.DataFrame:
|
|
109
|
+
"""Convert the Table data to a pandas DataFrame.
|
|
110
|
+
|
|
111
|
+
This method handles conversion from various internal formats (Parquet files,
|
|
112
|
+
memory buffers, Arrow tables) to a unified pandas DataFrame representation.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
pd.DataFrame: The table data as a pandas DataFrame
|
|
116
|
+
|
|
117
|
+
Raises:
|
|
118
|
+
ValueError: If no data source is available in the Table object
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
>>> table = Table(dataframe=df)
|
|
122
|
+
>>> result_table = table.query("SELECT * FROM __table__ LIMIT 5")
|
|
123
|
+
>>> df_result = result_table.to_pandas()
|
|
124
|
+
>>> print(df_result)
|
|
125
|
+
"""
|
|
58
126
|
if self._dataframe is None:
|
|
59
127
|
if self._arrow_table is not None:
|
|
60
128
|
return self._arrow_table.to_pandas()
|
|
@@ -71,8 +139,20 @@ class Table:
|
|
|
71
139
|
return self._dataframe
|
|
72
140
|
|
|
73
141
|
def flush_to_disk(self):
|
|
74
|
-
"""
|
|
75
|
-
|
|
142
|
+
"""Flush in-memory data to disk as a temporary Parquet file.
|
|
143
|
+
|
|
144
|
+
This method converts in-memory data (DataFrame, Arrow table, or memory buffer)
|
|
145
|
+
to a temporary Parquet file on disk. This can be useful for memory management
|
|
146
|
+
or when working with large datasets.
|
|
147
|
+
|
|
148
|
+
The method does nothing if data is already stored on disk.
|
|
149
|
+
|
|
150
|
+
Raises:
|
|
151
|
+
ValueError: If the Table object contains no data to flush
|
|
152
|
+
|
|
153
|
+
Example:
|
|
154
|
+
>>> table = Table(dataframe=large_df)
|
|
155
|
+
>>> table.flush_to_disk() # Frees memory, keeps data accessible
|
|
76
156
|
"""
|
|
77
157
|
if self._parquet_path is not None or self._temp_parquet_path is not None:
|
|
78
158
|
return
|
|
@@ -112,10 +192,33 @@ class Table:
|
|
|
112
192
|
return str(self.to_pandas())
|
|
113
193
|
|
|
114
194
|
def query(self, sql: str, **kwargs) -> "Table":
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
195
|
+
"""Execute SQL query on the current Table and return a new Table with results.
|
|
196
|
+
|
|
197
|
+
This method allows you to run SQL queries on the table data using chDB.
|
|
198
|
+
The table is referenced as '__table__' in the SQL statement.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
sql (str): SQL query string. Must reference the table as '__table__'
|
|
202
|
+
**kwargs: Additional arguments passed to the chDB query engine
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Table: New Table object containing the query results
|
|
206
|
+
|
|
207
|
+
Raises:
|
|
208
|
+
ValueError: If SQL doesn't contain '__table__' reference or if Table is not initialized
|
|
209
|
+
|
|
210
|
+
Examples:
|
|
211
|
+
>>> table = Table(dataframe=df)
|
|
212
|
+
>>> # Filter rows
|
|
213
|
+
>>> result = table.query("SELECT * FROM __table__ WHERE age > 25")
|
|
214
|
+
>>>
|
|
215
|
+
>>> # Aggregate data
|
|
216
|
+
>>> summary = table.query("SELECT COUNT(*), AVG(salary) FROM __table__")
|
|
217
|
+
>>>
|
|
218
|
+
>>> # Complex operations
|
|
219
|
+
>>> processed = table.query(
|
|
220
|
+
... "SELECT name, age * 2 as double_age FROM __table__ ORDER BY age DESC"
|
|
221
|
+
... )
|
|
119
222
|
"""
|
|
120
223
|
self._validate_sql(sql)
|
|
121
224
|
|
|
@@ -138,6 +241,18 @@ class Table:
|
|
|
138
241
|
sql = query
|
|
139
242
|
|
|
140
243
|
def show(self):
|
|
244
|
+
"""Display the Table data by printing the pandas DataFrame representation.
|
|
245
|
+
|
|
246
|
+
This is a convenience method for quickly viewing the table contents.
|
|
247
|
+
Equivalent to print(table.to_pandas()).
|
|
248
|
+
|
|
249
|
+
Example:
|
|
250
|
+
>>> table = Table(dataframe=df)
|
|
251
|
+
>>> table.show()
|
|
252
|
+
id name
|
|
253
|
+
0 1 Alice
|
|
254
|
+
1 2 Bob
|
|
255
|
+
"""
|
|
141
256
|
print(self.to_pandas())
|
|
142
257
|
|
|
143
258
|
def _query_on_path(self, path, sql, **kwargs):
|
|
@@ -220,12 +335,51 @@ class Table:
|
|
|
220
335
|
|
|
221
336
|
@staticmethod
|
|
222
337
|
def queryStatic(sql: str, **kwargs) -> "Table":
|
|
223
|
-
"""
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
338
|
+
"""Execute SQL query across multiple Table objects.
|
|
339
|
+
|
|
340
|
+
This static method enables complex queries involving multiple tables by referencing
|
|
341
|
+
them as '__tablename__' in the SQL and passing them as keyword arguments.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
sql (str): SQL query with table references as '__name__' patterns
|
|
345
|
+
**kwargs: Table objects referenced in the SQL, where key matches the table name
|
|
346
|
+
Can also include pandas DataFrames, which will be auto-converted to Tables
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Table: New Table object containing the query results
|
|
350
|
+
|
|
351
|
+
Raises:
|
|
352
|
+
ValueError: If referenced table names are missing from kwargs or have invalid types
|
|
353
|
+
|
|
354
|
+
Examples:
|
|
355
|
+
>>> users = Table(dataframe=users_df)
|
|
356
|
+
>>> orders = Table(dataframe=orders_df)
|
|
357
|
+
>>>
|
|
358
|
+
>>> # Join two tables
|
|
359
|
+
>>> result = Table.queryStatic(
|
|
360
|
+
... "SELECT u.name, COUNT(o.id) as order_count "
|
|
361
|
+
... "FROM __users__ u LEFT JOIN __orders__ o ON u.id = o.user_id "
|
|
362
|
+
... "GROUP BY u.name",
|
|
363
|
+
... users=users, orders=orders
|
|
364
|
+
... )
|
|
365
|
+
>>>
|
|
366
|
+
>>> # Works with pandas DataFrames directly
|
|
367
|
+
>>> result = Table.queryStatic(
|
|
368
|
+
... "SELECT * FROM __df1__ UNION ALL SELECT * FROM __df2__",
|
|
369
|
+
... df1=dataframe1, df2=dataframe2
|
|
370
|
+
... )
|
|
371
|
+
>>>
|
|
372
|
+
>>> # Complex multi-table operations
|
|
373
|
+
>>> analytics = Table.queryStatic(
|
|
374
|
+
... "SELECT p.category, AVG(o.amount) as avg_order "
|
|
375
|
+
... "FROM __products__ p "
|
|
376
|
+
... "JOIN __order_items__ oi ON p.id = oi.product_id "
|
|
377
|
+
... "JOIN __orders__ o ON oi.order_id = o.id "
|
|
378
|
+
... "GROUP BY p.category ORDER BY avg_order DESC",
|
|
379
|
+
... products=products_table,
|
|
380
|
+
... order_items=order_items_table,
|
|
381
|
+
... orders=orders_table
|
|
382
|
+
... )
|
|
229
383
|
"""
|
|
230
384
|
ansiTablePattern = re.compile(r"__([a-zA-Z][a-zA-Z0-9_]*)__")
|
|
231
385
|
temp_paths = []
|
|
@@ -322,13 +476,47 @@ class Table:
|
|
|
322
476
|
|
|
323
477
|
|
|
324
478
|
def pandas_read_parquet(path) -> pd.DataFrame:
|
|
479
|
+
"""Read a Parquet file into a pandas DataFrame.
|
|
480
|
+
|
|
481
|
+
This is a convenience wrapper around pandas.read_parquet() for consistency
|
|
482
|
+
with the chdb.dataframe module interface.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
path: File path or file-like object to read from
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
pd.DataFrame: The loaded DataFrame
|
|
489
|
+
"""
|
|
325
490
|
return pd.read_parquet(path)
|
|
326
491
|
|
|
327
492
|
|
|
328
493
|
def memfd_create(name: str = None) -> int:
|
|
329
|
-
"""
|
|
330
|
-
|
|
331
|
-
|
|
494
|
+
"""Create an in-memory file descriptor using memfd_create system call.
|
|
495
|
+
|
|
496
|
+
This function attempts to use the Linux-specific memfd_create(2) system call
|
|
497
|
+
to create a file descriptor that refers to an anonymous memory-backed file.
|
|
498
|
+
This provides better performance for temporary data operations.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
name (str, optional): Name for the memory file (for debugging). Defaults to None.
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
int: File descriptor on success, -1 on failure or if not supported
|
|
505
|
+
|
|
506
|
+
Note:
|
|
507
|
+
This function only works on Linux 3.17 or newer with glibc 2.27 or newer.
|
|
508
|
+
On other systems or if the call fails, it returns -1 and callers should
|
|
509
|
+
fall back to regular temporary files.
|
|
510
|
+
|
|
511
|
+
Example:
|
|
512
|
+
>>> fd = memfd_create("temp_data")
|
|
513
|
+
>>> if fd != -1:
|
|
514
|
+
... # Use memory-based file descriptor
|
|
515
|
+
... with os.fdopen(fd, 'wb') as f:
|
|
516
|
+
... f.write(data)
|
|
517
|
+
... else:
|
|
518
|
+
... # Fall back to regular temp file
|
|
519
|
+
... fd, path = tempfile.mkstemp()
|
|
332
520
|
"""
|
|
333
521
|
if hasattr(os, "memfd_create"):
|
|
334
522
|
try:
|
chdb/dbapi/__init__.py
CHANGED
|
@@ -13,20 +13,58 @@ paramstyle = "format"
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class DBAPISet(frozenset):
|
|
16
|
+
"""Extended frozenset for DB-API 2.0 type comparison.
|
|
17
|
+
|
|
18
|
+
This class extends frozenset to support DB-API 2.0 type comparison semantics.
|
|
19
|
+
It allows for flexible type checking where individual items can be compared
|
|
20
|
+
against the set using both equality and inequality operators.
|
|
21
|
+
|
|
22
|
+
This is used for type constants like STRING, BINARY, NUMBER, etc. to enable
|
|
23
|
+
comparisons like "field_type == STRING" where field_type is a single type value.
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
>>> string_types = DBAPISet([FIELD_TYPE.STRING, FIELD_TYPE.VAR_STRING])
|
|
27
|
+
>>> FIELD_TYPE.STRING == string_types # Returns True
|
|
28
|
+
>>> FIELD_TYPE.INT != string_types # Returns True
|
|
29
|
+
>>> FIELD_TYPE.BLOB in string_types # Returns False
|
|
30
|
+
"""
|
|
16
31
|
|
|
17
32
|
def __ne__(self, other):
|
|
33
|
+
"""Check inequality with flexible type comparison.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
other: Value to compare against this set
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
bool: True if other is not in this set (for non-set types) or
|
|
40
|
+
True if sets are not equal (for set types)
|
|
41
|
+
"""
|
|
18
42
|
if isinstance(other, set):
|
|
19
43
|
return frozenset.__ne__(self, other)
|
|
20
44
|
else:
|
|
21
45
|
return other not in self
|
|
22
46
|
|
|
23
47
|
def __eq__(self, other):
|
|
48
|
+
"""Check equality with flexible type comparison.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
other: Value to compare against this set
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
bool: True if other is in this set (for non-set types) or
|
|
55
|
+
True if sets are equal (for set types)
|
|
56
|
+
"""
|
|
24
57
|
if isinstance(other, frozenset):
|
|
25
58
|
return frozenset.__eq__(self, other)
|
|
26
59
|
else:
|
|
27
60
|
return other in self
|
|
28
61
|
|
|
29
62
|
def __hash__(self):
|
|
63
|
+
"""Return hash value for the set.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
int: Hash value of the underlying frozenset
|
|
67
|
+
"""
|
|
30
68
|
return frozenset.__hash__(self)
|
|
31
69
|
|
|
32
70
|
|
|
@@ -47,7 +85,17 @@ ROWID = DBAPISet()
|
|
|
47
85
|
|
|
48
86
|
|
|
49
87
|
def Binary(x):
|
|
50
|
-
"""Return x as a binary type.
|
|
88
|
+
"""Return x as a binary type.
|
|
89
|
+
|
|
90
|
+
This function converts the input to bytes type for use with binary
|
|
91
|
+
database fields, following the DB-API 2.0 specification.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
x: Input data to convert to binary
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
bytes: The input converted to bytes
|
|
98
|
+
"""
|
|
51
99
|
return bytes(x)
|
|
52
100
|
|
|
53
101
|
|
|
@@ -65,7 +113,14 @@ if _orig_conn.Connection.__init__.__doc__ is not None:
|
|
|
65
113
|
del _orig_conn
|
|
66
114
|
|
|
67
115
|
|
|
68
|
-
def get_client_info():
|
|
116
|
+
def get_client_info():
|
|
117
|
+
"""Get client version information.
|
|
118
|
+
|
|
119
|
+
Returns the chDB client version as a string for MySQLdb compatibility.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
str: Version string in format 'major.minor.patch'
|
|
123
|
+
"""
|
|
69
124
|
version = chdb_version
|
|
70
125
|
if len(chdb_version) > 3 and chdb_version[3] is None:
|
|
71
126
|
version = chdb_version[:3]
|
chdb/dbapi/connections.py
CHANGED
|
@@ -8,11 +8,56 @@ VERBOSE = False
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class Connection(object):
|
|
11
|
-
"""
|
|
12
|
-
|
|
11
|
+
"""DB-API 2.0 compliant connection to chDB database.
|
|
12
|
+
|
|
13
|
+
This class provides a standard DB-API interface for connecting to and interacting
|
|
14
|
+
with chDB databases. It supports both in-memory and file-based databases.
|
|
15
|
+
|
|
16
|
+
The connection manages the underlying chDB engine and provides methods for
|
|
17
|
+
executing queries, managing transactions (no-op for ClickHouse), and creating cursors.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
path (str, optional): Database file path. If None, uses in-memory database.
|
|
21
|
+
Can be a file path like 'database.db' or None for ':memory:'
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
encoding (str): Character encoding for queries, defaults to 'utf8'
|
|
25
|
+
open (bool): True if connection is open, False if closed
|
|
26
|
+
|
|
27
|
+
Examples:
|
|
28
|
+
>>> # In-memory database
|
|
29
|
+
>>> conn = Connection()
|
|
30
|
+
>>> cursor = conn.cursor()
|
|
31
|
+
>>> cursor.execute("SELECT 1")
|
|
32
|
+
>>> result = cursor.fetchall()
|
|
33
|
+
>>> conn.close()
|
|
34
|
+
|
|
35
|
+
>>> # File-based database
|
|
36
|
+
>>> conn = Connection('mydata.db')
|
|
37
|
+
>>> with conn.cursor() as cur:
|
|
38
|
+
... cur.execute("CREATE TABLE users (id INT, name STRING)")
|
|
39
|
+
... cur.execute("INSERT INTO users VALUES (1, 'Alice')")
|
|
40
|
+
>>> conn.close()
|
|
41
|
+
|
|
42
|
+
>>> # Context manager usage
|
|
43
|
+
>>> with Connection() as cur:
|
|
44
|
+
... cur.execute("SELECT version()")
|
|
45
|
+
... version = cur.fetchone()
|
|
46
|
+
|
|
47
|
+
Note:
|
|
48
|
+
ClickHouse does not support traditional transactions, so commit() and rollback()
|
|
49
|
+
operations are no-ops but provided for DB-API compliance.
|
|
13
50
|
"""
|
|
14
51
|
|
|
15
52
|
def __init__(self, path=None):
|
|
53
|
+
"""Initialize a new database connection.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
path (str, optional): Database file path. None for in-memory database.
|
|
57
|
+
|
|
58
|
+
Raises:
|
|
59
|
+
err.Error: If connection cannot be established
|
|
60
|
+
"""
|
|
16
61
|
self._closed = False
|
|
17
62
|
self.encoding = "utf8"
|
|
18
63
|
self._affected_rows = 0
|
|
@@ -28,7 +73,14 @@ class Connection(object):
|
|
|
28
73
|
cursor.close()
|
|
29
74
|
|
|
30
75
|
def close(self):
|
|
31
|
-
"""
|
|
76
|
+
"""Close the database connection.
|
|
77
|
+
|
|
78
|
+
Closes the underlying chDB connection and marks this connection as closed.
|
|
79
|
+
Subsequent operations on this connection will raise an Error.
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
err.Error: If connection is already closed
|
|
83
|
+
"""
|
|
32
84
|
if self._closed:
|
|
33
85
|
raise err.Error("Already closed")
|
|
34
86
|
self._closed = True
|
|
@@ -36,21 +88,51 @@ class Connection(object):
|
|
|
36
88
|
|
|
37
89
|
@property
|
|
38
90
|
def open(self):
|
|
39
|
-
"""
|
|
91
|
+
"""Check if the connection is open.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
bool: True if connection is open, False if closed
|
|
95
|
+
"""
|
|
40
96
|
return not self._closed
|
|
41
97
|
|
|
42
98
|
def commit(self):
|
|
43
|
-
"""Commit
|
|
99
|
+
"""Commit the current transaction.
|
|
100
|
+
|
|
101
|
+
Note:
|
|
102
|
+
This is a no-op for chDB/ClickHouse as it doesn't support traditional
|
|
103
|
+
transactions. Provided for DB-API 2.0 compliance.
|
|
104
|
+
"""
|
|
44
105
|
# No-op for ClickHouse
|
|
45
106
|
pass
|
|
46
107
|
|
|
47
108
|
def rollback(self):
|
|
48
|
-
"""Roll back the current transaction.
|
|
109
|
+
"""Roll back the current transaction.
|
|
110
|
+
|
|
111
|
+
Note:
|
|
112
|
+
This is a no-op for chDB/ClickHouse as it doesn't support traditional
|
|
113
|
+
transactions. Provided for DB-API 2.0 compliance.
|
|
114
|
+
"""
|
|
49
115
|
# No-op for ClickHouse
|
|
50
116
|
pass
|
|
51
117
|
|
|
52
118
|
def cursor(self, cursor=None):
|
|
53
|
-
"""Create a new cursor
|
|
119
|
+
"""Create a new cursor for executing queries.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
cursor: Ignored, provided for compatibility
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Cursor: New cursor object for this connection
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
err.Error: If connection is closed
|
|
129
|
+
|
|
130
|
+
Example:
|
|
131
|
+
>>> conn = Connection()
|
|
132
|
+
>>> cur = conn.cursor()
|
|
133
|
+
>>> cur.execute("SELECT 1")
|
|
134
|
+
>>> result = cur.fetchone()
|
|
135
|
+
"""
|
|
54
136
|
if self._closed:
|
|
55
137
|
raise err.Error("Connection closed")
|
|
56
138
|
if cursor:
|
|
@@ -58,7 +140,28 @@ class Connection(object):
|
|
|
58
140
|
return Cursor(self)
|
|
59
141
|
|
|
60
142
|
def query(self, sql, fmt="CSV"):
|
|
61
|
-
"""Execute a query and return
|
|
143
|
+
"""Execute a SQL query directly and return raw results.
|
|
144
|
+
|
|
145
|
+
This method bypasses the cursor interface and executes queries directly.
|
|
146
|
+
For standard DB-API usage, prefer using cursor() method.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
sql (str or bytes): SQL query to execute
|
|
150
|
+
fmt (str, optional): Output format. Defaults to "CSV".
|
|
151
|
+
Supported formats include "CSV", "JSON", "Arrow", "Parquet", etc.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Query result in the specified format
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
err.InterfaceError: If connection is closed or query fails
|
|
158
|
+
|
|
159
|
+
Example:
|
|
160
|
+
>>> conn = Connection()
|
|
161
|
+
>>> result = conn.query("SELECT 1, 'hello'", "CSV")
|
|
162
|
+
>>> print(result)
|
|
163
|
+
"1,hello\\n"
|
|
164
|
+
"""
|
|
62
165
|
if self._closed:
|
|
63
166
|
raise err.InterfaceError("Connection closed")
|
|
64
167
|
|
|
@@ -73,21 +176,67 @@ class Connection(object):
|
|
|
73
176
|
raise err.InterfaceError(f"Query error: {error}")
|
|
74
177
|
|
|
75
178
|
def escape(self, obj, mapping=None):
|
|
76
|
-
"""Escape
|
|
179
|
+
"""Escape a value for safe inclusion in SQL queries.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
obj: Value to escape (string, bytes, number, etc.)
|
|
183
|
+
mapping: Optional character mapping for escaping
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Escaped version of the input suitable for SQL queries
|
|
187
|
+
|
|
188
|
+
Example:
|
|
189
|
+
>>> conn = Connection()
|
|
190
|
+
>>> safe_value = conn.escape("O'Reilly")
|
|
191
|
+
>>> query = f"SELECT * FROM users WHERE name = {safe_value}"
|
|
192
|
+
"""
|
|
77
193
|
return converters.escape_item(obj, mapping)
|
|
78
194
|
|
|
79
195
|
def escape_string(self, s):
|
|
196
|
+
"""Escape a string value for SQL queries.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
s (str): String to escape
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
str: Escaped string safe for SQL inclusion
|
|
203
|
+
"""
|
|
80
204
|
return converters.escape_string(s)
|
|
81
205
|
|
|
82
206
|
def _quote_bytes(self, s):
|
|
207
|
+
"""Quote and escape bytes data for SQL queries.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
s (bytes): Bytes data to quote
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
str: Quoted and escaped bytes representation
|
|
214
|
+
"""
|
|
83
215
|
return converters.escape_bytes(s)
|
|
84
216
|
|
|
85
217
|
def __enter__(self):
|
|
86
|
-
"""
|
|
218
|
+
"""Enter context manager and return a cursor.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Cursor: New cursor for this connection
|
|
222
|
+
|
|
223
|
+
Example:
|
|
224
|
+
>>> with Connection() as cur:
|
|
225
|
+
... cur.execute("SELECT 1")
|
|
226
|
+
... result = cur.fetchone()
|
|
227
|
+
"""
|
|
87
228
|
return self.cursor()
|
|
88
229
|
|
|
89
230
|
def __exit__(self, exc, value, traceback):
|
|
90
|
-
"""
|
|
231
|
+
"""Exit context manager with proper cleanup.
|
|
232
|
+
|
|
233
|
+
Commits on successful exit, rolls back on exception, and always closes connection.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
exc: Exception type (if any)
|
|
237
|
+
value: Exception value (if any)
|
|
238
|
+
traceback: Exception traceback (if any)
|
|
239
|
+
"""
|
|
91
240
|
if exc:
|
|
92
241
|
self.rollback()
|
|
93
242
|
else:
|
|
@@ -96,5 +245,13 @@ class Connection(object):
|
|
|
96
245
|
|
|
97
246
|
@property
|
|
98
247
|
def resp(self):
|
|
99
|
-
"""
|
|
248
|
+
"""Get the last query response.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
The raw response from the last query() call
|
|
252
|
+
|
|
253
|
+
Note:
|
|
254
|
+
This property is updated each time query() is called directly.
|
|
255
|
+
It does not reflect queries executed through cursors.
|
|
256
|
+
"""
|
|
100
257
|
return self._resp
|