chdb 3.7.1__cp38-abi3-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chdb might be problematic. Click here for more details.
- chdb/__init__.py +255 -0
- chdb/__main__.py +38 -0
- chdb/_chdb.abi3.so +0 -0
- chdb/dataframe/__init__.py +24 -0
- chdb/dataframe/query.py +544 -0
- chdb/dbapi/__init__.py +134 -0
- chdb/dbapi/connections.py +257 -0
- chdb/dbapi/constants/FIELD_TYPE.py +31 -0
- chdb/dbapi/constants/__init__.py +0 -0
- chdb/dbapi/converters.py +611 -0
- chdb/dbapi/cursors.py +545 -0
- chdb/dbapi/err.py +300 -0
- chdb/dbapi/times.py +191 -0
- chdb/libpybind11nonlimitedapi_chdb_3.10.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.11.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.12.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.13.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.14.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.8.so +0 -0
- chdb/libpybind11nonlimitedapi_chdb_3.9.so +0 -0
- chdb/rwabc.py +65 -0
- chdb/session/__init__.py +3 -0
- chdb/session/state.py +287 -0
- chdb/state/__init__.py +3 -0
- chdb/state/sqlitelike.py +1101 -0
- chdb/udf/__init__.py +10 -0
- chdb/udf/udf.py +122 -0
- chdb/utils/__init__.py +15 -0
- chdb/utils/trace.py +105 -0
- chdb/utils/types.py +232 -0
- chdb-3.7.1.dist-info/LICENSE.txt +203 -0
- chdb-3.7.1.dist-info/METADATA +566 -0
- chdb-3.7.1.dist-info/RECORD +35 -0
- chdb-3.7.1.dist-info/WHEEL +5 -0
- chdb-3.7.1.dist-info/top_level.txt +2 -0
chdb/session/state.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import chdb
|
|
4
|
+
from ..state import sqlitelike as chdb_stateful
|
|
5
|
+
from ..state.sqlitelike import StreamingResult
|
|
6
|
+
|
|
7
|
+
g_session = None
|
|
8
|
+
g_session_path = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Session:
|
|
12
|
+
"""
|
|
13
|
+
Session will keep the state of query.
|
|
14
|
+
If path is None, it will create a temporary directory and use it as the database path
|
|
15
|
+
and the temporary directory will be removed when the session is closed.
|
|
16
|
+
You can also pass in a path to create a database at that path where will keep your data.
|
|
17
|
+
|
|
18
|
+
You can also use a connection string to pass in the path and other parameters.
|
|
19
|
+
Examples:
|
|
20
|
+
- ":memory:" (for in-memory database)
|
|
21
|
+
- "test.db" (for relative path)
|
|
22
|
+
- "file:test.db" (same as above)
|
|
23
|
+
- "/path/to/test.db" (for absolute path)
|
|
24
|
+
- "file:/path/to/test.db" (same as above)
|
|
25
|
+
- "file:test.db?param1=value1¶m2=value2" (for relative path with query params)
|
|
26
|
+
- "file::memory:?verbose&log-level=test" (for in-memory database with query params)
|
|
27
|
+
- "///path/to/test.db?param1=value1¶m2=value2" (for absolute path)
|
|
28
|
+
|
|
29
|
+
Connection string args handling:
|
|
30
|
+
Connection string can contain query params like "file:test.db?param1=value1¶m2=value2"
|
|
31
|
+
"param1=value1" will be passed to ClickHouse engine as start up args.
|
|
32
|
+
|
|
33
|
+
For more details, see `clickhouse local --help --verbose`
|
|
34
|
+
Some special args handling:
|
|
35
|
+
- "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
|
|
36
|
+
|
|
37
|
+
Important:
|
|
38
|
+
- There can be only one session at a time. If you want to create a new session, you need to close the existing one.
|
|
39
|
+
- Creating a new session will close the existing one.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, path=None):
|
|
43
|
+
self._conn = None
|
|
44
|
+
global g_session, g_session_path
|
|
45
|
+
if g_session is not None:
|
|
46
|
+
warnings.warn(
|
|
47
|
+
"There is already an active session. Creating a new session will close the existing one. "
|
|
48
|
+
"It is recommended to close the existing session before creating a new one. "
|
|
49
|
+
f"Closing the existing session {g_session_path}"
|
|
50
|
+
)
|
|
51
|
+
g_session.close()
|
|
52
|
+
g_session_path = None
|
|
53
|
+
if path is None:
|
|
54
|
+
self._path = ":memory:"
|
|
55
|
+
else:
|
|
56
|
+
self._path = path
|
|
57
|
+
if chdb.g_udf_path != "":
|
|
58
|
+
self._udf_path = chdb.g_udf_path
|
|
59
|
+
# add udf_path to conn_str here.
|
|
60
|
+
# - the `user_scripts_path` will be the value of `udf_path`
|
|
61
|
+
# - the `user_defined_executable_functions_config` will be `user_scripts_path/*.xml`
|
|
62
|
+
# Both of them will be added to the conn_str in the Connection class
|
|
63
|
+
if "?" in self._path:
|
|
64
|
+
self._conn_str = f"{self._path}&udf_path={self._udf_path}"
|
|
65
|
+
else:
|
|
66
|
+
self._conn_str = f"{self._path}?udf_path={self._udf_path}"
|
|
67
|
+
else:
|
|
68
|
+
self._udf_path = ""
|
|
69
|
+
self._conn_str = f"{self._path}"
|
|
70
|
+
self._conn = chdb_stateful.Connection(self._conn_str)
|
|
71
|
+
g_session = self
|
|
72
|
+
g_session_path = self._path
|
|
73
|
+
|
|
74
|
+
def __del__(self):
|
|
75
|
+
self.close()
|
|
76
|
+
|
|
77
|
+
def __enter__(self):
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
81
|
+
self.close()
|
|
82
|
+
|
|
83
|
+
def close(self):
|
|
84
|
+
"""Close the session and cleanup resources.
|
|
85
|
+
|
|
86
|
+
This method closes the underlying connection and resets the global session state.
|
|
87
|
+
After calling this method, the session becomes invalid and cannot be used for
|
|
88
|
+
further queries.
|
|
89
|
+
|
|
90
|
+
.. note::
|
|
91
|
+
This method is automatically called when the session is used as a context manager
|
|
92
|
+
or when the session object is destroyed.
|
|
93
|
+
|
|
94
|
+
.. warning::
|
|
95
|
+
Any attempt to use the session after calling close() will result in an error.
|
|
96
|
+
|
|
97
|
+
Examples:
|
|
98
|
+
>>> session = Session("test.db")
|
|
99
|
+
>>> session.query("SELECT 1")
|
|
100
|
+
>>> session.close() # Explicitly close the session
|
|
101
|
+
"""
|
|
102
|
+
if self._conn is not None:
|
|
103
|
+
self._conn.close()
|
|
104
|
+
self._conn = None
|
|
105
|
+
global g_session, g_session_path
|
|
106
|
+
g_session = None
|
|
107
|
+
g_session_path = None
|
|
108
|
+
|
|
109
|
+
def cleanup(self):
|
|
110
|
+
"""Cleanup session resources with exception handling.
|
|
111
|
+
|
|
112
|
+
This method attempts to close the session while suppressing any exceptions
|
|
113
|
+
that might occur during the cleanup process. It's particularly useful in
|
|
114
|
+
error handling scenarios or when you need to ensure cleanup happens regardless
|
|
115
|
+
of the session state.
|
|
116
|
+
|
|
117
|
+
.. note::
|
|
118
|
+
This method will never raise an exception, making it safe to call in
|
|
119
|
+
finally blocks or destructors.
|
|
120
|
+
|
|
121
|
+
.. seealso::
|
|
122
|
+
:meth:`close` - For explicit session closing with error propagation
|
|
123
|
+
|
|
124
|
+
Examples:
|
|
125
|
+
>>> session = Session("test.db")
|
|
126
|
+
>>> try:
|
|
127
|
+
... session.query("INVALID SQL")
|
|
128
|
+
... finally:
|
|
129
|
+
... session.cleanup() # Safe cleanup regardless of errors
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
self.close()
|
|
133
|
+
except: # noqa
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
def query(self, sql, fmt="CSV", udf_path=""):
|
|
137
|
+
"""Execute a SQL query and return the results.
|
|
138
|
+
|
|
139
|
+
This method executes a SQL query against the session's database and returns
|
|
140
|
+
the results in the specified format. The method supports various output formats
|
|
141
|
+
and maintains session state between queries.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
sql (str): SQL query string to execute
|
|
145
|
+
fmt (str, optional): Output format for results. Defaults to "CSV".
|
|
146
|
+
Available formats include:
|
|
147
|
+
|
|
148
|
+
- "CSV" - Comma-separated values
|
|
149
|
+
- "JSON" - JSON format
|
|
150
|
+
- "TabSeparated" - Tab-separated values
|
|
151
|
+
- "Pretty" - Pretty-printed table format
|
|
152
|
+
- "JSONCompact" - Compact JSON format
|
|
153
|
+
- "Arrow" - Apache Arrow format
|
|
154
|
+
- "Parquet" - Parquet format
|
|
155
|
+
|
|
156
|
+
udf_path (str, optional): Path to user-defined functions. Defaults to "".
|
|
157
|
+
If not specified, uses the UDF path from session initialization.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Query results in the specified format. The exact return type depends on
|
|
161
|
+
the format parameter:
|
|
162
|
+
|
|
163
|
+
- String formats (CSV, JSON, etc.) return str
|
|
164
|
+
- Binary formats (Arrow, Parquet) return bytes
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
RuntimeError: If the session is closed or invalid
|
|
168
|
+
ValueError: If the SQL query is malformed
|
|
169
|
+
|
|
170
|
+
.. note::
|
|
171
|
+
The "Debug" format is not supported and will be automatically converted
|
|
172
|
+
to "CSV" with a warning. For debugging, use connection string parameters
|
|
173
|
+
instead.
|
|
174
|
+
|
|
175
|
+
.. warning::
|
|
176
|
+
This method executes the query synchronously and loads all results into
|
|
177
|
+
memory. For large result sets, consider using :meth:`send_query` for
|
|
178
|
+
streaming results.
|
|
179
|
+
|
|
180
|
+
Examples:
|
|
181
|
+
>>> session = Session("test.db")
|
|
182
|
+
>>>
|
|
183
|
+
>>> # Basic query with default CSV format
|
|
184
|
+
>>> result = session.query("SELECT 1 as number")
|
|
185
|
+
>>> print(result)
|
|
186
|
+
number
|
|
187
|
+
1
|
|
188
|
+
|
|
189
|
+
>>> # Query with JSON format
|
|
190
|
+
>>> result = session.query("SELECT 1 as number", fmt="JSON")
|
|
191
|
+
>>> print(result)
|
|
192
|
+
{"number": "1"}
|
|
193
|
+
|
|
194
|
+
>>> # Complex query with table creation
|
|
195
|
+
>>> session.query("CREATE TABLE test (id INT, name String)")
|
|
196
|
+
>>> session.query("INSERT INTO test VALUES (1, 'Alice'), (2, 'Bob')")
|
|
197
|
+
>>> result = session.query("SELECT * FROM test ORDER BY id")
|
|
198
|
+
>>> print(result)
|
|
199
|
+
id,name
|
|
200
|
+
1,Alice
|
|
201
|
+
2,Bob
|
|
202
|
+
|
|
203
|
+
.. seealso::
|
|
204
|
+
:meth:`send_query` - For streaming query execution
|
|
205
|
+
:attr:`sql` - Alias for this method
|
|
206
|
+
"""
|
|
207
|
+
if fmt == "Debug":
|
|
208
|
+
warnings.warn(
|
|
209
|
+
"""Debug format is not supported in Session.query
|
|
210
|
+
Please try use parameters in connection string instead:
|
|
211
|
+
Eg: conn = connect(f"db_path?verbose&log-level=test")"""
|
|
212
|
+
)
|
|
213
|
+
fmt = "CSV"
|
|
214
|
+
return self._conn.query(sql, fmt)
|
|
215
|
+
|
|
216
|
+
# alias sql = query
|
|
217
|
+
sql = query
|
|
218
|
+
|
|
219
|
+
def send_query(self, sql, fmt="CSV") -> StreamingResult:
|
|
220
|
+
"""Execute a SQL query and return a streaming result iterator.
|
|
221
|
+
|
|
222
|
+
This method executes a SQL query against the session's database and returns
|
|
223
|
+
a streaming result object that allows you to iterate over the results without
|
|
224
|
+
loading everything into memory at once. This is particularly useful for large
|
|
225
|
+
result sets.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
sql (str): SQL query string to execute
|
|
229
|
+
fmt (str, optional): Output format for results. Defaults to "CSV".
|
|
230
|
+
Available formats include:
|
|
231
|
+
|
|
232
|
+
- "CSV" - Comma-separated values
|
|
233
|
+
- "JSON" - JSON format
|
|
234
|
+
- "TabSeparated" - Tab-separated values
|
|
235
|
+
- "JSONCompact" - Compact JSON format
|
|
236
|
+
- "Arrow" - Apache Arrow format
|
|
237
|
+
- "Parquet" - Parquet format
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
StreamingResult: A streaming result iterator that yields query results
|
|
241
|
+
incrementally. The iterator can be used in for loops or converted to
|
|
242
|
+
other data structures.
|
|
243
|
+
|
|
244
|
+
Raises:
|
|
245
|
+
RuntimeError: If the session is closed or invalid
|
|
246
|
+
ValueError: If the SQL query is malformed
|
|
247
|
+
|
|
248
|
+
.. note::
|
|
249
|
+
The "Debug" format is not supported and will be automatically converted
|
|
250
|
+
to "CSV" with a warning. For debugging, use connection string parameters
|
|
251
|
+
instead.
|
|
252
|
+
|
|
253
|
+
.. warning::
|
|
254
|
+
The returned StreamingResult object should be consumed promptly or stored
|
|
255
|
+
appropriately, as it maintains a connection to the database.
|
|
256
|
+
|
|
257
|
+
Examples:
|
|
258
|
+
>>> session = Session("test.db")
|
|
259
|
+
>>> session.query("CREATE TABLE big_table (id INT, data String)")
|
|
260
|
+
>>>
|
|
261
|
+
>>> # Insert large dataset
|
|
262
|
+
>>> for i in range(1000):
|
|
263
|
+
... session.query(f"INSERT INTO big_table VALUES ({i}, 'data_{i}')")
|
|
264
|
+
>>>
|
|
265
|
+
>>> # Stream results to avoid memory issues
|
|
266
|
+
>>> streaming_result = session.send_query("SELECT * FROM big_table ORDER BY id")
|
|
267
|
+
>>> for chunk in streaming_result:
|
|
268
|
+
... print(f"Processing chunk: {len(chunk)} bytes")
|
|
269
|
+
... # Process chunk without loading entire result set
|
|
270
|
+
|
|
271
|
+
>>> # Using with context manager
|
|
272
|
+
>>> with session.send_query("SELECT COUNT(*) FROM big_table") as stream:
|
|
273
|
+
... for result in stream:
|
|
274
|
+
... print(f"Count result: {result}")
|
|
275
|
+
|
|
276
|
+
.. seealso::
|
|
277
|
+
:meth:`query` - For non-streaming query execution
|
|
278
|
+
:class:`chdb.state.sqlitelike.StreamingResult` - Streaming result iterator
|
|
279
|
+
"""
|
|
280
|
+
if fmt == "Debug":
|
|
281
|
+
warnings.warn(
|
|
282
|
+
"""Debug format is not supported in Session.query
|
|
283
|
+
Please try use parameters in connection string instead:
|
|
284
|
+
Eg: conn = connect(f"db_path?verbose&log-level=test")"""
|
|
285
|
+
)
|
|
286
|
+
fmt = "CSV"
|
|
287
|
+
return self._conn.send_query(sql, fmt)
|
chdb/state/__init__.py
ADDED