chdb 3.7.1__cp38-abi3-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/session/state.py ADDED
@@ -0,0 +1,287 @@
1
+ import warnings
2
+
3
+ import chdb
4
+ from ..state import sqlitelike as chdb_stateful
5
+ from ..state.sqlitelike import StreamingResult
6
+
7
+ g_session = None
8
+ g_session_path = None
9
+
10
+
11
+ class Session:
12
+ """
13
+ Session will keep the state of query.
14
+ If path is None, it will create a temporary directory and use it as the database path
15
+ and the temporary directory will be removed when the session is closed.
16
+ You can also pass in a path to create a database at that path where will keep your data.
17
+
18
+ You can also use a connection string to pass in the path and other parameters.
19
+ Examples:
20
+ - ":memory:" (for in-memory database)
21
+ - "test.db" (for relative path)
22
+ - "file:test.db" (same as above)
23
+ - "/path/to/test.db" (for absolute path)
24
+ - "file:/path/to/test.db" (same as above)
25
+ - "file:test.db?param1=value1&param2=value2" (for relative path with query params)
26
+ - "file::memory:?verbose&log-level=test" (for in-memory database with query params)
27
+ - "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
28
+
29
+ Connection string args handling:
30
+ Connection string can contain query params like "file:test.db?param1=value1&param2=value2"
31
+ "param1=value1" will be passed to ClickHouse engine as start up args.
32
+
33
+ For more details, see `clickhouse local --help --verbose`
34
+ Some special args handling:
35
+ - "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
36
+
37
+ Important:
38
+ - There can be only one session at a time. If you want to create a new session, you need to close the existing one.
39
+ - Creating a new session will close the existing one.
40
+ """
41
+
42
+ def __init__(self, path=None):
43
+ self._conn = None
44
+ global g_session, g_session_path
45
+ if g_session is not None:
46
+ warnings.warn(
47
+ "There is already an active session. Creating a new session will close the existing one. "
48
+ "It is recommended to close the existing session before creating a new one. "
49
+ f"Closing the existing session {g_session_path}"
50
+ )
51
+ g_session.close()
52
+ g_session_path = None
53
+ if path is None:
54
+ self._path = ":memory:"
55
+ else:
56
+ self._path = path
57
+ if chdb.g_udf_path != "":
58
+ self._udf_path = chdb.g_udf_path
59
+ # add udf_path to conn_str here.
60
+ # - the `user_scripts_path` will be the value of `udf_path`
61
+ # - the `user_defined_executable_functions_config` will be `user_scripts_path/*.xml`
62
+ # Both of them will be added to the conn_str in the Connection class
63
+ if "?" in self._path:
64
+ self._conn_str = f"{self._path}&udf_path={self._udf_path}"
65
+ else:
66
+ self._conn_str = f"{self._path}?udf_path={self._udf_path}"
67
+ else:
68
+ self._udf_path = ""
69
+ self._conn_str = f"{self._path}"
70
+ self._conn = chdb_stateful.Connection(self._conn_str)
71
+ g_session = self
72
+ g_session_path = self._path
73
+
74
+ def __del__(self):
75
+ self.close()
76
+
77
+ def __enter__(self):
78
+ return self
79
+
80
+ def __exit__(self, exc_type, exc_value, traceback):
81
+ self.close()
82
+
83
+ def close(self):
84
+ """Close the session and cleanup resources.
85
+
86
+ This method closes the underlying connection and resets the global session state.
87
+ After calling this method, the session becomes invalid and cannot be used for
88
+ further queries.
89
+
90
+ .. note::
91
+ This method is automatically called when the session is used as a context manager
92
+ or when the session object is destroyed.
93
+
94
+ .. warning::
95
+ Any attempt to use the session after calling close() will result in an error.
96
+
97
+ Examples:
98
+ >>> session = Session("test.db")
99
+ >>> session.query("SELECT 1")
100
+ >>> session.close() # Explicitly close the session
101
+ """
102
+ if self._conn is not None:
103
+ self._conn.close()
104
+ self._conn = None
105
+ global g_session, g_session_path
106
+ g_session = None
107
+ g_session_path = None
108
+
109
+ def cleanup(self):
110
+ """Cleanup session resources with exception handling.
111
+
112
+ This method attempts to close the session while suppressing any exceptions
113
+ that might occur during the cleanup process. It's particularly useful in
114
+ error handling scenarios or when you need to ensure cleanup happens regardless
115
+ of the session state.
116
+
117
+ .. note::
118
+ This method will never raise an exception, making it safe to call in
119
+ finally blocks or destructors.
120
+
121
+ .. seealso::
122
+ :meth:`close` - For explicit session closing with error propagation
123
+
124
+ Examples:
125
+ >>> session = Session("test.db")
126
+ >>> try:
127
+ ... session.query("INVALID SQL")
128
+ ... finally:
129
+ ... session.cleanup() # Safe cleanup regardless of errors
130
+ """
131
+ try:
132
+ self.close()
133
+ except: # noqa
134
+ pass
135
+
136
+ def query(self, sql, fmt="CSV", udf_path=""):
137
+ """Execute a SQL query and return the results.
138
+
139
+ This method executes a SQL query against the session's database and returns
140
+ the results in the specified format. The method supports various output formats
141
+ and maintains session state between queries.
142
+
143
+ Args:
144
+ sql (str): SQL query string to execute
145
+ fmt (str, optional): Output format for results. Defaults to "CSV".
146
+ Available formats include:
147
+
148
+ - "CSV" - Comma-separated values
149
+ - "JSON" - JSON format
150
+ - "TabSeparated" - Tab-separated values
151
+ - "Pretty" - Pretty-printed table format
152
+ - "JSONCompact" - Compact JSON format
153
+ - "Arrow" - Apache Arrow format
154
+ - "Parquet" - Parquet format
155
+
156
+ udf_path (str, optional): Path to user-defined functions. Defaults to "".
157
+ If not specified, uses the UDF path from session initialization.
158
+
159
+ Returns:
160
+ Query results in the specified format. The exact return type depends on
161
+ the format parameter:
162
+
163
+ - String formats (CSV, JSON, etc.) return str
164
+ - Binary formats (Arrow, Parquet) return bytes
165
+
166
+ Raises:
167
+ RuntimeError: If the session is closed or invalid
168
+ ValueError: If the SQL query is malformed
169
+
170
+ .. note::
171
+ The "Debug" format is not supported and will be automatically converted
172
+ to "CSV" with a warning. For debugging, use connection string parameters
173
+ instead.
174
+
175
+ .. warning::
176
+ This method executes the query synchronously and loads all results into
177
+ memory. For large result sets, consider using :meth:`send_query` for
178
+ streaming results.
179
+
180
+ Examples:
181
+ >>> session = Session("test.db")
182
+ >>>
183
+ >>> # Basic query with default CSV format
184
+ >>> result = session.query("SELECT 1 as number")
185
+ >>> print(result)
186
+ number
187
+ 1
188
+
189
+ >>> # Query with JSON format
190
+ >>> result = session.query("SELECT 1 as number", fmt="JSON")
191
+ >>> print(result)
192
+ {"number": "1"}
193
+
194
+ >>> # Complex query with table creation
195
+ >>> session.query("CREATE TABLE test (id INT, name String)")
196
+ >>> session.query("INSERT INTO test VALUES (1, 'Alice'), (2, 'Bob')")
197
+ >>> result = session.query("SELECT * FROM test ORDER BY id")
198
+ >>> print(result)
199
+ id,name
200
+ 1,Alice
201
+ 2,Bob
202
+
203
+ .. seealso::
204
+ :meth:`send_query` - For streaming query execution
205
+ :attr:`sql` - Alias for this method
206
+ """
207
+ if fmt == "Debug":
208
+ warnings.warn(
209
+ """Debug format is not supported in Session.query
210
+ Please try use parameters in connection string instead:
211
+ Eg: conn = connect(f"db_path?verbose&log-level=test")"""
212
+ )
213
+ fmt = "CSV"
214
+ return self._conn.query(sql, fmt)
215
+
216
+ # alias sql = query
217
+ sql = query
218
+
219
+ def send_query(self, sql, fmt="CSV") -> StreamingResult:
220
+ """Execute a SQL query and return a streaming result iterator.
221
+
222
+ This method executes a SQL query against the session's database and returns
223
+ a streaming result object that allows you to iterate over the results without
224
+ loading everything into memory at once. This is particularly useful for large
225
+ result sets.
226
+
227
+ Args:
228
+ sql (str): SQL query string to execute
229
+ fmt (str, optional): Output format for results. Defaults to "CSV".
230
+ Available formats include:
231
+
232
+ - "CSV" - Comma-separated values
233
+ - "JSON" - JSON format
234
+ - "TabSeparated" - Tab-separated values
235
+ - "JSONCompact" - Compact JSON format
236
+ - "Arrow" - Apache Arrow format
237
+ - "Parquet" - Parquet format
238
+
239
+ Returns:
240
+ StreamingResult: A streaming result iterator that yields query results
241
+ incrementally. The iterator can be used in for loops or converted to
242
+ other data structures.
243
+
244
+ Raises:
245
+ RuntimeError: If the session is closed or invalid
246
+ ValueError: If the SQL query is malformed
247
+
248
+ .. note::
249
+ The "Debug" format is not supported and will be automatically converted
250
+ to "CSV" with a warning. For debugging, use connection string parameters
251
+ instead.
252
+
253
+ .. warning::
254
+ The returned StreamingResult object should be consumed promptly or stored
255
+ appropriately, as it maintains a connection to the database.
256
+
257
+ Examples:
258
+ >>> session = Session("test.db")
259
+ >>> session.query("CREATE TABLE big_table (id INT, data String)")
260
+ >>>
261
+ >>> # Insert large dataset
262
+ >>> for i in range(1000):
263
+ ... session.query(f"INSERT INTO big_table VALUES ({i}, 'data_{i}')")
264
+ >>>
265
+ >>> # Stream results to avoid memory issues
266
+ >>> streaming_result = session.send_query("SELECT * FROM big_table ORDER BY id")
267
+ >>> for chunk in streaming_result:
268
+ ... print(f"Processing chunk: {len(chunk)} bytes")
269
+ ... # Process chunk without loading entire result set
270
+
271
+ >>> # Using with context manager
272
+ >>> with session.send_query("SELECT COUNT(*) FROM big_table") as stream:
273
+ ... for result in stream:
274
+ ... print(f"Count result: {result}")
275
+
276
+ .. seealso::
277
+ :meth:`query` - For non-streaming query execution
278
+ :class:`chdb.state.sqlitelike.StreamingResult` - Streaming result iterator
279
+ """
280
+ if fmt == "Debug":
281
+ warnings.warn(
282
+ """Debug format is not supported in Session.query
283
+ Please try use parameters in connection string instead:
284
+ Eg: conn = connect(f"db_path?verbose&log-level=test")"""
285
+ )
286
+ fmt = "CSV"
287
+ return self._conn.send_query(sql, fmt)
chdb/state/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .sqlitelike import connect
2
+
3
+ __all__ = ["connect"]