chdb 3.6.0__cp38-abi3-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/dbapi/cursors.py ADDED
@@ -0,0 +1,351 @@
1
+ from . import err
2
+ import re
3
+
4
+ # Regular expression for :meth:`Cursor.executemany`.
5
+ # executemany only supports simple bulk insert.
6
+ # You can use it to load large dataset.
7
+ RE_INSERT_VALUES = re.compile(
8
+ r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
9
+ + r"(\(\s*(?:%s|%\(.+\)s|\?)\s*(?:,\s*(?:%s|%\(.+\)s|\?)\s*)*\))"
10
+ + r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
11
+ re.IGNORECASE | re.DOTALL,
12
+ )
13
+
14
+
15
+ class Cursor(object):
16
+ """
17
+ This is the object you use to interact with the database.
18
+
19
+ Do not create an instance of a Cursor yourself. Call
20
+ connections.Connection.cursor().
21
+
22
+ See `Cursor <https://www.python.org/dev/peps/pep-0249/#cursor-objects>`_ in
23
+ the specification.
24
+ """
25
+
26
+ #: Max statement size which :meth:`executemany` generates.
27
+ #:
28
+ #: Default value is 1024000.
29
+ max_stmt_length = 1024000
30
+
31
+ def __init__(self, connection):
32
+ self.connection = connection
33
+ self._cursor = connection._conn.cursor()
34
+ self.description = None
35
+ self.rowcount = -1
36
+ self.arraysize = 1
37
+ self.lastrowid = None
38
+ self._executed = None
39
+
40
+ def __enter__(self):
41
+ return self
42
+
43
+ def __exit__(self, *exc_info):
44
+ del exc_info
45
+ self.close()
46
+
47
+ def __iter__(self):
48
+ return iter(self.fetchone, None)
49
+
50
+ def callproc(self, procname, args=()):
51
+ """Execute stored procedure procname with args
52
+
53
+ procname -- string, name of procedure to execute on server
54
+
55
+ args -- Sequence of parameters to use with procedure
56
+
57
+ Returns the original args.
58
+
59
+ Compatibility warning: PEP-249 specifies that any modified
60
+ parameters must be returned. This is currently impossible
61
+ as they are only available by storing them in a server
62
+ variable and then retrieved by a query. Since stored
63
+ procedures return zero or more result sets, there is no
64
+ reliable way to get at OUT or INOUT parameters via callproc.
65
+ The server variables are named @_procname_n, where procname
66
+ is the parameter above and n is the position of the parameter
67
+ (from zero). Once all result sets generated by the procedure
68
+ have been fetched, you can issue a SELECT @_procname_0, ...
69
+ query using .execute() to get any OUT or INOUT values.
70
+
71
+ Compatibility warning: The act of calling a stored procedure
72
+ itself creates an empty result set. This appears after any
73
+ result sets generated by the procedure. This is non-standard
74
+ behavior with respect to the DB-API. Be sure to use nextset()
75
+ to advance through all result sets; otherwise you may get
76
+ disconnected.
77
+ """
78
+
79
+ return args
80
+
81
+ def close(self):
82
+ """
83
+ Closing a cursor just exhausts all remaining data.
84
+ """
85
+ self._cursor.close()
86
+
87
+ def _get_db(self):
88
+ if not self.connection:
89
+ raise err.ProgrammingError("Cursor closed")
90
+ return self.connection
91
+
92
+ def _escape_args(self, args, conn):
93
+ if isinstance(args, (tuple, list)):
94
+ return tuple(conn.escape(arg) for arg in args)
95
+ elif isinstance(args, dict):
96
+ return {key: conn.escape(val) for (key, val) in args.items()}
97
+ else:
98
+ # If it's not a dictionary let's try escaping it anyway.
99
+ # Worst case it will throw a Value error
100
+ return conn.escape(args)
101
+
102
+ def _format_query(self, query, args, conn):
103
+ """Format query with arguments supporting ? and % placeholders."""
104
+ if args is None or ('?' not in query and '%' not in query):
105
+ return query
106
+
107
+ escaped_args = self._escape_args(args, conn)
108
+ if not isinstance(escaped_args, (tuple, list)):
109
+ escaped_args = (escaped_args,)
110
+
111
+ result = []
112
+ arg_index = 0
113
+ max_args = len(escaped_args)
114
+ i = 0
115
+ query_len = len(query)
116
+ in_string = False
117
+ quote_char = None
118
+
119
+ while i < query_len:
120
+ char = query[i]
121
+ if not in_string:
122
+ if char in ("'", '"'):
123
+ in_string = True
124
+ quote_char = char
125
+ elif arg_index < max_args:
126
+ if char == '?':
127
+ result.append(str(escaped_args[arg_index]))
128
+ arg_index += 1
129
+ i += 1
130
+ continue
131
+ elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
132
+ result.append(str(escaped_args[arg_index]))
133
+ arg_index += 1
134
+ i += 2
135
+ continue
136
+ elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
137
+ in_string = False
138
+ quote_char = None
139
+
140
+ result.append(char)
141
+ i += 1
142
+
143
+ return ''.join(result)
144
+
145
+ def mogrify(self, query, args=None):
146
+ """
147
+ Returns the exact string that is sent to the database by calling the
148
+ execute() method.
149
+
150
+ This method follows the extension to the DB API 2.0 followed by Psycopg.
151
+ """
152
+ conn = self._get_db()
153
+ return self._format_query(query, args, conn)
154
+
155
+ def execute(self, query, args=None):
156
+ """Execute a query
157
+
158
+ :param str query: Query to execute.
159
+
160
+ :param args: parameters used with query. (optional)
161
+ :type args: tuple, list or dict
162
+
163
+ :return: Number of affected rows
164
+ :rtype: int
165
+
166
+ If args is a list or tuple, ? can be used as a placeholder in the query.
167
+ If args is a dict, %(name)s can be used as a placeholder in the query.
168
+ Also supports %s placeholder for backward compatibility.
169
+ """
170
+ query = self._format_query(query, args, self.connection)
171
+ self._cursor.execute(query)
172
+
173
+ # Get description from column names and types
174
+ if hasattr(self._cursor, "_column_names") and self._cursor._column_names:
175
+ self.description = [
176
+ (name, type_info, None, None, None, None, None)
177
+ for name, type_info in zip(
178
+ self._cursor._column_names, self._cursor._column_types
179
+ )
180
+ ]
181
+ self.rowcount = (
182
+ len(self._cursor._current_table) if self._cursor._current_table else -1
183
+ )
184
+ else:
185
+ self.description = None
186
+ self.rowcount = -1
187
+
188
+ self._executed = query
189
+ return self.rowcount
190
+
191
+ def executemany(self, query, args):
192
+ # type: (str, list) -> int
193
+ """Run several data against one query
194
+
195
+ :param query: query to execute on server
196
+ :param args: Sequence of sequences or mappings. It is used as parameter.
197
+ :return: Number of rows affected, if any.
198
+
199
+ This method improves performance on multiple-row INSERT and
200
+ REPLACE. Otherwise, it is equivalent to looping over args with
201
+ execute().
202
+ """
203
+ if not args:
204
+ return 0
205
+
206
+ m = RE_INSERT_VALUES.match(query)
207
+ if m:
208
+ q_prefix = m.group(1) % ()
209
+ q_values = m.group(2).rstrip()
210
+ q_postfix = m.group(3) or ""
211
+ assert q_values[0] == "(" and q_values[-1] == ")"
212
+ return self._do_execute_many(
213
+ q_prefix,
214
+ q_values,
215
+ q_postfix,
216
+ args,
217
+ self.max_stmt_length,
218
+ self._get_db().encoding,
219
+ )
220
+
221
+ self.rowcount = sum(self.execute(query, arg) for arg in args)
222
+ return self.rowcount
223
+
224
+ def _find_placeholder_positions(self, query):
225
+ positions = []
226
+ i = 0
227
+ query_len = len(query)
228
+ in_string = False
229
+ quote_char = None
230
+
231
+ while i < query_len:
232
+ char = query[i]
233
+ if not in_string:
234
+ if char in ("'", '"'):
235
+ in_string = True
236
+ quote_char = char
237
+ elif char == '?':
238
+ positions.append((i, 1)) # (position, length)
239
+ elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
240
+ positions.append((i, 2))
241
+ i += 1
242
+ elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
243
+ in_string = False
244
+ quote_char = None
245
+ i += 1
246
+
247
+ return positions
248
+
249
+ def _do_execute_many(
250
+ self, prefix, values, postfix, args, max_stmt_length, encoding
251
+ ):
252
+ conn = self._get_db()
253
+ if isinstance(prefix, str):
254
+ prefix = prefix.encode(encoding)
255
+ if isinstance(postfix, str):
256
+ postfix = postfix.encode(encoding)
257
+
258
+ # Pre-compute placeholder positions
259
+ placeholder_positions = self._find_placeholder_positions(values)
260
+
261
+ sql = prefix
262
+ args = iter(args)
263
+
264
+ if not placeholder_positions:
265
+ values_bytes = values.encode(encoding, "surrogateescape") if isinstance(values, str) else values
266
+ sql += values_bytes
267
+ rows = 0
268
+ for _ in args:
269
+ if len(sql) + len(values_bytes) + len(postfix) + 2 > max_stmt_length:
270
+ rows += self.execute(sql + postfix)
271
+ sql = prefix + values_bytes
272
+ else:
273
+ sql += ",".encode(encoding)
274
+ sql += values_bytes
275
+ rows += self.execute(sql + postfix)
276
+ self.rowcount = rows
277
+ return rows
278
+
279
+ template_parts = []
280
+ last_pos = 0
281
+ for pos, length in placeholder_positions:
282
+ template_parts.append(values[last_pos:pos])
283
+ last_pos = pos + length
284
+ template_parts.append(values[last_pos:])
285
+
286
+ def format_values_fast(escaped_arg):
287
+ if len(escaped_arg) != len(placeholder_positions):
288
+ return values
289
+ result = template_parts[0]
290
+ for i, val in enumerate(escaped_arg):
291
+ result += str(val) + template_parts[i + 1]
292
+ return result
293
+
294
+ def format_values_with_positions(arg):
295
+ escaped_arg = self._escape_args(arg, conn)
296
+ if not isinstance(escaped_arg, (tuple, list)):
297
+ escaped_arg = (escaped_arg,)
298
+ return format_values_fast(escaped_arg)
299
+
300
+ v = format_values_with_positions(next(args))
301
+ if isinstance(v, str):
302
+ v = v.encode(encoding, "surrogateescape")
303
+ sql += v
304
+ rows = 0
305
+
306
+ for arg in args:
307
+ v = format_values_with_positions(arg)
308
+ if isinstance(v, str):
309
+ v = v.encode(encoding, "surrogateescape")
310
+ if len(sql) + len(v) + len(postfix) + 2 > max_stmt_length: # +2 for comma
311
+ rows += self.execute(sql + postfix)
312
+ sql = prefix + v
313
+ else:
314
+ sql += ",".encode(encoding)
315
+ sql += v
316
+ rows += self.execute(sql + postfix)
317
+ self.rowcount = rows
318
+ return rows
319
+
320
+ def _check_executed(self):
321
+ if not self._executed:
322
+ raise err.ProgrammingError("execute() first")
323
+
324
+ def fetchone(self):
325
+ """Fetch the next row"""
326
+ if not self._executed:
327
+ raise err.ProgrammingError("execute() first")
328
+ return self._cursor.fetchone()
329
+
330
+ def fetchmany(self, size=1):
331
+ """Fetch several rows"""
332
+ if not self._executed:
333
+ raise err.ProgrammingError("execute() first")
334
+ return self._cursor.fetchmany(size)
335
+
336
+ def fetchall(self):
337
+ """Fetch all the rows"""
338
+ if not self._executed:
339
+ raise err.ProgrammingError("execute() first")
340
+ return self._cursor.fetchall()
341
+
342
+ def nextset(self):
343
+ """Get the next query set"""
344
+ # Not support for now
345
+ return None
346
+
347
+ def setinputsizes(self, *args):
348
+ """Does nothing, required by DB API."""
349
+
350
+ def setoutputsizes(self, *args):
351
+ """Does nothing, required by DB API."""
chdb/dbapi/err.py ADDED
@@ -0,0 +1,61 @@
1
+ class StandardError(Exception):
2
+ """Exception related to operation with chdb."""
3
+
4
+
5
+ class Warning(StandardError):
6
+ """Exception raised for important warnings like data truncations
7
+ while inserting, etc."""
8
+
9
+
10
+ class Error(StandardError):
11
+ """Exception that is the base class of all other error exceptions
12
+ (not Warning)."""
13
+
14
+
15
+ class InterfaceError(Error):
16
+ """Exception raised for errors that are related to the database
17
+ interface rather than the database itself."""
18
+
19
+
20
+ class DatabaseError(Error):
21
+ """Exception raised for errors that are related to the
22
+ database."""
23
+
24
+
25
+ class DataError(DatabaseError):
26
+ """Exception raised for errors that are due to problems with the
27
+ processed data like division by zero, numeric value out of range,
28
+ etc."""
29
+
30
+
31
+ class OperationalError(DatabaseError):
32
+ """Exception raised for errors that are related to the database's
33
+ operation and not necessarily under the control of the programmer,
34
+ e.g. an unexpected disconnect occurs, the data source name is not
35
+ found, a transaction could not be processed, a memory allocation
36
+ error occurred during processing, etc."""
37
+
38
+
39
+ class IntegrityError(DatabaseError):
40
+ """Exception raised when the relational integrity of the database
41
+ is affected, e.g. a foreign key check fails, duplicate key,
42
+ etc."""
43
+
44
+
45
+ class InternalError(DatabaseError):
46
+ """Exception raised when the database encounters an internal
47
+ error, e.g. the cursor is not valid anymore, the transaction is
48
+ out of sync, etc."""
49
+
50
+
51
+ class ProgrammingError(DatabaseError):
52
+ """Exception raised for programming errors, e.g. table not found
53
+ or already exists, syntax error in the SQL statement, wrong number
54
+ of parameters specified, etc."""
55
+
56
+
57
+ class NotSupportedError(DatabaseError):
58
+ """Exception raised in case a method or database API was used
59
+ which is not supported by the database, e.g. requesting a
60
+ .rollback() on a connection that does not support transaction or
61
+ has transactions turned off."""
chdb/dbapi/times.py ADDED
@@ -0,0 +1,20 @@
1
+ from time import localtime
2
+ from datetime import date, datetime, time, timedelta
3
+
4
+
5
+ Date = date
6
+ Time = time
7
+ TimeDelta = timedelta
8
+ Timestamp = datetime
9
+
10
+
11
+ def DateFromTicks(ticks):
12
+ return date(*localtime(ticks)[:3])
13
+
14
+
15
+ def TimeFromTicks(ticks):
16
+ return time(*localtime(ticks)[3:6])
17
+
18
+
19
+ def TimestampFromTicks(ticks):
20
+ return datetime(*localtime(ticks)[:6])
chdb/rwabc.py ADDED
@@ -0,0 +1,65 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Any
3
+
4
+
5
+ class PyReader(ABC):
6
+ def __init__(self, data: Any):
7
+ """
8
+ Initialize the reader with data. The exact type and structure of `data` can vary.
9
+
10
+ Args:
11
+ data (Any): The data with which to initialize the reader, format and type are not strictly defined.
12
+ """
13
+ self.data = data
14
+
15
+ @abstractmethod
16
+ def read(self, col_names: List[str], count: int) -> List[Any]:
17
+ """
18
+ Read a specified number of rows from the given columns and return a list of objects,
19
+ where each object is a sequence of values for a column.
20
+
21
+ Args:
22
+ col_names (List[str]): List of column names to read.
23
+ count (int): Maximum number of rows to read.
24
+
25
+ Returns:
26
+ List[Any]: List of sequences, one for each column.
27
+ """
28
+ pass
29
+
30
+
31
+ class PyWriter(ABC):
32
+ def __init__(self, col_names: List[str], types: List[type], data: Any):
33
+ """
34
+ Initialize the writer with column names, their types, and initial data.
35
+
36
+ Args:
37
+ col_names (List[str]): List of column names.
38
+ types (List[type]): List of types corresponding to each column.
39
+ data (Any): Initial data to setup the writer, format and type are not strictly defined.
40
+ """
41
+ self.col_names = col_names
42
+ self.types = types
43
+ self.data = data
44
+ self.blocks = []
45
+
46
+ @abstractmethod
47
+ def write(self, col_names: List[str], columns: List[List[Any]]) -> None:
48
+ """
49
+ Save columns of data to blocks. Must be implemented by subclasses.
50
+
51
+ Args:
52
+ col_names (List[str]): List of column names that are being written.
53
+ columns (List[List[Any]]): List of columns data, each column is represented by a list.
54
+ """
55
+ pass
56
+
57
+ @abstractmethod
58
+ def finalize(self) -> bytes:
59
+ """
60
+ Assemble and return the final data from blocks. Must be implemented by subclasses.
61
+
62
+ Returns:
63
+ bytes: The final serialized data.
64
+ """
65
+ pass
@@ -0,0 +1,3 @@
1
+ from .state import Session
2
+
3
+ __all__ = ["Session"]
chdb/session/state.py ADDED
@@ -0,0 +1,124 @@
1
+ import warnings
2
+
3
+ import chdb
4
+ from ..state import sqlitelike as chdb_stateful
5
+ from ..state.sqlitelike import StreamingResult
6
+
7
+ g_session = None
8
+ g_session_path = None
9
+
10
+
11
+ class Session:
12
+ """
13
+ Session will keep the state of query.
14
+ If path is None, it will create a temporary directory and use it as the database path
15
+ and the temporary directory will be removed when the session is closed.
16
+ You can also pass in a path to create a database at that path where will keep your data.
17
+
18
+ You can also use a connection string to pass in the path and other parameters.
19
+ Examples:
20
+ - ":memory:" (for in-memory database)
21
+ - "test.db" (for relative path)
22
+ - "file:test.db" (same as above)
23
+ - "/path/to/test.db" (for absolute path)
24
+ - "file:/path/to/test.db" (same as above)
25
+ - "file:test.db?param1=value1&param2=value2" (for relative path with query params)
26
+ - "file::memory:?verbose&log-level=test" (for in-memory database with query params)
27
+ - "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
28
+
29
+ Connection string args handling:
30
+ Connection string can contain query params like "file:test.db?param1=value1&param2=value2"
31
+ "param1=value1" will be passed to ClickHouse engine as start up args.
32
+
33
+ For more details, see `clickhouse local --help --verbose`
34
+ Some special args handling:
35
+ - "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
36
+
37
+ Important:
38
+ - There can be only one session at a time. If you want to create a new session, you need to close the existing one.
39
+ - Creating a new session will close the existing one.
40
+ """
41
+
42
+ def __init__(self, path=None):
43
+ self._conn = None
44
+ global g_session, g_session_path
45
+ if g_session is not None:
46
+ warnings.warn(
47
+ "There is already an active session. Creating a new session will close the existing one. "
48
+ "It is recommended to close the existing session before creating a new one. "
49
+ f"Closing the existing session {g_session_path}"
50
+ )
51
+ g_session.close()
52
+ g_session_path = None
53
+ if path is None:
54
+ self._path = ":memory:"
55
+ else:
56
+ self._path = path
57
+ if chdb.g_udf_path != "":
58
+ self._udf_path = chdb.g_udf_path
59
+ # add udf_path to conn_str here.
60
+ # - the `user_scripts_path` will be the value of `udf_path`
61
+ # - the `user_defined_executable_functions_config` will be `user_scripts_path/*.xml`
62
+ # Both of them will be added to the conn_str in the Connection class
63
+ if "?" in self._path:
64
+ self._conn_str = f"{self._path}&udf_path={self._udf_path}"
65
+ else:
66
+ self._conn_str = f"{self._path}?udf_path={self._udf_path}"
67
+ else:
68
+ self._udf_path = ""
69
+ self._conn_str = f"{self._path}"
70
+ self._conn = chdb_stateful.Connection(self._conn_str)
71
+ g_session = self
72
+ g_session_path = self._path
73
+
74
+ def __del__(self):
75
+ self.close()
76
+
77
+ def __enter__(self):
78
+ return self
79
+
80
+ def __exit__(self, exc_type, exc_value, traceback):
81
+ self.close()
82
+
83
+ def close(self):
84
+ if self._conn is not None:
85
+ self._conn.close()
86
+ self._conn = None
87
+ global g_session, g_session_path
88
+ g_session = None
89
+ g_session_path = None
90
+
91
+ def cleanup(self):
92
+ try:
93
+ self.close()
94
+ except: # noqa
95
+ pass
96
+
97
+ def query(self, sql, fmt="CSV", udf_path=""):
98
+ """
99
+ Execute a query.
100
+ """
101
+ if fmt == "Debug":
102
+ warnings.warn(
103
+ """Debug format is not supported in Session.query
104
+ Please try use parameters in connection string instead:
105
+ Eg: conn = connect(f"db_path?verbose&log-level=test")"""
106
+ )
107
+ fmt = "CSV"
108
+ return self._conn.query(sql, fmt)
109
+
110
+ # alias sql = query
111
+ sql = query
112
+
113
+ def send_query(self, sql, fmt="CSV") -> StreamingResult:
114
+ """
115
+ Execute a streaming query.
116
+ """
117
+ if fmt == "Debug":
118
+ warnings.warn(
119
+ """Debug format is not supported in Session.query
120
+ Please try use parameters in connection string instead:
121
+ Eg: conn = connect(f"db_path?verbose&log-level=test")"""
122
+ )
123
+ fmt = "CSV"
124
+ return self._conn.send_query(sql, fmt)
chdb/state/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .sqlitelike import connect
2
+
3
+ __all__ = ["connect"]