chdb 3.3.0__cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

@@ -0,0 +1,100 @@
1
+ from . import err
2
+ from .cursors import Cursor
3
+ from . import converters
4
+ from ..state import sqlitelike as chdb_stateful
5
+
6
+ DEBUG = False
7
+ VERBOSE = False
8
+
9
+
10
+ class Connection(object):
11
+ """
12
+ Representation of a connection with chdb.
13
+ """
14
+
15
+ def __init__(self, path=None):
16
+ self._closed = False
17
+ self.encoding = "utf8"
18
+ self._affected_rows = 0
19
+ self._resp = None
20
+
21
+ # Initialize sqlitelike connection
22
+ connection_string = ":memory:" if path is None else f"file:{path}"
23
+ self._conn = chdb_stateful.Connection(connection_string)
24
+
25
+ # Test connection with a simple query
26
+ cursor = self._conn.cursor()
27
+ cursor.execute("SELECT 1")
28
+ cursor.close()
29
+
30
+ def close(self):
31
+ """Send the quit message and close the socket."""
32
+ if self._closed:
33
+ raise err.Error("Already closed")
34
+ self._closed = True
35
+ self._conn.close()
36
+
37
+ @property
38
+ def open(self):
39
+ """Return True if the connection is open"""
40
+ return not self._closed
41
+
42
+ def commit(self):
43
+ """Commit changes to stable storage."""
44
+ # No-op for ClickHouse
45
+ pass
46
+
47
+ def rollback(self):
48
+ """Roll back the current transaction."""
49
+ # No-op for ClickHouse
50
+ pass
51
+
52
+ def cursor(self, cursor=None):
53
+ """Create a new cursor to execute queries with."""
54
+ if self._closed:
55
+ raise err.Error("Connection closed")
56
+ if cursor:
57
+ return Cursor(self)
58
+ return Cursor(self)
59
+
60
+ def query(self, sql, fmt="CSV"):
61
+ """Execute a query and return the raw result."""
62
+ if self._closed:
63
+ raise err.InterfaceError("Connection closed")
64
+
65
+ if isinstance(sql, str):
66
+ sql = sql.encode(self.encoding, "surrogateescape")
67
+
68
+ try:
69
+ result = self._conn.query(sql.decode(), fmt)
70
+ self._resp = result
71
+ return result
72
+ except Exception as error:
73
+ raise err.InterfaceError(f"Query error: {error}")
74
+
75
+ def escape(self, obj, mapping=None):
76
+ """Escape whatever value you pass to it."""
77
+ return converters.escape_item(obj, mapping)
78
+
79
+ def escape_string(self, s):
80
+ return converters.escape_string(s)
81
+
82
+ def _quote_bytes(self, s):
83
+ return converters.escape_bytes(s)
84
+
85
+ def __enter__(self):
86
+ """Context manager that returns a Cursor"""
87
+ return self.cursor()
88
+
89
+ def __exit__(self, exc, value, traceback):
90
+ """On successful exit, commit. On exception, rollback"""
91
+ if exc:
92
+ self.rollback()
93
+ else:
94
+ self.commit()
95
+ self.close()
96
+
97
+ @property
98
+ def resp(self):
99
+ """Return the last query response"""
100
+ return self._resp
@@ -0,0 +1,31 @@
1
+ DECIMAL = 0
2
+ TINY = 1
3
+ SHORT = 2
4
+ LONG = 3
5
+ FLOAT = 4
6
+ DOUBLE = 5
7
+ NULL = 6
8
+ TIMESTAMP = 7
9
+ LONGLONG = 8
10
+ INT24 = 9
11
+ DATE = 10
12
+ TIME = 11
13
+ DATETIME = 12
14
+ YEAR = 13
15
+ NEWDATE = 14
16
+ VARCHAR = 15
17
+ BIT = 16
18
+ JSON = 245
19
+ NEWDECIMAL = 246
20
+ ENUM = 247
21
+ SET = 248
22
+ TINY_BLOB = 249
23
+ MEDIUM_BLOB = 250
24
+ LONG_BLOB = 251
25
+ BLOB = 252
26
+ VAR_STRING = 253
27
+ STRING = 254
28
+ GEOMETRY = 255
29
+
30
+ CHAR = TINY
31
+ INTERVAL = ENUM
File without changes
@@ -0,0 +1,293 @@
1
+ import datetime
2
+ from decimal import Decimal
3
+ from .err import DataError
4
+ import re
5
+ import time
6
+
7
+
8
+ def escape_item(val, mapping=None):
9
+ if mapping is None:
10
+ mapping = encoders
11
+ encoder = mapping.get(type(val))
12
+
13
+ # Fallback to default when no encoder found
14
+ if not encoder:
15
+ try:
16
+ encoder = mapping[str]
17
+ except KeyError:
18
+ raise TypeError("no default type converter defined")
19
+
20
+ val = encoder(val, mapping)
21
+ return val
22
+
23
+
24
+ def escape_dict(val, mapping=None):
25
+ n = {}
26
+ for k, v in val.items():
27
+ quoted = escape_item(v, mapping)
28
+ n[k] = quoted
29
+ return n
30
+
31
+
32
+ def escape_sequence(val, mapping=None):
33
+ n = []
34
+ for item in val:
35
+ quoted = escape_item(item, mapping)
36
+ n.append(quoted)
37
+ return "(" + ",".join(n) + ")"
38
+
39
+
40
+ def escape_set(val, mapping=None):
41
+ return ','.join([escape_item(x, mapping) for x in val])
42
+
43
+
44
+ def escape_bool(value, mapping=None):
45
+ return str(int(value))
46
+
47
+
48
+ def escape_object(value, mapping=None):
49
+ return str(value)
50
+
51
+
52
+ def escape_int(value, mapping=None):
53
+ return str(value)
54
+
55
+
56
+ def escape_float(value, mapping=None):
57
+ return '%.15g' % value
58
+
59
+
60
+ _escape_table = [chr(x) for x in range(128)]
61
+ _escape_table[ord("'")] = u"''"
62
+ _escape_table[ord("\\")] = "\\\\"
63
+
64
+
65
+ def _escape_unicode(value, mapping=None):
66
+ """escapes *value* with adding single quote.
67
+
68
+ Value should be unicode
69
+ """
70
+ return value.translate(_escape_table)
71
+
72
+
73
+ escape_string = _escape_unicode
74
+
75
+ # On Python ~3.5, str.decode('ascii', 'surrogateescape') is slow.
76
+ # (fixed in Python 3.6, http://bugs.python.org/issue24870)
77
+ # Workaround is str.decode('latin1') then translate 0x80-0xff into 0udc80-0udcff.
78
+ # We can escape special chars and surrogateescape at once.
79
+ _escape_bytes_table = _escape_table + [chr(i) for i in range(0xdc80, 0xdd00)]
80
+
81
+
82
+ def escape_bytes(value, mapping=None):
83
+ return "'%s'" % value.decode('latin1').translate(_escape_bytes_table)
84
+
85
+
86
+ def escape_unicode(value, mapping=None):
87
+ return u"'%s'" % _escape_unicode(value)
88
+
89
+
90
+ def escape_str(value, mapping=None):
91
+ return "'%s'" % escape_string(str(value), mapping)
92
+
93
+
94
+ def escape_None(value, mapping=None):
95
+ return 'NULL'
96
+
97
+
98
+ def escape_timedelta(obj, mapping=None):
99
+ seconds = int(obj.seconds) % 60
100
+ minutes = int(obj.seconds // 60) % 60
101
+ hours = int(obj.seconds // 3600) % 24 + int(obj.days) * 24
102
+ if obj.microseconds:
103
+ fmt = "'{0:02d}:{1:02d}:{2:02d}.{3:06d}'"
104
+ else:
105
+ fmt = "'{0:02d}:{1:02d}:{2:02d}'"
106
+ return fmt.format(hours, minutes, seconds, obj.microseconds)
107
+
108
+
109
+ def escape_time(obj, mapping=None):
110
+ return "'{}'".format(obj.isoformat(timespec='microseconds'))
111
+
112
+
113
+ def escape_datetime(obj, mapping=None):
114
+ return "'{}'".format(obj.isoformat(sep=' ', timespec='microseconds'))
115
+ # if obj.microsecond:
116
+ # fmt = "'{0.year:04}-{0.month:02}-{0.day:02} {0.hour:02}:{0.minute:02}:{0.second:02}.{0.microsecond:06}'"
117
+ # else:
118
+ # fmt = "'{0.year:04}-{0.month:02}-{0.day:02} {0.hour:02}:{0.minute:02}:{0.second:02}'"
119
+ # return fmt.format(obj)
120
+
121
+
122
+ def escape_date(obj, mapping=None):
123
+ return "'{}'".format(obj.isoformat())
124
+
125
+
126
+ def escape_struct_time(obj, mapping=None):
127
+ return escape_datetime(datetime.datetime(*obj[:6]))
128
+
129
+
130
+ def _convert_second_fraction(s):
131
+ if not s:
132
+ return 0
133
+ # Pad zeros to ensure the fraction length in microseconds
134
+ s = s.ljust(6, '0')
135
+ return int(s[:6])
136
+
137
+
138
+ def convert_datetime(obj):
139
+ """Returns a DATETIME or TIMESTAMP column value as a datetime object:
140
+
141
+ >>> datetime_or_None('2007-02-25 23:06:20')
142
+ datetime.datetime(2007, 2, 25, 23, 6, 20)
143
+
144
+ Illegal values are raise DataError
145
+
146
+ """
147
+ if isinstance(obj, (bytes, bytearray)):
148
+ obj = obj.decode('ascii')
149
+
150
+ try:
151
+ time_obj = datetime.datetime.strptime(obj, '%Y-%m-%d %H:%M:%S')
152
+ return time_obj
153
+ except Exception as err:
154
+ raise DataError("Not valid datetime struct: %s" % err)
155
+
156
+
157
+ TIMEDELTA_RE = re.compile(r"(-)?(\d{1,3}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?")
158
+
159
+
160
+ def convert_timedelta(obj):
161
+ """Returns a TIME column as a timedelta object:
162
+
163
+ >>> timedelta_or_None('25:06:17')
164
+ datetime.timedelta(1, 3977)
165
+ >>> timedelta_or_None('-25:06:17')
166
+ datetime.timedelta(-2, 83177)
167
+
168
+ Illegal values are returned as None:
169
+
170
+ >>> timedelta_or_None('random crap') is None
171
+ True
172
+
173
+ Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but
174
+ can accept values as (+|-)DD HH:MM:SS. The latter format will not
175
+ be parsed correctly by this function.
176
+ """
177
+ if isinstance(obj, (bytes, bytearray)):
178
+ obj = obj.decode('ascii')
179
+
180
+ m = TIMEDELTA_RE.match(obj)
181
+ if not m:
182
+ return obj
183
+
184
+ try:
185
+ groups = list(m.groups())
186
+ groups[-1] = _convert_second_fraction(groups[-1])
187
+ negate = -1 if groups[0] else 1
188
+ hours, minutes, seconds, microseconds = groups[1:]
189
+
190
+ tdelta = datetime.timedelta(
191
+ hours=int(hours),
192
+ minutes=int(minutes),
193
+ seconds=int(seconds),
194
+ microseconds=int(microseconds)
195
+ ) * negate
196
+ return tdelta
197
+ except ValueError as err:
198
+ raise DataError("Not valid time or timedelta struct: %s" % err)
199
+
200
+
201
+ def convert_time(obj):
202
+ """Returns a TIME column as a time object:
203
+
204
+ >>> time_or_None('15:06:17')
205
+ datetime.time(15, 6, 17)
206
+
207
+ Illegal values are returned DataError:
208
+
209
+ """
210
+ if isinstance(obj, (bytes, bytearray)):
211
+ obj = obj.decode('ascii')
212
+
213
+ try:
214
+ time_obj = datetime.datetime.strptime(obj, '%H:%M:%S')
215
+ return time_obj.time()
216
+ except Exception:
217
+ return convert_timedelta(obj)
218
+
219
+
220
+ def convert_date(obj):
221
+ """Returns a DATE column as a date object:
222
+
223
+ >>> date_or_None('2007-02-26')
224
+ datetime.date(2007, 2, 26)
225
+
226
+ Illegal values are returned as None:
227
+
228
+ >>> date_or_None('2007-02-31') is None
229
+ True
230
+ >>> date_or_None('0000-00-00') is None
231
+ True
232
+
233
+ """
234
+ if isinstance(obj, (bytes, bytearray)):
235
+ obj = obj.decode('ascii')
236
+ try:
237
+ time_obj = datetime.datetime.strptime(obj, '%Y-%m-%d')
238
+ return time_obj.date()
239
+ except Exception as err:
240
+ raise DataError("Not valid date struct: %s" % err)
241
+
242
+
243
+ def convert_set(s):
244
+ if isinstance(s, (bytes, bytearray)):
245
+ return set(s.split(b","))
246
+ return set(s.split(","))
247
+
248
+
249
+ def convert_characters(connection, data):
250
+ if connection.use_unicode:
251
+ data = data.decode("utf8")
252
+ return data
253
+
254
+
255
+ def convert_column_data(column_type, column_data):
256
+ data = column_data
257
+
258
+ # Null
259
+ if data is None:
260
+ return data
261
+
262
+ if not isinstance(column_type, str):
263
+ return data
264
+
265
+ column_type = column_type.lower().strip()
266
+ if column_type == 'time':
267
+ data = convert_time(column_data)
268
+ elif column_type == 'date':
269
+ data = convert_date(column_data)
270
+ elif column_type == 'datetime':
271
+ data = convert_datetime(column_data)
272
+
273
+ return data
274
+
275
+
276
+ encoders = {
277
+ bool: escape_bool,
278
+ int: escape_int,
279
+ float: escape_float,
280
+ str: escape_unicode,
281
+ tuple: escape_sequence,
282
+ list: escape_sequence,
283
+ set: escape_sequence,
284
+ frozenset: escape_sequence,
285
+ dict: escape_dict,
286
+ type(None): escape_None,
287
+ datetime.date: escape_date,
288
+ datetime.datetime: escape_datetime,
289
+ datetime.timedelta: escape_timedelta,
290
+ datetime.time: escape_time,
291
+ time.struct_time: escape_struct_time,
292
+ Decimal: escape_object,
293
+ }
chdb/dbapi/cursors.py ADDED
@@ -0,0 +1,247 @@
1
+ from . import err
2
+ import re
3
+
4
+ # Regular expression for :meth:`Cursor.executemany`.
5
+ # executemany only supports simple bulk insert.
6
+ # You can use it to load large dataset.
7
+ RE_INSERT_VALUES = re.compile(
8
+ r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
9
+ + r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))"
10
+ + r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
11
+ re.IGNORECASE | re.DOTALL,
12
+ )
13
+
14
+
15
+ class Cursor(object):
16
+ """
17
+ This is the object you use to interact with the database.
18
+
19
+ Do not create an instance of a Cursor yourself. Call
20
+ connections.Connection.cursor().
21
+
22
+ See `Cursor <https://www.python.org/dev/peps/pep-0249/#cursor-objects>`_ in
23
+ the specification.
24
+ """
25
+
26
+ #: Max statement size which :meth:`executemany` generates.
27
+ #:
28
+ #: Default value is 1024000.
29
+ max_stmt_length = 1024000
30
+
31
+ def __init__(self, connection):
32
+ self.connection = connection
33
+ self._cursor = connection._conn.cursor()
34
+ self.description = None
35
+ self.rowcount = -1
36
+ self.arraysize = 1
37
+ self.lastrowid = None
38
+ self._executed = None
39
+
40
+ def __enter__(self):
41
+ return self
42
+
43
+ def __exit__(self, *exc_info):
44
+ del exc_info
45
+ self.close()
46
+
47
+ def __iter__(self):
48
+ return iter(self.fetchone, None)
49
+
50
+ def callproc(self, procname, args=()):
51
+ """Execute stored procedure procname with args
52
+
53
+ procname -- string, name of procedure to execute on server
54
+
55
+ args -- Sequence of parameters to use with procedure
56
+
57
+ Returns the original args.
58
+
59
+ Compatibility warning: PEP-249 specifies that any modified
60
+ parameters must be returned. This is currently impossible
61
+ as they are only available by storing them in a server
62
+ variable and then retrieved by a query. Since stored
63
+ procedures return zero or more result sets, there is no
64
+ reliable way to get at OUT or INOUT parameters via callproc.
65
+ The server variables are named @_procname_n, where procname
66
+ is the parameter above and n is the position of the parameter
67
+ (from zero). Once all result sets generated by the procedure
68
+ have been fetched, you can issue a SELECT @_procname_0, ...
69
+ query using .execute() to get any OUT or INOUT values.
70
+
71
+ Compatibility warning: The act of calling a stored procedure
72
+ itself creates an empty result set. This appears after any
73
+ result sets generated by the procedure. This is non-standard
74
+ behavior with respect to the DB-API. Be sure to use nextset()
75
+ to advance through all result sets; otherwise you may get
76
+ disconnected.
77
+ """
78
+
79
+ return args
80
+
81
+ def close(self):
82
+ """
83
+ Closing a cursor just exhausts all remaining data.
84
+ """
85
+ self._cursor.close()
86
+
87
+ def _get_db(self):
88
+ if not self.connection:
89
+ raise err.ProgrammingError("Cursor closed")
90
+ return self.connection
91
+
92
+ def _escape_args(self, args, conn):
93
+ if isinstance(args, (tuple, list)):
94
+ return tuple(conn.escape(arg) for arg in args)
95
+ elif isinstance(args, dict):
96
+ return {key: conn.escape(val) for (key, val) in args.items()}
97
+ else:
98
+ # If it's not a dictionary let's try escaping it anyway.
99
+ # Worst case it will throw a Value error
100
+ return conn.escape(args)
101
+
102
+ def mogrify(self, query, args=None):
103
+ """
104
+ Returns the exact string that is sent to the database by calling the
105
+ execute() method.
106
+
107
+ This method follows the extension to the DB API 2.0 followed by Psycopg.
108
+ """
109
+ conn = self._get_db()
110
+
111
+ if args is not None:
112
+ query = query % self._escape_args(args, conn)
113
+
114
+ return query
115
+
116
+ def execute(self, query, args=None):
117
+ """Execute a query
118
+
119
+ :param str query: Query to execute.
120
+
121
+ :param args: parameters used with query. (optional)
122
+ :type args: tuple, list or dict
123
+
124
+ :return: Number of affected rows
125
+ :rtype: int
126
+
127
+ If args is a list or tuple, %s can be used as a placeholder in the query.
128
+ If args is a dict, %(name)s can be used as a placeholder in the query.
129
+ """
130
+ if args is not None:
131
+ query = query % self._escape_args(args, self.connection)
132
+
133
+ self._cursor.execute(query)
134
+
135
+ # Get description from column names and types
136
+ if hasattr(self._cursor, "_column_names") and self._cursor._column_names:
137
+ self.description = [
138
+ (name, type_info, None, None, None, None, None)
139
+ for name, type_info in zip(
140
+ self._cursor._column_names, self._cursor._column_types
141
+ )
142
+ ]
143
+ self.rowcount = (
144
+ len(self._cursor._current_table) if self._cursor._current_table else -1
145
+ )
146
+ else:
147
+ self.description = None
148
+ self.rowcount = -1
149
+
150
+ self._executed = query
151
+ return self.rowcount
152
+
153
+ def executemany(self, query, args):
154
+ # type: (str, list) -> int
155
+ """Run several data against one query
156
+
157
+ :param query: query to execute on server
158
+ :param args: Sequence of sequences or mappings. It is used as parameter.
159
+ :return: Number of rows affected, if any.
160
+
161
+ This method improves performance on multiple-row INSERT and
162
+ REPLACE. Otherwise, it is equivalent to looping over args with
163
+ execute().
164
+ """
165
+ if not args:
166
+ return 0
167
+
168
+ m = RE_INSERT_VALUES.match(query)
169
+ if m:
170
+ q_prefix = m.group(1) % ()
171
+ q_values = m.group(2).rstrip()
172
+ q_postfix = m.group(3) or ""
173
+ assert q_values[0] == "(" and q_values[-1] == ")"
174
+ return self._do_execute_many(
175
+ q_prefix,
176
+ q_values,
177
+ q_postfix,
178
+ args,
179
+ self.max_stmt_length,
180
+ self._get_db().encoding,
181
+ )
182
+
183
+ self.rowcount = sum(self.execute(query, arg) for arg in args)
184
+ return self.rowcount
185
+
186
+ def _do_execute_many(
187
+ self, prefix, values, postfix, args, max_stmt_length, encoding
188
+ ):
189
+ conn = self._get_db()
190
+ escape = self._escape_args
191
+ if isinstance(prefix, str):
192
+ prefix = prefix.encode(encoding)
193
+ if isinstance(postfix, str):
194
+ postfix = postfix.encode(encoding)
195
+ sql = prefix
196
+ args = iter(args)
197
+ v = values % escape(next(args), conn)
198
+ if isinstance(v, str):
199
+ v = v.encode(encoding, "surrogateescape")
200
+ sql += v
201
+ rows = 0
202
+ for arg in args:
203
+ v = values % escape(arg, conn)
204
+ if isinstance(v, str):
205
+ v = v.encode(encoding, "surrogateescape")
206
+ if len(sql) + len(v) + len(postfix) + 1 > max_stmt_length:
207
+ rows += self.execute(sql + postfix)
208
+ sql = prefix
209
+ else:
210
+ sql += ",".encode(encoding)
211
+ sql += v
212
+ rows += self.execute(sql + postfix)
213
+ self.rowcount = rows
214
+ return rows
215
+
216
+ def _check_executed(self):
217
+ if not self._executed:
218
+ raise err.ProgrammingError("execute() first")
219
+
220
+ def fetchone(self):
221
+ """Fetch the next row"""
222
+ if not self._executed:
223
+ raise err.ProgrammingError("execute() first")
224
+ return self._cursor.fetchone()
225
+
226
+ def fetchmany(self, size=1):
227
+ """Fetch several rows"""
228
+ if not self._executed:
229
+ raise err.ProgrammingError("execute() first")
230
+ return self._cursor.fetchmany(size)
231
+
232
+ def fetchall(self):
233
+ """Fetch all the rows"""
234
+ if not self._executed:
235
+ raise err.ProgrammingError("execute() first")
236
+ return self._cursor.fetchall()
237
+
238
+ def nextset(self):
239
+ """Get the next query set"""
240
+ # Not support for now
241
+ return None
242
+
243
+ def setinputsizes(self, *args):
244
+ """Does nothing, required by DB API."""
245
+
246
+ def setoutputsizes(self, *args):
247
+ """Does nothing, required by DB API."""