chdb 3.7.1__cp38-abi3-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/dbapi/cursors.py ADDED
@@ -0,0 +1,545 @@
1
+ from . import err
2
+ import re
3
+
4
+ # Regular expression for :meth:`Cursor.executemany`.
5
+ # executemany only supports simple bulk insert.
6
+ # You can use it to load large dataset.
7
+ RE_INSERT_VALUES = re.compile(
8
+ r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
9
+ + r"(\(\s*(?:%s|%\(.+\)s|\?)\s*(?:,\s*(?:%s|%\(.+\)s|\?)\s*)*\))"
10
+ + r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
11
+ re.IGNORECASE | re.DOTALL,
12
+ )
13
+
14
+
15
+ class Cursor(object):
16
+ """DB-API 2.0 cursor for executing queries and fetching results.
17
+
18
+ The cursor provides methods for executing SQL statements, managing query results,
19
+ and navigating through result sets. It supports parameter binding, bulk operations,
20
+ and follows DB-API 2.0 specifications.
21
+
22
+ Do not create Cursor instances directly. Use Connection.cursor() instead.
23
+
24
+ Attributes:
25
+ description (tuple): Column metadata for the last query result
26
+ rowcount (int): Number of rows affected by the last query (-1 if unknown)
27
+ arraysize (int): Default number of rows to fetch at once (default: 1)
28
+ lastrowid: ID of the last inserted row (if applicable)
29
+ max_stmt_length (int): Maximum statement size for executemany() (default: 1024000)
30
+
31
+ Examples:
32
+ >>> conn = Connection()
33
+ >>> cur = conn.cursor()
34
+ >>> cur.execute("SELECT 1 as id, 'test' as name")
35
+ >>> result = cur.fetchone()
36
+ >>> print(result) # (1, 'test')
37
+ >>> cur.close()
38
+
39
+ Note:
40
+ See `DB-API 2.0 Cursor Objects <https://www.python.org/dev/peps/pep-0249/#cursor-objects>`_
41
+ for complete specification details.
42
+ """
43
+
44
+ #: Max statement size which :meth:`executemany` generates.
45
+ #:
46
+ #: Default value is 1024000.
47
+ max_stmt_length = 1024000
48
+
49
+ def __init__(self, connection):
50
+ """Initialize cursor for the given connection.
51
+
52
+ Args:
53
+ connection (Connection): Database connection to use
54
+ """
55
+ self.connection = connection
56
+ self._cursor = connection._conn.cursor()
57
+ self.description = None
58
+ self.rowcount = -1
59
+ self.arraysize = 1
60
+ self.lastrowid = None
61
+ self._executed = None
62
+
63
+ def __enter__(self):
64
+ """Enter context manager and return self.
65
+
66
+ Returns:
67
+ Cursor: This cursor instance
68
+ """
69
+ return self
70
+
71
+ def __exit__(self, *exc_info):
72
+ """Exit context manager and close cursor.
73
+
74
+ Args:
75
+ *exc_info: Exception information (ignored)
76
+ """
77
+ del exc_info
78
+ self.close()
79
+
80
+ def __iter__(self):
81
+ """Make cursor iterable over result rows.
82
+
83
+ Returns:
84
+ iterator: Iterator yielding rows until None is returned
85
+
86
+ Example:
87
+ >>> cur.execute("SELECT id FROM users")
88
+ >>> for row in cur:
89
+ ... print(row[0])
90
+ """
91
+ return iter(self.fetchone, None)
92
+
93
+ def callproc(self, procname, args=()):
94
+ """Execute a stored procedure (placeholder implementation).
95
+
96
+ Args:
97
+ procname (str): Name of stored procedure to execute
98
+ args (sequence): Parameters to pass to the procedure
99
+
100
+ Returns:
101
+ sequence: The original args parameter (unmodified)
102
+
103
+ Note:
104
+ chDB/ClickHouse does not support stored procedures in the traditional sense.
105
+ This method is provided for DB-API 2.0 compliance but does not perform
106
+ any actual operation. Use execute() for all SQL operations.
107
+
108
+ Compatibility Warning:
109
+ This is a placeholder implementation. Traditional stored procedure
110
+ features like OUT/INOUT parameters, multiple result sets, and server
111
+ variables are not supported by the underlying ClickHouse engine.
112
+ """
113
+
114
+ return args
115
+
116
+ def close(self):
117
+ """Close the cursor and free associated resources.
118
+
119
+ After closing, the cursor becomes unusable and any operation will raise an exception.
120
+ Closing a cursor exhausts all remaining data and releases the underlying cursor.
121
+ """
122
+ self._cursor.close()
123
+
124
+ def _get_db(self):
125
+ """Internal method to get the database connection.
126
+
127
+ Returns:
128
+ Connection: The database connection
129
+
130
+ Raises:
131
+ ProgrammingError: If cursor is closed
132
+ """
133
+ if not self.connection:
134
+ raise err.ProgrammingError("Cursor closed")
135
+ return self.connection
136
+
137
+ def _escape_args(self, args, conn):
138
+ """Internal method to escape query arguments.
139
+
140
+ Args:
141
+ args (tuple/list/dict): Arguments to escape
142
+ conn (Connection): Database connection for escaping
143
+
144
+ Returns:
145
+ Escaped arguments in the same structure as input
146
+ """
147
+ if isinstance(args, (tuple, list)):
148
+ return tuple(conn.escape(arg) for arg in args)
149
+ elif isinstance(args, dict):
150
+ return {key: conn.escape(val) for (key, val) in args.items()}
151
+ else:
152
+ # If it's not a dictionary let's try escaping it anyway.
153
+ # Worst case it will throw a Value error
154
+ return conn.escape(args)
155
+
156
+ def _format_query(self, query, args, conn):
157
+ """Format SQL query by substituting parameter placeholders.
158
+
159
+ This internal method handles parameter binding for both question mark (?) and
160
+ format (%s) style placeholders, with proper escaping for SQL injection prevention.
161
+
162
+ Args:
163
+ query (str): SQL query with parameter placeholders
164
+ args (tuple/list): Parameter values to substitute
165
+ conn (Connection): Database connection for escaping values
166
+
167
+ Returns:
168
+ str: SQL query with parameters substituted and properly escaped
169
+
170
+ Note:
171
+ This is an internal method. Use execute() or mogrify() instead.
172
+ """
173
+ if args is None or ('?' not in query and '%' not in query):
174
+ return query
175
+
176
+ escaped_args = self._escape_args(args, conn)
177
+ if not isinstance(escaped_args, (tuple, list)):
178
+ escaped_args = (escaped_args,)
179
+
180
+ result = []
181
+ arg_index = 0
182
+ max_args = len(escaped_args)
183
+ i = 0
184
+ query_len = len(query)
185
+ in_string = False
186
+ quote_char = None
187
+
188
+ while i < query_len:
189
+ char = query[i]
190
+ if not in_string:
191
+ if char in ("'", '"'):
192
+ in_string = True
193
+ quote_char = char
194
+ elif arg_index < max_args:
195
+ if char == '?':
196
+ result.append(str(escaped_args[arg_index]))
197
+ arg_index += 1
198
+ i += 1
199
+ continue
200
+ elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
201
+ result.append(str(escaped_args[arg_index]))
202
+ arg_index += 1
203
+ i += 2
204
+ continue
205
+ elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
206
+ in_string = False
207
+ quote_char = None
208
+
209
+ result.append(char)
210
+ i += 1
211
+
212
+ return ''.join(result)
213
+
214
+ def mogrify(self, query, args=None):
215
+ """Return the exact query string that would be sent to the database.
216
+
217
+ This method shows the final SQL query after parameter substitution,
218
+ which is useful for debugging and logging purposes.
219
+
220
+ Args:
221
+ query (str): SQL query with parameter placeholders
222
+ args (tuple/list/dict, optional): Parameters to substitute
223
+
224
+ Returns:
225
+ str: The final SQL query string with parameters substituted
226
+
227
+ Example:
228
+ >>> cur.mogrify("SELECT * FROM users WHERE id = ?", (123,))
229
+ "SELECT * FROM users WHERE id = 123"
230
+
231
+ Note:
232
+ This method follows the extension to DB-API 2.0 used by Psycopg.
233
+ """
234
+ conn = self._get_db()
235
+ return self._format_query(query, args, conn)
236
+
237
+ def execute(self, query, args=None):
238
+ """Execute a SQL query with optional parameter binding.
239
+
240
+ This method executes a single SQL statement with optional parameter substitution.
241
+ It supports multiple parameter placeholder styles for flexibility.
242
+
243
+ Args:
244
+ query (str): SQL query to execute
245
+ args (tuple/list/dict, optional): Parameters to bind to placeholders
246
+
247
+ Returns:
248
+ int: Number of affected rows (-1 if unknown)
249
+
250
+ Parameter Styles:
251
+ - Question mark style: "SELECT * FROM users WHERE id = ?"
252
+ - Named style: "SELECT * FROM users WHERE name = %(name)s"
253
+ - Format style: "SELECT * FROM users WHERE age = %s" (legacy)
254
+
255
+ Examples:
256
+ >>> # Question mark parameters
257
+ >>> cur.execute("SELECT * FROM users WHERE id = ? AND age > ?", (123, 18))
258
+ >>>
259
+ >>> # Named parameters
260
+ >>> cur.execute("SELECT * FROM users WHERE name = %(name)s", {'name': 'Alice'})
261
+ >>>
262
+ >>> # No parameters
263
+ >>> cur.execute("SELECT COUNT(*) FROM users")
264
+
265
+ Raises:
266
+ ProgrammingError: If cursor is closed or query is malformed
267
+ InterfaceError: If database error occurs during execution
268
+ """
269
+ query = self._format_query(query, args, self.connection)
270
+ self._cursor.execute(query)
271
+
272
+ # Get description from column names and types
273
+ if hasattr(self._cursor, "_column_names") and self._cursor._column_names:
274
+ self.description = [
275
+ (name, type_info, None, None, None, None, None)
276
+ for name, type_info in zip(
277
+ self._cursor._column_names, self._cursor._column_types
278
+ )
279
+ ]
280
+ self.rowcount = (
281
+ len(self._cursor._current_table) if self._cursor._current_table else -1
282
+ )
283
+ else:
284
+ self.description = None
285
+ self.rowcount = -1
286
+
287
+ self._executed = query
288
+ return self.rowcount
289
+
290
+ def executemany(self, query, args):
291
+ """Execute a query multiple times with different parameter sets.
292
+
293
+ This method efficiently executes the same SQL query multiple times with
294
+ different parameter values. It's particularly useful for bulk INSERT operations.
295
+
296
+ Args:
297
+ query (str): SQL query to execute multiple times
298
+ args (sequence): Sequence of parameter tuples/dicts/lists for each execution
299
+
300
+ Returns:
301
+ int: Total number of affected rows across all executions
302
+
303
+ Examples:
304
+ >>> # Bulk insert with question mark parameters
305
+ >>> users_data = [(1, 'Alice'), (2, 'Bob'), (3, 'Charlie')]
306
+ >>> cur.executemany("INSERT INTO users VALUES (?, ?)", users_data)
307
+ >>>
308
+ >>> # Bulk insert with named parameters
309
+ >>> users_data = [
310
+ ... {'id': 1, 'name': 'Alice'},
311
+ ... {'id': 2, 'name': 'Bob'}
312
+ ... ]
313
+ >>> cur.executemany(
314
+ ... "INSERT INTO users VALUES (%(id)s, %(name)s)",
315
+ ... users_data
316
+ ... )
317
+
318
+ Note:
319
+ This method improves performance for multiple-row INSERT and UPDATE operations
320
+ by optimizing the query execution process.
321
+ """
322
+ if not args:
323
+ return 0
324
+
325
+ m = RE_INSERT_VALUES.match(query)
326
+ if m:
327
+ q_prefix = m.group(1) % ()
328
+ q_values = m.group(2).rstrip()
329
+ q_postfix = m.group(3) or ""
330
+ assert q_values[0] == "(" and q_values[-1] == ")"
331
+ return self._do_execute_many(
332
+ q_prefix,
333
+ q_values,
334
+ q_postfix,
335
+ args,
336
+ self.max_stmt_length,
337
+ self._get_db().encoding,
338
+ )
339
+
340
+ self.rowcount = sum(self.execute(query, arg) for arg in args)
341
+ return self.rowcount
342
+
343
+ def _find_placeholder_positions(self, query):
344
+ positions = []
345
+ i = 0
346
+ query_len = len(query)
347
+ in_string = False
348
+ quote_char = None
349
+
350
+ while i < query_len:
351
+ char = query[i]
352
+ if not in_string:
353
+ if char in ("'", '"'):
354
+ in_string = True
355
+ quote_char = char
356
+ elif char == '?':
357
+ positions.append((i, 1)) # (position, length)
358
+ elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
359
+ positions.append((i, 2))
360
+ i += 1
361
+ elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
362
+ in_string = False
363
+ quote_char = None
364
+ i += 1
365
+
366
+ return positions
367
+
368
+ def _do_execute_many(
369
+ self, prefix, values, postfix, args, max_stmt_length, encoding
370
+ ):
371
+ conn = self._get_db()
372
+ if isinstance(prefix, str):
373
+ prefix = prefix.encode(encoding)
374
+ if isinstance(postfix, str):
375
+ postfix = postfix.encode(encoding)
376
+
377
+ # Pre-compute placeholder positions
378
+ placeholder_positions = self._find_placeholder_positions(values)
379
+
380
+ sql = prefix
381
+ args = iter(args)
382
+
383
+ if not placeholder_positions:
384
+ values_bytes = values.encode(encoding, "surrogateescape") if isinstance(values, str) else values
385
+ sql += values_bytes
386
+ rows = 0
387
+ for _ in args:
388
+ if len(sql) + len(values_bytes) + len(postfix) + 2 > max_stmt_length:
389
+ rows += self.execute(sql + postfix)
390
+ sql = prefix + values_bytes
391
+ else:
392
+ sql += ",".encode(encoding)
393
+ sql += values_bytes
394
+ rows += self.execute(sql + postfix)
395
+ self.rowcount = rows
396
+ return rows
397
+
398
+ template_parts = []
399
+ last_pos = 0
400
+ for pos, length in placeholder_positions:
401
+ template_parts.append(values[last_pos:pos])
402
+ last_pos = pos + length
403
+ template_parts.append(values[last_pos:])
404
+
405
+ def format_values_fast(escaped_arg):
406
+ if len(escaped_arg) != len(placeholder_positions):
407
+ return values
408
+ result = template_parts[0]
409
+ for i, val in enumerate(escaped_arg):
410
+ result += str(val) + template_parts[i + 1]
411
+ return result
412
+
413
+ def format_values_with_positions(arg):
414
+ escaped_arg = self._escape_args(arg, conn)
415
+ if not isinstance(escaped_arg, (tuple, list)):
416
+ escaped_arg = (escaped_arg,)
417
+ return format_values_fast(escaped_arg)
418
+
419
+ v = format_values_with_positions(next(args))
420
+ if isinstance(v, str):
421
+ v = v.encode(encoding, "surrogateescape")
422
+ sql += v
423
+ rows = 0
424
+
425
+ for arg in args:
426
+ v = format_values_with_positions(arg)
427
+ if isinstance(v, str):
428
+ v = v.encode(encoding, "surrogateescape")
429
+ if len(sql) + len(v) + len(postfix) + 2 > max_stmt_length: # +2 for comma
430
+ rows += self.execute(sql + postfix)
431
+ sql = prefix + v
432
+ else:
433
+ sql += ",".encode(encoding)
434
+ sql += v
435
+ rows += self.execute(sql + postfix)
436
+ self.rowcount = rows
437
+ return rows
438
+
439
+ def _check_executed(self):
440
+ """Internal method to verify that execute() has been called.
441
+
442
+ Raises:
443
+ ProgrammingError: If no query has been executed yet
444
+ """
445
+ if not self._executed:
446
+ raise err.ProgrammingError("execute() first")
447
+
448
+ def fetchone(self):
449
+ """Fetch the next row from the query result.
450
+
451
+ Returns:
452
+ tuple or None: Next row as a tuple, or None if no more rows available
453
+
454
+ Raises:
455
+ ProgrammingError: If execute() has not been called first
456
+
457
+ Example:
458
+ >>> cursor.execute("SELECT id, name FROM users LIMIT 3")
459
+ >>> row = cursor.fetchone()
460
+ >>> print(row) # (1, 'Alice')
461
+ >>> row = cursor.fetchone()
462
+ >>> print(row) # (2, 'Bob')
463
+ """
464
+ if not self._executed:
465
+ raise err.ProgrammingError("execute() first")
466
+ return self._cursor.fetchone()
467
+
468
+ def fetchmany(self, size=1):
469
+ """Fetch multiple rows from the query result.
470
+
471
+ Args:
472
+ size (int, optional): Number of rows to fetch. Defaults to 1.
473
+ If not specified, uses cursor.arraysize.
474
+
475
+ Returns:
476
+ list: List of tuples representing the fetched rows
477
+
478
+ Raises:
479
+ ProgrammingError: If execute() has not been called first
480
+
481
+ Example:
482
+ >>> cursor.execute("SELECT id, name FROM users")
483
+ >>> rows = cursor.fetchmany(3)
484
+ >>> print(rows) # [(1, 'Alice'), (2, 'Bob'), (3, 'Charlie')]
485
+ """
486
+ if not self._executed:
487
+ raise err.ProgrammingError("execute() first")
488
+ return self._cursor.fetchmany(size)
489
+
490
+ def fetchall(self):
491
+ """Fetch all remaining rows from the query result.
492
+
493
+ Returns:
494
+ list: List of tuples representing all remaining rows
495
+
496
+ Raises:
497
+ ProgrammingError: If execute() has not been called first
498
+
499
+ Warning:
500
+ This method can consume large amounts of memory for big result sets.
501
+ Consider using fetchmany() for large datasets.
502
+
503
+ Example:
504
+ >>> cursor.execute("SELECT id, name FROM users")
505
+ >>> all_rows = cursor.fetchall()
506
+ >>> print(len(all_rows)) # Number of total rows
507
+ """
508
+ if not self._executed:
509
+ raise err.ProgrammingError("execute() first")
510
+ return self._cursor.fetchall()
511
+
512
+ def nextset(self):
513
+ """Move to the next result set (not supported).
514
+
515
+ Returns:
516
+ None: Always returns None as multiple result sets are not supported
517
+
518
+ Note:
519
+ chDB/ClickHouse does not support multiple result sets from a single query.
520
+ This method is provided for DB-API 2.0 compliance but always returns None.
521
+ """
522
+ # Not support for now
523
+ return None
524
+
525
+ def setinputsizes(self, *args):
526
+ """Set input sizes for parameters (no-op implementation).
527
+
528
+ Args:
529
+ *args: Parameter size specifications (ignored)
530
+
531
+ Note:
532
+ This method does nothing but is required by DB-API 2.0 specification.
533
+ chDB automatically handles parameter sizing internally.
534
+ """
535
+
536
+ def setoutputsizes(self, *args):
537
+ """Set output column sizes (no-op implementation).
538
+
539
+ Args:
540
+ *args: Column size specifications (ignored)
541
+
542
+ Note:
543
+ This method does nothing but is required by DB-API 2.0 specification.
544
+ chDB automatically handles output sizing internally.
545
+ """