chdb 3.7.1__cp38-abi3-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

@@ -0,0 +1,611 @@
1
+ """Type conversion and escaping functions for chDB database operations.
2
+
3
+ This module provides comprehensive type conversion between Python types and SQL values,
4
+ including proper escaping for SQL injection prevention and bidirectional conversion
5
+ between database column values and Python objects.
6
+
7
+ The module handles:
8
+ - Escaping Python values for safe SQL inclusion
9
+ - Converting database column data to appropriate Python types
10
+ - Time/date/datetime conversions with proper formatting
11
+ - Collection types (lists, tuples, sets, dicts) handling
12
+ - Special value handling (None, boolean, numeric types)
13
+ """
14
+
15
+ import datetime
16
+ from decimal import Decimal
17
+ from .err import DataError
18
+ import re
19
+ import time
20
+
21
+
22
+ def escape_item(val, mapping=None):
23
+ """Escape a single value for safe SQL inclusion.
24
+
25
+ This function takes a Python value and converts it to a properly escaped
26
+ SQL representation using the appropriate encoder for the value's type.
27
+
28
+ Args:
29
+ val: Python value to escape (any supported type)
30
+ mapping (dict, optional): Custom encoder mapping. Uses default encoders if None.
31
+
32
+ Returns:
33
+ str: SQL-safe string representation of the value
34
+
35
+ Raises:
36
+ TypeError: If no encoder is found for the value type
37
+
38
+ Examples:
39
+ >>> escape_item("O'Reilly")
40
+ "'O''Reilly'"
41
+ >>> escape_item(42)
42
+ "42"
43
+ >>> escape_item(None)
44
+ "NULL"
45
+ >>> escape_item(True)
46
+ "1"
47
+ """
48
+ if mapping is None:
49
+ mapping = encoders
50
+ encoder = mapping.get(type(val))
51
+
52
+ # Fallback to default when no encoder found
53
+ if not encoder:
54
+ try:
55
+ encoder = mapping[str]
56
+ except KeyError:
57
+ raise TypeError("no default type converter defined")
58
+
59
+ val = encoder(val, mapping)
60
+ return val
61
+
62
+
63
+ def escape_dict(val, mapping=None):
64
+ """Escape all values in a dictionary.
65
+
66
+ Args:
67
+ val (dict): Dictionary with values to escape
68
+ mapping (dict, optional): Custom encoder mapping
69
+
70
+ Returns:
71
+ dict: Dictionary with all values properly escaped for SQL
72
+
73
+ Example:
74
+ >>> escape_dict({'name': "O'Reilly", 'age': 30})
75
+ {'name': "'O''Reilly'", 'age': '30'}
76
+ """
77
+ n = {}
78
+ for k, v in val.items():
79
+ quoted = escape_item(v, mapping)
80
+ n[k] = quoted
81
+ return n
82
+
83
+
84
+ def escape_sequence(val, mapping=None):
85
+ """Escape a sequence (list, tuple, etc.) for SQL VALUES clause.
86
+
87
+ Args:
88
+ val (sequence): Sequence of values to escape
89
+ mapping (dict, optional): Custom encoder mapping
90
+
91
+ Returns:
92
+ str: SQL VALUES clause representation like '(val1, val2, val3)'
93
+
94
+ Example:
95
+ >>> escape_sequence([1, "hello", None])
96
+ "(1, 'hello', NULL)"
97
+ """
98
+ n = []
99
+ for item in val:
100
+ quoted = escape_item(item, mapping)
101
+ n.append(quoted)
102
+ return "(" + ",".join(n) + ")"
103
+
104
+
105
+ def escape_set(val, mapping=None):
106
+ """Escape a set for SQL representation.
107
+
108
+ Args:
109
+ val (set): Set of values to escape
110
+ mapping (dict, optional): Custom encoder mapping
111
+
112
+ Returns:
113
+ str: Comma-separated escaped values
114
+
115
+ Example:
116
+ >>> escape_set({1, 2, 3})
117
+ "1,2,3"
118
+ """
119
+ return ','.join([escape_item(x, mapping) for x in val])
120
+
121
+
122
+ def escape_bool(value, mapping=None):
123
+ """Escape boolean value for SQL.
124
+
125
+ Args:
126
+ value (bool): Boolean value to escape
127
+ mapping: Unused, for interface compatibility
128
+
129
+ Returns:
130
+ str: "1" for True, "0" for False
131
+
132
+ Example:
133
+ >>> escape_bool(True)
134
+ "1"
135
+ >>> escape_bool(False)
136
+ "0"
137
+ """
138
+ return str(int(value))
139
+
140
+
141
+ def escape_object(value, mapping=None):
142
+ """Generic object escaper using string conversion.
143
+
144
+ Args:
145
+ value: Object to escape
146
+ mapping: Unused, for interface compatibility
147
+
148
+ Returns:
149
+ str: String representation of the object
150
+ """
151
+ return str(value)
152
+
153
+
154
+ def escape_int(value, mapping=None):
155
+ """Escape integer value for SQL.
156
+
157
+ Args:
158
+ value (int): Integer to escape
159
+ mapping: Unused, for interface compatibility
160
+
161
+ Returns:
162
+ str: String representation of the integer
163
+ """
164
+ return str(value)
165
+
166
+
167
+ def escape_float(value, mapping=None):
168
+ """Escape float value for SQL with precision control.
169
+
170
+ Args:
171
+ value (float): Float to escape
172
+ mapping: Unused, for interface compatibility
173
+
174
+ Returns:
175
+ str: String representation with up to 15 significant digits
176
+ """
177
+ return '%.15g' % value
178
+
179
+
180
+ _escape_table = [chr(x) for x in range(128)]
181
+ _escape_table[ord("'")] = u"''"
182
+ _escape_table[ord("\\")] = "\\\\"
183
+
184
+
185
+ def _escape_unicode(value, mapping=None):
186
+ """Escape Unicode string by replacing special characters.
187
+
188
+ This function escapes single quotes and backslashes in Unicode strings
189
+ to prevent SQL injection attacks.
190
+
191
+ Args:
192
+ value (str): Unicode string to escape
193
+ mapping: Unused, for interface compatibility
194
+
195
+ Returns:
196
+ str: Escaped Unicode string (without surrounding quotes)
197
+
198
+ Note:
199
+ This function does not add surrounding quotes. Use escape_unicode()
200
+ for complete string escaping with quotes.
201
+ """
202
+ return value.translate(_escape_table)
203
+
204
+
205
+ escape_string = _escape_unicode
206
+
207
+ # On Python ~3.5, str.decode('ascii', 'surrogateescape') is slow.
208
+ # (fixed in Python 3.6, http://bugs.python.org/issue24870)
209
+ # Workaround is str.decode('latin1') then translate 0x80-0xff into 0udc80-0udcff.
210
+ # We can escape special chars and surrogateescape at once.
211
+ _escape_bytes_table = _escape_table + [chr(i) for i in range(0xdc80, 0xdd00)]
212
+
213
+
214
+ def escape_bytes(value, mapping=None):
215
+ """Escape bytes value for SQL with proper encoding handling.
216
+
217
+ Args:
218
+ value (bytes): Bytes to escape
219
+ mapping: Unused, for interface compatibility
220
+
221
+ Returns:
222
+ str: Escaped bytes as quoted SQL string
223
+
224
+ Example:
225
+ >>> escape_bytes(b"hello'world")
226
+ "'hello''world'"
227
+ """
228
+ return "'%s'" % value.decode('latin1').translate(_escape_bytes_table)
229
+
230
+
231
+ def escape_unicode(value, mapping=None):
232
+ """Escape Unicode string for SQL with surrounding quotes.
233
+
234
+ Args:
235
+ value (str): Unicode string to escape
236
+ mapping: Unused, for interface compatibility
237
+
238
+ Returns:
239
+ str: Properly escaped and quoted SQL string
240
+
241
+ Example:
242
+ >>> escape_unicode("O'Reilly")
243
+ "'O''Reilly'"
244
+ """
245
+ return u"'%s'" % _escape_unicode(value)
246
+
247
+
248
+ def escape_str(value, mapping=None):
249
+ """Escape string value for SQL.
250
+
251
+ Args:
252
+ value: Value to convert to string and escape
253
+ mapping (dict, optional): Custom encoder mapping
254
+
255
+ Returns:
256
+ str: Escaped and quoted SQL string
257
+ """
258
+ return "'%s'" % escape_string(str(value), mapping)
259
+
260
+
261
+ def escape_None(value, mapping=None):
262
+ """Escape None value for SQL.
263
+
264
+ Args:
265
+ value: None value (ignored)
266
+ mapping: Unused, for interface compatibility
267
+
268
+ Returns:
269
+ str: SQL NULL literal
270
+ """
271
+ return 'NULL'
272
+
273
+
274
+ def escape_timedelta(obj, mapping=None):
275
+ """Escape timedelta object for SQL TIME format.
276
+
277
+ Args:
278
+ obj (datetime.timedelta): Timedelta to escape
279
+ mapping: Unused, for interface compatibility
280
+
281
+ Returns:
282
+ str: SQL TIME format string like 'HH:MM:SS' or 'HH:MM:SS.microseconds'
283
+
284
+ Example:
285
+ >>> td = datetime.timedelta(hours=2, minutes=30, seconds=45, microseconds=123456)
286
+ >>> escape_timedelta(td)
287
+ "'02:30:45.123456'"
288
+ """
289
+ seconds = int(obj.seconds) % 60
290
+ minutes = int(obj.seconds // 60) % 60
291
+ hours = int(obj.seconds // 3600) % 24 + int(obj.days) * 24
292
+ if obj.microseconds:
293
+ fmt = "'{0:02d}:{1:02d}:{2:02d}.{3:06d}'"
294
+ else:
295
+ fmt = "'{0:02d}:{1:02d}:{2:02d}'"
296
+ return fmt.format(hours, minutes, seconds, obj.microseconds)
297
+
298
+
299
+ def escape_time(obj, mapping=None):
300
+ """Escape time object for SQL.
301
+
302
+ Args:
303
+ obj (datetime.time): Time to escape
304
+ mapping: Unused, for interface compatibility
305
+
306
+ Returns:
307
+ str: SQL time string in ISO format with microseconds
308
+
309
+ Example:
310
+ >>> t = datetime.time(14, 30, 45, 123456)
311
+ >>> escape_time(t)
312
+ "'14:30:45.123456'"
313
+ """
314
+ return "'{}'".format(obj.isoformat(timespec='microseconds'))
315
+
316
+
317
+ def escape_datetime(obj, mapping=None):
318
+ """Escape datetime object for SQL DATETIME format.
319
+
320
+ Args:
321
+ obj (datetime.datetime): Datetime to escape
322
+ mapping: Unused, for interface compatibility
323
+
324
+ Returns:
325
+ str: SQL datetime string in ISO format with space separator and microseconds
326
+
327
+ Example:
328
+ >>> dt = datetime.datetime(2023, 12, 25, 14, 30, 45, 123456)
329
+ >>> escape_datetime(dt)
330
+ "'2023-12-25 14:30:45.123456'"
331
+ """
332
+ return "'{}'".format(obj.isoformat(sep=' ', timespec='microseconds'))
333
+
334
+
335
+ def escape_date(obj, mapping=None):
336
+ """Escape date object for SQL DATE format.
337
+
338
+ Args:
339
+ obj (datetime.date): Date to escape
340
+ mapping: Unused, for interface compatibility
341
+
342
+ Returns:
343
+ str: SQL date string in ISO format
344
+
345
+ Example:
346
+ >>> d = datetime.date(2023, 12, 25)
347
+ >>> escape_date(d)
348
+ "'2023-12-25'"
349
+ """
350
+ return "'{}'".format(obj.isoformat())
351
+
352
+
353
+ def escape_struct_time(obj, mapping=None):
354
+ """Escape struct_time object for SQL by converting to datetime.
355
+
356
+ Args:
357
+ obj (time.struct_time): Struct time to escape
358
+ mapping: Unused, for interface compatibility
359
+
360
+ Returns:
361
+ str: SQL datetime string converted from struct_time
362
+ """
363
+ return escape_datetime(datetime.datetime(*obj[:6]))
364
+
365
+
366
+ def _convert_second_fraction(s):
367
+ if not s:
368
+ return 0
369
+ # Pad zeros to ensure the fraction length in microseconds
370
+ s = s.ljust(6, '0')
371
+ return int(s[:6])
372
+
373
+
374
+ def convert_datetime(obj):
375
+ """Convert SQL DATETIME or TIMESTAMP string to datetime object.
376
+
377
+ Parses a SQL datetime string and returns a corresponding Python datetime object.
378
+ Handles both string and bytes input.
379
+
380
+ Args:
381
+ obj (str or bytes): SQL datetime string in format 'YYYY-MM-DD HH:MM:SS'
382
+
383
+ Returns:
384
+ datetime.datetime: Parsed datetime object
385
+
386
+ Raises:
387
+ DataError: If the datetime string format is invalid
388
+
389
+ Examples:
390
+ >>> convert_datetime('2007-02-25 23:06:20')
391
+ datetime.datetime(2007, 2, 25, 23, 6, 20)
392
+ >>> convert_datetime(b'2023-12-25 14:30:45')
393
+ datetime.datetime(2023, 12, 25, 14, 30, 45)
394
+ """
395
+ if isinstance(obj, (bytes, bytearray)):
396
+ obj = obj.decode('ascii')
397
+
398
+ try:
399
+ time_obj = datetime.datetime.strptime(obj, '%Y-%m-%d %H:%M:%S')
400
+ return time_obj
401
+ except Exception as err:
402
+ raise DataError("Not valid datetime struct: %s" % err)
403
+
404
+
405
+ TIMEDELTA_RE = re.compile(r"(-)?(\d{1,3}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?")
406
+
407
+
408
+ def convert_timedelta(obj):
409
+ """Convert SQL TIME string to timedelta object.
410
+
411
+ Parses a SQL TIME string (which can represent time intervals) and returns
412
+ a corresponding Python timedelta object. Supports negative intervals.
413
+
414
+ Args:
415
+ obj (str or bytes): SQL TIME string in format '[+|-]HH:MM:SS[.microseconds]'
416
+
417
+ Returns:
418
+ datetime.timedelta: Parsed timedelta object
419
+ str: Original string if parsing fails (for compatibility)
420
+
421
+ Raises:
422
+ DataError: If the time string format is invalid
423
+
424
+ Examples:
425
+ >>> convert_timedelta('25:06:17')
426
+ datetime.timedelta(seconds=90377)
427
+ >>> convert_timedelta('-25:06:17')
428
+ datetime.timedelta(days=-2, seconds=83223)
429
+ >>> convert_timedelta('12:30:45.123456')
430
+ datetime.timedelta(seconds=45045, microseconds=123456)
431
+
432
+ Note:
433
+ This function expects TIME format as HH:MM:SS, not DD HH:MM:SS.
434
+ Negative times are supported with leading minus sign.
435
+ """
436
+ if isinstance(obj, (bytes, bytearray)):
437
+ obj = obj.decode('ascii')
438
+
439
+ m = TIMEDELTA_RE.match(obj)
440
+ if not m:
441
+ return obj
442
+
443
+ try:
444
+ groups = list(m.groups())
445
+ groups[-1] = _convert_second_fraction(groups[-1])
446
+ negate = -1 if groups[0] else 1
447
+ hours, minutes, seconds, microseconds = groups[1:]
448
+
449
+ tdelta = datetime.timedelta(
450
+ hours=int(hours),
451
+ minutes=int(minutes),
452
+ seconds=int(seconds),
453
+ microseconds=int(microseconds)
454
+ ) * negate
455
+ return tdelta
456
+ except ValueError as err:
457
+ raise DataError("Not valid time or timedelta struct: %s" % err)
458
+
459
+
460
+ def convert_time(obj):
461
+ """Convert SQL TIME string to time object.
462
+
463
+ Parses a SQL TIME string and returns a corresponding Python time object.
464
+ Falls back to timedelta conversion for time intervals.
465
+
466
+ Args:
467
+ obj (str or bytes): SQL TIME string in format 'HH:MM:SS'
468
+
469
+ Returns:
470
+ datetime.time: Parsed time object for regular times
471
+ datetime.timedelta: Parsed timedelta for time intervals
472
+
473
+ Examples:
474
+ >>> convert_time('15:06:17')
475
+ datetime.time(15, 6, 17)
476
+ >>> convert_time('25:06:17') # Falls back to timedelta
477
+ datetime.timedelta(seconds=90377)
478
+ """
479
+ if isinstance(obj, (bytes, bytearray)):
480
+ obj = obj.decode('ascii')
481
+
482
+ try:
483
+ time_obj = datetime.datetime.strptime(obj, '%H:%M:%S')
484
+ return time_obj.time()
485
+ except Exception:
486
+ return convert_timedelta(obj)
487
+
488
+
489
+ def convert_date(obj):
490
+ """Convert SQL DATE string to date object.
491
+
492
+ Parses a SQL DATE string and returns a corresponding Python date object.
493
+
494
+ Args:
495
+ obj (str or bytes): SQL DATE string in format 'YYYY-MM-DD'
496
+
497
+ Returns:
498
+ datetime.date: Parsed date object
499
+
500
+ Raises:
501
+ DataError: If the date string format is invalid
502
+
503
+ Examples:
504
+ >>> convert_date('2007-02-26')
505
+ datetime.date(2007, 2, 26)
506
+ >>> convert_date(b'2023-12-25')
507
+ datetime.date(2023, 12, 25)
508
+ """
509
+ if isinstance(obj, (bytes, bytearray)):
510
+ obj = obj.decode('ascii')
511
+ try:
512
+ time_obj = datetime.datetime.strptime(obj, '%Y-%m-%d')
513
+ return time_obj.date()
514
+ except Exception as err:
515
+ raise DataError("Not valid date struct: %s" % err)
516
+
517
+
518
+ def convert_set(s):
519
+ """Convert comma-separated string to Python set.
520
+
521
+ Args:
522
+ s (str or bytes): Comma-separated values
523
+
524
+ Returns:
525
+ set: Set of string values split by comma
526
+
527
+ Example:
528
+ >>> convert_set("apple,banana,cherry")
529
+ {'apple', 'banana', 'cherry'}
530
+ >>> convert_set(b"1,2,3")
531
+ {b'1', b'2', b'3'}
532
+ """
533
+ if isinstance(s, (bytes, bytearray)):
534
+ return set(s.split(b","))
535
+ return set(s.split(","))
536
+
537
+
538
+ def convert_characters(connection, data):
539
+ """Convert character data based on connection encoding settings.
540
+
541
+ Args:
542
+ connection: Database connection object
543
+ data (bytes): Raw character data from database
544
+
545
+ Returns:
546
+ str or bytes: Decoded string if unicode enabled, otherwise raw bytes
547
+ """
548
+ if connection.use_unicode:
549
+ data = data.decode("utf8")
550
+ return data
551
+
552
+
553
+ def convert_column_data(column_type, column_data):
554
+ """Convert database column data to appropriate Python type.
555
+
556
+ This function automatically converts database column values to the most
557
+ appropriate Python type based on the column's SQL type.
558
+
559
+ Args:
560
+ column_type (str): SQL column type name (e.g., 'time', 'date', 'datetime')
561
+ column_data: Raw column value from database
562
+
563
+ Returns:
564
+ Converted Python object appropriate for the column type, or original data if no conversion needed
565
+
566
+ Example:
567
+ >>> convert_column_data('date', '2023-12-25')
568
+ datetime.date(2023, 12, 25)
569
+ >>> convert_column_data('time', '14:30:45')
570
+ datetime.time(14, 30, 45)
571
+ >>> convert_column_data('varchar', 'hello')
572
+ 'hello'
573
+ """
574
+ data = column_data
575
+
576
+ # Null
577
+ if data is None:
578
+ return data
579
+
580
+ if not isinstance(column_type, str):
581
+ return data
582
+
583
+ column_type = column_type.lower().strip()
584
+ if column_type == 'time':
585
+ data = convert_time(column_data)
586
+ elif column_type == 'date':
587
+ data = convert_date(column_data)
588
+ elif column_type == 'datetime':
589
+ data = convert_datetime(column_data)
590
+
591
+ return data
592
+
593
+
594
+ encoders = {
595
+ bool: escape_bool,
596
+ int: escape_int,
597
+ float: escape_float,
598
+ str: escape_unicode,
599
+ tuple: escape_sequence,
600
+ list: escape_sequence,
601
+ set: escape_sequence,
602
+ frozenset: escape_sequence,
603
+ dict: escape_dict,
604
+ type(None): escape_None,
605
+ datetime.date: escape_date,
606
+ datetime.datetime: escape_datetime,
607
+ datetime.timedelta: escape_timedelta,
608
+ datetime.time: escape_time,
609
+ time.struct_time: escape_struct_time,
610
+ Decimal: escape_object,
611
+ }