chdb 3.6.0__cp38-abi3-macosx_11_0_arm64.whl → 3.7.0__cp38-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chdb might be problematic. Click here for more details.

chdb/dbapi/converters.py CHANGED
@@ -1,3 +1,17 @@
1
+ """Type conversion and escaping functions for chDB database operations.
2
+
3
+ This module provides comprehensive type conversion between Python types and SQL values,
4
+ including proper escaping for SQL injection prevention and bidirectional conversion
5
+ between database column values and Python objects.
6
+
7
+ The module handles:
8
+ - Escaping Python values for safe SQL inclusion
9
+ - Converting database column data to appropriate Python types
10
+ - Time/date/datetime conversions with proper formatting
11
+ - Collection types (lists, tuples, sets, dicts) handling
12
+ - Special value handling (None, boolean, numeric types)
13
+ """
14
+
1
15
  import datetime
2
16
  from decimal import Decimal
3
17
  from .err import DataError
@@ -6,6 +20,31 @@ import time
6
20
 
7
21
 
8
22
  def escape_item(val, mapping=None):
23
+ """Escape a single value for safe SQL inclusion.
24
+
25
+ This function takes a Python value and converts it to a properly escaped
26
+ SQL representation using the appropriate encoder for the value's type.
27
+
28
+ Args:
29
+ val: Python value to escape (any supported type)
30
+ mapping (dict, optional): Custom encoder mapping. Uses default encoders if None.
31
+
32
+ Returns:
33
+ str: SQL-safe string representation of the value
34
+
35
+ Raises:
36
+ TypeError: If no encoder is found for the value type
37
+
38
+ Examples:
39
+ >>> escape_item("O'Reilly")
40
+ "'O''Reilly'"
41
+ >>> escape_item(42)
42
+ "42"
43
+ >>> escape_item(None)
44
+ "NULL"
45
+ >>> escape_item(True)
46
+ "1"
47
+ """
9
48
  if mapping is None:
10
49
  mapping = encoders
11
50
  encoder = mapping.get(type(val))
@@ -22,6 +61,19 @@ def escape_item(val, mapping=None):
22
61
 
23
62
 
24
63
  def escape_dict(val, mapping=None):
64
+ """Escape all values in a dictionary.
65
+
66
+ Args:
67
+ val (dict): Dictionary with values to escape
68
+ mapping (dict, optional): Custom encoder mapping
69
+
70
+ Returns:
71
+ dict: Dictionary with all values properly escaped for SQL
72
+
73
+ Example:
74
+ >>> escape_dict({'name': "O'Reilly", 'age': 30})
75
+ {'name': "'O''Reilly'", 'age': '30'}
76
+ """
25
77
  n = {}
26
78
  for k, v in val.items():
27
79
  quoted = escape_item(v, mapping)
@@ -30,6 +82,19 @@ def escape_dict(val, mapping=None):
30
82
 
31
83
 
32
84
  def escape_sequence(val, mapping=None):
85
+ """Escape a sequence (list, tuple, etc.) for SQL VALUES clause.
86
+
87
+ Args:
88
+ val (sequence): Sequence of values to escape
89
+ mapping (dict, optional): Custom encoder mapping
90
+
91
+ Returns:
92
+ str: SQL VALUES clause representation like '(val1, val2, val3)'
93
+
94
+ Example:
95
+ >>> escape_sequence([1, "hello", None])
96
+ "(1, 'hello', NULL)"
97
+ """
33
98
  n = []
34
99
  for item in val:
35
100
  quoted = escape_item(item, mapping)
@@ -38,22 +103,77 @@ def escape_sequence(val, mapping=None):
38
103
 
39
104
 
40
105
  def escape_set(val, mapping=None):
106
+ """Escape a set for SQL representation.
107
+
108
+ Args:
109
+ val (set): Set of values to escape
110
+ mapping (dict, optional): Custom encoder mapping
111
+
112
+ Returns:
113
+ str: Comma-separated escaped values
114
+
115
+ Example:
116
+ >>> escape_set({1, 2, 3})
117
+ "1,2,3"
118
+ """
41
119
  return ','.join([escape_item(x, mapping) for x in val])
42
120
 
43
121
 
44
122
  def escape_bool(value, mapping=None):
123
+ """Escape boolean value for SQL.
124
+
125
+ Args:
126
+ value (bool): Boolean value to escape
127
+ mapping: Unused, for interface compatibility
128
+
129
+ Returns:
130
+ str: "1" for True, "0" for False
131
+
132
+ Example:
133
+ >>> escape_bool(True)
134
+ "1"
135
+ >>> escape_bool(False)
136
+ "0"
137
+ """
45
138
  return str(int(value))
46
139
 
47
140
 
48
141
  def escape_object(value, mapping=None):
142
+ """Generic object escaper using string conversion.
143
+
144
+ Args:
145
+ value: Object to escape
146
+ mapping: Unused, for interface compatibility
147
+
148
+ Returns:
149
+ str: String representation of the object
150
+ """
49
151
  return str(value)
50
152
 
51
153
 
52
154
  def escape_int(value, mapping=None):
155
+ """Escape integer value for SQL.
156
+
157
+ Args:
158
+ value (int): Integer to escape
159
+ mapping: Unused, for interface compatibility
160
+
161
+ Returns:
162
+ str: String representation of the integer
163
+ """
53
164
  return str(value)
54
165
 
55
166
 
56
167
  def escape_float(value, mapping=None):
168
+ """Escape float value for SQL with precision control.
169
+
170
+ Args:
171
+ value (float): Float to escape
172
+ mapping: Unused, for interface compatibility
173
+
174
+ Returns:
175
+ str: String representation with up to 15 significant digits
176
+ """
57
177
  return '%.15g' % value
58
178
 
59
179
 
@@ -63,9 +183,21 @@ _escape_table[ord("\\")] = "\\\\"
63
183
 
64
184
 
65
185
  def _escape_unicode(value, mapping=None):
66
- """escapes *value* with adding single quote.
186
+ """Escape Unicode string by replacing special characters.
187
+
188
+ This function escapes single quotes and backslashes in Unicode strings
189
+ to prevent SQL injection attacks.
190
+
191
+ Args:
192
+ value (str): Unicode string to escape
193
+ mapping: Unused, for interface compatibility
194
+
195
+ Returns:
196
+ str: Escaped Unicode string (without surrounding quotes)
67
197
 
68
- Value should be unicode
198
+ Note:
199
+ This function does not add surrounding quotes. Use escape_unicode()
200
+ for complete string escaping with quotes.
69
201
  """
70
202
  return value.translate(_escape_table)
71
203
 
@@ -80,22 +212,80 @@ _escape_bytes_table = _escape_table + [chr(i) for i in range(0xdc80, 0xdd00)]
80
212
 
81
213
 
82
214
  def escape_bytes(value, mapping=None):
215
+ """Escape bytes value for SQL with proper encoding handling.
216
+
217
+ Args:
218
+ value (bytes): Bytes to escape
219
+ mapping: Unused, for interface compatibility
220
+
221
+ Returns:
222
+ str: Escaped bytes as quoted SQL string
223
+
224
+ Example:
225
+ >>> escape_bytes(b"hello'world")
226
+ "'hello''world'"
227
+ """
83
228
  return "'%s'" % value.decode('latin1').translate(_escape_bytes_table)
84
229
 
85
230
 
86
231
  def escape_unicode(value, mapping=None):
232
+ """Escape Unicode string for SQL with surrounding quotes.
233
+
234
+ Args:
235
+ value (str): Unicode string to escape
236
+ mapping: Unused, for interface compatibility
237
+
238
+ Returns:
239
+ str: Properly escaped and quoted SQL string
240
+
241
+ Example:
242
+ >>> escape_unicode("O'Reilly")
243
+ "'O''Reilly'"
244
+ """
87
245
  return u"'%s'" % _escape_unicode(value)
88
246
 
89
247
 
90
248
  def escape_str(value, mapping=None):
249
+ """Escape string value for SQL.
250
+
251
+ Args:
252
+ value: Value to convert to string and escape
253
+ mapping (dict, optional): Custom encoder mapping
254
+
255
+ Returns:
256
+ str: Escaped and quoted SQL string
257
+ """
91
258
  return "'%s'" % escape_string(str(value), mapping)
92
259
 
93
260
 
94
261
  def escape_None(value, mapping=None):
262
+ """Escape None value for SQL.
263
+
264
+ Args:
265
+ value: None value (ignored)
266
+ mapping: Unused, for interface compatibility
267
+
268
+ Returns:
269
+ str: SQL NULL literal
270
+ """
95
271
  return 'NULL'
96
272
 
97
273
 
98
274
  def escape_timedelta(obj, mapping=None):
275
+ """Escape timedelta object for SQL TIME format.
276
+
277
+ Args:
278
+ obj (datetime.timedelta): Timedelta to escape
279
+ mapping: Unused, for interface compatibility
280
+
281
+ Returns:
282
+ str: SQL TIME format string like 'HH:MM:SS' or 'HH:MM:SS.microseconds'
283
+
284
+ Example:
285
+ >>> td = datetime.timedelta(hours=2, minutes=30, seconds=45, microseconds=123456)
286
+ >>> escape_timedelta(td)
287
+ "'02:30:45.123456'"
288
+ """
99
289
  seconds = int(obj.seconds) % 60
100
290
  minutes = int(obj.seconds // 60) % 60
101
291
  hours = int(obj.seconds // 3600) % 24 + int(obj.days) * 24
@@ -107,23 +297,69 @@ def escape_timedelta(obj, mapping=None):
107
297
 
108
298
 
109
299
  def escape_time(obj, mapping=None):
300
+ """Escape time object for SQL.
301
+
302
+ Args:
303
+ obj (datetime.time): Time to escape
304
+ mapping: Unused, for interface compatibility
305
+
306
+ Returns:
307
+ str: SQL time string in ISO format with microseconds
308
+
309
+ Example:
310
+ >>> t = datetime.time(14, 30, 45, 123456)
311
+ >>> escape_time(t)
312
+ "'14:30:45.123456'"
313
+ """
110
314
  return "'{}'".format(obj.isoformat(timespec='microseconds'))
111
315
 
112
316
 
113
317
  def escape_datetime(obj, mapping=None):
318
+ """Escape datetime object for SQL DATETIME format.
319
+
320
+ Args:
321
+ obj (datetime.datetime): Datetime to escape
322
+ mapping: Unused, for interface compatibility
323
+
324
+ Returns:
325
+ str: SQL datetime string in ISO format with space separator and microseconds
326
+
327
+ Example:
328
+ >>> dt = datetime.datetime(2023, 12, 25, 14, 30, 45, 123456)
329
+ >>> escape_datetime(dt)
330
+ "'2023-12-25 14:30:45.123456'"
331
+ """
114
332
  return "'{}'".format(obj.isoformat(sep=' ', timespec='microseconds'))
115
- # if obj.microsecond:
116
- # fmt = "'{0.year:04}-{0.month:02}-{0.day:02} {0.hour:02}:{0.minute:02}:{0.second:02}.{0.microsecond:06}'"
117
- # else:
118
- # fmt = "'{0.year:04}-{0.month:02}-{0.day:02} {0.hour:02}:{0.minute:02}:{0.second:02}'"
119
- # return fmt.format(obj)
120
333
 
121
334
 
122
335
  def escape_date(obj, mapping=None):
336
+ """Escape date object for SQL DATE format.
337
+
338
+ Args:
339
+ obj (datetime.date): Date to escape
340
+ mapping: Unused, for interface compatibility
341
+
342
+ Returns:
343
+ str: SQL date string in ISO format
344
+
345
+ Example:
346
+ >>> d = datetime.date(2023, 12, 25)
347
+ >>> escape_date(d)
348
+ "'2023-12-25'"
349
+ """
123
350
  return "'{}'".format(obj.isoformat())
124
351
 
125
352
 
126
353
  def escape_struct_time(obj, mapping=None):
354
+ """Escape struct_time object for SQL by converting to datetime.
355
+
356
+ Args:
357
+ obj (time.struct_time): Struct time to escape
358
+ mapping: Unused, for interface compatibility
359
+
360
+ Returns:
361
+ str: SQL datetime string converted from struct_time
362
+ """
127
363
  return escape_datetime(datetime.datetime(*obj[:6]))
128
364
 
129
365
 
@@ -136,13 +372,25 @@ def _convert_second_fraction(s):
136
372
 
137
373
 
138
374
  def convert_datetime(obj):
139
- """Returns a DATETIME or TIMESTAMP column value as a datetime object:
375
+ """Convert SQL DATETIME or TIMESTAMP string to datetime object.
376
+
377
+ Parses a SQL datetime string and returns a corresponding Python datetime object.
378
+ Handles both string and bytes input.
140
379
 
141
- >>> datetime_or_None('2007-02-25 23:06:20')
142
- datetime.datetime(2007, 2, 25, 23, 6, 20)
380
+ Args:
381
+ obj (str or bytes): SQL datetime string in format 'YYYY-MM-DD HH:MM:SS'
143
382
 
144
- Illegal values are raise DataError
383
+ Returns:
384
+ datetime.datetime: Parsed datetime object
145
385
 
386
+ Raises:
387
+ DataError: If the datetime string format is invalid
388
+
389
+ Examples:
390
+ >>> convert_datetime('2007-02-25 23:06:20')
391
+ datetime.datetime(2007, 2, 25, 23, 6, 20)
392
+ >>> convert_datetime(b'2023-12-25 14:30:45')
393
+ datetime.datetime(2023, 12, 25, 14, 30, 45)
146
394
  """
147
395
  if isinstance(obj, (bytes, bytearray)):
148
396
  obj = obj.decode('ascii')
@@ -158,21 +406,32 @@ TIMEDELTA_RE = re.compile(r"(-)?(\d{1,3}):(\d{1,2}):(\d{1,2})(?:.(\d{1,6}))?")
158
406
 
159
407
 
160
408
  def convert_timedelta(obj):
161
- """Returns a TIME column as a timedelta object:
409
+ """Convert SQL TIME string to timedelta object.
410
+
411
+ Parses a SQL TIME string (which can represent time intervals) and returns
412
+ a corresponding Python timedelta object. Supports negative intervals.
162
413
 
163
- >>> timedelta_or_None('25:06:17')
164
- datetime.timedelta(1, 3977)
165
- >>> timedelta_or_None('-25:06:17')
166
- datetime.timedelta(-2, 83177)
414
+ Args:
415
+ obj (str or bytes): SQL TIME string in format '[+|-]HH:MM:SS[.microseconds]'
167
416
 
168
- Illegal values are returned as None:
417
+ Returns:
418
+ datetime.timedelta: Parsed timedelta object
419
+ str: Original string if parsing fails (for compatibility)
169
420
 
170
- >>> timedelta_or_None('random crap') is None
171
- True
421
+ Raises:
422
+ DataError: If the time string format is invalid
172
423
 
173
- Note that MySQL always returns TIME columns as (+|-)HH:MM:SS, but
174
- can accept values as (+|-)DD HH:MM:SS. The latter format will not
175
- be parsed correctly by this function.
424
+ Examples:
425
+ >>> convert_timedelta('25:06:17')
426
+ datetime.timedelta(seconds=90377)
427
+ >>> convert_timedelta('-25:06:17')
428
+ datetime.timedelta(days=-2, seconds=83223)
429
+ >>> convert_timedelta('12:30:45.123456')
430
+ datetime.timedelta(seconds=45045, microseconds=123456)
431
+
432
+ Note:
433
+ This function expects TIME format as HH:MM:SS, not DD HH:MM:SS.
434
+ Negative times are supported with leading minus sign.
176
435
  """
177
436
  if isinstance(obj, (bytes, bytearray)):
178
437
  obj = obj.decode('ascii')
@@ -199,13 +458,23 @@ def convert_timedelta(obj):
199
458
 
200
459
 
201
460
  def convert_time(obj):
202
- """Returns a TIME column as a time object:
461
+ """Convert SQL TIME string to time object.
462
+
463
+ Parses a SQL TIME string and returns a corresponding Python time object.
464
+ Falls back to timedelta conversion for time intervals.
203
465
 
204
- >>> time_or_None('15:06:17')
205
- datetime.time(15, 6, 17)
466
+ Args:
467
+ obj (str or bytes): SQL TIME string in format 'HH:MM:SS'
206
468
 
207
- Illegal values are returned DataError:
469
+ Returns:
470
+ datetime.time: Parsed time object for regular times
471
+ datetime.timedelta: Parsed timedelta for time intervals
208
472
 
473
+ Examples:
474
+ >>> convert_time('15:06:17')
475
+ datetime.time(15, 6, 17)
476
+ >>> convert_time('25:06:17') # Falls back to timedelta
477
+ datetime.timedelta(seconds=90377)
209
478
  """
210
479
  if isinstance(obj, (bytes, bytearray)):
211
480
  obj = obj.decode('ascii')
@@ -218,18 +487,24 @@ def convert_time(obj):
218
487
 
219
488
 
220
489
  def convert_date(obj):
221
- """Returns a DATE column as a date object:
490
+ """Convert SQL DATE string to date object.
491
+
492
+ Parses a SQL DATE string and returns a corresponding Python date object.
222
493
 
223
- >>> date_or_None('2007-02-26')
224
- datetime.date(2007, 2, 26)
494
+ Args:
495
+ obj (str or bytes): SQL DATE string in format 'YYYY-MM-DD'
225
496
 
226
- Illegal values are returned as None:
497
+ Returns:
498
+ datetime.date: Parsed date object
227
499
 
228
- >>> date_or_None('2007-02-31') is None
229
- True
230
- >>> date_or_None('0000-00-00') is None
231
- True
500
+ Raises:
501
+ DataError: If the date string format is invalid
232
502
 
503
+ Examples:
504
+ >>> convert_date('2007-02-26')
505
+ datetime.date(2007, 2, 26)
506
+ >>> convert_date(b'2023-12-25')
507
+ datetime.date(2023, 12, 25)
233
508
  """
234
509
  if isinstance(obj, (bytes, bytearray)):
235
510
  obj = obj.decode('ascii')
@@ -241,18 +516,61 @@ def convert_date(obj):
241
516
 
242
517
 
243
518
  def convert_set(s):
519
+ """Convert comma-separated string to Python set.
520
+
521
+ Args:
522
+ s (str or bytes): Comma-separated values
523
+
524
+ Returns:
525
+ set: Set of string values split by comma
526
+
527
+ Example:
528
+ >>> convert_set("apple,banana,cherry")
529
+ {'apple', 'banana', 'cherry'}
530
+ >>> convert_set(b"1,2,3")
531
+ {b'1', b'2', b'3'}
532
+ """
244
533
  if isinstance(s, (bytes, bytearray)):
245
534
  return set(s.split(b","))
246
535
  return set(s.split(","))
247
536
 
248
537
 
249
538
  def convert_characters(connection, data):
539
+ """Convert character data based on connection encoding settings.
540
+
541
+ Args:
542
+ connection: Database connection object
543
+ data (bytes): Raw character data from database
544
+
545
+ Returns:
546
+ str or bytes: Decoded string if unicode enabled, otherwise raw bytes
547
+ """
250
548
  if connection.use_unicode:
251
549
  data = data.decode("utf8")
252
550
  return data
253
551
 
254
552
 
255
553
  def convert_column_data(column_type, column_data):
554
+ """Convert database column data to appropriate Python type.
555
+
556
+ This function automatically converts database column values to the most
557
+ appropriate Python type based on the column's SQL type.
558
+
559
+ Args:
560
+ column_type (str): SQL column type name (e.g., 'time', 'date', 'datetime')
561
+ column_data: Raw column value from database
562
+
563
+ Returns:
564
+ Converted Python object appropriate for the column type, or original data if no conversion needed
565
+
566
+ Example:
567
+ >>> convert_column_data('date', '2023-12-25')
568
+ datetime.date(2023, 12, 25)
569
+ >>> convert_column_data('time', '14:30:45')
570
+ datetime.time(14, 30, 45)
571
+ >>> convert_column_data('varchar', 'hello')
572
+ 'hello'
573
+ """
256
574
  data = column_data
257
575
 
258
576
  # Null