clickhouse-driver 0.2.1__cp39-cp39-win_amd64.whl → 0.2.8__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. clickhouse_driver/__init__.py +9 -9
  2. clickhouse_driver/block.py +227 -195
  3. clickhouse_driver/blockstreamprofileinfo.py +22 -22
  4. clickhouse_driver/bufferedreader.cp39-win_amd64.pyd +0 -0
  5. clickhouse_driver/bufferedwriter.cp39-win_amd64.pyd +0 -0
  6. clickhouse_driver/client.py +896 -666
  7. clickhouse_driver/clientinfo.py +119 -80
  8. clickhouse_driver/columns/arraycolumn.py +161 -150
  9. clickhouse_driver/columns/base.py +221 -147
  10. clickhouse_driver/columns/boolcolumn.py +7 -0
  11. clickhouse_driver/columns/datecolumn.py +108 -49
  12. clickhouse_driver/columns/datetimecolumn.py +202 -207
  13. clickhouse_driver/columns/decimalcolumn.py +116 -118
  14. clickhouse_driver/columns/enumcolumn.py +119 -119
  15. clickhouse_driver/columns/exceptions.py +12 -12
  16. clickhouse_driver/columns/floatcolumn.py +34 -34
  17. clickhouse_driver/columns/intcolumn.py +157 -157
  18. clickhouse_driver/columns/intervalcolumn.py +33 -33
  19. clickhouse_driver/columns/ipcolumn.py +118 -118
  20. clickhouse_driver/columns/jsoncolumn.py +37 -0
  21. clickhouse_driver/columns/largeint.cp39-win_amd64.pyd +0 -0
  22. clickhouse_driver/columns/lowcardinalitycolumn.py +142 -123
  23. clickhouse_driver/columns/mapcolumn.py +73 -58
  24. clickhouse_driver/columns/nestedcolumn.py +10 -0
  25. clickhouse_driver/columns/nothingcolumn.py +13 -13
  26. clickhouse_driver/columns/nullablecolumn.py +7 -7
  27. clickhouse_driver/columns/nullcolumn.py +15 -15
  28. clickhouse_driver/columns/numpy/base.py +47 -14
  29. clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
  30. clickhouse_driver/columns/numpy/datecolumn.py +19 -12
  31. clickhouse_driver/columns/numpy/datetimecolumn.py +143 -145
  32. clickhouse_driver/columns/numpy/floatcolumn.py +24 -13
  33. clickhouse_driver/columns/numpy/intcolumn.py +43 -43
  34. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -83
  35. clickhouse_driver/columns/numpy/service.py +58 -80
  36. clickhouse_driver/columns/numpy/stringcolumn.py +78 -76
  37. clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
  38. clickhouse_driver/columns/service.py +185 -131
  39. clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -7
  40. clickhouse_driver/columns/stringcolumn.py +73 -73
  41. clickhouse_driver/columns/tuplecolumn.py +63 -65
  42. clickhouse_driver/columns/util.py +60 -0
  43. clickhouse_driver/columns/uuidcolumn.py +64 -64
  44. clickhouse_driver/compression/__init__.py +28 -28
  45. clickhouse_driver/compression/base.py +87 -52
  46. clickhouse_driver/compression/lz4.py +21 -55
  47. clickhouse_driver/compression/lz4hc.py +9 -9
  48. clickhouse_driver/compression/zstd.py +20 -51
  49. clickhouse_driver/connection.py +784 -632
  50. clickhouse_driver/context.py +36 -36
  51. clickhouse_driver/dbapi/__init__.py +62 -62
  52. clickhouse_driver/dbapi/connection.py +99 -96
  53. clickhouse_driver/dbapi/cursor.py +370 -368
  54. clickhouse_driver/dbapi/errors.py +40 -40
  55. clickhouse_driver/dbapi/extras.py +73 -0
  56. clickhouse_driver/defines.py +55 -42
  57. clickhouse_driver/errors.py +453 -446
  58. clickhouse_driver/log.py +48 -44
  59. clickhouse_driver/numpy/block.py +8 -8
  60. clickhouse_driver/numpy/helpers.py +25 -25
  61. clickhouse_driver/numpy/result.py +123 -123
  62. clickhouse_driver/opentelemetry.py +43 -0
  63. clickhouse_driver/progress.py +38 -32
  64. clickhouse_driver/protocol.py +114 -105
  65. clickhouse_driver/queryprocessingstage.py +8 -8
  66. clickhouse_driver/reader.py +69 -69
  67. clickhouse_driver/readhelpers.py +26 -26
  68. clickhouse_driver/result.py +144 -144
  69. clickhouse_driver/settings/available.py +405 -405
  70. clickhouse_driver/settings/types.py +50 -50
  71. clickhouse_driver/settings/writer.py +34 -29
  72. clickhouse_driver/streams/compressed.py +88 -88
  73. clickhouse_driver/streams/native.py +102 -90
  74. clickhouse_driver/util/compat.py +39 -0
  75. clickhouse_driver/util/escape.py +94 -55
  76. clickhouse_driver/util/helpers.py +57 -57
  77. clickhouse_driver/varint.cp39-win_amd64.pyd +0 -0
  78. clickhouse_driver/writer.py +67 -67
  79. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/LICENSE +21 -21
  80. clickhouse_driver-0.2.8.dist-info/METADATA +201 -0
  81. clickhouse_driver-0.2.8.dist-info/RECORD +89 -0
  82. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/WHEEL +1 -1
  83. clickhouse_driver-0.2.1.dist-info/METADATA +0 -24
  84. clickhouse_driver-0.2.1.dist-info/RECORD +0 -80
  85. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/top_level.txt +0 -0
@@ -1,147 +1,221 @@
1
- from struct import Struct, error as struct_error
2
-
3
- from . import exceptions
4
-
5
-
6
- class Column(object):
7
- ch_type = None
8
- py_types = None
9
-
10
- check_item = None
11
- after_read_items = None
12
- before_write_items = None
13
-
14
- types_check_enabled = False
15
-
16
- null_value = 0
17
-
18
- def __init__(self, types_check=False, **kwargs):
19
- self.nullable = False
20
- self.types_check_enabled = types_check
21
- super(Column, self).__init__()
22
-
23
- def make_null_struct(self, n_items):
24
- return Struct('<{}B'.format(n_items))
25
-
26
- def _read_nulls_map(self, n_items, buf):
27
- s = self.make_null_struct(n_items)
28
- return s.unpack(buf.read(s.size))
29
-
30
- def _write_nulls_map(self, items, buf):
31
- s = self.make_null_struct(len(items))
32
- items = [x is None for x in items]
33
- buf.write(s.pack(*items))
34
-
35
- def check_item_type(self, value):
36
- if not isinstance(value, self.py_types):
37
- raise exceptions.ColumnTypeMismatchException(value)
38
-
39
- def prepare_items(self, items):
40
- nullable = self.nullable
41
- null_value = self.null_value
42
-
43
- check_item = self.check_item
44
- if self.types_check_enabled:
45
- check_item_type = self.check_item_type
46
- else:
47
- check_item_type = False
48
-
49
- if (not self.nullable and not check_item_type and
50
- not check_item and not self.before_write_items):
51
- return items
52
-
53
- nulls_map = [False] * len(items) if self.nullable else None
54
- for i, x in enumerate(items):
55
- if x is None and nullable:
56
- nulls_map[i] = True
57
- x = null_value
58
-
59
- else:
60
- if check_item_type:
61
- check_item_type(x)
62
-
63
- if check_item:
64
- check_item(x)
65
-
66
- items[i] = x
67
-
68
- if self.before_write_items:
69
- self.before_write_items(items, nulls_map=nulls_map)
70
-
71
- return items
72
-
73
- def write_data(self, items, buf):
74
- if self.nullable:
75
- self._write_nulls_map(items, buf)
76
-
77
- self._write_data(items, buf)
78
-
79
- def _write_data(self, items, buf):
80
- prepared = self.prepare_items(items)
81
- self.write_items(prepared, buf)
82
-
83
- def write_items(self, items, buf):
84
- raise NotImplementedError
85
-
86
- def read_data(self, n_items, buf):
87
- if self.nullable:
88
- nulls_map = self._read_nulls_map(n_items, buf)
89
- else:
90
- nulls_map = None
91
-
92
- return self._read_data(n_items, buf, nulls_map=nulls_map)
93
-
94
- def _read_data(self, n_items, buf, nulls_map=None):
95
- items = self.read_items(n_items, buf)
96
-
97
- if self.after_read_items:
98
- return self.after_read_items(items, nulls_map)
99
- elif nulls_map is not None:
100
- return tuple(
101
- (None if is_null else items[i])
102
- for i, is_null in enumerate(nulls_map)
103
- )
104
- return items
105
-
106
- def read_items(self, n_items, buf):
107
- raise NotImplementedError
108
-
109
- def read_state_prefix(self, buf):
110
- pass
111
-
112
- def write_state_prefix(self, buf):
113
- pass
114
-
115
-
116
- class FormatColumn(Column):
117
- """
118
- Uses struct.pack for bulk items writing.
119
- """
120
-
121
- format = None
122
-
123
- def make_struct(self, n_items):
124
- return Struct('<{}{}'.format(n_items, self.format))
125
-
126
- def write_items(self, items, buf):
127
- s = self.make_struct(len(items))
128
- try:
129
- buf.write(s.pack(*items))
130
-
131
- except struct_error as e:
132
- raise exceptions.StructPackException(e)
133
-
134
- def read_items(self, n_items, buf):
135
- s = self.make_struct(n_items)
136
- return s.unpack(buf.read(s.size))
137
-
138
-
139
- # How to write new column?
140
- # - Check ClickHouse documentation for column
141
- # - Wireshark and tcpdump are your friends.
142
- # - Use `clickhouse-client --compression 0` to see what's going on data
143
- # transmission.
144
- # - Check for similar existing columns and tests.
145
- # - Use `FormatColumn` for columns that use "simple" types under the hood.
146
- # - Some columns have before_write and after_read hooks.
147
- # Use them to convert items in column into "simple" types.
1
+ from struct import Struct, error as struct_error
2
+
3
+ from . import exceptions
4
+ from ..varint import read_varint
5
+
6
+
7
+ class CommonSerialization(object):
8
+ def __init__(self, column):
9
+ self.column = column
10
+ super(CommonSerialization, self).__init__()
11
+
12
+ def read_sparse(self, n_items, buf):
13
+ return n_items
14
+
15
+ def apply_sparse(self, items):
16
+ return items
17
+
18
+
19
+ class SparseSerialization(CommonSerialization):
20
+
21
+ def __init__(self, *args, **kwargs):
22
+ self.sparse_indexes = []
23
+ self.items_total = None
24
+ super(SparseSerialization, self).__init__(*args, **kwargs)
25
+
26
+ def read_sparse(self, n_items, buf):
27
+ sparse_indexes = []
28
+ items_total = 0
29
+ non_default_items = 0
30
+
31
+ END_OF_GRANULE_FLAG = 1 << 62
32
+ end_of_granule = False
33
+
34
+ while not end_of_granule:
35
+ group_size = read_varint(buf)
36
+ end_of_granule = group_size & END_OF_GRANULE_FLAG
37
+ group_size &= ~END_OF_GRANULE_FLAG
38
+
39
+ items_total += group_size + 1
40
+ if not end_of_granule:
41
+ non_default_items += 1
42
+ sparse_indexes.append(items_total)
43
+
44
+ self.sparse_indexes = sparse_indexes
45
+ self.items_total = items_total
46
+
47
+ return non_default_items
48
+
49
+ def apply_sparse(self, items):
50
+ default = self.column.null_value
51
+ if self.column.after_read_items:
52
+ default = self.column.after_read_items([default])[0]
53
+
54
+ rv = [default] * (self.items_total - 1)
55
+ for item_number, i in enumerate(self.sparse_indexes):
56
+ rv[i - 1] = items[item_number]
57
+
58
+ return rv
59
+
60
+
61
+ class Column(object):
62
+ ch_type = None
63
+ py_types = None
64
+
65
+ check_item = None
66
+ after_read_items = None
67
+ before_write_items = None
68
+
69
+ types_check_enabled = False
70
+
71
+ null_value = 0
72
+
73
+ def __init__(self, types_check=False, has_custom_serialization=False,
74
+ **kwargs):
75
+ self.nullable = False
76
+ self.types_check_enabled = types_check
77
+ self.has_custom_serialization = has_custom_serialization
78
+ self.serialization = CommonSerialization(self)
79
+ self.input_null_as_default = False
80
+
81
+ self.context = kwargs['context']
82
+ self.input_null_as_default = self.context.client_settings \
83
+ .get('input_format_null_as_default', False)
84
+
85
+ super(Column, self).__init__()
86
+
87
+ def make_null_struct(self, n_items):
88
+ return Struct('<{}B'.format(n_items))
89
+
90
+ def _read_nulls_map(self, n_items, buf):
91
+ s = self.make_null_struct(n_items)
92
+ return s.unpack(buf.read(s.size))
93
+
94
+ def _write_nulls_map(self, items, buf):
95
+ s = self.make_null_struct(len(items))
96
+ items = [x is None for x in items]
97
+ buf.write(s.pack(*items))
98
+
99
+ def check_item_type(self, value):
100
+ if not isinstance(value, self.py_types):
101
+ raise exceptions.ColumnTypeMismatchException(value)
102
+
103
+ def prepare_items(self, items):
104
+ nullable = self.nullable
105
+ null_value = self.null_value
106
+ null_as_default = self.input_null_as_default
107
+
108
+ check_item = self.check_item
109
+ if self.types_check_enabled:
110
+ check_item_type = self.check_item_type
111
+ else:
112
+ check_item_type = False
113
+
114
+ if (not (self.nullable or null_as_default) and not check_item_type and
115
+ not check_item and not self.before_write_items):
116
+ return items
117
+
118
+ nulls_map = [False] * len(items) if self.nullable else None
119
+ for i, x in enumerate(items):
120
+ if x is None:
121
+ if nullable:
122
+ nulls_map[i] = True
123
+ x = null_value
124
+ elif null_as_default:
125
+ x = null_value
126
+
127
+ else:
128
+ if check_item_type:
129
+ check_item_type(x)
130
+
131
+ if check_item:
132
+ check_item(x)
133
+
134
+ items[i] = x
135
+
136
+ if self.before_write_items:
137
+ self.before_write_items(items, nulls_map=nulls_map)
138
+
139
+ return items
140
+
141
+ def write_data(self, items, buf):
142
+ if self.nullable:
143
+ self._write_nulls_map(items, buf)
144
+
145
+ self._write_data(items, buf)
146
+
147
+ def _write_data(self, items, buf):
148
+ prepared = self.prepare_items(items)
149
+ self.write_items(prepared, buf)
150
+
151
+ def write_items(self, items, buf):
152
+ raise NotImplementedError
153
+
154
+ def read_data(self, n_items, buf):
155
+ n_items = self.serialization.read_sparse(n_items, buf)
156
+
157
+ if self.nullable:
158
+ nulls_map = self._read_nulls_map(n_items, buf)
159
+ else:
160
+ nulls_map = None
161
+
162
+ items = self._read_data(n_items, buf, nulls_map=nulls_map)
163
+ return self.serialization.apply_sparse(items)
164
+
165
+ def _read_data(self, n_items, buf, nulls_map=None):
166
+ items = self.read_items(n_items, buf)
167
+
168
+ if self.after_read_items:
169
+ return self.after_read_items(items, nulls_map)
170
+ elif nulls_map is not None:
171
+ return tuple(
172
+ (None if is_null else items[i])
173
+ for i, is_null in enumerate(nulls_map)
174
+ )
175
+ return items
176
+
177
+ def read_items(self, n_items, buf):
178
+ raise NotImplementedError
179
+
180
+ def read_state_prefix(self, buf):
181
+ if self.has_custom_serialization:
182
+ use_custom_serialization = read_varint(buf)
183
+ if use_custom_serialization:
184
+ self.serialization = SparseSerialization(self)
185
+
186
+ def write_state_prefix(self, buf):
187
+ pass
188
+
189
+
190
+ class FormatColumn(Column):
191
+ """
192
+ Uses struct.pack for bulk items writing.
193
+ """
194
+
195
+ format = None
196
+
197
+ def make_struct(self, n_items):
198
+ return Struct('<{}{}'.format(n_items, self.format))
199
+
200
+ def write_items(self, items, buf):
201
+ s = self.make_struct(len(items))
202
+ try:
203
+ buf.write(s.pack(*items))
204
+
205
+ except struct_error as e:
206
+ raise exceptions.StructPackException(e)
207
+
208
+ def read_items(self, n_items, buf):
209
+ s = self.make_struct(n_items)
210
+ return s.unpack(buf.read(s.size))
211
+
212
+
213
+ # How to write new column?
214
+ # - Check ClickHouse documentation for column
215
+ # - Wireshark and tcpdump are your friends.
216
+ # - Use `clickhouse-client --compression 0` to see what's going on data
217
+ # transmission.
218
+ # - Check for similar existing columns and tests.
219
+ # - Use `FormatColumn` for columns that use "simple" types under the hood.
220
+ # - Some columns have before_write and after_read hooks.
221
+ # Use them to convert items in column into "simple" types.
@@ -0,0 +1,7 @@
1
+ from .base import FormatColumn
2
+
3
+
4
+ class BoolColumn(FormatColumn):
5
+ ch_type = 'Bool'
6
+ py_types = (bool, )
7
+ format = '?'
@@ -1,49 +1,108 @@
1
- from datetime import date, timedelta
2
-
3
- from .base import FormatColumn
4
-
5
-
6
- epoch_start = date(1970, 1, 1)
7
-
8
-
9
- class DateColumn(FormatColumn):
10
- ch_type = 'Date'
11
- py_types = (date, )
12
- format = 'H'
13
-
14
- epoch_start = epoch_start
15
- epoch_end = date(2105, 12, 31)
16
-
17
- date_lut = {x: epoch_start + timedelta(x) for x in range(65535)}
18
- date_lut_reverse = {value: key for key, value in date_lut.items()}
19
-
20
- def before_write_items(self, items, nulls_map=None):
21
- null_value = self.null_value
22
-
23
- date_lut_reverse = self.date_lut_reverse
24
- epoch_start = self.epoch_start
25
- epoch_end = self.epoch_end
26
-
27
- for i, item in enumerate(items):
28
- if nulls_map and nulls_map[i]:
29
- items[i] = null_value
30
- continue
31
-
32
- if type(item) != date:
33
- item = date(item.year, item.month, item.day)
34
-
35
- if item > epoch_end or item < epoch_start:
36
- items[i] = 0
37
- else:
38
- items[i] = date_lut_reverse[item]
39
-
40
- def after_read_items(self, items, nulls_map=None):
41
- date_lut = self.date_lut
42
-
43
- if nulls_map is None:
44
- return tuple(date_lut[item] for item in items)
45
- else:
46
- return tuple(
47
- (None if is_null else date_lut[items[i]])
48
- for i, is_null in enumerate(nulls_map)
49
- )
1
+ from os import getenv
2
+ from datetime import date, timedelta
3
+
4
+ from .base import FormatColumn
5
+
6
+
7
+ epoch_start = date(1970, 1, 1)
8
+ epoch_end = date(2149, 6, 6)
9
+
10
+ epoch_start_date32 = date(1900, 1, 1)
11
+ epoch_end_date32 = date(2299, 12, 31)
12
+
13
+
14
+ class LazyLUT(dict):
15
+ def __init__(self, *args, _factory, **kwargs):
16
+ super().__init__(*args, **kwargs)
17
+ self._default_factory = _factory
18
+
19
+ def __missing__(self, key):
20
+ return self.setdefault(key, self._default_factory(key))
21
+
22
+
23
+ def make_date_lut_range(date_start, date_end):
24
+ return range(
25
+ (date_start - epoch_start).days,
26
+ (date_end - epoch_start).days + 1,
27
+ )
28
+
29
+
30
+ enable_lazy_date_lut = getenv('CLICKHOUSE_DRIVER_LASY_DATE_LUT', False)
31
+ if enable_lazy_date_lut:
32
+ try:
33
+ start, end = enable_lazy_date_lut.split(':')
34
+ start_date = date.fromisoformat(start)
35
+ end_date = date.fromisoformat(end)
36
+
37
+ date_range = make_date_lut_range(start_date, end_date)
38
+ except ValueError:
39
+ date_range = ()
40
+
41
+ # Since we initialize lazy lut with some initially warmed values,
42
+ # we use iterator and not dict comprehension for memory & time optimization
43
+ _date_lut = LazyLUT(
44
+ ((x, epoch_start + timedelta(days=x)) for x in date_range),
45
+ _factory=lambda x: epoch_start + timedelta(days=x),
46
+ )
47
+ _date_lut_reverse = LazyLUT(
48
+ ((value, key) for key, value in _date_lut.items()),
49
+ _factory=lambda x: (x - epoch_start).days,
50
+ )
51
+ else:
52
+ # If lazy lut is not enabled, we fallback to static dict initialization
53
+ # In both cases, we use same lut for both data types,
54
+ # since one encompasses the other and we can avoid duplicating overlap
55
+ date_range = make_date_lut_range(epoch_start_date32, epoch_end_date32)
56
+ _date_lut = {x: epoch_start + timedelta(days=x) for x in date_range}
57
+ _date_lut_reverse = {value: key for key, value in _date_lut.items()}
58
+
59
+
60
+ class DateColumn(FormatColumn):
61
+ ch_type = 'Date'
62
+ py_types = (date, )
63
+ format = 'H'
64
+
65
+ min_value = epoch_start
66
+ max_value = epoch_end
67
+
68
+ date_lut = _date_lut
69
+ date_lut_reverse = _date_lut_reverse
70
+
71
+ def before_write_items(self, items, nulls_map=None):
72
+ null_value = self.null_value
73
+
74
+ date_lut_reverse = self.date_lut_reverse
75
+ min_value = self.min_value
76
+ max_value = self.max_value
77
+
78
+ for i, item in enumerate(items):
79
+ if nulls_map and nulls_map[i]:
80
+ items[i] = null_value
81
+ continue
82
+
83
+ if item is not date:
84
+ item = date(item.year, item.month, item.day)
85
+
86
+ if min_value <= item <= max_value:
87
+ items[i] = date_lut_reverse[item]
88
+ else:
89
+ items[i] = 0
90
+
91
+ def after_read_items(self, items, nulls_map=None):
92
+ date_lut = self.date_lut
93
+
94
+ if nulls_map is None:
95
+ return tuple(date_lut[item] for item in items)
96
+ else:
97
+ return tuple(
98
+ (None if is_null else date_lut[items[i]])
99
+ for i, is_null in enumerate(nulls_map)
100
+ )
101
+
102
+
103
+ class Date32Column(DateColumn):
104
+ ch_type = 'Date32'
105
+ format = 'i'
106
+
107
+ min_value = epoch_start_date32
108
+ max_value = epoch_end_date32