clickhouse-driver 0.2.8__cp37-cp37m-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. clickhouse_driver/__init__.py +9 -0
  2. clickhouse_driver/block.py +227 -0
  3. clickhouse_driver/blockstreamprofileinfo.py +22 -0
  4. clickhouse_driver/bufferedreader.cpython-37m-darwin.so +0 -0
  5. clickhouse_driver/bufferedwriter.cpython-37m-darwin.so +0 -0
  6. clickhouse_driver/client.py +896 -0
  7. clickhouse_driver/clientinfo.py +119 -0
  8. clickhouse_driver/columns/__init__.py +0 -0
  9. clickhouse_driver/columns/arraycolumn.py +161 -0
  10. clickhouse_driver/columns/base.py +221 -0
  11. clickhouse_driver/columns/boolcolumn.py +7 -0
  12. clickhouse_driver/columns/datecolumn.py +108 -0
  13. clickhouse_driver/columns/datetimecolumn.py +202 -0
  14. clickhouse_driver/columns/decimalcolumn.py +116 -0
  15. clickhouse_driver/columns/enumcolumn.py +119 -0
  16. clickhouse_driver/columns/exceptions.py +12 -0
  17. clickhouse_driver/columns/floatcolumn.py +34 -0
  18. clickhouse_driver/columns/intcolumn.py +157 -0
  19. clickhouse_driver/columns/intervalcolumn.py +33 -0
  20. clickhouse_driver/columns/ipcolumn.py +118 -0
  21. clickhouse_driver/columns/jsoncolumn.py +37 -0
  22. clickhouse_driver/columns/largeint.cpython-37m-darwin.so +0 -0
  23. clickhouse_driver/columns/lowcardinalitycolumn.py +142 -0
  24. clickhouse_driver/columns/mapcolumn.py +73 -0
  25. clickhouse_driver/columns/nestedcolumn.py +10 -0
  26. clickhouse_driver/columns/nothingcolumn.py +13 -0
  27. clickhouse_driver/columns/nullablecolumn.py +7 -0
  28. clickhouse_driver/columns/nullcolumn.py +15 -0
  29. clickhouse_driver/columns/numpy/__init__.py +0 -0
  30. clickhouse_driver/columns/numpy/base.py +47 -0
  31. clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
  32. clickhouse_driver/columns/numpy/datecolumn.py +19 -0
  33. clickhouse_driver/columns/numpy/datetimecolumn.py +143 -0
  34. clickhouse_driver/columns/numpy/floatcolumn.py +24 -0
  35. clickhouse_driver/columns/numpy/intcolumn.py +43 -0
  36. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -0
  37. clickhouse_driver/columns/numpy/service.py +58 -0
  38. clickhouse_driver/columns/numpy/stringcolumn.py +78 -0
  39. clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
  40. clickhouse_driver/columns/service.py +185 -0
  41. clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -0
  42. clickhouse_driver/columns/stringcolumn.py +73 -0
  43. clickhouse_driver/columns/tuplecolumn.py +63 -0
  44. clickhouse_driver/columns/util.py +60 -0
  45. clickhouse_driver/columns/uuidcolumn.py +64 -0
  46. clickhouse_driver/compression/__init__.py +28 -0
  47. clickhouse_driver/compression/base.py +87 -0
  48. clickhouse_driver/compression/lz4.py +21 -0
  49. clickhouse_driver/compression/lz4hc.py +9 -0
  50. clickhouse_driver/compression/zstd.py +20 -0
  51. clickhouse_driver/connection.py +784 -0
  52. clickhouse_driver/context.py +36 -0
  53. clickhouse_driver/dbapi/__init__.py +62 -0
  54. clickhouse_driver/dbapi/connection.py +99 -0
  55. clickhouse_driver/dbapi/cursor.py +370 -0
  56. clickhouse_driver/dbapi/errors.py +40 -0
  57. clickhouse_driver/dbapi/extras.py +73 -0
  58. clickhouse_driver/defines.py +55 -0
  59. clickhouse_driver/errors.py +453 -0
  60. clickhouse_driver/log.py +48 -0
  61. clickhouse_driver/numpy/__init__.py +0 -0
  62. clickhouse_driver/numpy/block.py +8 -0
  63. clickhouse_driver/numpy/helpers.py +25 -0
  64. clickhouse_driver/numpy/result.py +123 -0
  65. clickhouse_driver/opentelemetry.py +43 -0
  66. clickhouse_driver/progress.py +38 -0
  67. clickhouse_driver/protocol.py +114 -0
  68. clickhouse_driver/queryprocessingstage.py +8 -0
  69. clickhouse_driver/reader.py +69 -0
  70. clickhouse_driver/readhelpers.py +26 -0
  71. clickhouse_driver/result.py +144 -0
  72. clickhouse_driver/settings/__init__.py +0 -0
  73. clickhouse_driver/settings/available.py +405 -0
  74. clickhouse_driver/settings/types.py +50 -0
  75. clickhouse_driver/settings/writer.py +34 -0
  76. clickhouse_driver/streams/__init__.py +0 -0
  77. clickhouse_driver/streams/compressed.py +88 -0
  78. clickhouse_driver/streams/native.py +102 -0
  79. clickhouse_driver/util/__init__.py +0 -0
  80. clickhouse_driver/util/compat.py +39 -0
  81. clickhouse_driver/util/escape.py +94 -0
  82. clickhouse_driver/util/helpers.py +57 -0
  83. clickhouse_driver/varint.cpython-37m-darwin.so +0 -0
  84. clickhouse_driver/writer.py +67 -0
  85. clickhouse_driver-0.2.8.dist-info/LICENSE +21 -0
  86. clickhouse_driver-0.2.8.dist-info/METADATA +201 -0
  87. clickhouse_driver-0.2.8.dist-info/RECORD +89 -0
  88. clickhouse_driver-0.2.8.dist-info/WHEEL +5 -0
  89. clickhouse_driver-0.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,119 @@
1
+ import socket
2
+ import getpass
3
+ from time import time
4
+
5
+ from . import defines
6
+ from . import errors
7
+ from .opentelemetry import OpenTelemetryTraceContext
8
+ from .varint import write_varint
9
+ from .writer import write_binary_str, write_binary_uint8, \
10
+ write_binary_uint64, write_binary_uint128
11
+
12
+
13
+ class ClientInfo(object):
14
+ class Interface(object):
15
+ TCP = 1
16
+ HTTP = 2
17
+
18
+ class QueryKind(object):
19
+ # Uninitialized object.
20
+ NO_QUERY = 0
21
+
22
+ INITIAL_QUERY = 1
23
+
24
+ # Query that was initiated by another query for distributed query
25
+ # execution.
26
+ SECONDARY_QUERY = 2
27
+
28
+ client_version_major = defines.CLIENT_VERSION_MAJOR
29
+ client_version_minor = defines.CLIENT_VERSION_MINOR
30
+ client_version_patch = defines.CLIENT_VERSION_PATCH
31
+ interface = Interface.TCP
32
+
33
+ initial_user = ''
34
+ initial_query_id = ''
35
+ initial_address = '0.0.0.0:0'
36
+
37
+ def __init__(self, client_name, context, client_revision):
38
+ self.query_kind = ClientInfo.QueryKind.NO_QUERY
39
+
40
+ try:
41
+ self.os_user = getpass.getuser()
42
+ except KeyError:
43
+ self.os_user = ''
44
+ self.client_hostname = socket.gethostname()
45
+ self.client_name = client_name
46
+ self.client_revision = client_revision
47
+
48
+ self.client_trace_context = OpenTelemetryTraceContext(
49
+ context.client_settings['opentelemetry_traceparent'],
50
+ context.client_settings['opentelemetry_tracestate']
51
+ )
52
+
53
+ self.quota_key = context.client_settings['quota_key']
54
+ self.distributed_depth = 0
55
+ self.initial_query_start_time_microseconds = int(time() * 1000000)
56
+
57
+ super(ClientInfo, self).__init__()
58
+
59
+ @property
60
+ def empty(self):
61
+ return self.query_kind == ClientInfo.QueryKind.NO_QUERY
62
+
63
+ def write(self, server_revision, fout):
64
+ revision = server_revision
65
+ if server_revision < defines.DBMS_MIN_REVISION_WITH_CLIENT_INFO:
66
+ raise errors.LogicalError('Method ClientInfo.write is called '
67
+ 'for unsupported server revision')
68
+
69
+ write_binary_uint8(self.query_kind, fout)
70
+ if self.empty:
71
+ return
72
+
73
+ write_binary_str(self.initial_user, fout)
74
+ write_binary_str(self.initial_query_id, fout)
75
+ write_binary_str(self.initial_address, fout)
76
+
77
+ if (
78
+ revision >=
79
+ defines.DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME
80
+ ):
81
+ write_binary_uint64(
82
+ self.initial_query_start_time_microseconds, fout
83
+ )
84
+
85
+ write_binary_uint8(self.interface, fout)
86
+
87
+ write_binary_str(self.os_user, fout)
88
+ write_binary_str(self.client_hostname, fout)
89
+ write_binary_str(self.client_name, fout)
90
+ write_varint(self.client_version_major, fout)
91
+ write_varint(self.client_version_minor, fout)
92
+ write_varint(self.client_revision, fout)
93
+
94
+ if revision >= defines.DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO:
95
+ write_binary_str(self.quota_key, fout)
96
+
97
+ if revision >= \
98
+ defines.DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH:
99
+ write_varint(self.distributed_depth, fout)
100
+
101
+ if revision >= defines.DBMS_MIN_REVISION_WITH_VERSION_PATCH:
102
+ write_varint(self.client_version_patch, fout)
103
+
104
+ if revision >= defines.DBMS_MIN_REVISION_WITH_OPENTELEMETRY:
105
+ if self.client_trace_context.trace_id is not None:
106
+ # Have OpenTelemetry header.
107
+ write_binary_uint8(1, fout)
108
+ write_binary_uint128(self.client_trace_context.trace_id, fout)
109
+ write_binary_uint64(self.client_trace_context.span_id, fout)
110
+ write_binary_str(self.client_trace_context.tracestate, fout)
111
+ write_binary_uint8(self.client_trace_context.trace_flags, fout)
112
+ else:
113
+ # Don't have OpenTelemetry header.
114
+ write_binary_uint8(0, fout)
115
+
116
+ if revision >= defines.DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS:
117
+ write_varint(0, fout) # collaborate_with_initiator
118
+ write_varint(0, fout) # count_participating_replicas
119
+ write_varint(0, fout) # number_of_current_replica
File without changes
@@ -0,0 +1,161 @@
1
+ from itertools import chain
2
+ from struct import Struct
3
+
4
+ from .base import Column
5
+ from .intcolumn import UInt64Column
6
+ from ..util.helpers import pairwise
7
+
8
+
9
+ class ArrayColumn(Column):
10
+ """
11
+ Nested arrays written in flatten form after information about their
12
+ sizes (offsets really).
13
+ One element of array of arrays can be represented as tree:
14
+ (0 depth) [[3, 4], [5, 6]]
15
+ | |
16
+ (1 depth) [3, 4] [5, 6]
17
+ | | | |
18
+ (leaf) 3 4 5 6
19
+
20
+ Offsets (sizes) written in breadth-first search order. In example above
21
+ following sequence of offset will be written: 4 -> 2 -> 4
22
+ 1) size of whole array: 4
23
+ 2) size of array 1 in depth=1: 2
24
+ 3) size of array 2 plus size of all array before in depth=1: 2 + 2 = 4
25
+
26
+ After sizes info comes flatten data: 3 -> 4 -> 5 -> 6
27
+ """
28
+ py_types = (list, tuple)
29
+
30
+ def __init__(self, nested_column, **kwargs):
31
+ self.init_kwargs = kwargs
32
+ self.size_column = UInt64Column(**kwargs)
33
+ self.nested_column = nested_column
34
+ self._write_depth_0_size = True
35
+ super(ArrayColumn, self).__init__(**kwargs)
36
+ self.null_value = []
37
+
38
+ def write_data(self, data, buf):
39
+ # Column of Array(T) is stored in "compact" format and passed to server
40
+ # wrapped into another Array without size of wrapper array.
41
+ self.nested_column = ArrayColumn(
42
+ self.nested_column, **self.init_kwargs
43
+ )
44
+ self.nested_column.nullable = self.nullable
45
+ self.nullable = False
46
+ self._write_depth_0_size = False
47
+ self._write(data, buf)
48
+
49
+ def read_data(self, n_rows, buf):
50
+ self.nested_column = ArrayColumn(
51
+ self.nested_column, **self.init_kwargs
52
+ )
53
+ self.nested_column.nullable = self.nullable
54
+ self.nullable = False
55
+ return self._read(n_rows, buf)[0]
56
+
57
+ def _write_sizes(self, value, buf):
58
+ nulls_map = []
59
+
60
+ column = self
61
+ sizes = [len(value)] if self._write_depth_0_size else []
62
+
63
+ while True:
64
+ nested_column = column.nested_column
65
+ if not isinstance(nested_column, ArrayColumn):
66
+ if column.nullable:
67
+ nulls_map = [x is None for x in value]
68
+ break
69
+
70
+ offset = 0
71
+ new_value = []
72
+ for x in value:
73
+ offset += len(x)
74
+ sizes.append(offset)
75
+ new_value.extend(x)
76
+
77
+ value = new_value
78
+ column = nested_column
79
+
80
+ if nulls_map:
81
+ self._write_nulls_map(nulls_map, buf)
82
+
83
+ ns = Struct('<{}Q'.format(len(sizes)))
84
+ buf.write(ns.pack(*sizes))
85
+
86
+ def _write_data(self, value, buf):
87
+ if self.nullable:
88
+ value = value or []
89
+
90
+ if isinstance(self.nested_column, ArrayColumn):
91
+ value = list(chain.from_iterable(value))
92
+
93
+ if value:
94
+ self.nested_column._write_data(value, buf)
95
+
96
+ def _write_nulls_data(self, value, buf):
97
+ if self.nullable:
98
+ value = value or []
99
+
100
+ if isinstance(self.nested_column, ArrayColumn):
101
+ value = list(chain.from_iterable(value))
102
+ self.nested_column._write_nulls_data(value, buf)
103
+ else:
104
+ if self.nested_column.nullable:
105
+ self.nested_column._write_nulls_map(value, buf)
106
+
107
+ def _write(self, value, buf):
108
+ value = self.prepare_items(value)
109
+ self._write_sizes(value, buf)
110
+ self._write_nulls_data(value, buf)
111
+ self._write_data(value, buf)
112
+
113
+ def read_state_prefix(self, buf):
114
+ super(ArrayColumn, self).read_state_prefix(buf)
115
+
116
+ self.nested_column.read_state_prefix(buf)
117
+
118
+ def write_state_prefix(self, buf):
119
+ super(ArrayColumn, self).write_state_prefix(buf)
120
+
121
+ self.nested_column.write_state_prefix(buf)
122
+
123
+ def _read(self, size, buf):
124
+ slices_series = [[0, size]]
125
+ nested_column = self.nested_column
126
+
127
+ cur_level_slice_size = size
128
+ cur_level_slice = None
129
+ while (isinstance(nested_column, ArrayColumn)):
130
+ if cur_level_slice is None:
131
+ cur_level_slice = [0]
132
+ ns = Struct('<{}Q'.format(cur_level_slice_size))
133
+ nested_sizes = ns.unpack(buf.read(ns.size))
134
+ cur_level_slice.extend(nested_sizes)
135
+ slices_series.append(cur_level_slice)
136
+ cur_level_slice = None
137
+ cur_level_slice_size = nested_sizes[-1] if len(nested_sizes) > 0 \
138
+ else 0
139
+ nested_column = nested_column.nested_column
140
+
141
+ n_items = cur_level_slice_size if size > 0 else 0
142
+ nulls_map = None
143
+ if nested_column.nullable:
144
+ nulls_map = self._read_nulls_map(n_items, buf)
145
+
146
+ data = []
147
+ if n_items:
148
+ data = list(nested_column._read_data(
149
+ n_items, buf, nulls_map=nulls_map
150
+ ))
151
+
152
+ # Build nested structure.
153
+ for slices in reversed(slices_series):
154
+ data = [data[begin:end] for begin, end in pairwise(slices)]
155
+
156
+ return tuple(data)
157
+
158
+
159
+ def create_array_column(spec, column_by_spec_getter, column_options):
160
+ inner = spec[6:-1]
161
+ return ArrayColumn(column_by_spec_getter(inner), **column_options)
@@ -0,0 +1,221 @@
1
+ from struct import Struct, error as struct_error
2
+
3
+ from . import exceptions
4
+ from ..varint import read_varint
5
+
6
+
7
+ class CommonSerialization(object):
8
+ def __init__(self, column):
9
+ self.column = column
10
+ super(CommonSerialization, self).__init__()
11
+
12
+ def read_sparse(self, n_items, buf):
13
+ return n_items
14
+
15
+ def apply_sparse(self, items):
16
+ return items
17
+
18
+
19
+ class SparseSerialization(CommonSerialization):
20
+
21
+ def __init__(self, *args, **kwargs):
22
+ self.sparse_indexes = []
23
+ self.items_total = None
24
+ super(SparseSerialization, self).__init__(*args, **kwargs)
25
+
26
+ def read_sparse(self, n_items, buf):
27
+ sparse_indexes = []
28
+ items_total = 0
29
+ non_default_items = 0
30
+
31
+ END_OF_GRANULE_FLAG = 1 << 62
32
+ end_of_granule = False
33
+
34
+ while not end_of_granule:
35
+ group_size = read_varint(buf)
36
+ end_of_granule = group_size & END_OF_GRANULE_FLAG
37
+ group_size &= ~END_OF_GRANULE_FLAG
38
+
39
+ items_total += group_size + 1
40
+ if not end_of_granule:
41
+ non_default_items += 1
42
+ sparse_indexes.append(items_total)
43
+
44
+ self.sparse_indexes = sparse_indexes
45
+ self.items_total = items_total
46
+
47
+ return non_default_items
48
+
49
+ def apply_sparse(self, items):
50
+ default = self.column.null_value
51
+ if self.column.after_read_items:
52
+ default = self.column.after_read_items([default])[0]
53
+
54
+ rv = [default] * (self.items_total - 1)
55
+ for item_number, i in enumerate(self.sparse_indexes):
56
+ rv[i - 1] = items[item_number]
57
+
58
+ return rv
59
+
60
+
61
+ class Column(object):
62
+ ch_type = None
63
+ py_types = None
64
+
65
+ check_item = None
66
+ after_read_items = None
67
+ before_write_items = None
68
+
69
+ types_check_enabled = False
70
+
71
+ null_value = 0
72
+
73
+ def __init__(self, types_check=False, has_custom_serialization=False,
74
+ **kwargs):
75
+ self.nullable = False
76
+ self.types_check_enabled = types_check
77
+ self.has_custom_serialization = has_custom_serialization
78
+ self.serialization = CommonSerialization(self)
79
+ self.input_null_as_default = False
80
+
81
+ self.context = kwargs['context']
82
+ self.input_null_as_default = self.context.client_settings \
83
+ .get('input_format_null_as_default', False)
84
+
85
+ super(Column, self).__init__()
86
+
87
+ def make_null_struct(self, n_items):
88
+ return Struct('<{}B'.format(n_items))
89
+
90
+ def _read_nulls_map(self, n_items, buf):
91
+ s = self.make_null_struct(n_items)
92
+ return s.unpack(buf.read(s.size))
93
+
94
+ def _write_nulls_map(self, items, buf):
95
+ s = self.make_null_struct(len(items))
96
+ items = [x is None for x in items]
97
+ buf.write(s.pack(*items))
98
+
99
+ def check_item_type(self, value):
100
+ if not isinstance(value, self.py_types):
101
+ raise exceptions.ColumnTypeMismatchException(value)
102
+
103
+ def prepare_items(self, items):
104
+ nullable = self.nullable
105
+ null_value = self.null_value
106
+ null_as_default = self.input_null_as_default
107
+
108
+ check_item = self.check_item
109
+ if self.types_check_enabled:
110
+ check_item_type = self.check_item_type
111
+ else:
112
+ check_item_type = False
113
+
114
+ if (not (self.nullable or null_as_default) and not check_item_type and
115
+ not check_item and not self.before_write_items):
116
+ return items
117
+
118
+ nulls_map = [False] * len(items) if self.nullable else None
119
+ for i, x in enumerate(items):
120
+ if x is None:
121
+ if nullable:
122
+ nulls_map[i] = True
123
+ x = null_value
124
+ elif null_as_default:
125
+ x = null_value
126
+
127
+ else:
128
+ if check_item_type:
129
+ check_item_type(x)
130
+
131
+ if check_item:
132
+ check_item(x)
133
+
134
+ items[i] = x
135
+
136
+ if self.before_write_items:
137
+ self.before_write_items(items, nulls_map=nulls_map)
138
+
139
+ return items
140
+
141
+ def write_data(self, items, buf):
142
+ if self.nullable:
143
+ self._write_nulls_map(items, buf)
144
+
145
+ self._write_data(items, buf)
146
+
147
+ def _write_data(self, items, buf):
148
+ prepared = self.prepare_items(items)
149
+ self.write_items(prepared, buf)
150
+
151
+ def write_items(self, items, buf):
152
+ raise NotImplementedError
153
+
154
+ def read_data(self, n_items, buf):
155
+ n_items = self.serialization.read_sparse(n_items, buf)
156
+
157
+ if self.nullable:
158
+ nulls_map = self._read_nulls_map(n_items, buf)
159
+ else:
160
+ nulls_map = None
161
+
162
+ items = self._read_data(n_items, buf, nulls_map=nulls_map)
163
+ return self.serialization.apply_sparse(items)
164
+
165
+ def _read_data(self, n_items, buf, nulls_map=None):
166
+ items = self.read_items(n_items, buf)
167
+
168
+ if self.after_read_items:
169
+ return self.after_read_items(items, nulls_map)
170
+ elif nulls_map is not None:
171
+ return tuple(
172
+ (None if is_null else items[i])
173
+ for i, is_null in enumerate(nulls_map)
174
+ )
175
+ return items
176
+
177
+ def read_items(self, n_items, buf):
178
+ raise NotImplementedError
179
+
180
+ def read_state_prefix(self, buf):
181
+ if self.has_custom_serialization:
182
+ use_custom_serialization = read_varint(buf)
183
+ if use_custom_serialization:
184
+ self.serialization = SparseSerialization(self)
185
+
186
+ def write_state_prefix(self, buf):
187
+ pass
188
+
189
+
190
+ class FormatColumn(Column):
191
+ """
192
+ Uses struct.pack for bulk items writing.
193
+ """
194
+
195
+ format = None
196
+
197
+ def make_struct(self, n_items):
198
+ return Struct('<{}{}'.format(n_items, self.format))
199
+
200
+ def write_items(self, items, buf):
201
+ s = self.make_struct(len(items))
202
+ try:
203
+ buf.write(s.pack(*items))
204
+
205
+ except struct_error as e:
206
+ raise exceptions.StructPackException(e)
207
+
208
+ def read_items(self, n_items, buf):
209
+ s = self.make_struct(n_items)
210
+ return s.unpack(buf.read(s.size))
211
+
212
+
213
+ # How to write new column?
214
+ # - Check ClickHouse documentation for column
215
+ # - Wireshark and tcpdump are your friends.
216
+ # - Use `clickhouse-client --compression 0` to see what's going on data
217
+ # transmission.
218
+ # - Check for similar existing columns and tests.
219
+ # - Use `FormatColumn` for columns that use "simple" types under the hood.
220
+ # - Some columns have before_write and after_read hooks.
221
+ # Use them to convert items in column into "simple" types.
@@ -0,0 +1,7 @@
1
+ from .base import FormatColumn
2
+
3
+
4
+ class BoolColumn(FormatColumn):
5
+ ch_type = 'Bool'
6
+ py_types = (bool, )
7
+ format = '?'
@@ -0,0 +1,108 @@
1
+ from os import getenv
2
+ from datetime import date, timedelta
3
+
4
+ from .base import FormatColumn
5
+
6
+
7
+ epoch_start = date(1970, 1, 1)
8
+ epoch_end = date(2149, 6, 6)
9
+
10
+ epoch_start_date32 = date(1900, 1, 1)
11
+ epoch_end_date32 = date(2299, 12, 31)
12
+
13
+
14
+ class LazyLUT(dict):
15
+ def __init__(self, *args, _factory, **kwargs):
16
+ super().__init__(*args, **kwargs)
17
+ self._default_factory = _factory
18
+
19
+ def __missing__(self, key):
20
+ return self.setdefault(key, self._default_factory(key))
21
+
22
+
23
+ def make_date_lut_range(date_start, date_end):
24
+ return range(
25
+ (date_start - epoch_start).days,
26
+ (date_end - epoch_start).days + 1,
27
+ )
28
+
29
+
30
+ enable_lazy_date_lut = getenv('CLICKHOUSE_DRIVER_LASY_DATE_LUT', False)
31
+ if enable_lazy_date_lut:
32
+ try:
33
+ start, end = enable_lazy_date_lut.split(':')
34
+ start_date = date.fromisoformat(start)
35
+ end_date = date.fromisoformat(end)
36
+
37
+ date_range = make_date_lut_range(start_date, end_date)
38
+ except ValueError:
39
+ date_range = ()
40
+
41
+ # Since we initialize lazy lut with some initially warmed values,
42
+ # we use iterator and not dict comprehension for memory & time optimization
43
+ _date_lut = LazyLUT(
44
+ ((x, epoch_start + timedelta(days=x)) for x in date_range),
45
+ _factory=lambda x: epoch_start + timedelta(days=x),
46
+ )
47
+ _date_lut_reverse = LazyLUT(
48
+ ((value, key) for key, value in _date_lut.items()),
49
+ _factory=lambda x: (x - epoch_start).days,
50
+ )
51
+ else:
52
+ # If lazy lut is not enabled, we fallback to static dict initialization
53
+ # In both cases, we use same lut for both data types,
54
+ # since one encompasses the other and we can avoid duplicating overlap
55
+ date_range = make_date_lut_range(epoch_start_date32, epoch_end_date32)
56
+ _date_lut = {x: epoch_start + timedelta(days=x) for x in date_range}
57
+ _date_lut_reverse = {value: key for key, value in _date_lut.items()}
58
+
59
+
60
+ class DateColumn(FormatColumn):
61
+ ch_type = 'Date'
62
+ py_types = (date, )
63
+ format = 'H'
64
+
65
+ min_value = epoch_start
66
+ max_value = epoch_end
67
+
68
+ date_lut = _date_lut
69
+ date_lut_reverse = _date_lut_reverse
70
+
71
+ def before_write_items(self, items, nulls_map=None):
72
+ null_value = self.null_value
73
+
74
+ date_lut_reverse = self.date_lut_reverse
75
+ min_value = self.min_value
76
+ max_value = self.max_value
77
+
78
+ for i, item in enumerate(items):
79
+ if nulls_map and nulls_map[i]:
80
+ items[i] = null_value
81
+ continue
82
+
83
+ if item is not date:
84
+ item = date(item.year, item.month, item.day)
85
+
86
+ if min_value <= item <= max_value:
87
+ items[i] = date_lut_reverse[item]
88
+ else:
89
+ items[i] = 0
90
+
91
+ def after_read_items(self, items, nulls_map=None):
92
+ date_lut = self.date_lut
93
+
94
+ if nulls_map is None:
95
+ return tuple(date_lut[item] for item in items)
96
+ else:
97
+ return tuple(
98
+ (None if is_null else date_lut[items[i]])
99
+ for i, is_null in enumerate(nulls_map)
100
+ )
101
+
102
+
103
+ class Date32Column(DateColumn):
104
+ ch_type = 'Date32'
105
+ format = 'i'
106
+
107
+ min_value = epoch_start_date32
108
+ max_value = epoch_end_date32