clickhouse-driver 0.2.10__pp311-pypy311_pp73-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickhouse_driver/__init__.py +9 -0
- clickhouse_driver/block.py +227 -0
- clickhouse_driver/blockstreamprofileinfo.py +22 -0
- clickhouse_driver/bufferedreader.pypy311-pp73-win_amd64.pyd +0 -0
- clickhouse_driver/bufferedwriter.pypy311-pp73-win_amd64.pyd +0 -0
- clickhouse_driver/client.py +812 -0
- clickhouse_driver/clientinfo.py +119 -0
- clickhouse_driver/columns/__init__.py +0 -0
- clickhouse_driver/columns/arraycolumn.py +161 -0
- clickhouse_driver/columns/base.py +221 -0
- clickhouse_driver/columns/boolcolumn.py +7 -0
- clickhouse_driver/columns/datecolumn.py +108 -0
- clickhouse_driver/columns/datetimecolumn.py +203 -0
- clickhouse_driver/columns/decimalcolumn.py +116 -0
- clickhouse_driver/columns/enumcolumn.py +129 -0
- clickhouse_driver/columns/exceptions.py +12 -0
- clickhouse_driver/columns/floatcolumn.py +34 -0
- clickhouse_driver/columns/intcolumn.py +157 -0
- clickhouse_driver/columns/intervalcolumn.py +33 -0
- clickhouse_driver/columns/ipcolumn.py +118 -0
- clickhouse_driver/columns/jsoncolumn.py +37 -0
- clickhouse_driver/columns/largeint.pypy311-pp73-win_amd64.pyd +0 -0
- clickhouse_driver/columns/lowcardinalitycolumn.py +142 -0
- clickhouse_driver/columns/mapcolumn.py +73 -0
- clickhouse_driver/columns/nestedcolumn.py +10 -0
- clickhouse_driver/columns/nothingcolumn.py +13 -0
- clickhouse_driver/columns/nullablecolumn.py +7 -0
- clickhouse_driver/columns/nullcolumn.py +15 -0
- clickhouse_driver/columns/numpy/__init__.py +0 -0
- clickhouse_driver/columns/numpy/base.py +47 -0
- clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
- clickhouse_driver/columns/numpy/datecolumn.py +19 -0
- clickhouse_driver/columns/numpy/datetimecolumn.py +146 -0
- clickhouse_driver/columns/numpy/floatcolumn.py +24 -0
- clickhouse_driver/columns/numpy/intcolumn.py +43 -0
- clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -0
- clickhouse_driver/columns/numpy/service.py +58 -0
- clickhouse_driver/columns/numpy/stringcolumn.py +78 -0
- clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
- clickhouse_driver/columns/service.py +185 -0
- clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -0
- clickhouse_driver/columns/stringcolumn.py +73 -0
- clickhouse_driver/columns/tuplecolumn.py +63 -0
- clickhouse_driver/columns/util.py +61 -0
- clickhouse_driver/columns/uuidcolumn.py +64 -0
- clickhouse_driver/compression/__init__.py +32 -0
- clickhouse_driver/compression/base.py +87 -0
- clickhouse_driver/compression/lz4.py +21 -0
- clickhouse_driver/compression/lz4hc.py +9 -0
- clickhouse_driver/compression/zstd.py +20 -0
- clickhouse_driver/connection.py +825 -0
- clickhouse_driver/context.py +36 -0
- clickhouse_driver/dbapi/__init__.py +62 -0
- clickhouse_driver/dbapi/connection.py +99 -0
- clickhouse_driver/dbapi/cursor.py +370 -0
- clickhouse_driver/dbapi/errors.py +40 -0
- clickhouse_driver/dbapi/extras.py +73 -0
- clickhouse_driver/defines.py +58 -0
- clickhouse_driver/errors.py +453 -0
- clickhouse_driver/log.py +48 -0
- clickhouse_driver/numpy/__init__.py +0 -0
- clickhouse_driver/numpy/block.py +8 -0
- clickhouse_driver/numpy/helpers.py +28 -0
- clickhouse_driver/numpy/result.py +123 -0
- clickhouse_driver/opentelemetry.py +43 -0
- clickhouse_driver/progress.py +44 -0
- clickhouse_driver/protocol.py +130 -0
- clickhouse_driver/queryprocessingstage.py +8 -0
- clickhouse_driver/reader.py +69 -0
- clickhouse_driver/readhelpers.py +26 -0
- clickhouse_driver/result.py +144 -0
- clickhouse_driver/settings/__init__.py +0 -0
- clickhouse_driver/settings/available.py +405 -0
- clickhouse_driver/settings/types.py +50 -0
- clickhouse_driver/settings/writer.py +34 -0
- clickhouse_driver/streams/__init__.py +0 -0
- clickhouse_driver/streams/compressed.py +88 -0
- clickhouse_driver/streams/native.py +108 -0
- clickhouse_driver/util/__init__.py +0 -0
- clickhouse_driver/util/compat.py +39 -0
- clickhouse_driver/util/escape.py +94 -0
- clickhouse_driver/util/helpers.py +173 -0
- clickhouse_driver/varint.pypy311-pp73-win_amd64.pyd +0 -0
- clickhouse_driver/writer.py +67 -0
- clickhouse_driver-0.2.10.dist-info/METADATA +215 -0
- clickhouse_driver-0.2.10.dist-info/RECORD +89 -0
- clickhouse_driver-0.2.10.dist-info/WHEEL +5 -0
- clickhouse_driver-0.2.10.dist-info/licenses/LICENSE +21 -0
- clickhouse_driver-0.2.10.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import socket
|
|
2
|
+
import getpass
|
|
3
|
+
from time import time
|
|
4
|
+
|
|
5
|
+
from . import defines
|
|
6
|
+
from . import errors
|
|
7
|
+
from .opentelemetry import OpenTelemetryTraceContext
|
|
8
|
+
from .varint import write_varint
|
|
9
|
+
from .writer import write_binary_str, write_binary_uint8, \
|
|
10
|
+
write_binary_uint64, write_binary_uint128
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ClientInfo(object):
|
|
14
|
+
class Interface(object):
|
|
15
|
+
TCP = 1
|
|
16
|
+
HTTP = 2
|
|
17
|
+
|
|
18
|
+
class QueryKind(object):
|
|
19
|
+
# Uninitialized object.
|
|
20
|
+
NO_QUERY = 0
|
|
21
|
+
|
|
22
|
+
INITIAL_QUERY = 1
|
|
23
|
+
|
|
24
|
+
# Query that was initiated by another query for distributed query
|
|
25
|
+
# execution.
|
|
26
|
+
SECONDARY_QUERY = 2
|
|
27
|
+
|
|
28
|
+
client_version_major = defines.CLIENT_VERSION_MAJOR
|
|
29
|
+
client_version_minor = defines.CLIENT_VERSION_MINOR
|
|
30
|
+
client_version_patch = defines.CLIENT_VERSION_PATCH
|
|
31
|
+
interface = Interface.TCP
|
|
32
|
+
|
|
33
|
+
initial_user = ''
|
|
34
|
+
initial_query_id = ''
|
|
35
|
+
initial_address = '0.0.0.0:0'
|
|
36
|
+
|
|
37
|
+
def __init__(self, client_name, context, client_revision):
|
|
38
|
+
self.query_kind = ClientInfo.QueryKind.NO_QUERY
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
self.os_user = getpass.getuser()
|
|
42
|
+
except (KeyError, OSError):
|
|
43
|
+
self.os_user = ''
|
|
44
|
+
self.client_hostname = socket.gethostname()
|
|
45
|
+
self.client_name = client_name
|
|
46
|
+
self.client_revision = client_revision
|
|
47
|
+
|
|
48
|
+
self.client_trace_context = OpenTelemetryTraceContext(
|
|
49
|
+
context.client_settings['opentelemetry_traceparent'],
|
|
50
|
+
context.client_settings['opentelemetry_tracestate']
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
self.quota_key = context.client_settings['quota_key']
|
|
54
|
+
self.distributed_depth = 0
|
|
55
|
+
self.initial_query_start_time_microseconds = int(time() * 1000000)
|
|
56
|
+
|
|
57
|
+
super(ClientInfo, self).__init__()
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def empty(self):
|
|
61
|
+
return self.query_kind == ClientInfo.QueryKind.NO_QUERY
|
|
62
|
+
|
|
63
|
+
def write(self, server_revision, fout):
|
|
64
|
+
revision = server_revision
|
|
65
|
+
if server_revision < defines.DBMS_MIN_REVISION_WITH_CLIENT_INFO:
|
|
66
|
+
raise errors.LogicalError('Method ClientInfo.write is called '
|
|
67
|
+
'for unsupported server revision')
|
|
68
|
+
|
|
69
|
+
write_binary_uint8(self.query_kind, fout)
|
|
70
|
+
if self.empty:
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
write_binary_str(self.initial_user, fout)
|
|
74
|
+
write_binary_str(self.initial_query_id, fout)
|
|
75
|
+
write_binary_str(self.initial_address, fout)
|
|
76
|
+
|
|
77
|
+
if (
|
|
78
|
+
revision >=
|
|
79
|
+
defines.DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME
|
|
80
|
+
):
|
|
81
|
+
write_binary_uint64(
|
|
82
|
+
self.initial_query_start_time_microseconds, fout
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
write_binary_uint8(self.interface, fout)
|
|
86
|
+
|
|
87
|
+
write_binary_str(self.os_user, fout)
|
|
88
|
+
write_binary_str(self.client_hostname, fout)
|
|
89
|
+
write_binary_str(self.client_name, fout)
|
|
90
|
+
write_varint(self.client_version_major, fout)
|
|
91
|
+
write_varint(self.client_version_minor, fout)
|
|
92
|
+
write_varint(self.client_revision, fout)
|
|
93
|
+
|
|
94
|
+
if revision >= defines.DBMS_MIN_REVISION_WITH_QUOTA_KEY_IN_CLIENT_INFO:
|
|
95
|
+
write_binary_str(self.quota_key, fout)
|
|
96
|
+
|
|
97
|
+
if revision >= \
|
|
98
|
+
defines.DBMS_MIN_PROTOCOL_VERSION_WITH_DISTRIBUTED_DEPTH:
|
|
99
|
+
write_varint(self.distributed_depth, fout)
|
|
100
|
+
|
|
101
|
+
if revision >= defines.DBMS_MIN_REVISION_WITH_VERSION_PATCH:
|
|
102
|
+
write_varint(self.client_version_patch, fout)
|
|
103
|
+
|
|
104
|
+
if revision >= defines.DBMS_MIN_REVISION_WITH_OPENTELEMETRY:
|
|
105
|
+
if self.client_trace_context.trace_id is not None:
|
|
106
|
+
# Have OpenTelemetry header.
|
|
107
|
+
write_binary_uint8(1, fout)
|
|
108
|
+
write_binary_uint128(self.client_trace_context.trace_id, fout)
|
|
109
|
+
write_binary_uint64(self.client_trace_context.span_id, fout)
|
|
110
|
+
write_binary_str(self.client_trace_context.tracestate, fout)
|
|
111
|
+
write_binary_uint8(self.client_trace_context.trace_flags, fout)
|
|
112
|
+
else:
|
|
113
|
+
# Don't have OpenTelemetry header.
|
|
114
|
+
write_binary_uint8(0, fout)
|
|
115
|
+
|
|
116
|
+
if revision >= defines.DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS:
|
|
117
|
+
write_varint(0, fout) # collaborate_with_initiator
|
|
118
|
+
write_varint(0, fout) # count_participating_replicas
|
|
119
|
+
write_varint(0, fout) # number_of_current_replica
|
|
File without changes
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from itertools import chain
|
|
2
|
+
from struct import Struct
|
|
3
|
+
|
|
4
|
+
from .base import Column
|
|
5
|
+
from .intcolumn import UInt64Column
|
|
6
|
+
from ..util.helpers import pairwise
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ArrayColumn(Column):
|
|
10
|
+
"""
|
|
11
|
+
Nested arrays written in flatten form after information about their
|
|
12
|
+
sizes (offsets really).
|
|
13
|
+
One element of array of arrays can be represented as tree:
|
|
14
|
+
(0 depth) [[3, 4], [5, 6]]
|
|
15
|
+
| |
|
|
16
|
+
(1 depth) [3, 4] [5, 6]
|
|
17
|
+
| | | |
|
|
18
|
+
(leaf) 3 4 5 6
|
|
19
|
+
|
|
20
|
+
Offsets (sizes) written in breadth-first search order. In example above
|
|
21
|
+
following sequence of offset will be written: 4 -> 2 -> 4
|
|
22
|
+
1) size of whole array: 4
|
|
23
|
+
2) size of array 1 in depth=1: 2
|
|
24
|
+
3) size of array 2 plus size of all array before in depth=1: 2 + 2 = 4
|
|
25
|
+
|
|
26
|
+
After sizes info comes flatten data: 3 -> 4 -> 5 -> 6
|
|
27
|
+
"""
|
|
28
|
+
py_types = (list, tuple)
|
|
29
|
+
|
|
30
|
+
def __init__(self, nested_column, **kwargs):
|
|
31
|
+
self.init_kwargs = kwargs
|
|
32
|
+
self.size_column = UInt64Column(**kwargs)
|
|
33
|
+
self.nested_column = nested_column
|
|
34
|
+
self._write_depth_0_size = True
|
|
35
|
+
super(ArrayColumn, self).__init__(**kwargs)
|
|
36
|
+
self.null_value = []
|
|
37
|
+
|
|
38
|
+
def write_data(self, data, buf):
|
|
39
|
+
# Column of Array(T) is stored in "compact" format and passed to server
|
|
40
|
+
# wrapped into another Array without size of wrapper array.
|
|
41
|
+
self.nested_column = ArrayColumn(
|
|
42
|
+
self.nested_column, **self.init_kwargs
|
|
43
|
+
)
|
|
44
|
+
self.nested_column.nullable = self.nullable
|
|
45
|
+
self.nullable = False
|
|
46
|
+
self._write_depth_0_size = False
|
|
47
|
+
self._write(data, buf)
|
|
48
|
+
|
|
49
|
+
def read_data(self, n_rows, buf):
|
|
50
|
+
self.nested_column = ArrayColumn(
|
|
51
|
+
self.nested_column, **self.init_kwargs
|
|
52
|
+
)
|
|
53
|
+
self.nested_column.nullable = self.nullable
|
|
54
|
+
self.nullable = False
|
|
55
|
+
return self._read(n_rows, buf)[0]
|
|
56
|
+
|
|
57
|
+
def _write_sizes(self, value, buf):
|
|
58
|
+
nulls_map = []
|
|
59
|
+
|
|
60
|
+
column = self
|
|
61
|
+
sizes = [len(value)] if self._write_depth_0_size else []
|
|
62
|
+
|
|
63
|
+
while True:
|
|
64
|
+
nested_column = column.nested_column
|
|
65
|
+
if not isinstance(nested_column, ArrayColumn):
|
|
66
|
+
if column.nullable:
|
|
67
|
+
nulls_map = [x is None for x in value]
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
offset = 0
|
|
71
|
+
new_value = []
|
|
72
|
+
for x in value:
|
|
73
|
+
offset += len(x)
|
|
74
|
+
sizes.append(offset)
|
|
75
|
+
new_value.extend(x)
|
|
76
|
+
|
|
77
|
+
value = new_value
|
|
78
|
+
column = nested_column
|
|
79
|
+
|
|
80
|
+
if nulls_map:
|
|
81
|
+
self._write_nulls_map(nulls_map, buf)
|
|
82
|
+
|
|
83
|
+
ns = Struct('<{}Q'.format(len(sizes)))
|
|
84
|
+
buf.write(ns.pack(*sizes))
|
|
85
|
+
|
|
86
|
+
def _write_data(self, value, buf):
|
|
87
|
+
if self.nullable:
|
|
88
|
+
value = value or []
|
|
89
|
+
|
|
90
|
+
if isinstance(self.nested_column, ArrayColumn):
|
|
91
|
+
value = list(chain.from_iterable(value))
|
|
92
|
+
|
|
93
|
+
if value:
|
|
94
|
+
self.nested_column._write_data(value, buf)
|
|
95
|
+
|
|
96
|
+
def _write_nulls_data(self, value, buf):
|
|
97
|
+
if self.nullable:
|
|
98
|
+
value = value or []
|
|
99
|
+
|
|
100
|
+
if isinstance(self.nested_column, ArrayColumn):
|
|
101
|
+
value = list(chain.from_iterable(value))
|
|
102
|
+
self.nested_column._write_nulls_data(value, buf)
|
|
103
|
+
else:
|
|
104
|
+
if self.nested_column.nullable:
|
|
105
|
+
self.nested_column._write_nulls_map(value, buf)
|
|
106
|
+
|
|
107
|
+
def _write(self, value, buf):
|
|
108
|
+
value = self.prepare_items(value)
|
|
109
|
+
self._write_sizes(value, buf)
|
|
110
|
+
self._write_nulls_data(value, buf)
|
|
111
|
+
self._write_data(value, buf)
|
|
112
|
+
|
|
113
|
+
def read_state_prefix(self, buf):
|
|
114
|
+
super(ArrayColumn, self).read_state_prefix(buf)
|
|
115
|
+
|
|
116
|
+
self.nested_column.read_state_prefix(buf)
|
|
117
|
+
|
|
118
|
+
def write_state_prefix(self, buf):
|
|
119
|
+
super(ArrayColumn, self).write_state_prefix(buf)
|
|
120
|
+
|
|
121
|
+
self.nested_column.write_state_prefix(buf)
|
|
122
|
+
|
|
123
|
+
def _read(self, size, buf):
|
|
124
|
+
slices_series = [[0, size]]
|
|
125
|
+
nested_column = self.nested_column
|
|
126
|
+
|
|
127
|
+
cur_level_slice_size = size
|
|
128
|
+
cur_level_slice = None
|
|
129
|
+
while (isinstance(nested_column, ArrayColumn)):
|
|
130
|
+
if cur_level_slice is None:
|
|
131
|
+
cur_level_slice = [0]
|
|
132
|
+
ns = Struct('<{}Q'.format(cur_level_slice_size))
|
|
133
|
+
nested_sizes = ns.unpack(buf.read(ns.size))
|
|
134
|
+
cur_level_slice.extend(nested_sizes)
|
|
135
|
+
slices_series.append(cur_level_slice)
|
|
136
|
+
cur_level_slice = None
|
|
137
|
+
cur_level_slice_size = nested_sizes[-1] if len(nested_sizes) > 0 \
|
|
138
|
+
else 0
|
|
139
|
+
nested_column = nested_column.nested_column
|
|
140
|
+
|
|
141
|
+
n_items = cur_level_slice_size if size > 0 else 0
|
|
142
|
+
nulls_map = None
|
|
143
|
+
if nested_column.nullable:
|
|
144
|
+
nulls_map = self._read_nulls_map(n_items, buf)
|
|
145
|
+
|
|
146
|
+
data = []
|
|
147
|
+
if n_items:
|
|
148
|
+
data = list(nested_column._read_data(
|
|
149
|
+
n_items, buf, nulls_map=nulls_map
|
|
150
|
+
))
|
|
151
|
+
|
|
152
|
+
# Build nested structure.
|
|
153
|
+
for slices in reversed(slices_series):
|
|
154
|
+
data = [data[begin:end] for begin, end in pairwise(slices)]
|
|
155
|
+
|
|
156
|
+
return tuple(data)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def create_array_column(spec, column_by_spec_getter, column_options):
|
|
160
|
+
inner = spec[6:-1]
|
|
161
|
+
return ArrayColumn(column_by_spec_getter(inner), **column_options)
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
from struct import Struct, error as struct_error
|
|
2
|
+
|
|
3
|
+
from . import exceptions
|
|
4
|
+
from ..varint import read_varint
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CommonSerialization(object):
|
|
8
|
+
def __init__(self, column):
|
|
9
|
+
self.column = column
|
|
10
|
+
super(CommonSerialization, self).__init__()
|
|
11
|
+
|
|
12
|
+
def read_sparse(self, n_items, buf):
|
|
13
|
+
return n_items
|
|
14
|
+
|
|
15
|
+
def apply_sparse(self, items):
|
|
16
|
+
return items
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SparseSerialization(CommonSerialization):
|
|
20
|
+
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
self.sparse_indexes = []
|
|
23
|
+
self.items_total = None
|
|
24
|
+
super(SparseSerialization, self).__init__(*args, **kwargs)
|
|
25
|
+
|
|
26
|
+
def read_sparse(self, n_items, buf):
|
|
27
|
+
sparse_indexes = []
|
|
28
|
+
items_total = 0
|
|
29
|
+
non_default_items = 0
|
|
30
|
+
|
|
31
|
+
END_OF_GRANULE_FLAG = 1 << 62
|
|
32
|
+
end_of_granule = False
|
|
33
|
+
|
|
34
|
+
while not end_of_granule:
|
|
35
|
+
group_size = read_varint(buf)
|
|
36
|
+
end_of_granule = group_size & END_OF_GRANULE_FLAG
|
|
37
|
+
group_size &= ~END_OF_GRANULE_FLAG
|
|
38
|
+
|
|
39
|
+
items_total += group_size + 1
|
|
40
|
+
if not end_of_granule:
|
|
41
|
+
non_default_items += 1
|
|
42
|
+
sparse_indexes.append(items_total)
|
|
43
|
+
|
|
44
|
+
self.sparse_indexes = sparse_indexes
|
|
45
|
+
self.items_total = items_total
|
|
46
|
+
|
|
47
|
+
return non_default_items
|
|
48
|
+
|
|
49
|
+
def apply_sparse(self, items):
|
|
50
|
+
default = self.column.null_value
|
|
51
|
+
if self.column.after_read_items:
|
|
52
|
+
default = self.column.after_read_items([default])[0]
|
|
53
|
+
|
|
54
|
+
rv = [default] * (self.items_total - 1)
|
|
55
|
+
for item_number, i in enumerate(self.sparse_indexes):
|
|
56
|
+
rv[i - 1] = items[item_number]
|
|
57
|
+
|
|
58
|
+
return rv
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Column(object):
|
|
62
|
+
ch_type = None
|
|
63
|
+
py_types = None
|
|
64
|
+
|
|
65
|
+
check_item = None
|
|
66
|
+
after_read_items = None
|
|
67
|
+
before_write_items = None
|
|
68
|
+
|
|
69
|
+
types_check_enabled = False
|
|
70
|
+
|
|
71
|
+
null_value = 0
|
|
72
|
+
|
|
73
|
+
def __init__(self, types_check=False, has_custom_serialization=False,
|
|
74
|
+
**kwargs):
|
|
75
|
+
self.nullable = False
|
|
76
|
+
self.types_check_enabled = types_check
|
|
77
|
+
self.has_custom_serialization = has_custom_serialization
|
|
78
|
+
self.serialization = CommonSerialization(self)
|
|
79
|
+
self.input_null_as_default = False
|
|
80
|
+
|
|
81
|
+
self.context = kwargs['context']
|
|
82
|
+
self.input_null_as_default = self.context.client_settings \
|
|
83
|
+
.get('input_format_null_as_default', False)
|
|
84
|
+
|
|
85
|
+
super(Column, self).__init__()
|
|
86
|
+
|
|
87
|
+
def make_null_struct(self, n_items):
|
|
88
|
+
return Struct('<{}B'.format(n_items))
|
|
89
|
+
|
|
90
|
+
def _read_nulls_map(self, n_items, buf):
|
|
91
|
+
s = self.make_null_struct(n_items)
|
|
92
|
+
return s.unpack(buf.read(s.size))
|
|
93
|
+
|
|
94
|
+
def _write_nulls_map(self, items, buf):
|
|
95
|
+
s = self.make_null_struct(len(items))
|
|
96
|
+
items = [x is None for x in items]
|
|
97
|
+
buf.write(s.pack(*items))
|
|
98
|
+
|
|
99
|
+
def check_item_type(self, value):
|
|
100
|
+
if not isinstance(value, self.py_types):
|
|
101
|
+
raise exceptions.ColumnTypeMismatchException(value)
|
|
102
|
+
|
|
103
|
+
def prepare_items(self, items):
|
|
104
|
+
nullable = self.nullable
|
|
105
|
+
null_value = self.null_value
|
|
106
|
+
null_as_default = self.input_null_as_default
|
|
107
|
+
|
|
108
|
+
check_item = self.check_item
|
|
109
|
+
if self.types_check_enabled:
|
|
110
|
+
check_item_type = self.check_item_type
|
|
111
|
+
else:
|
|
112
|
+
check_item_type = False
|
|
113
|
+
|
|
114
|
+
if (not (self.nullable or null_as_default) and not check_item_type and
|
|
115
|
+
not check_item and not self.before_write_items):
|
|
116
|
+
return items
|
|
117
|
+
|
|
118
|
+
nulls_map = [False] * len(items) if self.nullable else None
|
|
119
|
+
for i, x in enumerate(items):
|
|
120
|
+
if x is None:
|
|
121
|
+
if nullable:
|
|
122
|
+
nulls_map[i] = True
|
|
123
|
+
x = null_value
|
|
124
|
+
elif null_as_default:
|
|
125
|
+
x = null_value
|
|
126
|
+
|
|
127
|
+
else:
|
|
128
|
+
if check_item_type:
|
|
129
|
+
check_item_type(x)
|
|
130
|
+
|
|
131
|
+
if check_item:
|
|
132
|
+
check_item(x)
|
|
133
|
+
|
|
134
|
+
items[i] = x
|
|
135
|
+
|
|
136
|
+
if self.before_write_items:
|
|
137
|
+
self.before_write_items(items, nulls_map=nulls_map)
|
|
138
|
+
|
|
139
|
+
return items
|
|
140
|
+
|
|
141
|
+
def write_data(self, items, buf):
|
|
142
|
+
if self.nullable:
|
|
143
|
+
self._write_nulls_map(items, buf)
|
|
144
|
+
|
|
145
|
+
self._write_data(items, buf)
|
|
146
|
+
|
|
147
|
+
def _write_data(self, items, buf):
|
|
148
|
+
prepared = self.prepare_items(items)
|
|
149
|
+
self.write_items(prepared, buf)
|
|
150
|
+
|
|
151
|
+
def write_items(self, items, buf):
|
|
152
|
+
raise NotImplementedError
|
|
153
|
+
|
|
154
|
+
def read_data(self, n_items, buf):
|
|
155
|
+
n_items = self.serialization.read_sparse(n_items, buf)
|
|
156
|
+
|
|
157
|
+
if self.nullable:
|
|
158
|
+
nulls_map = self._read_nulls_map(n_items, buf)
|
|
159
|
+
else:
|
|
160
|
+
nulls_map = None
|
|
161
|
+
|
|
162
|
+
items = self._read_data(n_items, buf, nulls_map=nulls_map)
|
|
163
|
+
return self.serialization.apply_sparse(items)
|
|
164
|
+
|
|
165
|
+
def _read_data(self, n_items, buf, nulls_map=None):
|
|
166
|
+
items = self.read_items(n_items, buf)
|
|
167
|
+
|
|
168
|
+
if self.after_read_items:
|
|
169
|
+
return self.after_read_items(items, nulls_map)
|
|
170
|
+
elif nulls_map is not None:
|
|
171
|
+
return tuple(
|
|
172
|
+
(None if is_null else items[i])
|
|
173
|
+
for i, is_null in enumerate(nulls_map)
|
|
174
|
+
)
|
|
175
|
+
return items
|
|
176
|
+
|
|
177
|
+
def read_items(self, n_items, buf):
|
|
178
|
+
raise NotImplementedError
|
|
179
|
+
|
|
180
|
+
def read_state_prefix(self, buf):
|
|
181
|
+
if self.has_custom_serialization:
|
|
182
|
+
use_custom_serialization = read_varint(buf)
|
|
183
|
+
if use_custom_serialization:
|
|
184
|
+
self.serialization = SparseSerialization(self)
|
|
185
|
+
|
|
186
|
+
def write_state_prefix(self, buf):
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class FormatColumn(Column):
|
|
191
|
+
"""
|
|
192
|
+
Uses struct.pack for bulk items writing.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
format = None
|
|
196
|
+
|
|
197
|
+
def make_struct(self, n_items):
|
|
198
|
+
return Struct('<{}{}'.format(n_items, self.format))
|
|
199
|
+
|
|
200
|
+
def write_items(self, items, buf):
|
|
201
|
+
s = self.make_struct(len(items))
|
|
202
|
+
try:
|
|
203
|
+
buf.write(s.pack(*items))
|
|
204
|
+
|
|
205
|
+
except struct_error as e:
|
|
206
|
+
raise exceptions.StructPackException(e)
|
|
207
|
+
|
|
208
|
+
def read_items(self, n_items, buf):
|
|
209
|
+
s = self.make_struct(n_items)
|
|
210
|
+
return s.unpack(buf.read(s.size))
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# How to write new column?
|
|
214
|
+
# - Check ClickHouse documentation for column
|
|
215
|
+
# - Wireshark and tcpdump are your friends.
|
|
216
|
+
# - Use `clickhouse-client --compression 0` to see what's going on data
|
|
217
|
+
# transmission.
|
|
218
|
+
# - Check for similar existing columns and tests.
|
|
219
|
+
# - Use `FormatColumn` for columns that use "simple" types under the hood.
|
|
220
|
+
# - Some columns have before_write and after_read hooks.
|
|
221
|
+
# Use them to convert items in column into "simple" types.
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from os import getenv
|
|
2
|
+
from datetime import date, timedelta
|
|
3
|
+
|
|
4
|
+
from .base import FormatColumn
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
epoch_start = date(1970, 1, 1)
|
|
8
|
+
epoch_end = date(2149, 6, 6)
|
|
9
|
+
|
|
10
|
+
epoch_start_date32 = date(1900, 1, 1)
|
|
11
|
+
epoch_end_date32 = date(2299, 12, 31)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LazyLUT(dict):
|
|
15
|
+
def __init__(self, *args, _factory, **kwargs):
|
|
16
|
+
super().__init__(*args, **kwargs)
|
|
17
|
+
self._default_factory = _factory
|
|
18
|
+
|
|
19
|
+
def __missing__(self, key):
|
|
20
|
+
return self.setdefault(key, self._default_factory(key))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def make_date_lut_range(date_start, date_end):
|
|
24
|
+
return range(
|
|
25
|
+
(date_start - epoch_start).days,
|
|
26
|
+
(date_end - epoch_start).days + 1,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
enable_lazy_date_lut = getenv('CLICKHOUSE_DRIVER_LASY_DATE_LUT', False)
|
|
31
|
+
if enable_lazy_date_lut:
|
|
32
|
+
try:
|
|
33
|
+
start, end = enable_lazy_date_lut.split(':')
|
|
34
|
+
start_date = date.fromisoformat(start)
|
|
35
|
+
end_date = date.fromisoformat(end)
|
|
36
|
+
|
|
37
|
+
date_range = make_date_lut_range(start_date, end_date)
|
|
38
|
+
except ValueError:
|
|
39
|
+
date_range = ()
|
|
40
|
+
|
|
41
|
+
# Since we initialize lazy lut with some initially warmed values,
|
|
42
|
+
# we use iterator and not dict comprehension for memory & time optimization
|
|
43
|
+
_date_lut = LazyLUT(
|
|
44
|
+
((x, epoch_start + timedelta(days=x)) for x in date_range),
|
|
45
|
+
_factory=lambda x: epoch_start + timedelta(days=x),
|
|
46
|
+
)
|
|
47
|
+
_date_lut_reverse = LazyLUT(
|
|
48
|
+
((value, key) for key, value in _date_lut.items()),
|
|
49
|
+
_factory=lambda x: (x - epoch_start).days,
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
# If lazy lut is not enabled, we fallback to static dict initialization
|
|
53
|
+
# In both cases, we use same lut for both data types,
|
|
54
|
+
# since one encompasses the other and we can avoid duplicating overlap
|
|
55
|
+
date_range = make_date_lut_range(epoch_start_date32, epoch_end_date32)
|
|
56
|
+
_date_lut = {x: epoch_start + timedelta(days=x) for x in date_range}
|
|
57
|
+
_date_lut_reverse = {value: key for key, value in _date_lut.items()}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DateColumn(FormatColumn):
|
|
61
|
+
ch_type = 'Date'
|
|
62
|
+
py_types = (date, )
|
|
63
|
+
format = 'H'
|
|
64
|
+
|
|
65
|
+
min_value = epoch_start
|
|
66
|
+
max_value = epoch_end
|
|
67
|
+
|
|
68
|
+
date_lut = _date_lut
|
|
69
|
+
date_lut_reverse = _date_lut_reverse
|
|
70
|
+
|
|
71
|
+
def before_write_items(self, items, nulls_map=None):
|
|
72
|
+
null_value = self.null_value
|
|
73
|
+
|
|
74
|
+
date_lut_reverse = self.date_lut_reverse
|
|
75
|
+
min_value = self.min_value
|
|
76
|
+
max_value = self.max_value
|
|
77
|
+
|
|
78
|
+
for i, item in enumerate(items):
|
|
79
|
+
if nulls_map and nulls_map[i]:
|
|
80
|
+
items[i] = null_value
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
if item is not date:
|
|
84
|
+
item = date(item.year, item.month, item.day)
|
|
85
|
+
|
|
86
|
+
if min_value <= item <= max_value:
|
|
87
|
+
items[i] = date_lut_reverse[item]
|
|
88
|
+
else:
|
|
89
|
+
items[i] = 0
|
|
90
|
+
|
|
91
|
+
def after_read_items(self, items, nulls_map=None):
|
|
92
|
+
date_lut = self.date_lut
|
|
93
|
+
|
|
94
|
+
if nulls_map is None:
|
|
95
|
+
return tuple(date_lut[item] for item in items)
|
|
96
|
+
else:
|
|
97
|
+
return tuple(
|
|
98
|
+
(None if is_null else date_lut[items[i]])
|
|
99
|
+
for i, is_null in enumerate(nulls_map)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class Date32Column(DateColumn):
|
|
104
|
+
ch_type = 'Date32'
|
|
105
|
+
format = 'i'
|
|
106
|
+
|
|
107
|
+
min_value = epoch_start_date32
|
|
108
|
+
max_value = epoch_end_date32
|