clickhouse-driver 0.2.5__cp38-cp38-win32.whl → 0.2.7__cp38-cp38-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of clickhouse-driver might be problematic. Click here for more details.
- clickhouse_driver/__init__.py +1 -1
- clickhouse_driver/block.py +3 -2
- clickhouse_driver/bufferedreader.cp38-win32.pyd +0 -0
- clickhouse_driver/bufferedwriter.cp38-win32.pyd +0 -0
- clickhouse_driver/client.py +120 -16
- clickhouse_driver/clientinfo.py +2 -2
- clickhouse_driver/columns/arraycolumn.py +15 -6
- clickhouse_driver/columns/base.py +71 -7
- clickhouse_driver/columns/datecolumn.py +2 -2
- clickhouse_driver/columns/jsoncolumn.py +37 -0
- clickhouse_driver/columns/largeint.cp38-win32.pyd +0 -0
- clickhouse_driver/columns/lowcardinalitycolumn.py +23 -4
- clickhouse_driver/columns/mapcolumn.py +6 -2
- clickhouse_driver/columns/nestedcolumn.py +2 -13
- clickhouse_driver/columns/numpy/datetimecolumn.py +16 -16
- clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +2 -2
- clickhouse_driver/columns/service.py +12 -2
- clickhouse_driver/columns/tuplecolumn.py +31 -5
- clickhouse_driver/columns/uuidcolumn.py +1 -1
- clickhouse_driver/connection.py +104 -15
- clickhouse_driver/defines.py +9 -1
- clickhouse_driver/log.py +7 -3
- clickhouse_driver/progress.py +8 -2
- clickhouse_driver/settings/writer.py +7 -2
- clickhouse_driver/streams/native.py +18 -6
- clickhouse_driver/util/compat.py +12 -0
- clickhouse_driver/util/escape.py +35 -7
- clickhouse_driver/varint.cp38-win32.pyd +0 -0
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/METADATA +201 -202
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/RECORD +33 -32
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/WHEEL +1 -1
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/LICENSE +0 -0
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/top_level.txt +0 -0
clickhouse_driver/__init__.py
CHANGED
clickhouse_driver/block.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
+
from .columns.util import get_inner_spec, get_inner_columns_with_types
|
|
1
2
|
from .reader import read_varint, read_binary_uint8, read_binary_int32
|
|
2
3
|
from .varint import write_varint
|
|
3
4
|
from .writer import write_binary_uint8, write_binary_int32
|
|
4
|
-
from .columns import nestedcolumn
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class BlockInfo(object):
|
|
@@ -172,7 +172,8 @@ class RowOrientedBlock(BaseBlock):
|
|
|
172
172
|
for name, type_ in columns_with_types:
|
|
173
173
|
cwt = None
|
|
174
174
|
if type_.startswith('Nested'):
|
|
175
|
-
|
|
175
|
+
inner_spec = get_inner_spec('Nested', type_)
|
|
176
|
+
cwt = get_inner_columns_with_types(inner_spec)
|
|
176
177
|
columns_with_cwt.append((name, cwt))
|
|
177
178
|
|
|
178
179
|
for i, row in enumerate(data):
|
|
Binary file
|
|
Binary file
|
clickhouse_driver/client.py
CHANGED
|
@@ -57,6 +57,17 @@ class Client(object):
|
|
|
57
57
|
* ``round_robin`` -- If ``alt_hosts`` are provided the query will be
|
|
58
58
|
executed on host picked with round-robin algorithm.
|
|
59
59
|
New in version *0.2.5*.
|
|
60
|
+
* ``namedtuple_as_json`` -- Controls named tuple and nested types
|
|
61
|
+
deserialization. To interpret these column alongside
|
|
62
|
+
with ``allow_experimental_object_type=1`` as Python
|
|
63
|
+
tuple set ``namedtuple_as_json`` to ``False``.
|
|
64
|
+
Default: True.
|
|
65
|
+
New in version *0.2.6*.
|
|
66
|
+
* ``server_side_params`` -- Species on which side query parameters
|
|
67
|
+
should be rendered into placeholders.
|
|
68
|
+
Default: False. Means that parameters are rendered
|
|
69
|
+
on driver's side.
|
|
70
|
+
New in version *0.2.7*.
|
|
60
71
|
"""
|
|
61
72
|
|
|
62
73
|
available_client_settings = (
|
|
@@ -67,7 +78,9 @@ class Client(object):
|
|
|
67
78
|
'opentelemetry_traceparent',
|
|
68
79
|
'opentelemetry_tracestate',
|
|
69
80
|
'quota_key',
|
|
70
|
-
'input_format_null_as_default'
|
|
81
|
+
'input_format_null_as_default',
|
|
82
|
+
'namedtuple_as_json',
|
|
83
|
+
'server_side_params'
|
|
71
84
|
)
|
|
72
85
|
|
|
73
86
|
def __init__(self, *args, **kwargs):
|
|
@@ -97,6 +110,12 @@ class Client(object):
|
|
|
97
110
|
),
|
|
98
111
|
'input_format_null_as_default': self.settings.pop(
|
|
99
112
|
'input_format_null_as_default', False
|
|
113
|
+
),
|
|
114
|
+
'namedtuple_as_json': self.settings.pop(
|
|
115
|
+
'namedtuple_as_json', True
|
|
116
|
+
),
|
|
117
|
+
'server_side_params': self.settings.pop(
|
|
118
|
+
'server_side_params', False
|
|
100
119
|
)
|
|
101
120
|
}
|
|
102
121
|
|
|
@@ -125,13 +144,19 @@ class Client(object):
|
|
|
125
144
|
url = urlparse('clickhouse://' + host)
|
|
126
145
|
|
|
127
146
|
connection_kwargs = kwargs.copy()
|
|
128
|
-
|
|
129
|
-
|
|
147
|
+
num_args = len(args)
|
|
148
|
+
if num_args >= 2:
|
|
149
|
+
# host and port as positional arguments
|
|
130
150
|
connection_args = (url.hostname, url.port) + args[2:]
|
|
131
|
-
|
|
132
|
-
# port as keyword argument
|
|
151
|
+
elif num_args >= 1:
|
|
152
|
+
# host as positional and port as keyword argument
|
|
133
153
|
connection_args = (url.hostname, ) + args[1:]
|
|
134
154
|
connection_kwargs['port'] = url.port
|
|
155
|
+
else:
|
|
156
|
+
# host and port as keyword arguments
|
|
157
|
+
connection_args = tuple()
|
|
158
|
+
connection_kwargs['host'] = url.hostname
|
|
159
|
+
connection_kwargs['port'] = url.port
|
|
135
160
|
|
|
136
161
|
connection = Connection(*connection_args, **connection_kwargs)
|
|
137
162
|
self.connections.append(connection)
|
|
@@ -438,7 +463,7 @@ class Client(object):
|
|
|
438
463
|
|
|
439
464
|
def query_dataframe(
|
|
440
465
|
self, query, params=None, external_tables=None, query_id=None,
|
|
441
|
-
settings=None):
|
|
466
|
+
settings=None, replace_nonwords=True):
|
|
442
467
|
"""
|
|
443
468
|
*New in version 0.2.0.*
|
|
444
469
|
|
|
@@ -453,6 +478,8 @@ class Client(object):
|
|
|
453
478
|
ClickHouse server will generate it.
|
|
454
479
|
:param settings: dictionary of query settings.
|
|
455
480
|
Defaults to ``None`` (no additional settings).
|
|
481
|
+
:param replace_nonwords: boolean to replace non-words in column names
|
|
482
|
+
to underscores. Defaults to ``True``.
|
|
456
483
|
:return: pandas DataFrame.
|
|
457
484
|
"""
|
|
458
485
|
|
|
@@ -467,7 +494,10 @@ class Client(object):
|
|
|
467
494
|
settings=settings
|
|
468
495
|
)
|
|
469
496
|
|
|
470
|
-
columns = [
|
|
497
|
+
columns = [name for name, type_ in columns]
|
|
498
|
+
if replace_nonwords:
|
|
499
|
+
columns = [re.sub(r'\W', '_', x) for x in columns]
|
|
500
|
+
|
|
471
501
|
return pd.DataFrame(
|
|
472
502
|
{col: d for d, col in zip(data, columns)}, columns=columns
|
|
473
503
|
)
|
|
@@ -529,7 +559,7 @@ class Client(object):
|
|
|
529
559
|
query, params, self.connection.context
|
|
530
560
|
)
|
|
531
561
|
|
|
532
|
-
self.connection.send_query(query, query_id=query_id)
|
|
562
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
533
563
|
self.connection.send_external_tables(external_tables,
|
|
534
564
|
types_check=types_check)
|
|
535
565
|
return self.receive_result(with_column_types=with_column_types,
|
|
@@ -544,8 +574,7 @@ class Client(object):
|
|
|
544
574
|
query = self.substitute_params(
|
|
545
575
|
query, params, self.connection.context
|
|
546
576
|
)
|
|
547
|
-
|
|
548
|
-
self.connection.send_query(query, query_id=query_id)
|
|
577
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
549
578
|
self.connection.send_external_tables(external_tables,
|
|
550
579
|
types_check=types_check)
|
|
551
580
|
return self.receive_result(with_column_types=with_column_types,
|
|
@@ -561,7 +590,7 @@ class Client(object):
|
|
|
561
590
|
query, params, self.connection.context
|
|
562
591
|
)
|
|
563
592
|
|
|
564
|
-
self.connection.send_query(query, query_id=query_id)
|
|
593
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
565
594
|
self.connection.send_external_tables(external_tables,
|
|
566
595
|
types_check=types_check)
|
|
567
596
|
return self.iter_receive_result(with_column_types=with_column_types)
|
|
@@ -572,12 +601,12 @@ class Client(object):
|
|
|
572
601
|
self.connection.send_query(query_without_data, query_id=query_id)
|
|
573
602
|
self.connection.send_external_tables(external_tables,
|
|
574
603
|
types_check=types_check)
|
|
575
|
-
|
|
576
604
|
sample_block = self.receive_sample_block()
|
|
605
|
+
|
|
577
606
|
if sample_block:
|
|
578
607
|
rv = self.send_data(sample_block, data,
|
|
579
608
|
types_check=types_check, columnar=columnar)
|
|
580
|
-
self.
|
|
609
|
+
self.receive_end_of_insert_query()
|
|
581
610
|
return rv
|
|
582
611
|
|
|
583
612
|
def receive_sample_block(self):
|
|
@@ -631,8 +660,15 @@ class Client(object):
|
|
|
631
660
|
self.connection.send_data(block)
|
|
632
661
|
inserted_rows += block.num_rows
|
|
633
662
|
|
|
663
|
+
# Starting from the specific revision there are profile events
|
|
664
|
+
# sent by server in response to each inserted block
|
|
665
|
+
self.receive_profile_events()
|
|
666
|
+
|
|
634
667
|
# Empty block means end of data.
|
|
635
668
|
self.connection.send_data(block_cls())
|
|
669
|
+
# If enabled by revision profile events are also sent after empty block
|
|
670
|
+
self.receive_profile_events()
|
|
671
|
+
|
|
636
672
|
return inserted_rows
|
|
637
673
|
|
|
638
674
|
def receive_end_of_query(self):
|
|
@@ -643,7 +679,7 @@ class Client(object):
|
|
|
643
679
|
break
|
|
644
680
|
|
|
645
681
|
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
646
|
-
|
|
682
|
+
self.last_query.store_progress(packet.progress)
|
|
647
683
|
|
|
648
684
|
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
649
685
|
raise packet.exception
|
|
@@ -655,11 +691,64 @@ class Client(object):
|
|
|
655
691
|
pass
|
|
656
692
|
|
|
657
693
|
elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
|
|
658
|
-
|
|
694
|
+
self.last_query.store_profile(packet.profile_info)
|
|
659
695
|
|
|
660
696
|
else:
|
|
661
697
|
message = self.connection.unexpected_packet_message(
|
|
662
|
-
'Exception, EndOfStream
|
|
698
|
+
'Exception, EndOfStream, Progress, TableColumns, '
|
|
699
|
+
'ProfileEvents or Log', packet.type
|
|
700
|
+
)
|
|
701
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
702
|
+
|
|
703
|
+
def receive_end_of_insert_query(self):
|
|
704
|
+
while True:
|
|
705
|
+
packet = self.connection.receive_packet()
|
|
706
|
+
|
|
707
|
+
if packet.type == ServerPacketTypes.END_OF_STREAM:
|
|
708
|
+
break
|
|
709
|
+
|
|
710
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
711
|
+
log_block(packet.block)
|
|
712
|
+
|
|
713
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
714
|
+
self.last_query.store_progress(packet.progress)
|
|
715
|
+
|
|
716
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
717
|
+
raise packet.exception
|
|
718
|
+
|
|
719
|
+
else:
|
|
720
|
+
message = self.connection.unexpected_packet_message(
|
|
721
|
+
'EndOfStream, Log, Progress or Exception', packet.type
|
|
722
|
+
)
|
|
723
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
724
|
+
|
|
725
|
+
def receive_profile_events(self):
|
|
726
|
+
revision = self.connection.server_info.used_revision
|
|
727
|
+
if (
|
|
728
|
+
revision <
|
|
729
|
+
defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
|
|
730
|
+
):
|
|
731
|
+
return None
|
|
732
|
+
|
|
733
|
+
while True:
|
|
734
|
+
packet = self.connection.receive_packet()
|
|
735
|
+
|
|
736
|
+
if packet.type == ServerPacketTypes.PROFILE_EVENTS:
|
|
737
|
+
self.last_query.store_profile(packet.profile_info)
|
|
738
|
+
break
|
|
739
|
+
|
|
740
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
741
|
+
self.last_query.store_progress(packet.progress)
|
|
742
|
+
|
|
743
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
744
|
+
log_block(packet.block)
|
|
745
|
+
|
|
746
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
747
|
+
raise packet.exception
|
|
748
|
+
|
|
749
|
+
else:
|
|
750
|
+
message = self.connection.unexpected_packet_message(
|
|
751
|
+
'ProfileEvents, Progress, Log or Exception', packet.type
|
|
663
752
|
)
|
|
664
753
|
raise errors.UnexpectedPacketFromServerError(message)
|
|
665
754
|
|
|
@@ -686,6 +775,10 @@ class Client(object):
|
|
|
686
775
|
# prints: SELECT 1234, 'bar'
|
|
687
776
|
print(substituted_query)
|
|
688
777
|
"""
|
|
778
|
+
# In case of server side templating we don't substitute here.
|
|
779
|
+
if self.connection.context.client_settings['server_side_params']:
|
|
780
|
+
return query
|
|
781
|
+
|
|
689
782
|
if not isinstance(params, dict):
|
|
690
783
|
raise ValueError('Parameters are expected in dict form')
|
|
691
784
|
|
|
@@ -773,6 +866,17 @@ class Client(object):
|
|
|
773
866
|
elif name == 'settings_is_important':
|
|
774
867
|
kwargs[name] = asbool(value)
|
|
775
868
|
|
|
869
|
+
elif name == 'tcp_keepalive':
|
|
870
|
+
try:
|
|
871
|
+
kwargs[name] = asbool(value)
|
|
872
|
+
except ValueError:
|
|
873
|
+
parts = value.split(',')
|
|
874
|
+
kwargs[name] = (
|
|
875
|
+
float(parts[0]), float(parts[1]), int(parts[2])
|
|
876
|
+
)
|
|
877
|
+
elif name == 'client_revision':
|
|
878
|
+
kwargs[name] = int(value)
|
|
879
|
+
|
|
776
880
|
# ssl
|
|
777
881
|
elif name == 'verify':
|
|
778
882
|
kwargs[name] = asbool(value)
|
clickhouse_driver/clientinfo.py
CHANGED
|
@@ -28,14 +28,13 @@ class ClientInfo(object):
|
|
|
28
28
|
client_version_major = defines.CLIENT_VERSION_MAJOR
|
|
29
29
|
client_version_minor = defines.CLIENT_VERSION_MINOR
|
|
30
30
|
client_version_patch = defines.CLIENT_VERSION_PATCH
|
|
31
|
-
client_revision = defines.CLIENT_REVISION
|
|
32
31
|
interface = Interface.TCP
|
|
33
32
|
|
|
34
33
|
initial_user = ''
|
|
35
34
|
initial_query_id = ''
|
|
36
35
|
initial_address = '0.0.0.0:0'
|
|
37
36
|
|
|
38
|
-
def __init__(self, client_name, context):
|
|
37
|
+
def __init__(self, client_name, context, client_revision):
|
|
39
38
|
self.query_kind = ClientInfo.QueryKind.NO_QUERY
|
|
40
39
|
|
|
41
40
|
try:
|
|
@@ -44,6 +43,7 @@ class ClientInfo(object):
|
|
|
44
43
|
self.os_user = ''
|
|
45
44
|
self.client_hostname = socket.gethostname()
|
|
46
45
|
self.client_name = client_name
|
|
46
|
+
self.client_revision = client_revision
|
|
47
47
|
|
|
48
48
|
self.client_trace_context = OpenTelemetryTraceContext(
|
|
49
49
|
context.client_settings['opentelemetry_traceparent'],
|
|
@@ -28,7 +28,8 @@ class ArrayColumn(Column):
|
|
|
28
28
|
py_types = (list, tuple)
|
|
29
29
|
|
|
30
30
|
def __init__(self, nested_column, **kwargs):
|
|
31
|
-
self.
|
|
31
|
+
self.init_kwargs = kwargs
|
|
32
|
+
self.size_column = UInt64Column(**kwargs)
|
|
32
33
|
self.nested_column = nested_column
|
|
33
34
|
self._write_depth_0_size = True
|
|
34
35
|
super(ArrayColumn, self).__init__(**kwargs)
|
|
@@ -37,17 +38,21 @@ class ArrayColumn(Column):
|
|
|
37
38
|
def write_data(self, data, buf):
|
|
38
39
|
# Column of Array(T) is stored in "compact" format and passed to server
|
|
39
40
|
# wrapped into another Array without size of wrapper array.
|
|
40
|
-
self.nested_column = ArrayColumn(
|
|
41
|
+
self.nested_column = ArrayColumn(
|
|
42
|
+
self.nested_column, **self.init_kwargs
|
|
43
|
+
)
|
|
41
44
|
self.nested_column.nullable = self.nullable
|
|
42
45
|
self.nullable = False
|
|
43
46
|
self._write_depth_0_size = False
|
|
44
47
|
self._write(data, buf)
|
|
45
48
|
|
|
46
|
-
def read_data(self,
|
|
47
|
-
self.nested_column = ArrayColumn(
|
|
49
|
+
def read_data(self, n_rows, buf):
|
|
50
|
+
self.nested_column = ArrayColumn(
|
|
51
|
+
self.nested_column, **self.init_kwargs
|
|
52
|
+
)
|
|
48
53
|
self.nested_column.nullable = self.nullable
|
|
49
54
|
self.nullable = False
|
|
50
|
-
return self._read(
|
|
55
|
+
return self._read(n_rows, buf)[0]
|
|
51
56
|
|
|
52
57
|
def _write_sizes(self, value, buf):
|
|
53
58
|
nulls_map = []
|
|
@@ -106,9 +111,13 @@ class ArrayColumn(Column):
|
|
|
106
111
|
self._write_data(value, buf)
|
|
107
112
|
|
|
108
113
|
def read_state_prefix(self, buf):
|
|
109
|
-
|
|
114
|
+
super(ArrayColumn, self).read_state_prefix(buf)
|
|
115
|
+
|
|
116
|
+
self.nested_column.read_state_prefix(buf)
|
|
110
117
|
|
|
111
118
|
def write_state_prefix(self, buf):
|
|
119
|
+
super(ArrayColumn, self).write_state_prefix(buf)
|
|
120
|
+
|
|
112
121
|
self.nested_column.write_state_prefix(buf)
|
|
113
122
|
|
|
114
123
|
def _read(self, size, buf):
|
|
@@ -1,6 +1,61 @@
|
|
|
1
1
|
from struct import Struct, error as struct_error
|
|
2
2
|
|
|
3
3
|
from . import exceptions
|
|
4
|
+
from ..varint import read_varint
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CommonSerialization(object):
|
|
8
|
+
def __init__(self, column):
|
|
9
|
+
self.column = column
|
|
10
|
+
super(CommonSerialization, self).__init__()
|
|
11
|
+
|
|
12
|
+
def read_sparse(self, n_items, buf):
|
|
13
|
+
return n_items
|
|
14
|
+
|
|
15
|
+
def apply_sparse(self, items):
|
|
16
|
+
return items
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SparseSerialization(CommonSerialization):
|
|
20
|
+
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
self.sparse_indexes = []
|
|
23
|
+
self.items_total = None
|
|
24
|
+
super(SparseSerialization, self).__init__(*args, **kwargs)
|
|
25
|
+
|
|
26
|
+
def read_sparse(self, n_items, buf):
|
|
27
|
+
sparse_indexes = []
|
|
28
|
+
items_total = 0
|
|
29
|
+
non_default_items = 0
|
|
30
|
+
|
|
31
|
+
END_OF_GRANULE_FLAG = 1 << 62
|
|
32
|
+
end_of_granule = False
|
|
33
|
+
|
|
34
|
+
while not end_of_granule:
|
|
35
|
+
group_size = read_varint(buf)
|
|
36
|
+
end_of_granule = group_size & END_OF_GRANULE_FLAG
|
|
37
|
+
group_size &= ~END_OF_GRANULE_FLAG
|
|
38
|
+
|
|
39
|
+
items_total += group_size + 1
|
|
40
|
+
if not end_of_granule:
|
|
41
|
+
non_default_items += 1
|
|
42
|
+
sparse_indexes.append(items_total)
|
|
43
|
+
|
|
44
|
+
self.sparse_indexes = sparse_indexes
|
|
45
|
+
self.items_total = items_total
|
|
46
|
+
|
|
47
|
+
return non_default_items
|
|
48
|
+
|
|
49
|
+
def apply_sparse(self, items):
|
|
50
|
+
default = self.column.null_value
|
|
51
|
+
if self.column.after_read_items:
|
|
52
|
+
default = self.column.after_read_items([default])[0]
|
|
53
|
+
|
|
54
|
+
rv = [default] * (self.items_total - 1)
|
|
55
|
+
for item_number, i in enumerate(self.sparse_indexes):
|
|
56
|
+
rv[i - 1] = items[item_number]
|
|
57
|
+
|
|
58
|
+
return rv
|
|
4
59
|
|
|
5
60
|
|
|
6
61
|
class Column(object):
|
|
@@ -15,14 +70,17 @@ class Column(object):
|
|
|
15
70
|
|
|
16
71
|
null_value = 0
|
|
17
72
|
|
|
18
|
-
def __init__(self, types_check=False,
|
|
73
|
+
def __init__(self, types_check=False, has_custom_serialization=False,
|
|
74
|
+
**kwargs):
|
|
19
75
|
self.nullable = False
|
|
20
76
|
self.types_check_enabled = types_check
|
|
77
|
+
self.has_custom_serialization = has_custom_serialization
|
|
78
|
+
self.serialization = CommonSerialization(self)
|
|
21
79
|
self.input_null_as_default = False
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
80
|
+
|
|
81
|
+
self.context = kwargs['context']
|
|
82
|
+
self.input_null_as_default = self.context.client_settings \
|
|
83
|
+
.get('input_format_null_as_default', False)
|
|
26
84
|
|
|
27
85
|
super(Column, self).__init__()
|
|
28
86
|
|
|
@@ -94,12 +152,15 @@ class Column(object):
|
|
|
94
152
|
raise NotImplementedError
|
|
95
153
|
|
|
96
154
|
def read_data(self, n_items, buf):
|
|
155
|
+
n_items = self.serialization.read_sparse(n_items, buf)
|
|
156
|
+
|
|
97
157
|
if self.nullable:
|
|
98
158
|
nulls_map = self._read_nulls_map(n_items, buf)
|
|
99
159
|
else:
|
|
100
160
|
nulls_map = None
|
|
101
161
|
|
|
102
|
-
|
|
162
|
+
items = self._read_data(n_items, buf, nulls_map=nulls_map)
|
|
163
|
+
return self.serialization.apply_sparse(items)
|
|
103
164
|
|
|
104
165
|
def _read_data(self, n_items, buf, nulls_map=None):
|
|
105
166
|
items = self.read_items(n_items, buf)
|
|
@@ -117,7 +178,10 @@ class Column(object):
|
|
|
117
178
|
raise NotImplementedError
|
|
118
179
|
|
|
119
180
|
def read_state_prefix(self, buf):
|
|
120
|
-
|
|
181
|
+
if self.has_custom_serialization:
|
|
182
|
+
use_custom_serialization = read_varint(buf)
|
|
183
|
+
if use_custom_serialization:
|
|
184
|
+
self.serialization = SparseSerialization(self)
|
|
121
185
|
|
|
122
186
|
def write_state_prefix(self, buf):
|
|
123
187
|
pass
|
|
@@ -6,7 +6,7 @@ from .base import FormatColumn
|
|
|
6
6
|
epoch_start = date(1970, 1, 1)
|
|
7
7
|
epoch_end = date(2149, 6, 6)
|
|
8
8
|
|
|
9
|
-
epoch_start_date32 = date(
|
|
9
|
+
epoch_start_date32 = date(1900, 1, 1)
|
|
10
10
|
epoch_end_date32 = date(2283, 11, 11)
|
|
11
11
|
|
|
12
12
|
|
|
@@ -34,7 +34,7 @@ class DateColumn(FormatColumn):
|
|
|
34
34
|
items[i] = null_value
|
|
35
35
|
continue
|
|
36
36
|
|
|
37
|
-
if
|
|
37
|
+
if item is not date:
|
|
38
38
|
item = date(item.year, item.month, item.day)
|
|
39
39
|
|
|
40
40
|
if min_value <= item <= max_value:
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from .base import Column
|
|
2
|
+
from .stringcolumn import String
|
|
3
|
+
from ..reader import read_binary_uint8, read_binary_str
|
|
4
|
+
from ..util.compat import json
|
|
5
|
+
from ..writer import write_binary_uint8
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class JsonColumn(Column):
|
|
9
|
+
py_types = (dict, )
|
|
10
|
+
|
|
11
|
+
# No NULL value actually
|
|
12
|
+
null_value = {}
|
|
13
|
+
|
|
14
|
+
def __init__(self, column_by_spec_getter, **kwargs):
|
|
15
|
+
self.column_by_spec_getter = column_by_spec_getter
|
|
16
|
+
self.string_column = String(**kwargs)
|
|
17
|
+
super(JsonColumn, self).__init__(**kwargs)
|
|
18
|
+
|
|
19
|
+
def write_state_prefix(self, buf):
|
|
20
|
+
# Read in binary format.
|
|
21
|
+
# Write in text format.
|
|
22
|
+
write_binary_uint8(1, buf)
|
|
23
|
+
|
|
24
|
+
def read_items(self, n_items, buf):
|
|
25
|
+
read_binary_uint8(buf)
|
|
26
|
+
spec = read_binary_str(buf)
|
|
27
|
+
col = self.column_by_spec_getter(spec)
|
|
28
|
+
col.read_state_prefix(buf)
|
|
29
|
+
return col.read_data(n_items, buf)
|
|
30
|
+
|
|
31
|
+
def write_items(self, items, buf):
|
|
32
|
+
items = [x if isinstance(x, str) else json.dumps(x) for x in items]
|
|
33
|
+
self.string_column.write_items(items, buf)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def create_json_column(spec, column_by_spec_getter, column_options):
|
|
37
|
+
return JsonColumn(column_by_spec_getter, **column_options)
|
|
Binary file
|
|
@@ -35,25 +35,32 @@ class LowCardinalityColumn(Column):
|
|
|
35
35
|
serialization_type = has_additional_keys_bit | need_update_dictionary
|
|
36
36
|
|
|
37
37
|
def __init__(self, nested_column, **kwargs):
|
|
38
|
+
self.init_kwargs = kwargs
|
|
38
39
|
self.nested_column = nested_column
|
|
39
40
|
super(LowCardinalityColumn, self).__init__(**kwargs)
|
|
40
41
|
|
|
41
42
|
def read_state_prefix(self, buf):
|
|
42
|
-
|
|
43
|
+
super(LowCardinalityColumn, self).read_state_prefix(buf)
|
|
44
|
+
|
|
45
|
+
read_binary_uint64(buf)
|
|
43
46
|
|
|
44
47
|
def write_state_prefix(self, buf):
|
|
48
|
+
super(LowCardinalityColumn, self).write_state_prefix(buf)
|
|
49
|
+
|
|
45
50
|
# KeysSerializationVersion. See ClickHouse docs.
|
|
46
51
|
write_binary_int64(1, buf)
|
|
47
52
|
|
|
48
53
|
def _write_data(self, items, buf):
|
|
49
54
|
index, keys = [], []
|
|
50
55
|
key_by_index_element = {}
|
|
56
|
+
nested_is_nullable = False
|
|
51
57
|
|
|
52
58
|
if self.nested_column.nullable:
|
|
53
59
|
# First element represents NULL if column is nullable.
|
|
54
60
|
index.append(self.nested_column.null_value)
|
|
55
61
|
# Prevent null map writing. Reset nested column nullable flag.
|
|
56
62
|
self.nested_column.nullable = False
|
|
63
|
+
nested_is_nullable = True
|
|
57
64
|
|
|
58
65
|
for x in items:
|
|
59
66
|
if x is None:
|
|
@@ -87,14 +94,26 @@ class LowCardinalityColumn(Column):
|
|
|
87
94
|
return
|
|
88
95
|
|
|
89
96
|
int_type = int(log(len(index), 2) / 8)
|
|
90
|
-
int_column = self.int_types[int_type]()
|
|
97
|
+
int_column = self.int_types[int_type](**self.init_kwargs)
|
|
91
98
|
|
|
92
99
|
serialization_type = self.serialization_type | int_type
|
|
93
100
|
|
|
94
101
|
write_binary_int64(serialization_type, buf)
|
|
95
102
|
write_binary_int64(len(index), buf)
|
|
96
103
|
|
|
97
|
-
|
|
104
|
+
if nested_is_nullable:
|
|
105
|
+
# Given we reset nested column nullable flag above,
|
|
106
|
+
# we need to write null map manually. If to invoke
|
|
107
|
+
# write_data method, it will cause an exception,
|
|
108
|
+
# because `prepare_data` may not be able to handle
|
|
109
|
+
# null value correctly.
|
|
110
|
+
self.nested_column.write_items(
|
|
111
|
+
[self.nested_column.null_value], buf)
|
|
112
|
+
# Remove null map from index, because it is already written.
|
|
113
|
+
index_to_write = index[1:]
|
|
114
|
+
self.nested_column.write_data(index_to_write, buf)
|
|
115
|
+
else:
|
|
116
|
+
self.nested_column.write_data(index, buf)
|
|
98
117
|
write_binary_int64(len(items), buf)
|
|
99
118
|
int_column.write_items(keys, buf)
|
|
100
119
|
|
|
@@ -106,7 +125,7 @@ class LowCardinalityColumn(Column):
|
|
|
106
125
|
|
|
107
126
|
# Lowest byte contains info about key type.
|
|
108
127
|
key_type = serialization_type & 0xf
|
|
109
|
-
keys_column = self.int_types[key_type]()
|
|
128
|
+
keys_column = self.int_types[key_type](**self.init_kwargs)
|
|
110
129
|
|
|
111
130
|
nullable = self.nested_column.nullable
|
|
112
131
|
# Prevent null map reading. Reset nested column nullable flag.
|
|
@@ -13,16 +13,20 @@ class MapColumn(Column):
|
|
|
13
13
|
null_value = {}
|
|
14
14
|
|
|
15
15
|
def __init__(self, key_column, value_column, **kwargs):
|
|
16
|
-
self.offset_column = UInt64Column()
|
|
16
|
+
self.offset_column = UInt64Column(**kwargs)
|
|
17
17
|
self.key_column = key_column
|
|
18
18
|
self.value_column = value_column
|
|
19
19
|
super(MapColumn, self).__init__(**kwargs)
|
|
20
20
|
|
|
21
21
|
def read_state_prefix(self, buf):
|
|
22
|
+
super(MapColumn, self).read_state_prefix(buf)
|
|
23
|
+
|
|
22
24
|
self.key_column.read_state_prefix(buf)
|
|
23
25
|
self.value_column.read_state_prefix(buf)
|
|
24
26
|
|
|
25
27
|
def write_state_prefix(self, buf):
|
|
28
|
+
super(MapColumn, self).write_state_prefix(buf)
|
|
29
|
+
|
|
26
30
|
self.key_column.write_state_prefix(buf)
|
|
27
31
|
self.value_column.write_state_prefix(buf)
|
|
28
32
|
|
|
@@ -57,7 +61,7 @@ class MapColumn(Column):
|
|
|
57
61
|
|
|
58
62
|
|
|
59
63
|
def create_map_column(spec, column_by_spec_getter, column_options):
|
|
60
|
-
# Match commas outside of parentheses so we don't match the comma in
|
|
64
|
+
# Match commas outside of parentheses, so we don't match the comma in
|
|
61
65
|
# Decimal types.
|
|
62
66
|
key, value = comma_re.split(spec[4:-1])
|
|
63
67
|
key_column = column_by_spec_getter(key.strip())
|
|
@@ -1,21 +1,10 @@
|
|
|
1
1
|
|
|
2
2
|
from .arraycolumn import create_array_column
|
|
3
|
-
from .util import get_inner_spec
|
|
4
|
-
get_inner_columns_with_types
|
|
3
|
+
from .util import get_inner_spec
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def create_nested_column(spec, column_by_spec_getter, column_options):
|
|
8
7
|
return create_array_column(
|
|
9
|
-
'Array(Tuple({}))'.format(',
|
|
8
|
+
'Array(Tuple({}))'.format(get_inner_spec('Nested', spec)),
|
|
10
9
|
column_by_spec_getter, column_options
|
|
11
10
|
)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def get_nested_columns(spec):
|
|
15
|
-
inner_spec = get_inner_spec('Nested', spec)
|
|
16
|
-
return get_inner_columns(inner_spec)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def get_columns_with_types(spec):
|
|
20
|
-
inner_spec = get_inner_spec('Nested', spec)
|
|
21
|
-
return get_inner_columns_with_types(inner_spec)
|