clickhouse-driver 0.2.5__cp311-cp311-musllinux_1_1_aarch64.whl → 0.2.9__cp311-cp311-musllinux_1_1_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickhouse_driver/__init__.py +1 -1
- clickhouse_driver/block.py +3 -2
- clickhouse_driver/bufferedreader.cpython-311-aarch64-linux-musl.so +0 -0
- clickhouse_driver/bufferedwriter.cpython-311-aarch64-linux-musl.so +0 -0
- clickhouse_driver/client.py +119 -99
- clickhouse_driver/clientinfo.py +2 -2
- clickhouse_driver/columns/arraycolumn.py +15 -6
- clickhouse_driver/columns/base.py +71 -7
- clickhouse_driver/columns/datecolumn.py +52 -13
- clickhouse_driver/columns/datetimecolumn.py +3 -2
- clickhouse_driver/columns/enumcolumn.py +27 -17
- clickhouse_driver/columns/jsoncolumn.py +37 -0
- clickhouse_driver/columns/largeint.cpython-311-aarch64-linux-musl.so +0 -0
- clickhouse_driver/columns/lowcardinalitycolumn.py +23 -4
- clickhouse_driver/columns/mapcolumn.py +9 -2
- clickhouse_driver/columns/nestedcolumn.py +2 -13
- clickhouse_driver/columns/numpy/datetimecolumn.py +21 -18
- clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +2 -2
- clickhouse_driver/columns/service.py +12 -2
- clickhouse_driver/columns/tuplecolumn.py +31 -5
- clickhouse_driver/columns/util.py +2 -1
- clickhouse_driver/columns/uuidcolumn.py +1 -1
- clickhouse_driver/connection.py +117 -19
- clickhouse_driver/defines.py +12 -1
- clickhouse_driver/log.py +7 -3
- clickhouse_driver/numpy/helpers.py +5 -2
- clickhouse_driver/progress.py +15 -3
- clickhouse_driver/protocol.py +19 -3
- clickhouse_driver/settings/writer.py +7 -2
- clickhouse_driver/streams/native.py +24 -6
- clickhouse_driver/util/compat.py +12 -0
- clickhouse_driver/util/escape.py +36 -8
- clickhouse_driver/util/helpers.py +114 -0
- clickhouse_driver/varint.cpython-311-aarch64-linux-musl.so +0 -0
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/METADATA +8 -8
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/RECORD +71 -70
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/WHEEL +1 -1
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/LICENSE +0 -0
- {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/top_level.txt +0 -0
clickhouse_driver/__init__.py
CHANGED
clickhouse_driver/block.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
+
from .columns.util import get_inner_spec, get_inner_columns_with_types
|
|
1
2
|
from .reader import read_varint, read_binary_uint8, read_binary_int32
|
|
2
3
|
from .varint import write_varint
|
|
3
4
|
from .writer import write_binary_uint8, write_binary_int32
|
|
4
|
-
from .columns import nestedcolumn
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class BlockInfo(object):
|
|
@@ -172,7 +172,8 @@ class RowOrientedBlock(BaseBlock):
|
|
|
172
172
|
for name, type_ in columns_with_types:
|
|
173
173
|
cwt = None
|
|
174
174
|
if type_.startswith('Nested'):
|
|
175
|
-
|
|
175
|
+
inner_spec = get_inner_spec('Nested', type_)
|
|
176
|
+
cwt = get_inner_columns_with_types(inner_spec)
|
|
176
177
|
columns_with_cwt.append((name, cwt))
|
|
177
178
|
|
|
178
179
|
for i, row in enumerate(data):
|
|
Binary file
|
|
Binary file
|
clickhouse_driver/client.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import ssl
|
|
3
2
|
from collections import deque
|
|
4
3
|
from contextlib import contextmanager
|
|
5
4
|
from time import time
|
|
6
5
|
import types
|
|
7
|
-
from urllib.parse import urlparse
|
|
6
|
+
from urllib.parse import urlparse
|
|
8
7
|
|
|
9
8
|
from . import errors, defines
|
|
10
9
|
from .block import ColumnOrientedBlock, RowOrientedBlock
|
|
@@ -15,7 +14,7 @@ from .result import (
|
|
|
15
14
|
IterQueryResult, ProgressQueryResult, QueryResult, QueryInfo
|
|
16
15
|
)
|
|
17
16
|
from .util.escape import escape_params
|
|
18
|
-
from .util.helpers import column_chunks, chunks,
|
|
17
|
+
from .util.helpers import column_chunks, chunks, parse_url
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class Client(object):
|
|
@@ -57,6 +56,17 @@ class Client(object):
|
|
|
57
56
|
* ``round_robin`` -- If ``alt_hosts`` are provided the query will be
|
|
58
57
|
executed on host picked with round-robin algorithm.
|
|
59
58
|
New in version *0.2.5*.
|
|
59
|
+
* ``namedtuple_as_json`` -- Controls named tuple and nested types
|
|
60
|
+
deserialization. To interpret these column alongside
|
|
61
|
+
with ``allow_experimental_object_type=1`` as Python
|
|
62
|
+
tuple set ``namedtuple_as_json`` to ``False``.
|
|
63
|
+
Default: True.
|
|
64
|
+
New in version *0.2.6*.
|
|
65
|
+
* ``server_side_params`` -- Species on which side query parameters
|
|
66
|
+
should be rendered into placeholders.
|
|
67
|
+
Default: False. Means that parameters are rendered
|
|
68
|
+
on driver's side.
|
|
69
|
+
New in version *0.2.7*.
|
|
60
70
|
"""
|
|
61
71
|
|
|
62
72
|
available_client_settings = (
|
|
@@ -67,7 +77,9 @@ class Client(object):
|
|
|
67
77
|
'opentelemetry_traceparent',
|
|
68
78
|
'opentelemetry_tracestate',
|
|
69
79
|
'quota_key',
|
|
70
|
-
'input_format_null_as_default'
|
|
80
|
+
'input_format_null_as_default',
|
|
81
|
+
'namedtuple_as_json',
|
|
82
|
+
'server_side_params'
|
|
71
83
|
)
|
|
72
84
|
|
|
73
85
|
def __init__(self, *args, **kwargs):
|
|
@@ -97,6 +109,12 @@ class Client(object):
|
|
|
97
109
|
),
|
|
98
110
|
'input_format_null_as_default': self.settings.pop(
|
|
99
111
|
'input_format_null_as_default', False
|
|
112
|
+
),
|
|
113
|
+
'namedtuple_as_json': self.settings.pop(
|
|
114
|
+
'namedtuple_as_json', True
|
|
115
|
+
),
|
|
116
|
+
'server_side_params': self.settings.pop(
|
|
117
|
+
'server_side_params', False
|
|
100
118
|
)
|
|
101
119
|
}
|
|
102
120
|
|
|
@@ -125,13 +143,19 @@ class Client(object):
|
|
|
125
143
|
url = urlparse('clickhouse://' + host)
|
|
126
144
|
|
|
127
145
|
connection_kwargs = kwargs.copy()
|
|
128
|
-
|
|
129
|
-
|
|
146
|
+
num_args = len(args)
|
|
147
|
+
if num_args >= 2:
|
|
148
|
+
# host and port as positional arguments
|
|
130
149
|
connection_args = (url.hostname, url.port) + args[2:]
|
|
131
|
-
|
|
132
|
-
# port as keyword argument
|
|
150
|
+
elif num_args >= 1:
|
|
151
|
+
# host as positional and port as keyword argument
|
|
133
152
|
connection_args = (url.hostname, ) + args[1:]
|
|
134
153
|
connection_kwargs['port'] = url.port
|
|
154
|
+
else:
|
|
155
|
+
# host and port as keyword arguments
|
|
156
|
+
connection_args = tuple()
|
|
157
|
+
connection_kwargs['host'] = url.hostname
|
|
158
|
+
connection_kwargs['port'] = url.port
|
|
135
159
|
|
|
136
160
|
connection = Connection(*connection_args, **connection_kwargs)
|
|
137
161
|
self.connections.append(connection)
|
|
@@ -287,6 +311,7 @@ class Client(object):
|
|
|
287
311
|
def disconnect_on_error(self, query, settings):
|
|
288
312
|
try:
|
|
289
313
|
self.establish_connection(settings)
|
|
314
|
+
self.connection.server_info.session_timezone = None
|
|
290
315
|
|
|
291
316
|
yield
|
|
292
317
|
|
|
@@ -438,7 +463,7 @@ class Client(object):
|
|
|
438
463
|
|
|
439
464
|
def query_dataframe(
|
|
440
465
|
self, query, params=None, external_tables=None, query_id=None,
|
|
441
|
-
settings=None):
|
|
466
|
+
settings=None, replace_nonwords=True):
|
|
442
467
|
"""
|
|
443
468
|
*New in version 0.2.0.*
|
|
444
469
|
|
|
@@ -453,6 +478,8 @@ class Client(object):
|
|
|
453
478
|
ClickHouse server will generate it.
|
|
454
479
|
:param settings: dictionary of query settings.
|
|
455
480
|
Defaults to ``None`` (no additional settings).
|
|
481
|
+
:param replace_nonwords: boolean to replace non-words in column names
|
|
482
|
+
to underscores. Defaults to ``True``.
|
|
456
483
|
:return: pandas DataFrame.
|
|
457
484
|
"""
|
|
458
485
|
|
|
@@ -467,7 +494,10 @@ class Client(object):
|
|
|
467
494
|
settings=settings
|
|
468
495
|
)
|
|
469
496
|
|
|
470
|
-
columns = [
|
|
497
|
+
columns = [name for name, type_ in columns]
|
|
498
|
+
if replace_nonwords:
|
|
499
|
+
columns = [re.sub(r'\W', '_', x) for x in columns]
|
|
500
|
+
|
|
471
501
|
return pd.DataFrame(
|
|
472
502
|
{col: d for d, col in zip(data, columns)}, columns=columns
|
|
473
503
|
)
|
|
@@ -529,7 +559,7 @@ class Client(object):
|
|
|
529
559
|
query, params, self.connection.context
|
|
530
560
|
)
|
|
531
561
|
|
|
532
|
-
self.connection.send_query(query, query_id=query_id)
|
|
562
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
533
563
|
self.connection.send_external_tables(external_tables,
|
|
534
564
|
types_check=types_check)
|
|
535
565
|
return self.receive_result(with_column_types=with_column_types,
|
|
@@ -544,8 +574,7 @@ class Client(object):
|
|
|
544
574
|
query = self.substitute_params(
|
|
545
575
|
query, params, self.connection.context
|
|
546
576
|
)
|
|
547
|
-
|
|
548
|
-
self.connection.send_query(query, query_id=query_id)
|
|
577
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
549
578
|
self.connection.send_external_tables(external_tables,
|
|
550
579
|
types_check=types_check)
|
|
551
580
|
return self.receive_result(with_column_types=with_column_types,
|
|
@@ -561,7 +590,7 @@ class Client(object):
|
|
|
561
590
|
query, params, self.connection.context
|
|
562
591
|
)
|
|
563
592
|
|
|
564
|
-
self.connection.send_query(query, query_id=query_id)
|
|
593
|
+
self.connection.send_query(query, query_id=query_id, params=params)
|
|
565
594
|
self.connection.send_external_tables(external_tables,
|
|
566
595
|
types_check=types_check)
|
|
567
596
|
return self.iter_receive_result(with_column_types=with_column_types)
|
|
@@ -572,12 +601,12 @@ class Client(object):
|
|
|
572
601
|
self.connection.send_query(query_without_data, query_id=query_id)
|
|
573
602
|
self.connection.send_external_tables(external_tables,
|
|
574
603
|
types_check=types_check)
|
|
575
|
-
|
|
576
604
|
sample_block = self.receive_sample_block()
|
|
605
|
+
|
|
577
606
|
if sample_block:
|
|
578
607
|
rv = self.send_data(sample_block, data,
|
|
579
608
|
types_check=types_check, columnar=columnar)
|
|
580
|
-
self.
|
|
609
|
+
self.receive_end_of_insert_query()
|
|
581
610
|
return rv
|
|
582
611
|
|
|
583
612
|
def receive_sample_block(self):
|
|
@@ -631,8 +660,15 @@ class Client(object):
|
|
|
631
660
|
self.connection.send_data(block)
|
|
632
661
|
inserted_rows += block.num_rows
|
|
633
662
|
|
|
663
|
+
# Starting from the specific revision there are profile events
|
|
664
|
+
# sent by server in response to each inserted block
|
|
665
|
+
self.receive_profile_events()
|
|
666
|
+
|
|
634
667
|
# Empty block means end of data.
|
|
635
668
|
self.connection.send_data(block_cls())
|
|
669
|
+
# If enabled by revision profile events are also sent after empty block
|
|
670
|
+
self.receive_profile_events()
|
|
671
|
+
|
|
636
672
|
return inserted_rows
|
|
637
673
|
|
|
638
674
|
def receive_end_of_query(self):
|
|
@@ -643,7 +679,7 @@ class Client(object):
|
|
|
643
679
|
break
|
|
644
680
|
|
|
645
681
|
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
646
|
-
|
|
682
|
+
self.last_query.store_progress(packet.progress)
|
|
647
683
|
|
|
648
684
|
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
649
685
|
raise packet.exception
|
|
@@ -655,11 +691,68 @@ class Client(object):
|
|
|
655
691
|
pass
|
|
656
692
|
|
|
657
693
|
elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
|
|
694
|
+
self.last_query.store_profile(packet.profile_info)
|
|
695
|
+
|
|
696
|
+
else:
|
|
697
|
+
message = self.connection.unexpected_packet_message(
|
|
698
|
+
'Exception, EndOfStream, Progress, TableColumns, '
|
|
699
|
+
'ProfileEvents or Log', packet.type
|
|
700
|
+
)
|
|
701
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
702
|
+
|
|
703
|
+
def receive_end_of_insert_query(self):
|
|
704
|
+
while True:
|
|
705
|
+
packet = self.connection.receive_packet()
|
|
706
|
+
|
|
707
|
+
if packet.type == ServerPacketTypes.END_OF_STREAM:
|
|
708
|
+
break
|
|
709
|
+
|
|
710
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
711
|
+
log_block(packet.block)
|
|
712
|
+
|
|
713
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
714
|
+
self.last_query.store_progress(packet.progress)
|
|
715
|
+
|
|
716
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
717
|
+
raise packet.exception
|
|
718
|
+
|
|
719
|
+
else:
|
|
720
|
+
message = self.connection.unexpected_packet_message(
|
|
721
|
+
'EndOfStream, Log, Progress or Exception', packet.type
|
|
722
|
+
)
|
|
723
|
+
raise errors.UnexpectedPacketFromServerError(message)
|
|
724
|
+
|
|
725
|
+
def receive_profile_events(self):
|
|
726
|
+
revision = self.connection.server_info.used_revision
|
|
727
|
+
if (
|
|
728
|
+
revision <
|
|
729
|
+
defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
|
|
730
|
+
):
|
|
731
|
+
return None
|
|
732
|
+
|
|
733
|
+
while True:
|
|
734
|
+
packet = self.connection.receive_packet()
|
|
735
|
+
|
|
736
|
+
if packet.type == ServerPacketTypes.PROFILE_EVENTS:
|
|
737
|
+
self.last_query.store_profile(packet.profile_info)
|
|
738
|
+
break
|
|
739
|
+
|
|
740
|
+
elif packet.type == ServerPacketTypes.PROGRESS:
|
|
741
|
+
self.last_query.store_progress(packet.progress)
|
|
742
|
+
|
|
743
|
+
elif packet.type == ServerPacketTypes.LOG:
|
|
744
|
+
log_block(packet.block)
|
|
745
|
+
|
|
746
|
+
elif packet.type == ServerPacketTypes.EXCEPTION:
|
|
747
|
+
raise packet.exception
|
|
748
|
+
|
|
749
|
+
elif packet.type == ServerPacketTypes.TIMEZONE_UPDATE:
|
|
658
750
|
pass
|
|
659
751
|
|
|
660
752
|
else:
|
|
661
753
|
message = self.connection.unexpected_packet_message(
|
|
662
|
-
'
|
|
754
|
+
'ProfileEvents, Progress, Log, Exception or '
|
|
755
|
+
'TimezoneUpdate', packet.type
|
|
663
756
|
)
|
|
664
757
|
raise errors.UnexpectedPacketFromServerError(message)
|
|
665
758
|
|
|
@@ -686,6 +779,10 @@ class Client(object):
|
|
|
686
779
|
# prints: SELECT 1234, 'bar'
|
|
687
780
|
print(substituted_query)
|
|
688
781
|
"""
|
|
782
|
+
# In case of server side templating we don't substitute here.
|
|
783
|
+
if self.connection.context.client_settings['server_side_params']:
|
|
784
|
+
return query
|
|
785
|
+
|
|
689
786
|
if not isinstance(params, dict):
|
|
690
787
|
raise ValueError('Parameters are expected in dict form')
|
|
691
788
|
|
|
@@ -703,90 +800,13 @@ class Client(object):
|
|
|
703
800
|
clickhouses://[user:password]@localhost:9440/default
|
|
704
801
|
|
|
705
802
|
Three URL schemes are supported:
|
|
706
|
-
|
|
707
|
-
|
|
803
|
+
|
|
804
|
+
* clickhouse:// creates a normal TCP socket connection
|
|
805
|
+
* clickhouses:// creates a SSL wrapped TCP socket connection
|
|
708
806
|
|
|
709
807
|
Any additional querystring arguments will be passed along to
|
|
710
808
|
the Connection class's initializer.
|
|
711
809
|
"""
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
settings = {}
|
|
715
|
-
kwargs = {}
|
|
716
|
-
|
|
717
|
-
host = url.hostname
|
|
718
|
-
|
|
719
|
-
if url.port is not None:
|
|
720
|
-
kwargs['port'] = url.port
|
|
721
|
-
|
|
722
|
-
path = url.path.replace('/', '', 1)
|
|
723
|
-
if path:
|
|
724
|
-
kwargs['database'] = path
|
|
725
|
-
|
|
726
|
-
if url.username is not None:
|
|
727
|
-
kwargs['user'] = unquote(url.username)
|
|
728
|
-
|
|
729
|
-
if url.password is not None:
|
|
730
|
-
kwargs['password'] = unquote(url.password)
|
|
731
|
-
|
|
732
|
-
if url.scheme == 'clickhouses':
|
|
733
|
-
kwargs['secure'] = True
|
|
734
|
-
|
|
735
|
-
compression_algs = {'lz4', 'lz4hc', 'zstd'}
|
|
736
|
-
timeouts = {
|
|
737
|
-
'connect_timeout',
|
|
738
|
-
'send_receive_timeout',
|
|
739
|
-
'sync_request_timeout'
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
for name, value in parse_qs(url.query).items():
|
|
743
|
-
if not value or not len(value):
|
|
744
|
-
continue
|
|
745
|
-
|
|
746
|
-
value = value[0]
|
|
747
|
-
|
|
748
|
-
if name == 'compression':
|
|
749
|
-
value = value.lower()
|
|
750
|
-
if value in compression_algs:
|
|
751
|
-
kwargs[name] = value
|
|
752
|
-
else:
|
|
753
|
-
kwargs[name] = asbool(value)
|
|
754
|
-
|
|
755
|
-
elif name == 'secure':
|
|
756
|
-
kwargs[name] = asbool(value)
|
|
757
|
-
|
|
758
|
-
elif name == 'use_numpy':
|
|
759
|
-
settings[name] = asbool(value)
|
|
760
|
-
|
|
761
|
-
elif name == 'round_robin':
|
|
762
|
-
kwargs[name] = asbool(value)
|
|
763
|
-
|
|
764
|
-
elif name == 'client_name':
|
|
765
|
-
kwargs[name] = value
|
|
766
|
-
|
|
767
|
-
elif name in timeouts:
|
|
768
|
-
kwargs[name] = float(value)
|
|
769
|
-
|
|
770
|
-
elif name == 'compress_block_size':
|
|
771
|
-
kwargs[name] = int(value)
|
|
772
|
-
|
|
773
|
-
elif name == 'settings_is_important':
|
|
774
|
-
kwargs[name] = asbool(value)
|
|
775
|
-
|
|
776
|
-
# ssl
|
|
777
|
-
elif name == 'verify':
|
|
778
|
-
kwargs[name] = asbool(value)
|
|
779
|
-
elif name == 'ssl_version':
|
|
780
|
-
kwargs[name] = getattr(ssl, value)
|
|
781
|
-
elif name in ['ca_certs', 'ciphers', 'keyfile', 'certfile',
|
|
782
|
-
'server_hostname']:
|
|
783
|
-
kwargs[name] = value
|
|
784
|
-
elif name == 'alt_hosts':
|
|
785
|
-
kwargs['alt_hosts'] = value
|
|
786
|
-
else:
|
|
787
|
-
settings[name] = value
|
|
788
|
-
|
|
789
|
-
if settings:
|
|
790
|
-
kwargs['settings'] = settings
|
|
810
|
+
host, kwargs = parse_url(url)
|
|
791
811
|
|
|
792
812
|
return cls(host, **kwargs)
|
clickhouse_driver/clientinfo.py
CHANGED
|
@@ -28,14 +28,13 @@ class ClientInfo(object):
|
|
|
28
28
|
client_version_major = defines.CLIENT_VERSION_MAJOR
|
|
29
29
|
client_version_minor = defines.CLIENT_VERSION_MINOR
|
|
30
30
|
client_version_patch = defines.CLIENT_VERSION_PATCH
|
|
31
|
-
client_revision = defines.CLIENT_REVISION
|
|
32
31
|
interface = Interface.TCP
|
|
33
32
|
|
|
34
33
|
initial_user = ''
|
|
35
34
|
initial_query_id = ''
|
|
36
35
|
initial_address = '0.0.0.0:0'
|
|
37
36
|
|
|
38
|
-
def __init__(self, client_name, context):
|
|
37
|
+
def __init__(self, client_name, context, client_revision):
|
|
39
38
|
self.query_kind = ClientInfo.QueryKind.NO_QUERY
|
|
40
39
|
|
|
41
40
|
try:
|
|
@@ -44,6 +43,7 @@ class ClientInfo(object):
|
|
|
44
43
|
self.os_user = ''
|
|
45
44
|
self.client_hostname = socket.gethostname()
|
|
46
45
|
self.client_name = client_name
|
|
46
|
+
self.client_revision = client_revision
|
|
47
47
|
|
|
48
48
|
self.client_trace_context = OpenTelemetryTraceContext(
|
|
49
49
|
context.client_settings['opentelemetry_traceparent'],
|
|
@@ -28,7 +28,8 @@ class ArrayColumn(Column):
|
|
|
28
28
|
py_types = (list, tuple)
|
|
29
29
|
|
|
30
30
|
def __init__(self, nested_column, **kwargs):
|
|
31
|
-
self.
|
|
31
|
+
self.init_kwargs = kwargs
|
|
32
|
+
self.size_column = UInt64Column(**kwargs)
|
|
32
33
|
self.nested_column = nested_column
|
|
33
34
|
self._write_depth_0_size = True
|
|
34
35
|
super(ArrayColumn, self).__init__(**kwargs)
|
|
@@ -37,17 +38,21 @@ class ArrayColumn(Column):
|
|
|
37
38
|
def write_data(self, data, buf):
|
|
38
39
|
# Column of Array(T) is stored in "compact" format and passed to server
|
|
39
40
|
# wrapped into another Array without size of wrapper array.
|
|
40
|
-
self.nested_column = ArrayColumn(
|
|
41
|
+
self.nested_column = ArrayColumn(
|
|
42
|
+
self.nested_column, **self.init_kwargs
|
|
43
|
+
)
|
|
41
44
|
self.nested_column.nullable = self.nullable
|
|
42
45
|
self.nullable = False
|
|
43
46
|
self._write_depth_0_size = False
|
|
44
47
|
self._write(data, buf)
|
|
45
48
|
|
|
46
|
-
def read_data(self,
|
|
47
|
-
self.nested_column = ArrayColumn(
|
|
49
|
+
def read_data(self, n_rows, buf):
|
|
50
|
+
self.nested_column = ArrayColumn(
|
|
51
|
+
self.nested_column, **self.init_kwargs
|
|
52
|
+
)
|
|
48
53
|
self.nested_column.nullable = self.nullable
|
|
49
54
|
self.nullable = False
|
|
50
|
-
return self._read(
|
|
55
|
+
return self._read(n_rows, buf)[0]
|
|
51
56
|
|
|
52
57
|
def _write_sizes(self, value, buf):
|
|
53
58
|
nulls_map = []
|
|
@@ -106,9 +111,13 @@ class ArrayColumn(Column):
|
|
|
106
111
|
self._write_data(value, buf)
|
|
107
112
|
|
|
108
113
|
def read_state_prefix(self, buf):
|
|
109
|
-
|
|
114
|
+
super(ArrayColumn, self).read_state_prefix(buf)
|
|
115
|
+
|
|
116
|
+
self.nested_column.read_state_prefix(buf)
|
|
110
117
|
|
|
111
118
|
def write_state_prefix(self, buf):
|
|
119
|
+
super(ArrayColumn, self).write_state_prefix(buf)
|
|
120
|
+
|
|
112
121
|
self.nested_column.write_state_prefix(buf)
|
|
113
122
|
|
|
114
123
|
def _read(self, size, buf):
|
|
@@ -1,6 +1,61 @@
|
|
|
1
1
|
from struct import Struct, error as struct_error
|
|
2
2
|
|
|
3
3
|
from . import exceptions
|
|
4
|
+
from ..varint import read_varint
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CommonSerialization(object):
|
|
8
|
+
def __init__(self, column):
|
|
9
|
+
self.column = column
|
|
10
|
+
super(CommonSerialization, self).__init__()
|
|
11
|
+
|
|
12
|
+
def read_sparse(self, n_items, buf):
|
|
13
|
+
return n_items
|
|
14
|
+
|
|
15
|
+
def apply_sparse(self, items):
|
|
16
|
+
return items
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SparseSerialization(CommonSerialization):
|
|
20
|
+
|
|
21
|
+
def __init__(self, *args, **kwargs):
|
|
22
|
+
self.sparse_indexes = []
|
|
23
|
+
self.items_total = None
|
|
24
|
+
super(SparseSerialization, self).__init__(*args, **kwargs)
|
|
25
|
+
|
|
26
|
+
def read_sparse(self, n_items, buf):
|
|
27
|
+
sparse_indexes = []
|
|
28
|
+
items_total = 0
|
|
29
|
+
non_default_items = 0
|
|
30
|
+
|
|
31
|
+
END_OF_GRANULE_FLAG = 1 << 62
|
|
32
|
+
end_of_granule = False
|
|
33
|
+
|
|
34
|
+
while not end_of_granule:
|
|
35
|
+
group_size = read_varint(buf)
|
|
36
|
+
end_of_granule = group_size & END_OF_GRANULE_FLAG
|
|
37
|
+
group_size &= ~END_OF_GRANULE_FLAG
|
|
38
|
+
|
|
39
|
+
items_total += group_size + 1
|
|
40
|
+
if not end_of_granule:
|
|
41
|
+
non_default_items += 1
|
|
42
|
+
sparse_indexes.append(items_total)
|
|
43
|
+
|
|
44
|
+
self.sparse_indexes = sparse_indexes
|
|
45
|
+
self.items_total = items_total
|
|
46
|
+
|
|
47
|
+
return non_default_items
|
|
48
|
+
|
|
49
|
+
def apply_sparse(self, items):
|
|
50
|
+
default = self.column.null_value
|
|
51
|
+
if self.column.after_read_items:
|
|
52
|
+
default = self.column.after_read_items([default])[0]
|
|
53
|
+
|
|
54
|
+
rv = [default] * (self.items_total - 1)
|
|
55
|
+
for item_number, i in enumerate(self.sparse_indexes):
|
|
56
|
+
rv[i - 1] = items[item_number]
|
|
57
|
+
|
|
58
|
+
return rv
|
|
4
59
|
|
|
5
60
|
|
|
6
61
|
class Column(object):
|
|
@@ -15,14 +70,17 @@ class Column(object):
|
|
|
15
70
|
|
|
16
71
|
null_value = 0
|
|
17
72
|
|
|
18
|
-
def __init__(self, types_check=False,
|
|
73
|
+
def __init__(self, types_check=False, has_custom_serialization=False,
|
|
74
|
+
**kwargs):
|
|
19
75
|
self.nullable = False
|
|
20
76
|
self.types_check_enabled = types_check
|
|
77
|
+
self.has_custom_serialization = has_custom_serialization
|
|
78
|
+
self.serialization = CommonSerialization(self)
|
|
21
79
|
self.input_null_as_default = False
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
80
|
+
|
|
81
|
+
self.context = kwargs['context']
|
|
82
|
+
self.input_null_as_default = self.context.client_settings \
|
|
83
|
+
.get('input_format_null_as_default', False)
|
|
26
84
|
|
|
27
85
|
super(Column, self).__init__()
|
|
28
86
|
|
|
@@ -94,12 +152,15 @@ class Column(object):
|
|
|
94
152
|
raise NotImplementedError
|
|
95
153
|
|
|
96
154
|
def read_data(self, n_items, buf):
|
|
155
|
+
n_items = self.serialization.read_sparse(n_items, buf)
|
|
156
|
+
|
|
97
157
|
if self.nullable:
|
|
98
158
|
nulls_map = self._read_nulls_map(n_items, buf)
|
|
99
159
|
else:
|
|
100
160
|
nulls_map = None
|
|
101
161
|
|
|
102
|
-
|
|
162
|
+
items = self._read_data(n_items, buf, nulls_map=nulls_map)
|
|
163
|
+
return self.serialization.apply_sparse(items)
|
|
103
164
|
|
|
104
165
|
def _read_data(self, n_items, buf, nulls_map=None):
|
|
105
166
|
items = self.read_items(n_items, buf)
|
|
@@ -117,7 +178,10 @@ class Column(object):
|
|
|
117
178
|
raise NotImplementedError
|
|
118
179
|
|
|
119
180
|
def read_state_prefix(self, buf):
|
|
120
|
-
|
|
181
|
+
if self.has_custom_serialization:
|
|
182
|
+
use_custom_serialization = read_varint(buf)
|
|
183
|
+
if use_custom_serialization:
|
|
184
|
+
self.serialization = SparseSerialization(self)
|
|
121
185
|
|
|
122
186
|
def write_state_prefix(self, buf):
|
|
123
187
|
pass
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from os import getenv
|
|
1
2
|
from datetime import date, timedelta
|
|
2
3
|
|
|
3
4
|
from .base import FormatColumn
|
|
@@ -6,8 +7,54 @@ from .base import FormatColumn
|
|
|
6
7
|
epoch_start = date(1970, 1, 1)
|
|
7
8
|
epoch_end = date(2149, 6, 6)
|
|
8
9
|
|
|
9
|
-
epoch_start_date32 = date(
|
|
10
|
-
epoch_end_date32 = date(
|
|
10
|
+
epoch_start_date32 = date(1900, 1, 1)
|
|
11
|
+
epoch_end_date32 = date(2299, 12, 31)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LazyLUT(dict):
|
|
15
|
+
def __init__(self, *args, _factory, **kwargs):
|
|
16
|
+
super().__init__(*args, **kwargs)
|
|
17
|
+
self._default_factory = _factory
|
|
18
|
+
|
|
19
|
+
def __missing__(self, key):
|
|
20
|
+
return self.setdefault(key, self._default_factory(key))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def make_date_lut_range(date_start, date_end):
|
|
24
|
+
return range(
|
|
25
|
+
(date_start - epoch_start).days,
|
|
26
|
+
(date_end - epoch_start).days + 1,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
enable_lazy_date_lut = getenv('CLICKHOUSE_DRIVER_LASY_DATE_LUT', False)
|
|
31
|
+
if enable_lazy_date_lut:
|
|
32
|
+
try:
|
|
33
|
+
start, end = enable_lazy_date_lut.split(':')
|
|
34
|
+
start_date = date.fromisoformat(start)
|
|
35
|
+
end_date = date.fromisoformat(end)
|
|
36
|
+
|
|
37
|
+
date_range = make_date_lut_range(start_date, end_date)
|
|
38
|
+
except ValueError:
|
|
39
|
+
date_range = ()
|
|
40
|
+
|
|
41
|
+
# Since we initialize lazy lut with some initially warmed values,
|
|
42
|
+
# we use iterator and not dict comprehension for memory & time optimization
|
|
43
|
+
_date_lut = LazyLUT(
|
|
44
|
+
((x, epoch_start + timedelta(days=x)) for x in date_range),
|
|
45
|
+
_factory=lambda x: epoch_start + timedelta(days=x),
|
|
46
|
+
)
|
|
47
|
+
_date_lut_reverse = LazyLUT(
|
|
48
|
+
((value, key) for key, value in _date_lut.items()),
|
|
49
|
+
_factory=lambda x: (x - epoch_start).days,
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
# If lazy lut is not enabled, we fallback to static dict initialization
|
|
53
|
+
# In both cases, we use same lut for both data types,
|
|
54
|
+
# since one encompasses the other and we can avoid duplicating overlap
|
|
55
|
+
date_range = make_date_lut_range(epoch_start_date32, epoch_end_date32)
|
|
56
|
+
_date_lut = {x: epoch_start + timedelta(days=x) for x in date_range}
|
|
57
|
+
_date_lut_reverse = {value: key for key, value in _date_lut.items()}
|
|
11
58
|
|
|
12
59
|
|
|
13
60
|
class DateColumn(FormatColumn):
|
|
@@ -18,9 +65,8 @@ class DateColumn(FormatColumn):
|
|
|
18
65
|
min_value = epoch_start
|
|
19
66
|
max_value = epoch_end
|
|
20
67
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
date_lut_reverse = {value: key for key, value in date_lut.items()}
|
|
68
|
+
date_lut = _date_lut
|
|
69
|
+
date_lut_reverse = _date_lut_reverse
|
|
24
70
|
|
|
25
71
|
def before_write_items(self, items, nulls_map=None):
|
|
26
72
|
null_value = self.null_value
|
|
@@ -34,7 +80,7 @@ class DateColumn(FormatColumn):
|
|
|
34
80
|
items[i] = null_value
|
|
35
81
|
continue
|
|
36
82
|
|
|
37
|
-
if
|
|
83
|
+
if item is not date:
|
|
38
84
|
item = date(item.year, item.month, item.day)
|
|
39
85
|
|
|
40
86
|
if min_value <= item <= max_value:
|
|
@@ -60,10 +106,3 @@ class Date32Column(DateColumn):
|
|
|
60
106
|
|
|
61
107
|
min_value = epoch_start_date32
|
|
62
108
|
max_value = epoch_end_date32
|
|
63
|
-
|
|
64
|
-
date_lut_days = (epoch_end_date32 - epoch_start).days + 1
|
|
65
|
-
date_lut = {
|
|
66
|
-
x: epoch_start + timedelta(x)
|
|
67
|
-
for x in range((epoch_start_date32 - epoch_start).days, date_lut_days)
|
|
68
|
-
}
|
|
69
|
-
date_lut_reverse = {value: key for key, value in date_lut.items()}
|
|
@@ -193,8 +193,9 @@ def create_datetime_column(spec, column_options):
|
|
|
193
193
|
else:
|
|
194
194
|
if not context.settings.get('use_client_time_zone', False):
|
|
195
195
|
local_timezone = get_localzone_name_compat()
|
|
196
|
-
|
|
197
|
-
|
|
196
|
+
remote_timezone = context.server_info.get_timezone()
|
|
197
|
+
if local_timezone != remote_timezone:
|
|
198
|
+
tz_name = remote_timezone
|
|
198
199
|
|
|
199
200
|
if tz_name:
|
|
200
201
|
timezone = get_timezone(tz_name)
|