clickhouse-driver 0.2.5__cp311-cp311-musllinux_1_1_aarch64.whl → 0.2.9__cp311-cp311-musllinux_1_1_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. clickhouse_driver/__init__.py +1 -1
  2. clickhouse_driver/block.py +3 -2
  3. clickhouse_driver/bufferedreader.cpython-311-aarch64-linux-musl.so +0 -0
  4. clickhouse_driver/bufferedwriter.cpython-311-aarch64-linux-musl.so +0 -0
  5. clickhouse_driver/client.py +119 -99
  6. clickhouse_driver/clientinfo.py +2 -2
  7. clickhouse_driver/columns/arraycolumn.py +15 -6
  8. clickhouse_driver/columns/base.py +71 -7
  9. clickhouse_driver/columns/datecolumn.py +52 -13
  10. clickhouse_driver/columns/datetimecolumn.py +3 -2
  11. clickhouse_driver/columns/enumcolumn.py +27 -17
  12. clickhouse_driver/columns/jsoncolumn.py +37 -0
  13. clickhouse_driver/columns/largeint.cpython-311-aarch64-linux-musl.so +0 -0
  14. clickhouse_driver/columns/lowcardinalitycolumn.py +23 -4
  15. clickhouse_driver/columns/mapcolumn.py +9 -2
  16. clickhouse_driver/columns/nestedcolumn.py +2 -13
  17. clickhouse_driver/columns/numpy/datetimecolumn.py +21 -18
  18. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +2 -2
  19. clickhouse_driver/columns/service.py +12 -2
  20. clickhouse_driver/columns/tuplecolumn.py +31 -5
  21. clickhouse_driver/columns/util.py +2 -1
  22. clickhouse_driver/columns/uuidcolumn.py +1 -1
  23. clickhouse_driver/connection.py +117 -19
  24. clickhouse_driver/defines.py +12 -1
  25. clickhouse_driver/log.py +7 -3
  26. clickhouse_driver/numpy/helpers.py +5 -2
  27. clickhouse_driver/progress.py +15 -3
  28. clickhouse_driver/protocol.py +19 -3
  29. clickhouse_driver/settings/writer.py +7 -2
  30. clickhouse_driver/streams/native.py +24 -6
  31. clickhouse_driver/util/compat.py +12 -0
  32. clickhouse_driver/util/escape.py +36 -8
  33. clickhouse_driver/util/helpers.py +114 -0
  34. clickhouse_driver/varint.cpython-311-aarch64-linux-musl.so +0 -0
  35. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/METADATA +8 -8
  36. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/RECORD +71 -70
  37. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/WHEEL +1 -1
  38. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/LICENSE +0 -0
  39. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.9.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from .client import Client
3
3
  from .dbapi import connect
4
4
 
5
5
 
6
- VERSION = (0, 2, 5)
6
+ VERSION = (0, 2, 9)
7
7
  __version__ = '.'.join(str(x) for x in VERSION)
8
8
 
9
9
  __all__ = ['Client', 'connect']
@@ -1,7 +1,7 @@
1
+ from .columns.util import get_inner_spec, get_inner_columns_with_types
1
2
  from .reader import read_varint, read_binary_uint8, read_binary_int32
2
3
  from .varint import write_varint
3
4
  from .writer import write_binary_uint8, write_binary_int32
4
- from .columns import nestedcolumn
5
5
 
6
6
 
7
7
  class BlockInfo(object):
@@ -172,7 +172,8 @@ class RowOrientedBlock(BaseBlock):
172
172
  for name, type_ in columns_with_types:
173
173
  cwt = None
174
174
  if type_.startswith('Nested'):
175
- cwt = nestedcolumn.get_columns_with_types(type_)
175
+ inner_spec = get_inner_spec('Nested', type_)
176
+ cwt = get_inner_columns_with_types(inner_spec)
176
177
  columns_with_cwt.append((name, cwt))
177
178
 
178
179
  for i, row in enumerate(data):
@@ -1,10 +1,9 @@
1
1
  import re
2
- import ssl
3
2
  from collections import deque
4
3
  from contextlib import contextmanager
5
4
  from time import time
6
5
  import types
7
- from urllib.parse import urlparse, parse_qs, unquote
6
+ from urllib.parse import urlparse
8
7
 
9
8
  from . import errors, defines
10
9
  from .block import ColumnOrientedBlock, RowOrientedBlock
@@ -15,7 +14,7 @@ from .result import (
15
14
  IterQueryResult, ProgressQueryResult, QueryResult, QueryInfo
16
15
  )
17
16
  from .util.escape import escape_params
18
- from .util.helpers import column_chunks, chunks, asbool
17
+ from .util.helpers import column_chunks, chunks, parse_url
19
18
 
20
19
 
21
20
  class Client(object):
@@ -57,6 +56,17 @@ class Client(object):
57
56
  * ``round_robin`` -- If ``alt_hosts`` are provided the query will be
58
57
  executed on host picked with round-robin algorithm.
59
58
  New in version *0.2.5*.
59
+ * ``namedtuple_as_json`` -- Controls named tuple and nested types
60
+ deserialization. To interpret these column alongside
61
+ with ``allow_experimental_object_type=1`` as Python
62
+ tuple set ``namedtuple_as_json`` to ``False``.
63
+ Default: True.
64
+ New in version *0.2.6*.
65
+ * ``server_side_params`` -- Species on which side query parameters
66
+ should be rendered into placeholders.
67
+ Default: False. Means that parameters are rendered
68
+ on driver's side.
69
+ New in version *0.2.7*.
60
70
  """
61
71
 
62
72
  available_client_settings = (
@@ -67,7 +77,9 @@ class Client(object):
67
77
  'opentelemetry_traceparent',
68
78
  'opentelemetry_tracestate',
69
79
  'quota_key',
70
- 'input_format_null_as_default'
80
+ 'input_format_null_as_default',
81
+ 'namedtuple_as_json',
82
+ 'server_side_params'
71
83
  )
72
84
 
73
85
  def __init__(self, *args, **kwargs):
@@ -97,6 +109,12 @@ class Client(object):
97
109
  ),
98
110
  'input_format_null_as_default': self.settings.pop(
99
111
  'input_format_null_as_default', False
112
+ ),
113
+ 'namedtuple_as_json': self.settings.pop(
114
+ 'namedtuple_as_json', True
115
+ ),
116
+ 'server_side_params': self.settings.pop(
117
+ 'server_side_params', False
100
118
  )
101
119
  }
102
120
 
@@ -125,13 +143,19 @@ class Client(object):
125
143
  url = urlparse('clickhouse://' + host)
126
144
 
127
145
  connection_kwargs = kwargs.copy()
128
- if len(args) > 2:
129
- # port as positional argument
146
+ num_args = len(args)
147
+ if num_args >= 2:
148
+ # host and port as positional arguments
130
149
  connection_args = (url.hostname, url.port) + args[2:]
131
- else:
132
- # port as keyword argument
150
+ elif num_args >= 1:
151
+ # host as positional and port as keyword argument
133
152
  connection_args = (url.hostname, ) + args[1:]
134
153
  connection_kwargs['port'] = url.port
154
+ else:
155
+ # host and port as keyword arguments
156
+ connection_args = tuple()
157
+ connection_kwargs['host'] = url.hostname
158
+ connection_kwargs['port'] = url.port
135
159
 
136
160
  connection = Connection(*connection_args, **connection_kwargs)
137
161
  self.connections.append(connection)
@@ -287,6 +311,7 @@ class Client(object):
287
311
  def disconnect_on_error(self, query, settings):
288
312
  try:
289
313
  self.establish_connection(settings)
314
+ self.connection.server_info.session_timezone = None
290
315
 
291
316
  yield
292
317
 
@@ -438,7 +463,7 @@ class Client(object):
438
463
 
439
464
  def query_dataframe(
440
465
  self, query, params=None, external_tables=None, query_id=None,
441
- settings=None):
466
+ settings=None, replace_nonwords=True):
442
467
  """
443
468
  *New in version 0.2.0.*
444
469
 
@@ -453,6 +478,8 @@ class Client(object):
453
478
  ClickHouse server will generate it.
454
479
  :param settings: dictionary of query settings.
455
480
  Defaults to ``None`` (no additional settings).
481
+ :param replace_nonwords: boolean to replace non-words in column names
482
+ to underscores. Defaults to ``True``.
456
483
  :return: pandas DataFrame.
457
484
  """
458
485
 
@@ -467,7 +494,10 @@ class Client(object):
467
494
  settings=settings
468
495
  )
469
496
 
470
- columns = [re.sub(r'\W', '_', name) for name, type_ in columns]
497
+ columns = [name for name, type_ in columns]
498
+ if replace_nonwords:
499
+ columns = [re.sub(r'\W', '_', x) for x in columns]
500
+
471
501
  return pd.DataFrame(
472
502
  {col: d for d, col in zip(data, columns)}, columns=columns
473
503
  )
@@ -529,7 +559,7 @@ class Client(object):
529
559
  query, params, self.connection.context
530
560
  )
531
561
 
532
- self.connection.send_query(query, query_id=query_id)
562
+ self.connection.send_query(query, query_id=query_id, params=params)
533
563
  self.connection.send_external_tables(external_tables,
534
564
  types_check=types_check)
535
565
  return self.receive_result(with_column_types=with_column_types,
@@ -544,8 +574,7 @@ class Client(object):
544
574
  query = self.substitute_params(
545
575
  query, params, self.connection.context
546
576
  )
547
-
548
- self.connection.send_query(query, query_id=query_id)
577
+ self.connection.send_query(query, query_id=query_id, params=params)
549
578
  self.connection.send_external_tables(external_tables,
550
579
  types_check=types_check)
551
580
  return self.receive_result(with_column_types=with_column_types,
@@ -561,7 +590,7 @@ class Client(object):
561
590
  query, params, self.connection.context
562
591
  )
563
592
 
564
- self.connection.send_query(query, query_id=query_id)
593
+ self.connection.send_query(query, query_id=query_id, params=params)
565
594
  self.connection.send_external_tables(external_tables,
566
595
  types_check=types_check)
567
596
  return self.iter_receive_result(with_column_types=with_column_types)
@@ -572,12 +601,12 @@ class Client(object):
572
601
  self.connection.send_query(query_without_data, query_id=query_id)
573
602
  self.connection.send_external_tables(external_tables,
574
603
  types_check=types_check)
575
-
576
604
  sample_block = self.receive_sample_block()
605
+
577
606
  if sample_block:
578
607
  rv = self.send_data(sample_block, data,
579
608
  types_check=types_check, columnar=columnar)
580
- self.receive_end_of_query()
609
+ self.receive_end_of_insert_query()
581
610
  return rv
582
611
 
583
612
  def receive_sample_block(self):
@@ -631,8 +660,15 @@ class Client(object):
631
660
  self.connection.send_data(block)
632
661
  inserted_rows += block.num_rows
633
662
 
663
+ # Starting from the specific revision there are profile events
664
+ # sent by server in response to each inserted block
665
+ self.receive_profile_events()
666
+
634
667
  # Empty block means end of data.
635
668
  self.connection.send_data(block_cls())
669
+ # If enabled by revision profile events are also sent after empty block
670
+ self.receive_profile_events()
671
+
636
672
  return inserted_rows
637
673
 
638
674
  def receive_end_of_query(self):
@@ -643,7 +679,7 @@ class Client(object):
643
679
  break
644
680
 
645
681
  elif packet.type == ServerPacketTypes.PROGRESS:
646
- continue
682
+ self.last_query.store_progress(packet.progress)
647
683
 
648
684
  elif packet.type == ServerPacketTypes.EXCEPTION:
649
685
  raise packet.exception
@@ -655,11 +691,68 @@ class Client(object):
655
691
  pass
656
692
 
657
693
  elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
694
+ self.last_query.store_profile(packet.profile_info)
695
+
696
+ else:
697
+ message = self.connection.unexpected_packet_message(
698
+ 'Exception, EndOfStream, Progress, TableColumns, '
699
+ 'ProfileEvents or Log', packet.type
700
+ )
701
+ raise errors.UnexpectedPacketFromServerError(message)
702
+
703
+ def receive_end_of_insert_query(self):
704
+ while True:
705
+ packet = self.connection.receive_packet()
706
+
707
+ if packet.type == ServerPacketTypes.END_OF_STREAM:
708
+ break
709
+
710
+ elif packet.type == ServerPacketTypes.LOG:
711
+ log_block(packet.block)
712
+
713
+ elif packet.type == ServerPacketTypes.PROGRESS:
714
+ self.last_query.store_progress(packet.progress)
715
+
716
+ elif packet.type == ServerPacketTypes.EXCEPTION:
717
+ raise packet.exception
718
+
719
+ else:
720
+ message = self.connection.unexpected_packet_message(
721
+ 'EndOfStream, Log, Progress or Exception', packet.type
722
+ )
723
+ raise errors.UnexpectedPacketFromServerError(message)
724
+
725
+ def receive_profile_events(self):
726
+ revision = self.connection.server_info.used_revision
727
+ if (
728
+ revision <
729
+ defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
730
+ ):
731
+ return None
732
+
733
+ while True:
734
+ packet = self.connection.receive_packet()
735
+
736
+ if packet.type == ServerPacketTypes.PROFILE_EVENTS:
737
+ self.last_query.store_profile(packet.profile_info)
738
+ break
739
+
740
+ elif packet.type == ServerPacketTypes.PROGRESS:
741
+ self.last_query.store_progress(packet.progress)
742
+
743
+ elif packet.type == ServerPacketTypes.LOG:
744
+ log_block(packet.block)
745
+
746
+ elif packet.type == ServerPacketTypes.EXCEPTION:
747
+ raise packet.exception
748
+
749
+ elif packet.type == ServerPacketTypes.TIMEZONE_UPDATE:
658
750
  pass
659
751
 
660
752
  else:
661
753
  message = self.connection.unexpected_packet_message(
662
- 'Exception, EndOfStream or Log', packet.type
754
+ 'ProfileEvents, Progress, Log, Exception or '
755
+ 'TimezoneUpdate', packet.type
663
756
  )
664
757
  raise errors.UnexpectedPacketFromServerError(message)
665
758
 
@@ -686,6 +779,10 @@ class Client(object):
686
779
  # prints: SELECT 1234, 'bar'
687
780
  print(substituted_query)
688
781
  """
782
+ # In case of server side templating we don't substitute here.
783
+ if self.connection.context.client_settings['server_side_params']:
784
+ return query
785
+
689
786
  if not isinstance(params, dict):
690
787
  raise ValueError('Parameters are expected in dict form')
691
788
 
@@ -703,90 +800,13 @@ class Client(object):
703
800
  clickhouses://[user:password]@localhost:9440/default
704
801
 
705
802
  Three URL schemes are supported:
706
- clickhouse:// creates a normal TCP socket connection
707
- clickhouses:// creates a SSL wrapped TCP socket connection
803
+
804
+ * clickhouse:// creates a normal TCP socket connection
805
+ * clickhouses:// creates a SSL wrapped TCP socket connection
708
806
 
709
807
  Any additional querystring arguments will be passed along to
710
808
  the Connection class's initializer.
711
809
  """
712
- url = urlparse(url)
713
-
714
- settings = {}
715
- kwargs = {}
716
-
717
- host = url.hostname
718
-
719
- if url.port is not None:
720
- kwargs['port'] = url.port
721
-
722
- path = url.path.replace('/', '', 1)
723
- if path:
724
- kwargs['database'] = path
725
-
726
- if url.username is not None:
727
- kwargs['user'] = unquote(url.username)
728
-
729
- if url.password is not None:
730
- kwargs['password'] = unquote(url.password)
731
-
732
- if url.scheme == 'clickhouses':
733
- kwargs['secure'] = True
734
-
735
- compression_algs = {'lz4', 'lz4hc', 'zstd'}
736
- timeouts = {
737
- 'connect_timeout',
738
- 'send_receive_timeout',
739
- 'sync_request_timeout'
740
- }
741
-
742
- for name, value in parse_qs(url.query).items():
743
- if not value or not len(value):
744
- continue
745
-
746
- value = value[0]
747
-
748
- if name == 'compression':
749
- value = value.lower()
750
- if value in compression_algs:
751
- kwargs[name] = value
752
- else:
753
- kwargs[name] = asbool(value)
754
-
755
- elif name == 'secure':
756
- kwargs[name] = asbool(value)
757
-
758
- elif name == 'use_numpy':
759
- settings[name] = asbool(value)
760
-
761
- elif name == 'round_robin':
762
- kwargs[name] = asbool(value)
763
-
764
- elif name == 'client_name':
765
- kwargs[name] = value
766
-
767
- elif name in timeouts:
768
- kwargs[name] = float(value)
769
-
770
- elif name == 'compress_block_size':
771
- kwargs[name] = int(value)
772
-
773
- elif name == 'settings_is_important':
774
- kwargs[name] = asbool(value)
775
-
776
- # ssl
777
- elif name == 'verify':
778
- kwargs[name] = asbool(value)
779
- elif name == 'ssl_version':
780
- kwargs[name] = getattr(ssl, value)
781
- elif name in ['ca_certs', 'ciphers', 'keyfile', 'certfile',
782
- 'server_hostname']:
783
- kwargs[name] = value
784
- elif name == 'alt_hosts':
785
- kwargs['alt_hosts'] = value
786
- else:
787
- settings[name] = value
788
-
789
- if settings:
790
- kwargs['settings'] = settings
810
+ host, kwargs = parse_url(url)
791
811
 
792
812
  return cls(host, **kwargs)
@@ -28,14 +28,13 @@ class ClientInfo(object):
28
28
  client_version_major = defines.CLIENT_VERSION_MAJOR
29
29
  client_version_minor = defines.CLIENT_VERSION_MINOR
30
30
  client_version_patch = defines.CLIENT_VERSION_PATCH
31
- client_revision = defines.CLIENT_REVISION
32
31
  interface = Interface.TCP
33
32
 
34
33
  initial_user = ''
35
34
  initial_query_id = ''
36
35
  initial_address = '0.0.0.0:0'
37
36
 
38
- def __init__(self, client_name, context):
37
+ def __init__(self, client_name, context, client_revision):
39
38
  self.query_kind = ClientInfo.QueryKind.NO_QUERY
40
39
 
41
40
  try:
@@ -44,6 +43,7 @@ class ClientInfo(object):
44
43
  self.os_user = ''
45
44
  self.client_hostname = socket.gethostname()
46
45
  self.client_name = client_name
46
+ self.client_revision = client_revision
47
47
 
48
48
  self.client_trace_context = OpenTelemetryTraceContext(
49
49
  context.client_settings['opentelemetry_traceparent'],
@@ -28,7 +28,8 @@ class ArrayColumn(Column):
28
28
  py_types = (list, tuple)
29
29
 
30
30
  def __init__(self, nested_column, **kwargs):
31
- self.size_column = UInt64Column()
31
+ self.init_kwargs = kwargs
32
+ self.size_column = UInt64Column(**kwargs)
32
33
  self.nested_column = nested_column
33
34
  self._write_depth_0_size = True
34
35
  super(ArrayColumn, self).__init__(**kwargs)
@@ -37,17 +38,21 @@ class ArrayColumn(Column):
37
38
  def write_data(self, data, buf):
38
39
  # Column of Array(T) is stored in "compact" format and passed to server
39
40
  # wrapped into another Array without size of wrapper array.
40
- self.nested_column = ArrayColumn(self.nested_column)
41
+ self.nested_column = ArrayColumn(
42
+ self.nested_column, **self.init_kwargs
43
+ )
41
44
  self.nested_column.nullable = self.nullable
42
45
  self.nullable = False
43
46
  self._write_depth_0_size = False
44
47
  self._write(data, buf)
45
48
 
46
- def read_data(self, rows, buf):
47
- self.nested_column = ArrayColumn(self.nested_column)
49
+ def read_data(self, n_rows, buf):
50
+ self.nested_column = ArrayColumn(
51
+ self.nested_column, **self.init_kwargs
52
+ )
48
53
  self.nested_column.nullable = self.nullable
49
54
  self.nullable = False
50
- return self._read(rows, buf)[0]
55
+ return self._read(n_rows, buf)[0]
51
56
 
52
57
  def _write_sizes(self, value, buf):
53
58
  nulls_map = []
@@ -106,9 +111,13 @@ class ArrayColumn(Column):
106
111
  self._write_data(value, buf)
107
112
 
108
113
  def read_state_prefix(self, buf):
109
- return self.nested_column.read_state_prefix(buf)
114
+ super(ArrayColumn, self).read_state_prefix(buf)
115
+
116
+ self.nested_column.read_state_prefix(buf)
110
117
 
111
118
  def write_state_prefix(self, buf):
119
+ super(ArrayColumn, self).write_state_prefix(buf)
120
+
112
121
  self.nested_column.write_state_prefix(buf)
113
122
 
114
123
  def _read(self, size, buf):
@@ -1,6 +1,61 @@
1
1
  from struct import Struct, error as struct_error
2
2
 
3
3
  from . import exceptions
4
+ from ..varint import read_varint
5
+
6
+
7
+ class CommonSerialization(object):
8
+ def __init__(self, column):
9
+ self.column = column
10
+ super(CommonSerialization, self).__init__()
11
+
12
+ def read_sparse(self, n_items, buf):
13
+ return n_items
14
+
15
+ def apply_sparse(self, items):
16
+ return items
17
+
18
+
19
+ class SparseSerialization(CommonSerialization):
20
+
21
+ def __init__(self, *args, **kwargs):
22
+ self.sparse_indexes = []
23
+ self.items_total = None
24
+ super(SparseSerialization, self).__init__(*args, **kwargs)
25
+
26
+ def read_sparse(self, n_items, buf):
27
+ sparse_indexes = []
28
+ items_total = 0
29
+ non_default_items = 0
30
+
31
+ END_OF_GRANULE_FLAG = 1 << 62
32
+ end_of_granule = False
33
+
34
+ while not end_of_granule:
35
+ group_size = read_varint(buf)
36
+ end_of_granule = group_size & END_OF_GRANULE_FLAG
37
+ group_size &= ~END_OF_GRANULE_FLAG
38
+
39
+ items_total += group_size + 1
40
+ if not end_of_granule:
41
+ non_default_items += 1
42
+ sparse_indexes.append(items_total)
43
+
44
+ self.sparse_indexes = sparse_indexes
45
+ self.items_total = items_total
46
+
47
+ return non_default_items
48
+
49
+ def apply_sparse(self, items):
50
+ default = self.column.null_value
51
+ if self.column.after_read_items:
52
+ default = self.column.after_read_items([default])[0]
53
+
54
+ rv = [default] * (self.items_total - 1)
55
+ for item_number, i in enumerate(self.sparse_indexes):
56
+ rv[i - 1] = items[item_number]
57
+
58
+ return rv
4
59
 
5
60
 
6
61
  class Column(object):
@@ -15,14 +70,17 @@ class Column(object):
15
70
 
16
71
  null_value = 0
17
72
 
18
- def __init__(self, types_check=False, **kwargs):
73
+ def __init__(self, types_check=False, has_custom_serialization=False,
74
+ **kwargs):
19
75
  self.nullable = False
20
76
  self.types_check_enabled = types_check
77
+ self.has_custom_serialization = has_custom_serialization
78
+ self.serialization = CommonSerialization(self)
21
79
  self.input_null_as_default = False
22
- if 'context' in kwargs:
23
- settings = kwargs['context'].client_settings
24
- self.input_null_as_default = settings \
25
- .get('input_format_null_as_default', False)
80
+
81
+ self.context = kwargs['context']
82
+ self.input_null_as_default = self.context.client_settings \
83
+ .get('input_format_null_as_default', False)
26
84
 
27
85
  super(Column, self).__init__()
28
86
 
@@ -94,12 +152,15 @@ class Column(object):
94
152
  raise NotImplementedError
95
153
 
96
154
  def read_data(self, n_items, buf):
155
+ n_items = self.serialization.read_sparse(n_items, buf)
156
+
97
157
  if self.nullable:
98
158
  nulls_map = self._read_nulls_map(n_items, buf)
99
159
  else:
100
160
  nulls_map = None
101
161
 
102
- return self._read_data(n_items, buf, nulls_map=nulls_map)
162
+ items = self._read_data(n_items, buf, nulls_map=nulls_map)
163
+ return self.serialization.apply_sparse(items)
103
164
 
104
165
  def _read_data(self, n_items, buf, nulls_map=None):
105
166
  items = self.read_items(n_items, buf)
@@ -117,7 +178,10 @@ class Column(object):
117
178
  raise NotImplementedError
118
179
 
119
180
  def read_state_prefix(self, buf):
120
- pass
181
+ if self.has_custom_serialization:
182
+ use_custom_serialization = read_varint(buf)
183
+ if use_custom_serialization:
184
+ self.serialization = SparseSerialization(self)
121
185
 
122
186
  def write_state_prefix(self, buf):
123
187
  pass
@@ -1,3 +1,4 @@
1
+ from os import getenv
1
2
  from datetime import date, timedelta
2
3
 
3
4
  from .base import FormatColumn
@@ -6,8 +7,54 @@ from .base import FormatColumn
6
7
  epoch_start = date(1970, 1, 1)
7
8
  epoch_end = date(2149, 6, 6)
8
9
 
9
- epoch_start_date32 = date(1925, 1, 1)
10
- epoch_end_date32 = date(2283, 11, 11)
10
+ epoch_start_date32 = date(1900, 1, 1)
11
+ epoch_end_date32 = date(2299, 12, 31)
12
+
13
+
14
+ class LazyLUT(dict):
15
+ def __init__(self, *args, _factory, **kwargs):
16
+ super().__init__(*args, **kwargs)
17
+ self._default_factory = _factory
18
+
19
+ def __missing__(self, key):
20
+ return self.setdefault(key, self._default_factory(key))
21
+
22
+
23
+ def make_date_lut_range(date_start, date_end):
24
+ return range(
25
+ (date_start - epoch_start).days,
26
+ (date_end - epoch_start).days + 1,
27
+ )
28
+
29
+
30
+ enable_lazy_date_lut = getenv('CLICKHOUSE_DRIVER_LASY_DATE_LUT', False)
31
+ if enable_lazy_date_lut:
32
+ try:
33
+ start, end = enable_lazy_date_lut.split(':')
34
+ start_date = date.fromisoformat(start)
35
+ end_date = date.fromisoformat(end)
36
+
37
+ date_range = make_date_lut_range(start_date, end_date)
38
+ except ValueError:
39
+ date_range = ()
40
+
41
+ # Since we initialize lazy lut with some initially warmed values,
42
+ # we use iterator and not dict comprehension for memory & time optimization
43
+ _date_lut = LazyLUT(
44
+ ((x, epoch_start + timedelta(days=x)) for x in date_range),
45
+ _factory=lambda x: epoch_start + timedelta(days=x),
46
+ )
47
+ _date_lut_reverse = LazyLUT(
48
+ ((value, key) for key, value in _date_lut.items()),
49
+ _factory=lambda x: (x - epoch_start).days,
50
+ )
51
+ else:
52
+ # If lazy lut is not enabled, we fallback to static dict initialization
53
+ # In both cases, we use same lut for both data types,
54
+ # since one encompasses the other and we can avoid duplicating overlap
55
+ date_range = make_date_lut_range(epoch_start_date32, epoch_end_date32)
56
+ _date_lut = {x: epoch_start + timedelta(days=x) for x in date_range}
57
+ _date_lut_reverse = {value: key for key, value in _date_lut.items()}
11
58
 
12
59
 
13
60
  class DateColumn(FormatColumn):
@@ -18,9 +65,8 @@ class DateColumn(FormatColumn):
18
65
  min_value = epoch_start
19
66
  max_value = epoch_end
20
67
 
21
- date_lut_days = (epoch_end - epoch_start).days + 1
22
- date_lut = {x: epoch_start + timedelta(x) for x in range(date_lut_days)}
23
- date_lut_reverse = {value: key for key, value in date_lut.items()}
68
+ date_lut = _date_lut
69
+ date_lut_reverse = _date_lut_reverse
24
70
 
25
71
  def before_write_items(self, items, nulls_map=None):
26
72
  null_value = self.null_value
@@ -34,7 +80,7 @@ class DateColumn(FormatColumn):
34
80
  items[i] = null_value
35
81
  continue
36
82
 
37
- if type(item) != date:
83
+ if item is not date:
38
84
  item = date(item.year, item.month, item.day)
39
85
 
40
86
  if min_value <= item <= max_value:
@@ -60,10 +106,3 @@ class Date32Column(DateColumn):
60
106
 
61
107
  min_value = epoch_start_date32
62
108
  max_value = epoch_end_date32
63
-
64
- date_lut_days = (epoch_end_date32 - epoch_start).days + 1
65
- date_lut = {
66
- x: epoch_start + timedelta(x)
67
- for x in range((epoch_start_date32 - epoch_start).days, date_lut_days)
68
- }
69
- date_lut_reverse = {value: key for key, value in date_lut.items()}
@@ -193,8 +193,9 @@ def create_datetime_column(spec, column_options):
193
193
  else:
194
194
  if not context.settings.get('use_client_time_zone', False):
195
195
  local_timezone = get_localzone_name_compat()
196
- if local_timezone != context.server_info.timezone:
197
- tz_name = context.server_info.timezone
196
+ remote_timezone = context.server_info.get_timezone()
197
+ if local_timezone != remote_timezone:
198
+ tz_name = remote_timezone
198
199
 
199
200
  if tz_name:
200
201
  timezone = get_timezone(tz_name)