clickhouse-driver 0.2.5__pp39-pypy39_pp73-win_amd64.whl → 0.2.7__pp39-pypy39_pp73-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of clickhouse-driver might be problematic. Click here for more details.

Files changed (33) hide show
  1. clickhouse_driver/__init__.py +1 -1
  2. clickhouse_driver/block.py +3 -2
  3. clickhouse_driver/bufferedreader.pypy39-pp73-win_amd64.pyd +0 -0
  4. clickhouse_driver/bufferedwriter.pypy39-pp73-win_amd64.pyd +0 -0
  5. clickhouse_driver/client.py +120 -16
  6. clickhouse_driver/clientinfo.py +2 -2
  7. clickhouse_driver/columns/arraycolumn.py +15 -6
  8. clickhouse_driver/columns/base.py +71 -7
  9. clickhouse_driver/columns/datecolumn.py +2 -2
  10. clickhouse_driver/columns/jsoncolumn.py +37 -0
  11. clickhouse_driver/columns/largeint.pypy39-pp73-win_amd64.pyd +0 -0
  12. clickhouse_driver/columns/lowcardinalitycolumn.py +23 -4
  13. clickhouse_driver/columns/mapcolumn.py +6 -2
  14. clickhouse_driver/columns/nestedcolumn.py +2 -13
  15. clickhouse_driver/columns/numpy/datetimecolumn.py +16 -16
  16. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +2 -2
  17. clickhouse_driver/columns/service.py +12 -2
  18. clickhouse_driver/columns/tuplecolumn.py +31 -5
  19. clickhouse_driver/columns/uuidcolumn.py +1 -1
  20. clickhouse_driver/connection.py +104 -15
  21. clickhouse_driver/defines.py +9 -1
  22. clickhouse_driver/log.py +7 -3
  23. clickhouse_driver/progress.py +8 -2
  24. clickhouse_driver/settings/writer.py +7 -2
  25. clickhouse_driver/streams/native.py +18 -6
  26. clickhouse_driver/util/compat.py +12 -0
  27. clickhouse_driver/util/escape.py +35 -7
  28. clickhouse_driver/varint.pypy39-pp73-win_amd64.pyd +0 -0
  29. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/METADATA +201 -202
  30. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/RECORD +33 -32
  31. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/WHEEL +1 -1
  32. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/LICENSE +0 -0
  33. {clickhouse_driver-0.2.5.dist-info → clickhouse_driver-0.2.7.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from .client import Client
3
3
  from .dbapi import connect
4
4
 
5
5
 
6
- VERSION = (0, 2, 5)
6
+ VERSION = (0, 2, 7)
7
7
  __version__ = '.'.join(str(x) for x in VERSION)
8
8
 
9
9
  __all__ = ['Client', 'connect']
@@ -1,7 +1,7 @@
1
+ from .columns.util import get_inner_spec, get_inner_columns_with_types
1
2
  from .reader import read_varint, read_binary_uint8, read_binary_int32
2
3
  from .varint import write_varint
3
4
  from .writer import write_binary_uint8, write_binary_int32
4
- from .columns import nestedcolumn
5
5
 
6
6
 
7
7
  class BlockInfo(object):
@@ -172,7 +172,8 @@ class RowOrientedBlock(BaseBlock):
172
172
  for name, type_ in columns_with_types:
173
173
  cwt = None
174
174
  if type_.startswith('Nested'):
175
- cwt = nestedcolumn.get_columns_with_types(type_)
175
+ inner_spec = get_inner_spec('Nested', type_)
176
+ cwt = get_inner_columns_with_types(inner_spec)
176
177
  columns_with_cwt.append((name, cwt))
177
178
 
178
179
  for i, row in enumerate(data):
@@ -57,6 +57,17 @@ class Client(object):
57
57
  * ``round_robin`` -- If ``alt_hosts`` are provided the query will be
58
58
  executed on host picked with round-robin algorithm.
59
59
  New in version *0.2.5*.
60
+ * ``namedtuple_as_json`` -- Controls named tuple and nested types
61
+ deserialization. To interpret these column alongside
62
+ with ``allow_experimental_object_type=1`` as Python
63
+ tuple set ``namedtuple_as_json`` to ``False``.
64
+ Default: True.
65
+ New in version *0.2.6*.
66
+ * ``server_side_params`` -- Species on which side query parameters
67
+ should be rendered into placeholders.
68
+ Default: False. Means that parameters are rendered
69
+ on driver's side.
70
+ New in version *0.2.7*.
60
71
  """
61
72
 
62
73
  available_client_settings = (
@@ -67,7 +78,9 @@ class Client(object):
67
78
  'opentelemetry_traceparent',
68
79
  'opentelemetry_tracestate',
69
80
  'quota_key',
70
- 'input_format_null_as_default'
81
+ 'input_format_null_as_default',
82
+ 'namedtuple_as_json',
83
+ 'server_side_params'
71
84
  )
72
85
 
73
86
  def __init__(self, *args, **kwargs):
@@ -97,6 +110,12 @@ class Client(object):
97
110
  ),
98
111
  'input_format_null_as_default': self.settings.pop(
99
112
  'input_format_null_as_default', False
113
+ ),
114
+ 'namedtuple_as_json': self.settings.pop(
115
+ 'namedtuple_as_json', True
116
+ ),
117
+ 'server_side_params': self.settings.pop(
118
+ 'server_side_params', False
100
119
  )
101
120
  }
102
121
 
@@ -125,13 +144,19 @@ class Client(object):
125
144
  url = urlparse('clickhouse://' + host)
126
145
 
127
146
  connection_kwargs = kwargs.copy()
128
- if len(args) > 2:
129
- # port as positional argument
147
+ num_args = len(args)
148
+ if num_args >= 2:
149
+ # host and port as positional arguments
130
150
  connection_args = (url.hostname, url.port) + args[2:]
131
- else:
132
- # port as keyword argument
151
+ elif num_args >= 1:
152
+ # host as positional and port as keyword argument
133
153
  connection_args = (url.hostname, ) + args[1:]
134
154
  connection_kwargs['port'] = url.port
155
+ else:
156
+ # host and port as keyword arguments
157
+ connection_args = tuple()
158
+ connection_kwargs['host'] = url.hostname
159
+ connection_kwargs['port'] = url.port
135
160
 
136
161
  connection = Connection(*connection_args, **connection_kwargs)
137
162
  self.connections.append(connection)
@@ -438,7 +463,7 @@ class Client(object):
438
463
 
439
464
  def query_dataframe(
440
465
  self, query, params=None, external_tables=None, query_id=None,
441
- settings=None):
466
+ settings=None, replace_nonwords=True):
442
467
  """
443
468
  *New in version 0.2.0.*
444
469
 
@@ -453,6 +478,8 @@ class Client(object):
453
478
  ClickHouse server will generate it.
454
479
  :param settings: dictionary of query settings.
455
480
  Defaults to ``None`` (no additional settings).
481
+ :param replace_nonwords: boolean to replace non-words in column names
482
+ to underscores. Defaults to ``True``.
456
483
  :return: pandas DataFrame.
457
484
  """
458
485
 
@@ -467,7 +494,10 @@ class Client(object):
467
494
  settings=settings
468
495
  )
469
496
 
470
- columns = [re.sub(r'\W', '_', name) for name, type_ in columns]
497
+ columns = [name for name, type_ in columns]
498
+ if replace_nonwords:
499
+ columns = [re.sub(r'\W', '_', x) for x in columns]
500
+
471
501
  return pd.DataFrame(
472
502
  {col: d for d, col in zip(data, columns)}, columns=columns
473
503
  )
@@ -529,7 +559,7 @@ class Client(object):
529
559
  query, params, self.connection.context
530
560
  )
531
561
 
532
- self.connection.send_query(query, query_id=query_id)
562
+ self.connection.send_query(query, query_id=query_id, params=params)
533
563
  self.connection.send_external_tables(external_tables,
534
564
  types_check=types_check)
535
565
  return self.receive_result(with_column_types=with_column_types,
@@ -544,8 +574,7 @@ class Client(object):
544
574
  query = self.substitute_params(
545
575
  query, params, self.connection.context
546
576
  )
547
-
548
- self.connection.send_query(query, query_id=query_id)
577
+ self.connection.send_query(query, query_id=query_id, params=params)
549
578
  self.connection.send_external_tables(external_tables,
550
579
  types_check=types_check)
551
580
  return self.receive_result(with_column_types=with_column_types,
@@ -561,7 +590,7 @@ class Client(object):
561
590
  query, params, self.connection.context
562
591
  )
563
592
 
564
- self.connection.send_query(query, query_id=query_id)
593
+ self.connection.send_query(query, query_id=query_id, params=params)
565
594
  self.connection.send_external_tables(external_tables,
566
595
  types_check=types_check)
567
596
  return self.iter_receive_result(with_column_types=with_column_types)
@@ -572,12 +601,12 @@ class Client(object):
572
601
  self.connection.send_query(query_without_data, query_id=query_id)
573
602
  self.connection.send_external_tables(external_tables,
574
603
  types_check=types_check)
575
-
576
604
  sample_block = self.receive_sample_block()
605
+
577
606
  if sample_block:
578
607
  rv = self.send_data(sample_block, data,
579
608
  types_check=types_check, columnar=columnar)
580
- self.receive_end_of_query()
609
+ self.receive_end_of_insert_query()
581
610
  return rv
582
611
 
583
612
  def receive_sample_block(self):
@@ -631,8 +660,15 @@ class Client(object):
631
660
  self.connection.send_data(block)
632
661
  inserted_rows += block.num_rows
633
662
 
663
+ # Starting from the specific revision there are profile events
664
+ # sent by server in response to each inserted block
665
+ self.receive_profile_events()
666
+
634
667
  # Empty block means end of data.
635
668
  self.connection.send_data(block_cls())
669
+ # If enabled by revision profile events are also sent after empty block
670
+ self.receive_profile_events()
671
+
636
672
  return inserted_rows
637
673
 
638
674
  def receive_end_of_query(self):
@@ -643,7 +679,7 @@ class Client(object):
643
679
  break
644
680
 
645
681
  elif packet.type == ServerPacketTypes.PROGRESS:
646
- continue
682
+ self.last_query.store_progress(packet.progress)
647
683
 
648
684
  elif packet.type == ServerPacketTypes.EXCEPTION:
649
685
  raise packet.exception
@@ -655,11 +691,64 @@ class Client(object):
655
691
  pass
656
692
 
657
693
  elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
658
- pass
694
+ self.last_query.store_profile(packet.profile_info)
659
695
 
660
696
  else:
661
697
  message = self.connection.unexpected_packet_message(
662
- 'Exception, EndOfStream or Log', packet.type
698
+ 'Exception, EndOfStream, Progress, TableColumns, '
699
+ 'ProfileEvents or Log', packet.type
700
+ )
701
+ raise errors.UnexpectedPacketFromServerError(message)
702
+
703
+ def receive_end_of_insert_query(self):
704
+ while True:
705
+ packet = self.connection.receive_packet()
706
+
707
+ if packet.type == ServerPacketTypes.END_OF_STREAM:
708
+ break
709
+
710
+ elif packet.type == ServerPacketTypes.LOG:
711
+ log_block(packet.block)
712
+
713
+ elif packet.type == ServerPacketTypes.PROGRESS:
714
+ self.last_query.store_progress(packet.progress)
715
+
716
+ elif packet.type == ServerPacketTypes.EXCEPTION:
717
+ raise packet.exception
718
+
719
+ else:
720
+ message = self.connection.unexpected_packet_message(
721
+ 'EndOfStream, Log, Progress or Exception', packet.type
722
+ )
723
+ raise errors.UnexpectedPacketFromServerError(message)
724
+
725
+ def receive_profile_events(self):
726
+ revision = self.connection.server_info.used_revision
727
+ if (
728
+ revision <
729
+ defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
730
+ ):
731
+ return None
732
+
733
+ while True:
734
+ packet = self.connection.receive_packet()
735
+
736
+ if packet.type == ServerPacketTypes.PROFILE_EVENTS:
737
+ self.last_query.store_profile(packet.profile_info)
738
+ break
739
+
740
+ elif packet.type == ServerPacketTypes.PROGRESS:
741
+ self.last_query.store_progress(packet.progress)
742
+
743
+ elif packet.type == ServerPacketTypes.LOG:
744
+ log_block(packet.block)
745
+
746
+ elif packet.type == ServerPacketTypes.EXCEPTION:
747
+ raise packet.exception
748
+
749
+ else:
750
+ message = self.connection.unexpected_packet_message(
751
+ 'ProfileEvents, Progress, Log or Exception', packet.type
663
752
  )
664
753
  raise errors.UnexpectedPacketFromServerError(message)
665
754
 
@@ -686,6 +775,10 @@ class Client(object):
686
775
  # prints: SELECT 1234, 'bar'
687
776
  print(substituted_query)
688
777
  """
778
+ # In case of server side templating we don't substitute here.
779
+ if self.connection.context.client_settings['server_side_params']:
780
+ return query
781
+
689
782
  if not isinstance(params, dict):
690
783
  raise ValueError('Parameters are expected in dict form')
691
784
 
@@ -773,6 +866,17 @@ class Client(object):
773
866
  elif name == 'settings_is_important':
774
867
  kwargs[name] = asbool(value)
775
868
 
869
+ elif name == 'tcp_keepalive':
870
+ try:
871
+ kwargs[name] = asbool(value)
872
+ except ValueError:
873
+ parts = value.split(',')
874
+ kwargs[name] = (
875
+ float(parts[0]), float(parts[1]), int(parts[2])
876
+ )
877
+ elif name == 'client_revision':
878
+ kwargs[name] = int(value)
879
+
776
880
  # ssl
777
881
  elif name == 'verify':
778
882
  kwargs[name] = asbool(value)
@@ -28,14 +28,13 @@ class ClientInfo(object):
28
28
  client_version_major = defines.CLIENT_VERSION_MAJOR
29
29
  client_version_minor = defines.CLIENT_VERSION_MINOR
30
30
  client_version_patch = defines.CLIENT_VERSION_PATCH
31
- client_revision = defines.CLIENT_REVISION
32
31
  interface = Interface.TCP
33
32
 
34
33
  initial_user = ''
35
34
  initial_query_id = ''
36
35
  initial_address = '0.0.0.0:0'
37
36
 
38
- def __init__(self, client_name, context):
37
+ def __init__(self, client_name, context, client_revision):
39
38
  self.query_kind = ClientInfo.QueryKind.NO_QUERY
40
39
 
41
40
  try:
@@ -44,6 +43,7 @@ class ClientInfo(object):
44
43
  self.os_user = ''
45
44
  self.client_hostname = socket.gethostname()
46
45
  self.client_name = client_name
46
+ self.client_revision = client_revision
47
47
 
48
48
  self.client_trace_context = OpenTelemetryTraceContext(
49
49
  context.client_settings['opentelemetry_traceparent'],
@@ -28,7 +28,8 @@ class ArrayColumn(Column):
28
28
  py_types = (list, tuple)
29
29
 
30
30
  def __init__(self, nested_column, **kwargs):
31
- self.size_column = UInt64Column()
31
+ self.init_kwargs = kwargs
32
+ self.size_column = UInt64Column(**kwargs)
32
33
  self.nested_column = nested_column
33
34
  self._write_depth_0_size = True
34
35
  super(ArrayColumn, self).__init__(**kwargs)
@@ -37,17 +38,21 @@ class ArrayColumn(Column):
37
38
  def write_data(self, data, buf):
38
39
  # Column of Array(T) is stored in "compact" format and passed to server
39
40
  # wrapped into another Array without size of wrapper array.
40
- self.nested_column = ArrayColumn(self.nested_column)
41
+ self.nested_column = ArrayColumn(
42
+ self.nested_column, **self.init_kwargs
43
+ )
41
44
  self.nested_column.nullable = self.nullable
42
45
  self.nullable = False
43
46
  self._write_depth_0_size = False
44
47
  self._write(data, buf)
45
48
 
46
- def read_data(self, rows, buf):
47
- self.nested_column = ArrayColumn(self.nested_column)
49
+ def read_data(self, n_rows, buf):
50
+ self.nested_column = ArrayColumn(
51
+ self.nested_column, **self.init_kwargs
52
+ )
48
53
  self.nested_column.nullable = self.nullable
49
54
  self.nullable = False
50
- return self._read(rows, buf)[0]
55
+ return self._read(n_rows, buf)[0]
51
56
 
52
57
  def _write_sizes(self, value, buf):
53
58
  nulls_map = []
@@ -106,9 +111,13 @@ class ArrayColumn(Column):
106
111
  self._write_data(value, buf)
107
112
 
108
113
  def read_state_prefix(self, buf):
109
- return self.nested_column.read_state_prefix(buf)
114
+ super(ArrayColumn, self).read_state_prefix(buf)
115
+
116
+ self.nested_column.read_state_prefix(buf)
110
117
 
111
118
  def write_state_prefix(self, buf):
119
+ super(ArrayColumn, self).write_state_prefix(buf)
120
+
112
121
  self.nested_column.write_state_prefix(buf)
113
122
 
114
123
  def _read(self, size, buf):
@@ -1,6 +1,61 @@
1
1
  from struct import Struct, error as struct_error
2
2
 
3
3
  from . import exceptions
4
+ from ..varint import read_varint
5
+
6
+
7
+ class CommonSerialization(object):
8
+ def __init__(self, column):
9
+ self.column = column
10
+ super(CommonSerialization, self).__init__()
11
+
12
+ def read_sparse(self, n_items, buf):
13
+ return n_items
14
+
15
+ def apply_sparse(self, items):
16
+ return items
17
+
18
+
19
+ class SparseSerialization(CommonSerialization):
20
+
21
+ def __init__(self, *args, **kwargs):
22
+ self.sparse_indexes = []
23
+ self.items_total = None
24
+ super(SparseSerialization, self).__init__(*args, **kwargs)
25
+
26
+ def read_sparse(self, n_items, buf):
27
+ sparse_indexes = []
28
+ items_total = 0
29
+ non_default_items = 0
30
+
31
+ END_OF_GRANULE_FLAG = 1 << 62
32
+ end_of_granule = False
33
+
34
+ while not end_of_granule:
35
+ group_size = read_varint(buf)
36
+ end_of_granule = group_size & END_OF_GRANULE_FLAG
37
+ group_size &= ~END_OF_GRANULE_FLAG
38
+
39
+ items_total += group_size + 1
40
+ if not end_of_granule:
41
+ non_default_items += 1
42
+ sparse_indexes.append(items_total)
43
+
44
+ self.sparse_indexes = sparse_indexes
45
+ self.items_total = items_total
46
+
47
+ return non_default_items
48
+
49
+ def apply_sparse(self, items):
50
+ default = self.column.null_value
51
+ if self.column.after_read_items:
52
+ default = self.column.after_read_items([default])[0]
53
+
54
+ rv = [default] * (self.items_total - 1)
55
+ for item_number, i in enumerate(self.sparse_indexes):
56
+ rv[i - 1] = items[item_number]
57
+
58
+ return rv
4
59
 
5
60
 
6
61
  class Column(object):
@@ -15,14 +70,17 @@ class Column(object):
15
70
 
16
71
  null_value = 0
17
72
 
18
- def __init__(self, types_check=False, **kwargs):
73
+ def __init__(self, types_check=False, has_custom_serialization=False,
74
+ **kwargs):
19
75
  self.nullable = False
20
76
  self.types_check_enabled = types_check
77
+ self.has_custom_serialization = has_custom_serialization
78
+ self.serialization = CommonSerialization(self)
21
79
  self.input_null_as_default = False
22
- if 'context' in kwargs:
23
- settings = kwargs['context'].client_settings
24
- self.input_null_as_default = settings \
25
- .get('input_format_null_as_default', False)
80
+
81
+ self.context = kwargs['context']
82
+ self.input_null_as_default = self.context.client_settings \
83
+ .get('input_format_null_as_default', False)
26
84
 
27
85
  super(Column, self).__init__()
28
86
 
@@ -94,12 +152,15 @@ class Column(object):
94
152
  raise NotImplementedError
95
153
 
96
154
  def read_data(self, n_items, buf):
155
+ n_items = self.serialization.read_sparse(n_items, buf)
156
+
97
157
  if self.nullable:
98
158
  nulls_map = self._read_nulls_map(n_items, buf)
99
159
  else:
100
160
  nulls_map = None
101
161
 
102
- return self._read_data(n_items, buf, nulls_map=nulls_map)
162
+ items = self._read_data(n_items, buf, nulls_map=nulls_map)
163
+ return self.serialization.apply_sparse(items)
103
164
 
104
165
  def _read_data(self, n_items, buf, nulls_map=None):
105
166
  items = self.read_items(n_items, buf)
@@ -117,7 +178,10 @@ class Column(object):
117
178
  raise NotImplementedError
118
179
 
119
180
  def read_state_prefix(self, buf):
120
- pass
181
+ if self.has_custom_serialization:
182
+ use_custom_serialization = read_varint(buf)
183
+ if use_custom_serialization:
184
+ self.serialization = SparseSerialization(self)
121
185
 
122
186
  def write_state_prefix(self, buf):
123
187
  pass
@@ -6,7 +6,7 @@ from .base import FormatColumn
6
6
  epoch_start = date(1970, 1, 1)
7
7
  epoch_end = date(2149, 6, 6)
8
8
 
9
- epoch_start_date32 = date(1925, 1, 1)
9
+ epoch_start_date32 = date(1900, 1, 1)
10
10
  epoch_end_date32 = date(2283, 11, 11)
11
11
 
12
12
 
@@ -34,7 +34,7 @@ class DateColumn(FormatColumn):
34
34
  items[i] = null_value
35
35
  continue
36
36
 
37
- if type(item) != date:
37
+ if item is not date:
38
38
  item = date(item.year, item.month, item.day)
39
39
 
40
40
  if min_value <= item <= max_value:
@@ -0,0 +1,37 @@
1
+ from .base import Column
2
+ from .stringcolumn import String
3
+ from ..reader import read_binary_uint8, read_binary_str
4
+ from ..util.compat import json
5
+ from ..writer import write_binary_uint8
6
+
7
+
8
+ class JsonColumn(Column):
9
+ py_types = (dict, )
10
+
11
+ # No NULL value actually
12
+ null_value = {}
13
+
14
+ def __init__(self, column_by_spec_getter, **kwargs):
15
+ self.column_by_spec_getter = column_by_spec_getter
16
+ self.string_column = String(**kwargs)
17
+ super(JsonColumn, self).__init__(**kwargs)
18
+
19
+ def write_state_prefix(self, buf):
20
+ # Read in binary format.
21
+ # Write in text format.
22
+ write_binary_uint8(1, buf)
23
+
24
+ def read_items(self, n_items, buf):
25
+ read_binary_uint8(buf)
26
+ spec = read_binary_str(buf)
27
+ col = self.column_by_spec_getter(spec)
28
+ col.read_state_prefix(buf)
29
+ return col.read_data(n_items, buf)
30
+
31
+ def write_items(self, items, buf):
32
+ items = [x if isinstance(x, str) else json.dumps(x) for x in items]
33
+ self.string_column.write_items(items, buf)
34
+
35
+
36
+ def create_json_column(spec, column_by_spec_getter, column_options):
37
+ return JsonColumn(column_by_spec_getter, **column_options)
@@ -35,25 +35,32 @@ class LowCardinalityColumn(Column):
35
35
  serialization_type = has_additional_keys_bit | need_update_dictionary
36
36
 
37
37
  def __init__(self, nested_column, **kwargs):
38
+ self.init_kwargs = kwargs
38
39
  self.nested_column = nested_column
39
40
  super(LowCardinalityColumn, self).__init__(**kwargs)
40
41
 
41
42
  def read_state_prefix(self, buf):
42
- return read_binary_uint64(buf)
43
+ super(LowCardinalityColumn, self).read_state_prefix(buf)
44
+
45
+ read_binary_uint64(buf)
43
46
 
44
47
  def write_state_prefix(self, buf):
48
+ super(LowCardinalityColumn, self).write_state_prefix(buf)
49
+
45
50
  # KeysSerializationVersion. See ClickHouse docs.
46
51
  write_binary_int64(1, buf)
47
52
 
48
53
  def _write_data(self, items, buf):
49
54
  index, keys = [], []
50
55
  key_by_index_element = {}
56
+ nested_is_nullable = False
51
57
 
52
58
  if self.nested_column.nullable:
53
59
  # First element represents NULL if column is nullable.
54
60
  index.append(self.nested_column.null_value)
55
61
  # Prevent null map writing. Reset nested column nullable flag.
56
62
  self.nested_column.nullable = False
63
+ nested_is_nullable = True
57
64
 
58
65
  for x in items:
59
66
  if x is None:
@@ -87,14 +94,26 @@ class LowCardinalityColumn(Column):
87
94
  return
88
95
 
89
96
  int_type = int(log(len(index), 2) / 8)
90
- int_column = self.int_types[int_type]()
97
+ int_column = self.int_types[int_type](**self.init_kwargs)
91
98
 
92
99
  serialization_type = self.serialization_type | int_type
93
100
 
94
101
  write_binary_int64(serialization_type, buf)
95
102
  write_binary_int64(len(index), buf)
96
103
 
97
- self.nested_column.write_data(index, buf)
104
+ if nested_is_nullable:
105
+ # Given we reset nested column nullable flag above,
106
+ # we need to write null map manually. If to invoke
107
+ # write_data method, it will cause an exception,
108
+ # because `prepare_data` may not be able to handle
109
+ # null value correctly.
110
+ self.nested_column.write_items(
111
+ [self.nested_column.null_value], buf)
112
+ # Remove null map from index, because it is already written.
113
+ index_to_write = index[1:]
114
+ self.nested_column.write_data(index_to_write, buf)
115
+ else:
116
+ self.nested_column.write_data(index, buf)
98
117
  write_binary_int64(len(items), buf)
99
118
  int_column.write_items(keys, buf)
100
119
 
@@ -106,7 +125,7 @@ class LowCardinalityColumn(Column):
106
125
 
107
126
  # Lowest byte contains info about key type.
108
127
  key_type = serialization_type & 0xf
109
- keys_column = self.int_types[key_type]()
128
+ keys_column = self.int_types[key_type](**self.init_kwargs)
110
129
 
111
130
  nullable = self.nested_column.nullable
112
131
  # Prevent null map reading. Reset nested column nullable flag.
@@ -13,16 +13,20 @@ class MapColumn(Column):
13
13
  null_value = {}
14
14
 
15
15
  def __init__(self, key_column, value_column, **kwargs):
16
- self.offset_column = UInt64Column()
16
+ self.offset_column = UInt64Column(**kwargs)
17
17
  self.key_column = key_column
18
18
  self.value_column = value_column
19
19
  super(MapColumn, self).__init__(**kwargs)
20
20
 
21
21
  def read_state_prefix(self, buf):
22
+ super(MapColumn, self).read_state_prefix(buf)
23
+
22
24
  self.key_column.read_state_prefix(buf)
23
25
  self.value_column.read_state_prefix(buf)
24
26
 
25
27
  def write_state_prefix(self, buf):
28
+ super(MapColumn, self).write_state_prefix(buf)
29
+
26
30
  self.key_column.write_state_prefix(buf)
27
31
  self.value_column.write_state_prefix(buf)
28
32
 
@@ -57,7 +61,7 @@ class MapColumn(Column):
57
61
 
58
62
 
59
63
  def create_map_column(spec, column_by_spec_getter, column_options):
60
- # Match commas outside of parentheses so we don't match the comma in
64
+ # Match commas outside of parentheses, so we don't match the comma in
61
65
  # Decimal types.
62
66
  key, value = comma_re.split(spec[4:-1])
63
67
  key_column = column_by_spec_getter(key.strip())
@@ -1,21 +1,10 @@
1
1
 
2
2
  from .arraycolumn import create_array_column
3
- from .util import get_inner_spec, get_inner_columns, \
4
- get_inner_columns_with_types
3
+ from .util import get_inner_spec
5
4
 
6
5
 
7
6
  def create_nested_column(spec, column_by_spec_getter, column_options):
8
7
  return create_array_column(
9
- 'Array(Tuple({}))'.format(','.join(get_nested_columns(spec))),
8
+ 'Array(Tuple({}))'.format(get_inner_spec('Nested', spec)),
10
9
  column_by_spec_getter, column_options
11
10
  )
12
-
13
-
14
- def get_nested_columns(spec):
15
- inner_spec = get_inner_spec('Nested', spec)
16
- return get_inner_columns(inner_spec)
17
-
18
-
19
- def get_columns_with_types(spec):
20
- inner_spec = get_inner_spec('Nested', spec)
21
- return get_inner_columns_with_types(inner_spec)