clickhouse-driver 0.2.4__cp38-cp38-musllinux_1_1_i686.whl → 0.2.8__cp38-cp38-musllinux_1_1_i686.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. clickhouse_driver/__init__.py +1 -1
  2. clickhouse_driver/block.py +3 -2
  3. clickhouse_driver/bufferedreader.cpython-38-i386-linux-gnu.so +0 -0
  4. clickhouse_driver/bufferedwriter.cpython-38-i386-linux-gnu.so +0 -0
  5. clickhouse_driver/client.py +209 -19
  6. clickhouse_driver/clientinfo.py +2 -2
  7. clickhouse_driver/columns/arraycolumn.py +16 -7
  8. clickhouse_driver/columns/base.py +71 -7
  9. clickhouse_driver/columns/datecolumn.py +52 -13
  10. clickhouse_driver/columns/jsoncolumn.py +37 -0
  11. clickhouse_driver/columns/largeint.cpython-38-i386-linux-gnu.so +0 -0
  12. clickhouse_driver/columns/lowcardinalitycolumn.py +23 -4
  13. clickhouse_driver/columns/mapcolumn.py +15 -2
  14. clickhouse_driver/columns/nestedcolumn.py +2 -13
  15. clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
  16. clickhouse_driver/columns/numpy/datetimecolumn.py +18 -18
  17. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +2 -2
  18. clickhouse_driver/columns/numpy/service.py +3 -1
  19. clickhouse_driver/columns/service.py +12 -2
  20. clickhouse_driver/columns/tuplecolumn.py +31 -5
  21. clickhouse_driver/columns/uuidcolumn.py +1 -1
  22. clickhouse_driver/connection.py +123 -17
  23. clickhouse_driver/defines.py +9 -1
  24. clickhouse_driver/log.py +7 -3
  25. clickhouse_driver/progress.py +8 -2
  26. clickhouse_driver/settings/writer.py +7 -2
  27. clickhouse_driver/streams/native.py +18 -6
  28. clickhouse_driver/util/compat.py +12 -0
  29. clickhouse_driver/util/escape.py +35 -7
  30. clickhouse_driver/varint.cpython-38-i386-linux-gnu.so +0 -0
  31. {clickhouse_driver-0.2.4.dist-info → clickhouse_driver-0.2.8.dist-info}/METADATA +8 -13
  32. {clickhouse_driver-0.2.4.dist-info → clickhouse_driver-0.2.8.dist-info}/RECORD +71 -69
  33. {clickhouse_driver-0.2.4.dist-info → clickhouse_driver-0.2.8.dist-info}/WHEEL +1 -1
  34. {clickhouse_driver-0.2.4.dist-info → clickhouse_driver-0.2.8.dist-info}/LICENSE +0 -0
  35. {clickhouse_driver-0.2.4.dist-info → clickhouse_driver-0.2.8.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from .client import Client
3
3
  from .dbapi import connect
4
4
 
5
5
 
6
- VERSION = (0, 2, 4)
6
+ VERSION = (0, 2, 8)
7
7
  __version__ = '.'.join(str(x) for x in VERSION)
8
8
 
9
9
  __all__ = ['Client', 'connect']
@@ -1,7 +1,7 @@
1
+ from .columns.util import get_inner_spec, get_inner_columns_with_types
1
2
  from .reader import read_varint, read_binary_uint8, read_binary_int32
2
3
  from .varint import write_varint
3
4
  from .writer import write_binary_uint8, write_binary_int32
4
- from .columns import nestedcolumn
5
5
 
6
6
 
7
7
  class BlockInfo(object):
@@ -172,7 +172,8 @@ class RowOrientedBlock(BaseBlock):
172
172
  for name, type_ in columns_with_types:
173
173
  cwt = None
174
174
  if type_.startswith('Nested'):
175
- cwt = nestedcolumn.get_columns_with_types(type_)
175
+ inner_spec = get_inner_spec('Nested', type_)
176
+ cwt = get_inner_columns_with_types(inner_spec)
176
177
  columns_with_cwt.append((name, cwt))
177
178
 
178
179
  for i, row in enumerate(data):
@@ -1,5 +1,6 @@
1
1
  import re
2
2
  import ssl
3
+ from collections import deque
3
4
  from contextlib import contextmanager
4
5
  from time import time
5
6
  import types
@@ -26,7 +27,7 @@ class Client(object):
26
27
  for the client settings, see below). Defaults to ``None``
27
28
  (no additional settings). See all available settings in
28
29
  `ClickHouse docs
29
- <https://clickhouse.tech/docs/en/operations/settings/settings/>`_.
30
+ <https://clickhouse.com/docs/en/operations/settings/settings/>`_.
30
31
  :param \\**kwargs: All other args are passed to the
31
32
  :py:class:`~clickhouse_driver.connection.Connection`
32
33
  constructor.
@@ -53,6 +54,20 @@ class Client(object):
53
54
  default values if data type of this field is not
54
55
  nullable. Does not work for NumPy. Default: False.
55
56
  New in version *0.2.4*.
57
+ * ``round_robin`` -- If ``alt_hosts`` are provided the query will be
58
+ executed on host picked with round-robin algorithm.
59
+ New in version *0.2.5*.
60
+ * ``namedtuple_as_json`` -- Controls named tuple and nested types
61
+ deserialization. To interpret these column alongside
62
+ with ``allow_experimental_object_type=1`` as Python
63
+ tuple set ``namedtuple_as_json`` to ``False``.
64
+ Default: True.
65
+ New in version *0.2.6*.
66
+ * ``server_side_params`` -- Species on which side query parameters
67
+ should be rendered into placeholders.
68
+ Default: False. Means that parameters are rendered
69
+ on driver's side.
70
+ New in version *0.2.7*.
56
71
  """
57
72
 
58
73
  available_client_settings = (
@@ -63,7 +78,9 @@ class Client(object):
63
78
  'opentelemetry_traceparent',
64
79
  'opentelemetry_tracestate',
65
80
  'quota_key',
66
- 'input_format_null_as_default'
81
+ 'input_format_null_as_default',
82
+ 'namedtuple_as_json',
83
+ 'server_side_params'
67
84
  )
68
85
 
69
86
  def __init__(self, *args, **kwargs):
@@ -93,6 +110,12 @@ class Client(object):
93
110
  ),
94
111
  'input_format_null_as_default': self.settings.pop(
95
112
  'input_format_null_as_default', False
113
+ ),
114
+ 'namedtuple_as_json': self.settings.pop(
115
+ 'namedtuple_as_json', True
116
+ ),
117
+ 'server_side_params': self.settings.pop(
118
+ 'server_side_params', False
96
119
  )
97
120
  }
98
121
 
@@ -112,9 +135,33 @@ class Client(object):
112
135
  self.iter_query_result_cls = IterQueryResult
113
136
  self.progress_query_result_cls = ProgressQueryResult
114
137
 
115
- self.connection = Connection(*args, **kwargs)
116
- self.connection.context.settings = self.settings
117
- self.connection.context.client_settings = self.client_settings
138
+ round_robin = kwargs.pop('round_robin', False)
139
+ self.connections = deque([Connection(*args, **kwargs)])
140
+
141
+ if round_robin and 'alt_hosts' in kwargs:
142
+ alt_hosts = kwargs.pop('alt_hosts')
143
+ for host in alt_hosts.split(','):
144
+ url = urlparse('clickhouse://' + host)
145
+
146
+ connection_kwargs = kwargs.copy()
147
+ num_args = len(args)
148
+ if num_args >= 2:
149
+ # host and port as positional arguments
150
+ connection_args = (url.hostname, url.port) + args[2:]
151
+ elif num_args >= 1:
152
+ # host as positional and port as keyword argument
153
+ connection_args = (url.hostname, ) + args[1:]
154
+ connection_kwargs['port'] = url.port
155
+ else:
156
+ # host and port as keyword arguments
157
+ connection_args = tuple()
158
+ connection_kwargs['host'] = url.hostname
159
+ connection_kwargs['port'] = url.port
160
+
161
+ connection = Connection(*connection_args, **connection_kwargs)
162
+ self.connections.append(connection)
163
+
164
+ self.connection = self.get_connection()
118
165
  self.reset_last_query()
119
166
  super(Client, self).__init__()
120
167
 
@@ -124,7 +171,22 @@ class Client(object):
124
171
  def __exit__(self, exc_type, exc_val, exc_tb):
125
172
  self.disconnect()
126
173
 
174
+ def get_connection(self):
175
+ if hasattr(self, 'connection'):
176
+ self.connections.append(self.connection)
177
+
178
+ connection = self.connections.popleft()
179
+
180
+ connection.context.settings = self.settings
181
+ connection.context.client_settings = self.client_settings
182
+ return connection
183
+
127
184
  def disconnect(self):
185
+ self.disconnect_connection()
186
+ for connection in self.connections:
187
+ connection.disconnect()
188
+
189
+ def disconnect_connection(self):
128
190
  """
129
191
  Disconnects from the server.
130
192
  """
@@ -227,13 +289,29 @@ class Client(object):
227
289
  if query.lower().startswith('use '):
228
290
  self.connection.database = query[4:].strip()
229
291
 
292
+ def establish_connection(self, settings):
293
+ num_connections = len(self.connections)
294
+ if hasattr(self, 'connection'):
295
+ num_connections += 1
296
+
297
+ for i in range(num_connections):
298
+ try:
299
+ self.connection = self.get_connection()
300
+ self.make_query_settings(settings)
301
+ self.connection.force_connect()
302
+ self.last_query = QueryInfo()
303
+
304
+ except (errors.SocketTimeoutError, errors.NetworkError):
305
+ if i < num_connections - 1:
306
+ continue
307
+ raise
308
+
309
+ return
310
+
230
311
  @contextmanager
231
312
  def disconnect_on_error(self, query, settings):
232
- self.make_query_settings(settings)
233
-
234
313
  try:
235
- self.connection.force_connect()
236
- self.last_query = QueryInfo()
314
+ self.establish_connection(settings)
237
315
 
238
316
  yield
239
317
 
@@ -385,7 +463,7 @@ class Client(object):
385
463
 
386
464
  def query_dataframe(
387
465
  self, query, params=None, external_tables=None, query_id=None,
388
- settings=None):
466
+ settings=None, replace_nonwords=True):
389
467
  """
390
468
  *New in version 0.2.0.*
391
469
 
@@ -400,6 +478,8 @@ class Client(object):
400
478
  ClickHouse server will generate it.
401
479
  :param settings: dictionary of query settings.
402
480
  Defaults to ``None`` (no additional settings).
481
+ :param replace_nonwords: boolean to replace non-words in column names
482
+ to underscores. Defaults to ``True``.
403
483
  :return: pandas DataFrame.
404
484
  """
405
485
 
@@ -414,8 +494,12 @@ class Client(object):
414
494
  settings=settings
415
495
  )
416
496
 
497
+ columns = [name for name, type_ in columns]
498
+ if replace_nonwords:
499
+ columns = [re.sub(r'\W', '_', x) for x in columns]
500
+
417
501
  return pd.DataFrame(
418
- {re.sub(r'\W', '_', col[0]): d for d, col in zip(data, columns)}
502
+ {col: d for d, col in zip(data, columns)}, columns=columns
419
503
  )
420
504
 
421
505
  def insert_dataframe(
@@ -452,6 +536,12 @@ class Client(object):
452
536
  rv = None
453
537
  if sample_block:
454
538
  columns = [x[0] for x in sample_block.columns_with_types]
539
+ # raise if any columns are missing from the dataframe
540
+ diff = set(columns) - set(dataframe.columns)
541
+ if len(diff):
542
+ msg = "DataFrame missing required columns: {}"
543
+ raise ValueError(msg.format(list(diff)))
544
+
455
545
  data = [dataframe[column].values for column in columns]
456
546
  rv = self.send_data(sample_block, data, columnar=True)
457
547
  self.receive_end_of_query()
@@ -469,7 +559,7 @@ class Client(object):
469
559
  query, params, self.connection.context
470
560
  )
471
561
 
472
- self.connection.send_query(query, query_id=query_id)
562
+ self.connection.send_query(query, query_id=query_id, params=params)
473
563
  self.connection.send_external_tables(external_tables,
474
564
  types_check=types_check)
475
565
  return self.receive_result(with_column_types=with_column_types,
@@ -484,8 +574,7 @@ class Client(object):
484
574
  query = self.substitute_params(
485
575
  query, params, self.connection.context
486
576
  )
487
-
488
- self.connection.send_query(query, query_id=query_id)
577
+ self.connection.send_query(query, query_id=query_id, params=params)
489
578
  self.connection.send_external_tables(external_tables,
490
579
  types_check=types_check)
491
580
  return self.receive_result(with_column_types=with_column_types,
@@ -501,7 +590,7 @@ class Client(object):
501
590
  query, params, self.connection.context
502
591
  )
503
592
 
504
- self.connection.send_query(query, query_id=query_id)
593
+ self.connection.send_query(query, query_id=query_id, params=params)
505
594
  self.connection.send_external_tables(external_tables,
506
595
  types_check=types_check)
507
596
  return self.iter_receive_result(with_column_types=with_column_types)
@@ -512,12 +601,12 @@ class Client(object):
512
601
  self.connection.send_query(query_without_data, query_id=query_id)
513
602
  self.connection.send_external_tables(external_tables,
514
603
  types_check=types_check)
515
-
516
604
  sample_block = self.receive_sample_block()
605
+
517
606
  if sample_block:
518
607
  rv = self.send_data(sample_block, data,
519
608
  types_check=types_check, columnar=columnar)
520
- self.receive_end_of_query()
609
+ self.receive_end_of_insert_query()
521
610
  return rv
522
611
 
523
612
  def receive_sample_block(self):
@@ -571,8 +660,15 @@ class Client(object):
571
660
  self.connection.send_data(block)
572
661
  inserted_rows += block.num_rows
573
662
 
663
+ # Starting from the specific revision there are profile events
664
+ # sent by server in response to each inserted block
665
+ self.receive_profile_events()
666
+
574
667
  # Empty block means end of data.
575
668
  self.connection.send_data(block_cls())
669
+ # If enabled by revision profile events are also sent after empty block
670
+ self.receive_profile_events()
671
+
576
672
  return inserted_rows
577
673
 
578
674
  def receive_end_of_query(self):
@@ -582,6 +678,9 @@ class Client(object):
582
678
  if packet.type == ServerPacketTypes.END_OF_STREAM:
583
679
  break
584
680
 
681
+ elif packet.type == ServerPacketTypes.PROGRESS:
682
+ self.last_query.store_progress(packet.progress)
683
+
585
684
  elif packet.type == ServerPacketTypes.EXCEPTION:
586
685
  raise packet.exception
587
686
 
@@ -591,9 +690,65 @@ class Client(object):
591
690
  elif packet.type == ServerPacketTypes.TABLE_COLUMNS:
592
691
  pass
593
692
 
693
+ elif packet.type == ServerPacketTypes.PROFILE_EVENTS:
694
+ self.last_query.store_profile(packet.profile_info)
695
+
696
+ else:
697
+ message = self.connection.unexpected_packet_message(
698
+ 'Exception, EndOfStream, Progress, TableColumns, '
699
+ 'ProfileEvents or Log', packet.type
700
+ )
701
+ raise errors.UnexpectedPacketFromServerError(message)
702
+
703
+ def receive_end_of_insert_query(self):
704
+ while True:
705
+ packet = self.connection.receive_packet()
706
+
707
+ if packet.type == ServerPacketTypes.END_OF_STREAM:
708
+ break
709
+
710
+ elif packet.type == ServerPacketTypes.LOG:
711
+ log_block(packet.block)
712
+
713
+ elif packet.type == ServerPacketTypes.PROGRESS:
714
+ self.last_query.store_progress(packet.progress)
715
+
716
+ elif packet.type == ServerPacketTypes.EXCEPTION:
717
+ raise packet.exception
718
+
594
719
  else:
595
720
  message = self.connection.unexpected_packet_message(
596
- 'Exception, EndOfStream or Log', packet.type
721
+ 'EndOfStream, Log, Progress or Exception', packet.type
722
+ )
723
+ raise errors.UnexpectedPacketFromServerError(message)
724
+
725
+ def receive_profile_events(self):
726
+ revision = self.connection.server_info.used_revision
727
+ if (
728
+ revision <
729
+ defines.DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT
730
+ ):
731
+ return None
732
+
733
+ while True:
734
+ packet = self.connection.receive_packet()
735
+
736
+ if packet.type == ServerPacketTypes.PROFILE_EVENTS:
737
+ self.last_query.store_profile(packet.profile_info)
738
+ break
739
+
740
+ elif packet.type == ServerPacketTypes.PROGRESS:
741
+ self.last_query.store_progress(packet.progress)
742
+
743
+ elif packet.type == ServerPacketTypes.LOG:
744
+ log_block(packet.block)
745
+
746
+ elif packet.type == ServerPacketTypes.EXCEPTION:
747
+ raise packet.exception
748
+
749
+ else:
750
+ message = self.connection.unexpected_packet_message(
751
+ 'ProfileEvents, Progress, Log or Exception', packet.type
597
752
  )
598
753
  raise errors.UnexpectedPacketFromServerError(message)
599
754
 
@@ -604,6 +759,26 @@ class Client(object):
604
759
  return self.receive_result(with_column_types=with_column_types)
605
760
 
606
761
  def substitute_params(self, query, params, context):
762
+ """
763
+ Substitutes parameters into a provided query.
764
+
765
+ For example::
766
+
767
+ client = Client(...)
768
+
769
+ substituted_query = client.substitute_params(
770
+ query='SELECT 1234, %(foo)s',
771
+ params={'foo': 'bar'},
772
+ context=client.connection.context
773
+ )
774
+
775
+ # prints: SELECT 1234, 'bar'
776
+ print(substituted_query)
777
+ """
778
+ # In case of server side templating we don't substitute here.
779
+ if self.connection.context.client_settings['server_side_params']:
780
+ return query
781
+
607
782
  if not isinstance(params, dict):
608
783
  raise ValueError('Parameters are expected in dict form')
609
784
 
@@ -676,6 +851,9 @@ class Client(object):
676
851
  elif name == 'use_numpy':
677
852
  settings[name] = asbool(value)
678
853
 
854
+ elif name == 'round_robin':
855
+ kwargs[name] = asbool(value)
856
+
679
857
  elif name == 'client_name':
680
858
  kwargs[name] = value
681
859
 
@@ -688,12 +866,24 @@ class Client(object):
688
866
  elif name == 'settings_is_important':
689
867
  kwargs[name] = asbool(value)
690
868
 
869
+ elif name == 'tcp_keepalive':
870
+ try:
871
+ kwargs[name] = asbool(value)
872
+ except ValueError:
873
+ parts = value.split(',')
874
+ kwargs[name] = (
875
+ int(parts[0]), int(parts[1]), int(parts[2])
876
+ )
877
+ elif name == 'client_revision':
878
+ kwargs[name] = int(value)
879
+
691
880
  # ssl
692
881
  elif name == 'verify':
693
882
  kwargs[name] = asbool(value)
694
883
  elif name == 'ssl_version':
695
884
  kwargs[name] = getattr(ssl, value)
696
- elif name in ['ca_certs', 'ciphers', 'keyfile', 'certfile']:
885
+ elif name in ['ca_certs', 'ciphers', 'keyfile', 'certfile',
886
+ 'server_hostname']:
697
887
  kwargs[name] = value
698
888
  elif name == 'alt_hosts':
699
889
  kwargs['alt_hosts'] = value
@@ -28,14 +28,13 @@ class ClientInfo(object):
28
28
  client_version_major = defines.CLIENT_VERSION_MAJOR
29
29
  client_version_minor = defines.CLIENT_VERSION_MINOR
30
30
  client_version_patch = defines.CLIENT_VERSION_PATCH
31
- client_revision = defines.CLIENT_REVISION
32
31
  interface = Interface.TCP
33
32
 
34
33
  initial_user = ''
35
34
  initial_query_id = ''
36
35
  initial_address = '0.0.0.0:0'
37
36
 
38
- def __init__(self, client_name, context):
37
+ def __init__(self, client_name, context, client_revision):
39
38
  self.query_kind = ClientInfo.QueryKind.NO_QUERY
40
39
 
41
40
  try:
@@ -44,6 +43,7 @@ class ClientInfo(object):
44
43
  self.os_user = ''
45
44
  self.client_hostname = socket.gethostname()
46
45
  self.client_name = client_name
46
+ self.client_revision = client_revision
47
47
 
48
48
  self.client_trace_context = OpenTelemetryTraceContext(
49
49
  context.client_settings['opentelemetry_traceparent'],
@@ -28,26 +28,31 @@ class ArrayColumn(Column):
28
28
  py_types = (list, tuple)
29
29
 
30
30
  def __init__(self, nested_column, **kwargs):
31
- self.size_column = UInt64Column()
31
+ self.init_kwargs = kwargs
32
+ self.size_column = UInt64Column(**kwargs)
32
33
  self.nested_column = nested_column
33
34
  self._write_depth_0_size = True
34
35
  super(ArrayColumn, self).__init__(**kwargs)
35
- self.null_value = [nested_column.null_value]
36
+ self.null_value = []
36
37
 
37
38
  def write_data(self, data, buf):
38
39
  # Column of Array(T) is stored in "compact" format and passed to server
39
40
  # wrapped into another Array without size of wrapper array.
40
- self.nested_column = ArrayColumn(self.nested_column)
41
+ self.nested_column = ArrayColumn(
42
+ self.nested_column, **self.init_kwargs
43
+ )
41
44
  self.nested_column.nullable = self.nullable
42
45
  self.nullable = False
43
46
  self._write_depth_0_size = False
44
47
  self._write(data, buf)
45
48
 
46
- def read_data(self, rows, buf):
47
- self.nested_column = ArrayColumn(self.nested_column)
49
+ def read_data(self, n_rows, buf):
50
+ self.nested_column = ArrayColumn(
51
+ self.nested_column, **self.init_kwargs
52
+ )
48
53
  self.nested_column.nullable = self.nullable
49
54
  self.nullable = False
50
- return self._read(rows, buf)[0]
55
+ return self._read(n_rows, buf)[0]
51
56
 
52
57
  def _write_sizes(self, value, buf):
53
58
  nulls_map = []
@@ -106,9 +111,13 @@ class ArrayColumn(Column):
106
111
  self._write_data(value, buf)
107
112
 
108
113
  def read_state_prefix(self, buf):
109
- return self.nested_column.read_state_prefix(buf)
114
+ super(ArrayColumn, self).read_state_prefix(buf)
115
+
116
+ self.nested_column.read_state_prefix(buf)
110
117
 
111
118
  def write_state_prefix(self, buf):
119
+ super(ArrayColumn, self).write_state_prefix(buf)
120
+
112
121
  self.nested_column.write_state_prefix(buf)
113
122
 
114
123
  def _read(self, size, buf):
@@ -1,6 +1,61 @@
1
1
  from struct import Struct, error as struct_error
2
2
 
3
3
  from . import exceptions
4
+ from ..varint import read_varint
5
+
6
+
7
+ class CommonSerialization(object):
8
+ def __init__(self, column):
9
+ self.column = column
10
+ super(CommonSerialization, self).__init__()
11
+
12
+ def read_sparse(self, n_items, buf):
13
+ return n_items
14
+
15
+ def apply_sparse(self, items):
16
+ return items
17
+
18
+
19
+ class SparseSerialization(CommonSerialization):
20
+
21
+ def __init__(self, *args, **kwargs):
22
+ self.sparse_indexes = []
23
+ self.items_total = None
24
+ super(SparseSerialization, self).__init__(*args, **kwargs)
25
+
26
+ def read_sparse(self, n_items, buf):
27
+ sparse_indexes = []
28
+ items_total = 0
29
+ non_default_items = 0
30
+
31
+ END_OF_GRANULE_FLAG = 1 << 62
32
+ end_of_granule = False
33
+
34
+ while not end_of_granule:
35
+ group_size = read_varint(buf)
36
+ end_of_granule = group_size & END_OF_GRANULE_FLAG
37
+ group_size &= ~END_OF_GRANULE_FLAG
38
+
39
+ items_total += group_size + 1
40
+ if not end_of_granule:
41
+ non_default_items += 1
42
+ sparse_indexes.append(items_total)
43
+
44
+ self.sparse_indexes = sparse_indexes
45
+ self.items_total = items_total
46
+
47
+ return non_default_items
48
+
49
+ def apply_sparse(self, items):
50
+ default = self.column.null_value
51
+ if self.column.after_read_items:
52
+ default = self.column.after_read_items([default])[0]
53
+
54
+ rv = [default] * (self.items_total - 1)
55
+ for item_number, i in enumerate(self.sparse_indexes):
56
+ rv[i - 1] = items[item_number]
57
+
58
+ return rv
4
59
 
5
60
 
6
61
  class Column(object):
@@ -15,14 +70,17 @@ class Column(object):
15
70
 
16
71
  null_value = 0
17
72
 
18
- def __init__(self, types_check=False, **kwargs):
73
+ def __init__(self, types_check=False, has_custom_serialization=False,
74
+ **kwargs):
19
75
  self.nullable = False
20
76
  self.types_check_enabled = types_check
77
+ self.has_custom_serialization = has_custom_serialization
78
+ self.serialization = CommonSerialization(self)
21
79
  self.input_null_as_default = False
22
- if 'context' in kwargs:
23
- settings = kwargs['context'].client_settings
24
- self.input_null_as_default = settings \
25
- .get('input_format_null_as_default', False)
80
+
81
+ self.context = kwargs['context']
82
+ self.input_null_as_default = self.context.client_settings \
83
+ .get('input_format_null_as_default', False)
26
84
 
27
85
  super(Column, self).__init__()
28
86
 
@@ -94,12 +152,15 @@ class Column(object):
94
152
  raise NotImplementedError
95
153
 
96
154
  def read_data(self, n_items, buf):
155
+ n_items = self.serialization.read_sparse(n_items, buf)
156
+
97
157
  if self.nullable:
98
158
  nulls_map = self._read_nulls_map(n_items, buf)
99
159
  else:
100
160
  nulls_map = None
101
161
 
102
- return self._read_data(n_items, buf, nulls_map=nulls_map)
162
+ items = self._read_data(n_items, buf, nulls_map=nulls_map)
163
+ return self.serialization.apply_sparse(items)
103
164
 
104
165
  def _read_data(self, n_items, buf, nulls_map=None):
105
166
  items = self.read_items(n_items, buf)
@@ -117,7 +178,10 @@ class Column(object):
117
178
  raise NotImplementedError
118
179
 
119
180
  def read_state_prefix(self, buf):
120
- pass
181
+ if self.has_custom_serialization:
182
+ use_custom_serialization = read_varint(buf)
183
+ if use_custom_serialization:
184
+ self.serialization = SparseSerialization(self)
121
185
 
122
186
  def write_state_prefix(self, buf):
123
187
  pass