clickhouse-driver 0.2.8__cp312-cp312-musllinux_1_1_i686.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. clickhouse_driver/__init__.py +9 -0
  2. clickhouse_driver/block.py +227 -0
  3. clickhouse_driver/blockstreamprofileinfo.py +22 -0
  4. clickhouse_driver/bufferedreader.cpython-312-i386-linux-musl.so +0 -0
  5. clickhouse_driver/bufferedwriter.cpython-312-i386-linux-musl.so +0 -0
  6. clickhouse_driver/client.py +896 -0
  7. clickhouse_driver/clientinfo.py +119 -0
  8. clickhouse_driver/columns/__init__.py +0 -0
  9. clickhouse_driver/columns/arraycolumn.py +161 -0
  10. clickhouse_driver/columns/base.py +221 -0
  11. clickhouse_driver/columns/boolcolumn.py +7 -0
  12. clickhouse_driver/columns/datecolumn.py +108 -0
  13. clickhouse_driver/columns/datetimecolumn.py +202 -0
  14. clickhouse_driver/columns/decimalcolumn.py +116 -0
  15. clickhouse_driver/columns/enumcolumn.py +119 -0
  16. clickhouse_driver/columns/exceptions.py +12 -0
  17. clickhouse_driver/columns/floatcolumn.py +34 -0
  18. clickhouse_driver/columns/intcolumn.py +157 -0
  19. clickhouse_driver/columns/intervalcolumn.py +33 -0
  20. clickhouse_driver/columns/ipcolumn.py +118 -0
  21. clickhouse_driver/columns/jsoncolumn.py +37 -0
  22. clickhouse_driver/columns/largeint.cpython-312-i386-linux-musl.so +0 -0
  23. clickhouse_driver/columns/lowcardinalitycolumn.py +142 -0
  24. clickhouse_driver/columns/mapcolumn.py +73 -0
  25. clickhouse_driver/columns/nestedcolumn.py +10 -0
  26. clickhouse_driver/columns/nothingcolumn.py +13 -0
  27. clickhouse_driver/columns/nullablecolumn.py +7 -0
  28. clickhouse_driver/columns/nullcolumn.py +15 -0
  29. clickhouse_driver/columns/numpy/__init__.py +0 -0
  30. clickhouse_driver/columns/numpy/base.py +47 -0
  31. clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
  32. clickhouse_driver/columns/numpy/datecolumn.py +19 -0
  33. clickhouse_driver/columns/numpy/datetimecolumn.py +143 -0
  34. clickhouse_driver/columns/numpy/floatcolumn.py +24 -0
  35. clickhouse_driver/columns/numpy/intcolumn.py +43 -0
  36. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -0
  37. clickhouse_driver/columns/numpy/service.py +58 -0
  38. clickhouse_driver/columns/numpy/stringcolumn.py +78 -0
  39. clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
  40. clickhouse_driver/columns/service.py +185 -0
  41. clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -0
  42. clickhouse_driver/columns/stringcolumn.py +73 -0
  43. clickhouse_driver/columns/tuplecolumn.py +63 -0
  44. clickhouse_driver/columns/util.py +60 -0
  45. clickhouse_driver/columns/uuidcolumn.py +64 -0
  46. clickhouse_driver/compression/__init__.py +28 -0
  47. clickhouse_driver/compression/base.py +87 -0
  48. clickhouse_driver/compression/lz4.py +21 -0
  49. clickhouse_driver/compression/lz4hc.py +9 -0
  50. clickhouse_driver/compression/zstd.py +20 -0
  51. clickhouse_driver/connection.py +784 -0
  52. clickhouse_driver/context.py +36 -0
  53. clickhouse_driver/dbapi/__init__.py +62 -0
  54. clickhouse_driver/dbapi/connection.py +99 -0
  55. clickhouse_driver/dbapi/cursor.py +370 -0
  56. clickhouse_driver/dbapi/errors.py +40 -0
  57. clickhouse_driver/dbapi/extras.py +73 -0
  58. clickhouse_driver/defines.py +55 -0
  59. clickhouse_driver/errors.py +453 -0
  60. clickhouse_driver/log.py +48 -0
  61. clickhouse_driver/numpy/__init__.py +0 -0
  62. clickhouse_driver/numpy/block.py +8 -0
  63. clickhouse_driver/numpy/helpers.py +25 -0
  64. clickhouse_driver/numpy/result.py +123 -0
  65. clickhouse_driver/opentelemetry.py +43 -0
  66. clickhouse_driver/progress.py +38 -0
  67. clickhouse_driver/protocol.py +114 -0
  68. clickhouse_driver/queryprocessingstage.py +8 -0
  69. clickhouse_driver/reader.py +69 -0
  70. clickhouse_driver/readhelpers.py +26 -0
  71. clickhouse_driver/result.py +144 -0
  72. clickhouse_driver/settings/__init__.py +0 -0
  73. clickhouse_driver/settings/available.py +405 -0
  74. clickhouse_driver/settings/types.py +50 -0
  75. clickhouse_driver/settings/writer.py +34 -0
  76. clickhouse_driver/streams/__init__.py +0 -0
  77. clickhouse_driver/streams/compressed.py +88 -0
  78. clickhouse_driver/streams/native.py +102 -0
  79. clickhouse_driver/util/__init__.py +0 -0
  80. clickhouse_driver/util/compat.py +39 -0
  81. clickhouse_driver/util/escape.py +94 -0
  82. clickhouse_driver/util/helpers.py +57 -0
  83. clickhouse_driver/varint.cpython-312-i386-linux-musl.so +0 -0
  84. clickhouse_driver/writer.py +67 -0
  85. clickhouse_driver-0.2.8.dist-info/LICENSE +21 -0
  86. clickhouse_driver-0.2.8.dist-info/METADATA +201 -0
  87. clickhouse_driver-0.2.8.dist-info/RECORD +89 -0
  88. clickhouse_driver-0.2.8.dist-info/WHEEL +5 -0
  89. clickhouse_driver-0.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,118 @@
1
+ from ipaddress import IPv4Address, IPv6Address, AddressValueError
2
+
3
+ from .. import errors
4
+ from .exceptions import ColumnTypeMismatchException
5
+ from .stringcolumn import ByteFixedString
6
+ from .intcolumn import UInt32Column
7
+
8
+
9
+ class IPv4Column(UInt32Column):
10
+ ch_type = "IPv4"
11
+ py_types = (str, IPv4Address, int)
12
+
13
+ def __init__(self, types_check=False, **kwargs):
14
+ # UIntColumn overrides before_write_item and check_item
15
+ # in its __init__ when types_check is True so we force
16
+ # __init__ without it then add the appropriate check method for IPv4
17
+ super(UInt32Column, self).__init__(types_check=False, **kwargs)
18
+
19
+ self.types_check_enabled = types_check
20
+ if types_check:
21
+
22
+ def check_item(value):
23
+ if isinstance(value, int) and value < 0:
24
+ raise ColumnTypeMismatchException(value)
25
+
26
+ if not isinstance(value, IPv4Address):
27
+ try:
28
+ value = IPv4Address(value)
29
+ except AddressValueError:
30
+ # Cannot parse input in a valid IPv4
31
+ raise ColumnTypeMismatchException(value)
32
+
33
+ self.check_item = check_item
34
+
35
+ def after_read_items(self, items, nulls_map=None):
36
+ if nulls_map is None:
37
+ return tuple(IPv4Address(item) for item in items)
38
+ else:
39
+ return tuple(
40
+ (None if is_null else IPv4Address(items[i]))
41
+ for i, is_null in enumerate(nulls_map)
42
+ )
43
+
44
+ def before_write_items(self, items, nulls_map=None):
45
+ null_value = self.null_value
46
+
47
+ for i, item in enumerate(items):
48
+ if nulls_map and nulls_map[i]:
49
+ items[i] = null_value
50
+ continue
51
+
52
+ # allow Ipv4 in integer, string or IPv4Address object
53
+ try:
54
+ if isinstance(item, int):
55
+ continue
56
+
57
+ if not isinstance(item, IPv4Address):
58
+ item = IPv4Address(item)
59
+
60
+ items[i] = int(item)
61
+ except AddressValueError:
62
+ raise errors.CannotParseDomainError(
63
+ "Cannot parse IPv4 '{}'".format(item)
64
+ )
65
+
66
+
67
+ class IPv6Column(ByteFixedString):
68
+ ch_type = "IPv6"
69
+ py_types = (str, IPv6Address, bytes)
70
+
71
+ def __init__(self, types_check=False, **kwargs):
72
+ super(IPv6Column, self).__init__(16, types_check=types_check, **kwargs)
73
+
74
+ if types_check:
75
+
76
+ def check_item(value):
77
+ if isinstance(value, bytes) and len(value) != 16:
78
+ raise ColumnTypeMismatchException(value)
79
+
80
+ if not isinstance(value, IPv6Address):
81
+ try:
82
+ value = IPv6Address(value)
83
+ except AddressValueError:
84
+ # Cannot parse input in a valid IPv6
85
+ raise ColumnTypeMismatchException(value)
86
+
87
+ self.check_item = check_item
88
+
89
+ def after_read_items(self, items, nulls_map=None):
90
+ if nulls_map is None:
91
+ return tuple(IPv6Address(item) for item in items)
92
+ else:
93
+ return tuple(
94
+ (None if is_null else IPv6Address(items[i]))
95
+ for i, is_null in enumerate(nulls_map)
96
+ )
97
+
98
+ def before_write_items(self, items, nulls_map=None):
99
+ null_value = self.null_value
100
+
101
+ for i, item in enumerate(items):
102
+ if nulls_map and nulls_map[i]:
103
+ items[i] = null_value
104
+ continue
105
+
106
+ # allow Ipv6 in bytes or python IPv6Address
107
+ # this is raw bytes (not encoded) in order to fit FixedString(16)
108
+ try:
109
+ if isinstance(item, bytes):
110
+ continue
111
+
112
+ if not isinstance(item, IPv6Address):
113
+ item = IPv6Address(item)
114
+ items[i] = item.packed
115
+ except AddressValueError:
116
+ raise errors.CannotParseDomainError(
117
+ "Cannot parse IPv6 '{}'".format(item)
118
+ )
@@ -0,0 +1,37 @@
1
+ from .base import Column
2
+ from .stringcolumn import String
3
+ from ..reader import read_binary_uint8, read_binary_str
4
+ from ..util.compat import json
5
+ from ..writer import write_binary_uint8
6
+
7
+
8
+ class JsonColumn(Column):
9
+ py_types = (dict, )
10
+
11
+ # No NULL value actually
12
+ null_value = {}
13
+
14
+ def __init__(self, column_by_spec_getter, **kwargs):
15
+ self.column_by_spec_getter = column_by_spec_getter
16
+ self.string_column = String(**kwargs)
17
+ super(JsonColumn, self).__init__(**kwargs)
18
+
19
+ def write_state_prefix(self, buf):
20
+ # Read in binary format.
21
+ # Write in text format.
22
+ write_binary_uint8(1, buf)
23
+
24
+ def read_items(self, n_items, buf):
25
+ read_binary_uint8(buf)
26
+ spec = read_binary_str(buf)
27
+ col = self.column_by_spec_getter(spec)
28
+ col.read_state_prefix(buf)
29
+ return col.read_data(n_items, buf)
30
+
31
+ def write_items(self, items, buf):
32
+ items = [x if isinstance(x, str) else json.dumps(x) for x in items]
33
+ self.string_column.write_items(items, buf)
34
+
35
+
36
+ def create_json_column(spec, column_by_spec_getter, column_options):
37
+ return JsonColumn(column_by_spec_getter, **column_options)
@@ -0,0 +1,142 @@
1
+ from math import log
2
+
3
+ from ..reader import read_binary_uint64
4
+ from ..writer import write_binary_int64
5
+ from .base import Column
6
+ from .intcolumn import UInt8Column, UInt16Column, UInt32Column, UInt64Column
7
+
8
+
9
+ def create_low_cardinality_column(spec, column_by_spec_getter, column_options):
10
+ inner = spec[15:-1]
11
+ nested = column_by_spec_getter(inner)
12
+ return LowCardinalityColumn(nested, **column_options)
13
+
14
+
15
+ class LowCardinalityColumn(Column):
16
+ """
17
+ Stores column as index (unique elements) and keys.
18
+ Good for de-duplication of large values with low cardinality.
19
+ """
20
+ int_types = {
21
+ 0: UInt8Column,
22
+ 1: UInt16Column,
23
+ 2: UInt32Column,
24
+ 3: UInt64Column
25
+ }
26
+
27
+ # Need to read additional keys.
28
+ # Additional keys are stored before indexes as value N and N keys
29
+ # after them.
30
+ has_additional_keys_bit = 1 << 9
31
+ # Need to update dictionary.
32
+ # It means that previous granule has different dictionary.
33
+ need_update_dictionary = 1 << 10
34
+
35
+ serialization_type = has_additional_keys_bit | need_update_dictionary
36
+
37
+ def __init__(self, nested_column, **kwargs):
38
+ self.init_kwargs = kwargs
39
+ self.nested_column = nested_column
40
+ super(LowCardinalityColumn, self).__init__(**kwargs)
41
+
42
+ def read_state_prefix(self, buf):
43
+ super(LowCardinalityColumn, self).read_state_prefix(buf)
44
+
45
+ read_binary_uint64(buf)
46
+
47
+ def write_state_prefix(self, buf):
48
+ super(LowCardinalityColumn, self).write_state_prefix(buf)
49
+
50
+ # KeysSerializationVersion. See ClickHouse docs.
51
+ write_binary_int64(1, buf)
52
+
53
+ def _write_data(self, items, buf):
54
+ index, keys = [], []
55
+ key_by_index_element = {}
56
+ nested_is_nullable = False
57
+
58
+ if self.nested_column.nullable:
59
+ # First element represents NULL if column is nullable.
60
+ index.append(self.nested_column.null_value)
61
+ # Prevent null map writing. Reset nested column nullable flag.
62
+ self.nested_column.nullable = False
63
+ nested_is_nullable = True
64
+
65
+ for x in items:
66
+ if x is None:
67
+ # Zero element for null.
68
+ keys.append(0)
69
+
70
+ else:
71
+ key = key_by_index_element.get(x)
72
+ # Get key from index or add it to index.
73
+ if key is None:
74
+ key = len(key_by_index_element)
75
+ key_by_index_element[x] = key
76
+ index.append(x)
77
+
78
+ keys.append(key + 1)
79
+ else:
80
+ for x in items:
81
+ key = key_by_index_element.get(x)
82
+
83
+ # Get key from index or add it to index.
84
+ if key is None:
85
+ key = len(key_by_index_element)
86
+ key_by_index_element[x] = len(key_by_index_element)
87
+ index.append(x)
88
+
89
+ keys.append(key)
90
+
91
+ # Do not write anything for empty column.
92
+ # May happen while writing empty arrays.
93
+ if not len(index):
94
+ return
95
+
96
+ int_type = int(log(len(index), 2) / 8)
97
+ int_column = self.int_types[int_type](**self.init_kwargs)
98
+
99
+ serialization_type = self.serialization_type | int_type
100
+
101
+ write_binary_int64(serialization_type, buf)
102
+ write_binary_int64(len(index), buf)
103
+
104
+ if nested_is_nullable:
105
+ # Given we reset nested column nullable flag above,
106
+ # we need to write null map manually. If to invoke
107
+ # write_data method, it will cause an exception,
108
+ # because `prepare_data` may not be able to handle
109
+ # null value correctly.
110
+ self.nested_column.write_items(
111
+ [self.nested_column.null_value], buf)
112
+ # Remove null map from index, because it is already written.
113
+ index_to_write = index[1:]
114
+ self.nested_column.write_data(index_to_write, buf)
115
+ else:
116
+ self.nested_column.write_data(index, buf)
117
+ write_binary_int64(len(items), buf)
118
+ int_column.write_items(keys, buf)
119
+
120
+ def _read_data(self, n_items, buf, nulls_map=None):
121
+ if not n_items:
122
+ return tuple()
123
+
124
+ serialization_type = read_binary_uint64(buf)
125
+
126
+ # Lowest byte contains info about key type.
127
+ key_type = serialization_type & 0xf
128
+ keys_column = self.int_types[key_type](**self.init_kwargs)
129
+
130
+ nullable = self.nested_column.nullable
131
+ # Prevent null map reading. Reset nested column nullable flag.
132
+ self.nested_column.nullable = False
133
+
134
+ index_size = read_binary_uint64(buf)
135
+ index = self.nested_column.read_data(index_size, buf)
136
+ if nullable:
137
+ index = (None, ) + index[1:]
138
+
139
+ read_binary_uint64(buf) # number of keys
140
+ keys = keys_column.read_data(n_items, buf)
141
+
142
+ return tuple(index[x] for x in keys)
@@ -0,0 +1,73 @@
1
+ import re
2
+ from .base import Column
3
+ from .intcolumn import UInt64Column
4
+ from ..util.helpers import pairwise
5
+
6
+
7
+ comma_re = re.compile(r',(?![^()]*\))')
8
+
9
+
10
+ class MapColumn(Column):
11
+ py_types = (dict, )
12
+
13
+ null_value = {}
14
+
15
+ def __init__(self, key_column, value_column, **kwargs):
16
+ self.offset_column = UInt64Column(**kwargs)
17
+ self.key_column = key_column
18
+ self.value_column = value_column
19
+ super(MapColumn, self).__init__(**kwargs)
20
+
21
+ def read_state_prefix(self, buf):
22
+ super(MapColumn, self).read_state_prefix(buf)
23
+
24
+ self.key_column.read_state_prefix(buf)
25
+ self.value_column.read_state_prefix(buf)
26
+
27
+ def write_state_prefix(self, buf):
28
+ super(MapColumn, self).write_state_prefix(buf)
29
+
30
+ self.key_column.write_state_prefix(buf)
31
+ self.value_column.write_state_prefix(buf)
32
+
33
+ def read_items(self, n_items, buf):
34
+ if not n_items:
35
+ return [{}]
36
+
37
+ offsets = list(self.offset_column.read_items(n_items, buf))
38
+ last_offset = offsets[-1]
39
+ keys = self.key_column.read_data(last_offset, buf)
40
+ values = self.value_column.read_data(last_offset, buf)
41
+
42
+ offsets.insert(0, 0)
43
+
44
+ return [
45
+ dict(zip(keys[begin:end], values[begin:end]))
46
+ for begin, end in pairwise(offsets)
47
+ ]
48
+
49
+ def write_items(self, items, buf):
50
+ offsets = []
51
+ keys = []
52
+ values = []
53
+
54
+ total = 0
55
+ for x in items:
56
+ total += len(x)
57
+ offsets.append(total)
58
+ keys.extend(x.keys())
59
+ values.extend(x.values())
60
+
61
+ self.offset_column.write_items(offsets, buf)
62
+ self.key_column.write_data(keys, buf)
63
+ self.value_column.write_data(values, buf)
64
+
65
+
66
+ def create_map_column(spec, column_by_spec_getter, column_options):
67
+ # Match commas outside of parentheses, so we don't match the comma in
68
+ # Decimal types.
69
+ key, value = comma_re.split(spec[4:-1])
70
+ key_column = column_by_spec_getter(key.strip())
71
+ value_column = column_by_spec_getter(value.strip())
72
+
73
+ return MapColumn(key_column, value_column, **column_options)
@@ -0,0 +1,10 @@
1
+
2
+ from .arraycolumn import create_array_column
3
+ from .util import get_inner_spec
4
+
5
+
6
+ def create_nested_column(spec, column_by_spec_getter, column_options):
7
+ return create_array_column(
8
+ 'Array(Tuple({}))'.format(get_inner_spec('Nested', spec)),
9
+ column_by_spec_getter, column_options
10
+ )
@@ -0,0 +1,13 @@
1
+ from .intcolumn import FormatColumn
2
+
3
+
4
+ class NothingColumn(FormatColumn):
5
+ ch_type = 'Nothing'
6
+ format = 'B'
7
+
8
+ @property
9
+ def size(self):
10
+ return 1
11
+
12
+ def after_read_items(self, items, nulls_map=None):
13
+ return (None, ) * len(items)
@@ -0,0 +1,7 @@
1
+
2
+
3
+ def create_nullable_column(spec, column_by_spec_getter):
4
+ inner = spec[9:-1]
5
+ nested = column_by_spec_getter(inner)
6
+ nested.nullable = True
7
+ return nested
@@ -0,0 +1,15 @@
1
+ from .intcolumn import FormatColumn
2
+
3
+
4
+ # TODO: Drop Null column support in future.
5
+ # Compatibility with old servers.
6
+ class NullColumn(FormatColumn):
7
+ ch_type = 'Null'
8
+ format = 'B'
9
+
10
+ @property
11
+ def size(self):
12
+ return 1
13
+
14
+ def after_read_items(self, items, nulls_map=None):
15
+ return (None, ) * len(items)
File without changes
@@ -0,0 +1,47 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from ..base import Column
5
+
6
+
7
+ class NumpyColumn(Column):
8
+ dtype = None
9
+
10
+ normalize_null_value = True
11
+
12
+ def read_items(self, n_items, buf):
13
+ data = buf.read(n_items * self.dtype.itemsize)
14
+ return np.frombuffer(data, self.dtype.newbyteorder('<'), n_items)
15
+
16
+ def write_items(self, items, buf):
17
+ buf.write(items.astype(self.dtype.newbyteorder('<')).tobytes())
18
+
19
+ def _write_nulls_map(self, items, buf):
20
+ s = self.make_null_struct(len(items))
21
+ nulls_map = self._get_nulls_map(items)
22
+ buf.write(s.pack(*nulls_map))
23
+
24
+ def _get_nulls_map(self, items):
25
+ return [bool(x) for x in pd.isnull(items)]
26
+
27
+ def _read_data(self, n_items, buf, nulls_map=None):
28
+ items = self.read_items(n_items, buf)
29
+
30
+ if self.after_read_items:
31
+ return self.after_read_items(items, nulls_map)
32
+ elif nulls_map is not None:
33
+ items = np.array(items, dtype=object)
34
+ np.place(items, nulls_map, None)
35
+
36
+ return items
37
+
38
+ def prepare_items(self, items):
39
+ nulls_map = pd.isnull(items)
40
+
41
+ # Always replace null values to null_value for proper inserts into
42
+ # non-nullable columns.
43
+ if isinstance(items, np.ndarray) and self.normalize_null_value:
44
+ items = np.array(items)
45
+ np.place(items, nulls_map, self.null_value)
46
+
47
+ return items
@@ -0,0 +1,8 @@
1
+ import numpy as np
2
+
3
+ from .base import NumpyColumn
4
+
5
+
6
+ class NumpyBoolColumn(NumpyColumn):
7
+ dtype = np.dtype(np.bool_)
8
+ ch_type = 'Bool'
@@ -0,0 +1,19 @@
1
+ import numpy as np
2
+
3
+ from .base import NumpyColumn
4
+
5
+
6
+ class NumpyDateColumn(NumpyColumn):
7
+ dtype = np.dtype(np.uint16)
8
+ ch_type = 'Date'
9
+
10
+ null_value = np.datetime64(0, 'Y')
11
+
12
+ def read_items(self, n_items, buf):
13
+ data = super(NumpyDateColumn, self).read_items(n_items, buf)
14
+ return data.astype('datetime64[D]')
15
+
16
+ def write_items(self, items, buf):
17
+ super(NumpyDateColumn, self).write_items(
18
+ items.astype('datetime64[D]'), buf
19
+ )
@@ -0,0 +1,143 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from pytz import timezone as get_timezone
4
+
5
+ from .base import NumpyColumn
6
+ from ...util.compat import get_localzone_name_compat
7
+
8
+
9
+ class NumpyDateTimeColumnBase(NumpyColumn):
10
+ datetime_dtype = None
11
+
12
+ null_value = np.datetime64(0, 'Y')
13
+
14
+ def __init__(self, timezone=None, offset_naive=True, local_timezone=None,
15
+ **kwargs):
16
+ self.timezone = timezone
17
+ self.offset_naive = offset_naive
18
+ self.local_timezone = local_timezone
19
+ super(NumpyDateTimeColumnBase, self).__init__(**kwargs)
20
+
21
+ def apply_timezones_after_read(self, dt):
22
+ timezone = self.timezone if self.timezone else self.local_timezone
23
+
24
+ if self.offset_naive and timezone.zone != 'UTC':
25
+ ts = pd.to_datetime(dt, utc=True).tz_convert(timezone)
26
+ ts = ts.tz_localize(None)
27
+ return ts.to_numpy(self.datetime_dtype)
28
+
29
+ return dt
30
+
31
+ def apply_timezones_before_write(self, items):
32
+ if isinstance(items, pd.DatetimeIndex):
33
+ ts = items
34
+ else:
35
+ timezone = self.timezone if self.timezone else self.local_timezone
36
+ ts = pd.to_datetime(items).tz_localize(timezone)
37
+
38
+ ts = ts.tz_convert('UTC')
39
+ return ts.tz_localize(None).to_numpy(self.datetime_dtype)
40
+
41
+ def is_items_integer(self, items):
42
+ return (
43
+ isinstance(items, np.ndarray) and
44
+ np.issubdtype(items.dtype, np.integer)
45
+ )
46
+
47
+
48
+ class NumpyDateTimeColumn(NumpyDateTimeColumnBase):
49
+ dtype = np.dtype(np.uint32)
50
+ datetime_dtype = 'datetime64[s]'
51
+
52
+ def write_items(self, items, buf):
53
+ # write int 'as is'.
54
+ if self.is_items_integer(items):
55
+ super(NumpyDateTimeColumn, self).write_items(items, buf)
56
+ return
57
+
58
+ items = self.apply_timezones_before_write(items)
59
+
60
+ super(NumpyDateTimeColumn, self).write_items(items, buf)
61
+
62
+ def read_items(self, n_items, buf):
63
+ items = super(NumpyDateTimeColumn, self).read_items(n_items, buf)
64
+ return self.apply_timezones_after_read(items.astype('datetime64[s]'))
65
+
66
+
67
+ class NumpyDateTime64Column(NumpyDateTimeColumnBase):
68
+ dtype = np.dtype(np.int64)
69
+ datetime_dtype = 'datetime64[ns]'
70
+
71
+ max_scale = 9
72
+
73
+ def __init__(self, scale=0, **kwargs):
74
+ self.scale = scale
75
+ super(NumpyDateTime64Column, self).__init__(**kwargs)
76
+
77
+ def read_items(self, n_items, buf):
78
+ # Clickhouse: t seconds is represented as t * 10^scale.
79
+ # datetime64[ns]: t seconds is represented as t * 10^9.
80
+ # Since 0 <= scale <= 9, multiply by the integer 10^(9 - scale).
81
+ items = super(NumpyDateTime64Column, self).read_items(n_items, buf)
82
+
83
+ tmp = np.copy(items)
84
+ tmp *= 10 ** (9 - self.scale)
85
+ dt = tmp.view(dtype='datetime64[ns]')
86
+
87
+ return self.apply_timezones_after_read(dt)
88
+
89
+ def write_items(self, items, buf):
90
+ # write int 'as is'.
91
+ if self.is_items_integer(items):
92
+ super(NumpyDateTime64Column, self).write_items(items, buf)
93
+ return
94
+
95
+ scale = 10 ** self.scale
96
+ frac_scale = 10 ** (self.max_scale - self.scale)
97
+
98
+ items = self.apply_timezones_before_write(items)
99
+
100
+ seconds = items.astype('datetime64[s]')
101
+ microseconds = (items - seconds).astype(dtype='timedelta64[ns]') \
102
+ .astype(np.uint32) // frac_scale
103
+
104
+ items = seconds.astype(self.dtype) * scale + microseconds
105
+
106
+ super(NumpyDateTime64Column, self).write_items(items, buf)
107
+
108
+
109
+ def create_numpy_datetime_column(spec, column_options):
110
+ if spec.startswith('DateTime64'):
111
+ cls = NumpyDateTime64Column
112
+ spec = spec[11:-1]
113
+ params = spec.split(',', 1)
114
+ column_options['scale'] = int(params[0])
115
+ if len(params) > 1:
116
+ spec = params[1].strip() + ')'
117
+ else:
118
+ cls = NumpyDateTimeColumn
119
+ spec = spec[9:]
120
+
121
+ context = column_options['context']
122
+
123
+ tz_name = None
124
+ offset_naive = True
125
+
126
+ # As Numpy do not use local timezone for converting timestamp to
127
+ # datetime we need always detect local timezone for manual converting.
128
+ local_tz_name = get_localzone_name_compat()
129
+
130
+ # Use column's timezone if it's specified.
131
+ if spec and spec[-1] == ')':
132
+ tz_name = spec[1:-2]
133
+ offset_naive = False
134
+ else:
135
+ if not context.settings.get('use_client_time_zone', False):
136
+ if local_tz_name != context.server_info.timezone:
137
+ tz_name = context.server_info.timezone
138
+
139
+ timezone = get_timezone(tz_name) if tz_name else None
140
+ local_timezone = get_timezone(local_tz_name) if local_tz_name else None
141
+
142
+ return cls(timezone=timezone, offset_naive=offset_naive,
143
+ local_timezone=local_timezone, **column_options)
@@ -0,0 +1,24 @@
1
+ import numpy as np
2
+
3
+ from .base import NumpyColumn
4
+
5
+ # normalize_null_value = False due to float('nan')
6
+ # With normalization pandas.isnull will threat float('nan') as NULL value.
7
+
8
+
9
+ class NumpyFloat32Column(NumpyColumn):
10
+ dtype = np.dtype(np.float32)
11
+ ch_type = 'Float32'
12
+ normalize_null_value = False
13
+
14
+ def _get_nulls_map(self, items):
15
+ return [x is None for x in items]
16
+
17
+
18
+ class NumpyFloat64Column(NumpyColumn):
19
+ dtype = np.dtype(np.float64)
20
+ ch_type = 'Float64'
21
+ normalize_null_value = False
22
+
23
+ def _get_nulls_map(self, items):
24
+ return [x is None for x in items]