clickhouse-driver 0.2.1__cp39-cp39-win_amd64.whl → 0.2.8__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. clickhouse_driver/__init__.py +9 -9
  2. clickhouse_driver/block.py +227 -195
  3. clickhouse_driver/blockstreamprofileinfo.py +22 -22
  4. clickhouse_driver/bufferedreader.cp39-win_amd64.pyd +0 -0
  5. clickhouse_driver/bufferedwriter.cp39-win_amd64.pyd +0 -0
  6. clickhouse_driver/client.py +896 -666
  7. clickhouse_driver/clientinfo.py +119 -80
  8. clickhouse_driver/columns/arraycolumn.py +161 -150
  9. clickhouse_driver/columns/base.py +221 -147
  10. clickhouse_driver/columns/boolcolumn.py +7 -0
  11. clickhouse_driver/columns/datecolumn.py +108 -49
  12. clickhouse_driver/columns/datetimecolumn.py +202 -207
  13. clickhouse_driver/columns/decimalcolumn.py +116 -118
  14. clickhouse_driver/columns/enumcolumn.py +119 -119
  15. clickhouse_driver/columns/exceptions.py +12 -12
  16. clickhouse_driver/columns/floatcolumn.py +34 -34
  17. clickhouse_driver/columns/intcolumn.py +157 -157
  18. clickhouse_driver/columns/intervalcolumn.py +33 -33
  19. clickhouse_driver/columns/ipcolumn.py +118 -118
  20. clickhouse_driver/columns/jsoncolumn.py +37 -0
  21. clickhouse_driver/columns/largeint.cp39-win_amd64.pyd +0 -0
  22. clickhouse_driver/columns/lowcardinalitycolumn.py +142 -123
  23. clickhouse_driver/columns/mapcolumn.py +73 -58
  24. clickhouse_driver/columns/nestedcolumn.py +10 -0
  25. clickhouse_driver/columns/nothingcolumn.py +13 -13
  26. clickhouse_driver/columns/nullablecolumn.py +7 -7
  27. clickhouse_driver/columns/nullcolumn.py +15 -15
  28. clickhouse_driver/columns/numpy/base.py +47 -14
  29. clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
  30. clickhouse_driver/columns/numpy/datecolumn.py +19 -12
  31. clickhouse_driver/columns/numpy/datetimecolumn.py +143 -145
  32. clickhouse_driver/columns/numpy/floatcolumn.py +24 -13
  33. clickhouse_driver/columns/numpy/intcolumn.py +43 -43
  34. clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -83
  35. clickhouse_driver/columns/numpy/service.py +58 -80
  36. clickhouse_driver/columns/numpy/stringcolumn.py +78 -76
  37. clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
  38. clickhouse_driver/columns/service.py +185 -131
  39. clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -7
  40. clickhouse_driver/columns/stringcolumn.py +73 -73
  41. clickhouse_driver/columns/tuplecolumn.py +63 -65
  42. clickhouse_driver/columns/util.py +60 -0
  43. clickhouse_driver/columns/uuidcolumn.py +64 -64
  44. clickhouse_driver/compression/__init__.py +28 -28
  45. clickhouse_driver/compression/base.py +87 -52
  46. clickhouse_driver/compression/lz4.py +21 -55
  47. clickhouse_driver/compression/lz4hc.py +9 -9
  48. clickhouse_driver/compression/zstd.py +20 -51
  49. clickhouse_driver/connection.py +784 -632
  50. clickhouse_driver/context.py +36 -36
  51. clickhouse_driver/dbapi/__init__.py +62 -62
  52. clickhouse_driver/dbapi/connection.py +99 -96
  53. clickhouse_driver/dbapi/cursor.py +370 -368
  54. clickhouse_driver/dbapi/errors.py +40 -40
  55. clickhouse_driver/dbapi/extras.py +73 -0
  56. clickhouse_driver/defines.py +55 -42
  57. clickhouse_driver/errors.py +453 -446
  58. clickhouse_driver/log.py +48 -44
  59. clickhouse_driver/numpy/block.py +8 -8
  60. clickhouse_driver/numpy/helpers.py +25 -25
  61. clickhouse_driver/numpy/result.py +123 -123
  62. clickhouse_driver/opentelemetry.py +43 -0
  63. clickhouse_driver/progress.py +38 -32
  64. clickhouse_driver/protocol.py +114 -105
  65. clickhouse_driver/queryprocessingstage.py +8 -8
  66. clickhouse_driver/reader.py +69 -69
  67. clickhouse_driver/readhelpers.py +26 -26
  68. clickhouse_driver/result.py +144 -144
  69. clickhouse_driver/settings/available.py +405 -405
  70. clickhouse_driver/settings/types.py +50 -50
  71. clickhouse_driver/settings/writer.py +34 -29
  72. clickhouse_driver/streams/compressed.py +88 -88
  73. clickhouse_driver/streams/native.py +102 -90
  74. clickhouse_driver/util/compat.py +39 -0
  75. clickhouse_driver/util/escape.py +94 -55
  76. clickhouse_driver/util/helpers.py +57 -57
  77. clickhouse_driver/varint.cp39-win_amd64.pyd +0 -0
  78. clickhouse_driver/writer.py +67 -67
  79. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/LICENSE +21 -21
  80. clickhouse_driver-0.2.8.dist-info/METADATA +201 -0
  81. clickhouse_driver-0.2.8.dist-info/RECORD +89 -0
  82. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/WHEEL +1 -1
  83. clickhouse_driver-0.2.1.dist-info/METADATA +0 -24
  84. clickhouse_driver-0.2.1.dist-info/RECORD +0 -80
  85. {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.8.dist-info}/top_level.txt +0 -0
@@ -1,43 +1,43 @@
1
- import numpy as np
2
-
3
- from .base import NumpyColumn
4
-
5
-
6
- class NumpyInt8Column(NumpyColumn):
7
- dtype = np.dtype(np.int8)
8
- ch_type = 'Int8'
9
-
10
-
11
- class NumpyUInt8Column(NumpyColumn):
12
- dtype = np.dtype(np.uint8)
13
- ch_type = 'UInt8'
14
-
15
-
16
- class NumpyInt16Column(NumpyColumn):
17
- dtype = np.dtype(np.int16)
18
- ch_type = 'Int16'
19
-
20
-
21
- class NumpyUInt16Column(NumpyColumn):
22
- dtype = np.dtype(np.uint16)
23
- ch_type = 'UInt16'
24
-
25
-
26
- class NumpyInt32Column(NumpyColumn):
27
- dtype = np.dtype(np.int32)
28
- ch_type = 'Int32'
29
-
30
-
31
- class NumpyUInt32Column(NumpyColumn):
32
- dtype = np.dtype(np.uint32)
33
- ch_type = 'UInt32'
34
-
35
-
36
- class NumpyInt64Column(NumpyColumn):
37
- dtype = np.dtype(np.int64)
38
- ch_type = 'Int64'
39
-
40
-
41
- class NumpyUInt64Column(NumpyColumn):
42
- dtype = np.dtype(np.uint64)
43
- ch_type = 'UInt64'
1
+ import numpy as np
2
+
3
+ from .base import NumpyColumn
4
+
5
+
6
+ class NumpyInt8Column(NumpyColumn):
7
+ dtype = np.dtype(np.int8)
8
+ ch_type = 'Int8'
9
+
10
+
11
+ class NumpyUInt8Column(NumpyColumn):
12
+ dtype = np.dtype(np.uint8)
13
+ ch_type = 'UInt8'
14
+
15
+
16
+ class NumpyInt16Column(NumpyColumn):
17
+ dtype = np.dtype(np.int16)
18
+ ch_type = 'Int16'
19
+
20
+
21
+ class NumpyUInt16Column(NumpyColumn):
22
+ dtype = np.dtype(np.uint16)
23
+ ch_type = 'UInt16'
24
+
25
+
26
+ class NumpyInt32Column(NumpyColumn):
27
+ dtype = np.dtype(np.int32)
28
+ ch_type = 'Int32'
29
+
30
+
31
+ class NumpyUInt32Column(NumpyColumn):
32
+ dtype = np.dtype(np.uint32)
33
+ ch_type = 'UInt32'
34
+
35
+
36
+ class NumpyInt64Column(NumpyColumn):
37
+ dtype = np.dtype(np.int64)
38
+ ch_type = 'Int64'
39
+
40
+
41
+ class NumpyUInt64Column(NumpyColumn):
42
+ dtype = np.dtype(np.uint64)
43
+ ch_type = 'UInt64'
@@ -1,83 +1,96 @@
1
- from math import log
2
-
3
- import pandas as pd
4
-
5
- from ..lowcardinalitycolumn import LowCardinalityColumn
6
- from ...reader import read_binary_uint64
7
- from ...writer import write_binary_int64
8
- from .intcolumn import (
9
- NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
10
- )
11
-
12
-
13
- class NumpyLowCardinalityColumn(LowCardinalityColumn):
14
- int_types = {
15
- 0: NumpyUInt8Column,
16
- 1: NumpyUInt16Column,
17
- 2: NumpyUInt32Column,
18
- 3: NumpyUInt64Column
19
- }
20
-
21
- def __init__(self, nested_column, **kwargs):
22
- super(NumpyLowCardinalityColumn, self).__init__(nested_column,
23
- **kwargs)
24
-
25
- def _write_data(self, items, buf):
26
- # TODO: nullable support
27
-
28
- # Do not write anything for empty column.
29
- # May happen while writing empty arrays.
30
- if not len(items):
31
- return
32
-
33
- c = pd.Categorical(items)
34
-
35
- int_type = int(log(len(c.codes), 2) / 8)
36
- int_column = self.int_types[int_type]()
37
-
38
- serialization_type = self.serialization_type | int_type
39
-
40
- index = c.categories
41
- keys = c.codes
42
-
43
- write_binary_int64(serialization_type, buf)
44
- write_binary_int64(len(index), buf)
45
-
46
- self.nested_column.write_data(index.to_numpy(items.dtype), buf)
47
- write_binary_int64(len(items), buf)
48
- int_column.write_data(keys, buf)
49
-
50
- def _read_data(self, n_items, buf, nulls_map=None):
51
- if not n_items:
52
- return tuple()
53
-
54
- serialization_type = read_binary_uint64(buf)
55
-
56
- # Lowest byte contains info about key type.
57
- key_type = serialization_type & 0xf
58
- keys_column = self.int_types[key_type]()
59
-
60
- nullable = self.nested_column.nullable
61
- # Prevent null map reading. Reset nested column nullable flag.
62
- self.nested_column.nullable = False
63
-
64
- index_size = read_binary_uint64(buf)
65
- index = self.nested_column.read_data(index_size, buf)
66
-
67
- read_binary_uint64(buf) # number of keys
68
- keys = keys_column.read_data(n_items, buf)
69
-
70
- if nullable:
71
- # Shift all codes by one ("No value" code is -1 for pandas
72
- # categorical) and drop corresponding first index
73
- # this is analog of original operation:
74
- # index = (None, ) + index[1:]
75
- keys = keys - 1
76
- index = index[1:]
77
- return pd.Categorical.from_codes(keys, index)
78
-
79
-
80
- def create_numpy_low_cardinality_column(spec, column_by_spec_getter):
81
- inner = spec[15:-1]
82
- nested = column_by_spec_getter(inner)
83
- return NumpyLowCardinalityColumn(nested)
1
+ from math import log
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from ..lowcardinalitycolumn import LowCardinalityColumn
7
+ from ...reader import read_binary_uint64
8
+ from ...writer import write_binary_int64
9
+ from .intcolumn import (
10
+ NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
11
+ )
12
+
13
+
14
+ class NumpyLowCardinalityColumn(LowCardinalityColumn):
15
+ int_types = {
16
+ 0: NumpyUInt8Column,
17
+ 1: NumpyUInt16Column,
18
+ 2: NumpyUInt32Column,
19
+ 3: NumpyUInt64Column
20
+ }
21
+
22
+ def __init__(self, nested_column, **kwargs):
23
+ super(NumpyLowCardinalityColumn, self).__init__(nested_column,
24
+ **kwargs)
25
+
26
+ def _write_data(self, items, buf):
27
+ # Do not write anything for empty column.
28
+ # May happen while writing empty arrays.
29
+ if not len(items):
30
+ return
31
+
32
+ # Replace nans with defaults if not nullabe.
33
+ if isinstance(items, np.ndarray) and not self.nested_column.nullable:
34
+ nulls = pd.isnull(items)
35
+ items = np.where(nulls, self.nested_column.null_value, items)
36
+
37
+ c = pd.Categorical(items)
38
+
39
+ int_type = int(log(len(c.codes), 2) / 8)
40
+ int_column = self.int_types[int_type](**self.init_kwargs)
41
+
42
+ serialization_type = self.serialization_type | int_type
43
+
44
+ index = c.categories
45
+ keys = c.codes
46
+
47
+ if self.nested_column.nullable:
48
+ # First element represents NULL if column is nullable.
49
+ index = index.insert(0, self.nested_column.null_value)
50
+ keys = keys + 1
51
+ # Prevent null map writing. Reset nested column nullable flag.
52
+ self.nested_column.nullable = False
53
+
54
+ write_binary_int64(serialization_type, buf)
55
+ write_binary_int64(len(index), buf)
56
+
57
+ self.nested_column.write_data(index.to_numpy(items.dtype), buf)
58
+ write_binary_int64(len(items), buf)
59
+ int_column.write_items(keys, buf)
60
+
61
+ def _read_data(self, n_items, buf, nulls_map=None):
62
+ if not n_items:
63
+ return tuple()
64
+
65
+ serialization_type = read_binary_uint64(buf)
66
+
67
+ # Lowest byte contains info about key type.
68
+ key_type = serialization_type & 0xf
69
+ keys_column = self.int_types[key_type](**self.init_kwargs)
70
+
71
+ nullable = self.nested_column.nullable
72
+ # Prevent null map reading. Reset nested column nullable flag.
73
+ self.nested_column.nullable = False
74
+
75
+ index_size = read_binary_uint64(buf)
76
+ index = self.nested_column.read_data(index_size, buf)
77
+
78
+ read_binary_uint64(buf) # number of keys
79
+ keys = keys_column.read_data(n_items, buf)
80
+
81
+ if nullable:
82
+ # Shift all codes by one ("No value" code is -1 for pandas
83
+ # categorical) and drop corresponding first index
84
+ # this is analog of original operation:
85
+ # index = (None, ) + index[1:]
86
+ keys = np.array(keys, dtype='int64') # deal with possible overflow
87
+ keys = keys - 1
88
+ index = index[1:]
89
+ return pd.Categorical.from_codes(keys, index)
90
+
91
+
92
+ def create_numpy_low_cardinality_column(spec, column_by_spec_getter,
93
+ column_options):
94
+ inner = spec[15:-1]
95
+ nested = column_by_spec_getter(inner)
96
+ return NumpyLowCardinalityColumn(nested, **column_options)
@@ -1,80 +1,58 @@
1
- from ... import errors
2
- from ..arraycolumn import create_array_column
3
- from .datecolumn import NumpyDateColumn
4
- from .datetimecolumn import create_numpy_datetime_column
5
- from ..decimalcolumn import create_decimal_column
6
- from ..enumcolumn import create_enum_column
7
- from .floatcolumn import NumpyFloat32Column, NumpyFloat64Column
8
- from .intcolumn import (
9
- NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
10
- NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
11
- )
12
- from .lowcardinalitycolumn import create_numpy_low_cardinality_column
13
- from ..nothingcolumn import NothingColumn
14
- from ..nullcolumn import NullColumn
15
- # from .nullablecolumn import create_nullable_column
16
- from ..simpleaggregatefunctioncolumn import (
17
- create_simple_aggregate_function_column
18
- )
19
- from .stringcolumn import create_string_column
20
- from ..tuplecolumn import create_tuple_column
21
- from ..uuidcolumn import UUIDColumn
22
- from ..intervalcolumn import (
23
- IntervalYearColumn, IntervalMonthColumn, IntervalWeekColumn,
24
- IntervalDayColumn, IntervalHourColumn, IntervalMinuteColumn,
25
- IntervalSecondColumn
26
- )
27
- from ..ipcolumn import IPv4Column, IPv6Column
28
-
29
- column_by_type = {c.ch_type: c for c in [
30
- NumpyDateColumn,
31
- NumpyFloat32Column, NumpyFloat64Column,
32
- NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
33
- NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column,
34
- NothingColumn, NullColumn, UUIDColumn,
35
- IntervalYearColumn, IntervalMonthColumn, IntervalWeekColumn,
36
- IntervalDayColumn, IntervalHourColumn, IntervalMinuteColumn,
37
- IntervalSecondColumn, IPv4Column, IPv6Column
38
- ]}
39
-
40
-
41
- def get_numpy_column_by_spec(spec, column_options):
42
- def create_column_with_options(x):
43
- return get_numpy_column_by_spec(x, column_options)
44
-
45
- if spec == 'String' or spec.startswith('FixedString'):
46
- return create_string_column(spec, column_options)
47
-
48
- elif spec.startswith('Enum'):
49
- return create_enum_column(spec, column_options)
50
-
51
- elif spec.startswith('DateTime'):
52
- return create_numpy_datetime_column(spec, column_options)
53
-
54
- elif spec.startswith('Decimal'):
55
- return create_decimal_column(spec, column_options)
56
-
57
- elif spec.startswith('Array'):
58
- return create_array_column(spec, create_column_with_options)
59
-
60
- elif spec.startswith('Tuple'):
61
- return create_tuple_column(spec, create_column_with_options)
62
-
63
- # elif spec.startswith('Nullable'):
64
- # return create_nullable_column(spec, create_column_with_options)
65
-
66
- elif spec.startswith('LowCardinality'):
67
- return create_numpy_low_cardinality_column(spec,
68
- create_column_with_options)
69
-
70
- elif spec.startswith('SimpleAggregateFunction'):
71
- return create_simple_aggregate_function_column(
72
- spec, create_column_with_options)
73
-
74
- else:
75
- try:
76
- cls = column_by_type[spec]
77
- return cls(**column_options)
78
-
79
- except KeyError as e:
80
- raise errors.UnknownTypeError('Unknown type {}'.format(e.args[0]))
1
+ from ..service import aliases
2
+ from ... import errors
3
+ from .datecolumn import NumpyDateColumn
4
+ from .datetimecolumn import create_numpy_datetime_column
5
+ from .floatcolumn import NumpyFloat32Column, NumpyFloat64Column
6
+ from .intcolumn import (
7
+ NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
8
+ NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
9
+ )
10
+ from .boolcolumn import NumpyBoolColumn
11
+ from .lowcardinalitycolumn import create_numpy_low_cardinality_column
12
+ from .stringcolumn import create_string_column
13
+ from .tuplecolumn import create_tuple_column
14
+ from ..nullablecolumn import create_nullable_column
15
+
16
+ column_by_type = {c.ch_type: c for c in [
17
+ NumpyDateColumn,
18
+ NumpyFloat32Column, NumpyFloat64Column,
19
+ NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
20
+ NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column,
21
+ NumpyBoolColumn
22
+ ]}
23
+
24
+
25
+ def get_numpy_column_by_spec(spec, column_options):
26
+ def create_column_with_options(x):
27
+ return get_numpy_column_by_spec(x, column_options)
28
+
29
+ if spec == 'String' or spec.startswith('FixedString'):
30
+ return create_string_column(spec, column_options)
31
+
32
+ elif spec.startswith('DateTime'):
33
+ return create_numpy_datetime_column(spec, column_options)
34
+
35
+ elif spec.startswith('Tuple'):
36
+ return create_tuple_column(
37
+ spec, create_column_with_options, column_options
38
+ )
39
+
40
+ elif spec.startswith('Nullable'):
41
+ return create_nullable_column(spec, create_column_with_options)
42
+
43
+ elif spec.startswith('LowCardinality'):
44
+ return create_numpy_low_cardinality_column(
45
+ spec, create_column_with_options, column_options
46
+ )
47
+ else:
48
+ for alias, primitive in aliases:
49
+ if spec.startswith(alias):
50
+ return create_column_with_options(
51
+ primitive + spec[len(alias):]
52
+ )
53
+
54
+ if spec in column_by_type:
55
+ cls = column_by_type[spec]
56
+ return cls(**column_options)
57
+
58
+ raise errors.UnknownTypeError('Unknown type {}'.format(spec))
@@ -1,76 +1,78 @@
1
- import numpy as np
2
-
3
- from ... import defines
4
- from .base import NumpyColumn
5
-
6
-
7
- class NumpyStringColumn(NumpyColumn):
8
- dtype = np.dtype('object')
9
-
10
- default_encoding = defines.STRINGS_ENCODING
11
-
12
- def __init__(self, encoding=default_encoding, **kwargs):
13
- self.encoding = encoding
14
- super(NumpyStringColumn, self).__init__(**kwargs)
15
-
16
- def read_items(self, n_items, buf):
17
- return np.array(
18
- buf.read_strings(n_items, encoding=self.encoding), dtype=self.dtype
19
- )
20
-
21
- def write_items(self, items, buf):
22
- return buf.write_strings(items.tolist(), encoding=self.encoding)
23
-
24
-
25
- class NumpyByteStringColumn(NumpyColumn):
26
- def read_items(self, n_items, buf):
27
- return np.array(buf.read_strings(n_items), dtype=self.dtype)
28
-
29
- def write_items(self, items, buf):
30
- return buf.write_strings(items.tolist())
31
-
32
-
33
- class NumpyFixedString(NumpyStringColumn):
34
- def __init__(self, length, **kwargs):
35
- self.length = length
36
- super(NumpyFixedString, self).__init__(**kwargs)
37
-
38
- def read_items(self, n_items, buf):
39
- return np.array(buf.read_fixed_strings(
40
- n_items, self.length, encoding=self.encoding
41
- ), dtype=self.dtype)
42
-
43
- def write_items(self, items, buf):
44
- return buf.write_fixed_strings(
45
- items.tolist(), self.length, encoding=self.encoding
46
- )
47
-
48
-
49
- class NumpyByteFixedString(NumpyByteStringColumn):
50
- def __init__(self, length, **kwargs):
51
- self.length = length
52
- super(NumpyByteFixedString, self).__init__(**kwargs)
53
-
54
- def read_items(self, n_items, buf):
55
- return np.array(
56
- buf.read_fixed_strings(n_items, self.length), dtype=self.dtype
57
- )
58
-
59
- def write_items(self, items, buf):
60
- return buf.write_fixed_strings(items.tolist(), self.length)
61
-
62
-
63
- def create_string_column(spec, column_options):
64
- client_settings = column_options['context'].client_settings
65
- strings_as_bytes = client_settings['strings_as_bytes']
66
- encoding = client_settings.get(
67
- 'strings_encoding', NumpyStringColumn.default_encoding
68
- )
69
-
70
- if spec == 'String':
71
- cls = NumpyByteStringColumn if strings_as_bytes else NumpyStringColumn
72
- return cls(encoding=encoding, **column_options)
73
- else:
74
- length = int(spec[12:-1])
75
- cls = NumpyByteFixedString if strings_as_bytes else NumpyFixedString
76
- return cls(length, encoding=encoding, **column_options)
1
+ import numpy as np
2
+
3
+ from ... import defines
4
+ from .base import NumpyColumn
5
+
6
+
7
+ class NumpyStringColumn(NumpyColumn):
8
+ null_value = ''
9
+
10
+ default_encoding = defines.STRINGS_ENCODING
11
+
12
+ def __init__(self, encoding=default_encoding, **kwargs):
13
+ self.encoding = encoding
14
+ super(NumpyStringColumn, self).__init__(**kwargs)
15
+
16
+ def read_items(self, n_items, buf):
17
+ return np.array(
18
+ buf.read_strings(n_items, encoding=self.encoding), dtype=self.dtype
19
+ )
20
+
21
+ def write_items(self, items, buf):
22
+ return buf.write_strings(items.tolist(), encoding=self.encoding)
23
+
24
+
25
+ class NumpyByteStringColumn(NumpyColumn):
26
+ null_value = b''
27
+
28
+ def read_items(self, n_items, buf):
29
+ return np.array(buf.read_strings(n_items), dtype=self.dtype)
30
+
31
+ def write_items(self, items, buf):
32
+ return buf.write_strings(items.tolist())
33
+
34
+
35
+ class NumpyFixedString(NumpyStringColumn):
36
+ def __init__(self, length, **kwargs):
37
+ self.length = length
38
+ super(NumpyFixedString, self).__init__(**kwargs)
39
+
40
+ def read_items(self, n_items, buf):
41
+ return np.array(buf.read_fixed_strings(
42
+ n_items, self.length, encoding=self.encoding
43
+ ), dtype=self.dtype)
44
+
45
+ def write_items(self, items, buf):
46
+ return buf.write_fixed_strings(
47
+ items.tolist(), self.length, encoding=self.encoding
48
+ )
49
+
50
+
51
+ class NumpyByteFixedString(NumpyByteStringColumn):
52
+ def __init__(self, length, **kwargs):
53
+ self.length = length
54
+ super(NumpyByteFixedString, self).__init__(**kwargs)
55
+
56
+ def read_items(self, n_items, buf):
57
+ return np.array(
58
+ buf.read_fixed_strings(n_items, self.length), dtype=self.dtype
59
+ )
60
+
61
+ def write_items(self, items, buf):
62
+ return buf.write_fixed_strings(items.tolist(), self.length)
63
+
64
+
65
+ def create_string_column(spec, column_options):
66
+ client_settings = column_options['context'].client_settings
67
+ strings_as_bytes = client_settings['strings_as_bytes']
68
+ encoding = client_settings.get(
69
+ 'strings_encoding', NumpyStringColumn.default_encoding
70
+ )
71
+
72
+ if spec == 'String':
73
+ cls = NumpyByteStringColumn if strings_as_bytes else NumpyStringColumn
74
+ return cls(encoding=encoding, **column_options)
75
+ else:
76
+ length = int(spec[12:-1])
77
+ cls = NumpyByteFixedString if strings_as_bytes else NumpyFixedString
78
+ return cls(length, encoding=encoding, **column_options)
@@ -0,0 +1,37 @@
1
+ import numpy as np
2
+
3
+ from .base import NumpyColumn
4
+ from ..util import get_inner_spec, get_inner_columns
5
+
6
+
7
+ class TupleColumn(NumpyColumn):
8
+ def __init__(self, nested_columns, **kwargs):
9
+ self.nested_columns = nested_columns
10
+ super(TupleColumn, self).__init__(**kwargs)
11
+
12
+ def write_data(self, items, buf):
13
+ names = items.dtype.names
14
+ for i, (x, name) in enumerate(zip(self.nested_columns, names)):
15
+ x.write_data(items[name], buf)
16
+
17
+ def write_items(self, items, buf):
18
+ return self.write_data(items, buf)
19
+
20
+ def read_data(self, n_items, buf):
21
+ data = [x.read_data(n_items, buf) for x in self.nested_columns]
22
+ dtype = [('f{}'.format(i), x.dtype) for i, x in enumerate(data)]
23
+ rv = np.empty(n_items, dtype=dtype)
24
+ for i, x in enumerate(data):
25
+ rv['f{}'.format(i)] = x
26
+ return rv
27
+
28
+ def read_items(self, n_items, buf):
29
+ return self.read_data(n_items, buf)
30
+
31
+
32
+ def create_tuple_column(spec, column_by_spec_getter, column_options):
33
+ inner_spec = get_inner_spec('Tuple', spec)
34
+ columns = get_inner_columns(inner_spec)
35
+
36
+ return TupleColumn([column_by_spec_getter(x) for x in columns],
37
+ **column_options)