clickhouse-driver 0.2.1__cp39-cp39-win_amd64.whl → 0.2.10__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clickhouse_driver/__init__.py +9 -9
- clickhouse_driver/block.py +227 -195
- clickhouse_driver/blockstreamprofileinfo.py +22 -22
- clickhouse_driver/bufferedreader.cp39-win_amd64.pyd +0 -0
- clickhouse_driver/bufferedwriter.cp39-win_amd64.pyd +0 -0
- clickhouse_driver/client.py +812 -666
- clickhouse_driver/clientinfo.py +119 -80
- clickhouse_driver/columns/arraycolumn.py +161 -150
- clickhouse_driver/columns/base.py +221 -147
- clickhouse_driver/columns/boolcolumn.py +7 -0
- clickhouse_driver/columns/datecolumn.py +108 -49
- clickhouse_driver/columns/datetimecolumn.py +203 -207
- clickhouse_driver/columns/decimalcolumn.py +116 -118
- clickhouse_driver/columns/enumcolumn.py +129 -119
- clickhouse_driver/columns/exceptions.py +12 -12
- clickhouse_driver/columns/floatcolumn.py +34 -34
- clickhouse_driver/columns/intcolumn.py +157 -157
- clickhouse_driver/columns/intervalcolumn.py +33 -33
- clickhouse_driver/columns/ipcolumn.py +118 -118
- clickhouse_driver/columns/jsoncolumn.py +37 -0
- clickhouse_driver/columns/largeint.cp39-win_amd64.pyd +0 -0
- clickhouse_driver/columns/lowcardinalitycolumn.py +142 -123
- clickhouse_driver/columns/mapcolumn.py +73 -58
- clickhouse_driver/columns/nestedcolumn.py +10 -0
- clickhouse_driver/columns/nothingcolumn.py +13 -13
- clickhouse_driver/columns/nullablecolumn.py +7 -7
- clickhouse_driver/columns/nullcolumn.py +15 -15
- clickhouse_driver/columns/numpy/base.py +47 -14
- clickhouse_driver/columns/numpy/boolcolumn.py +8 -0
- clickhouse_driver/columns/numpy/datecolumn.py +19 -12
- clickhouse_driver/columns/numpy/datetimecolumn.py +146 -145
- clickhouse_driver/columns/numpy/floatcolumn.py +24 -13
- clickhouse_driver/columns/numpy/intcolumn.py +43 -43
- clickhouse_driver/columns/numpy/lowcardinalitycolumn.py +96 -83
- clickhouse_driver/columns/numpy/service.py +58 -80
- clickhouse_driver/columns/numpy/stringcolumn.py +78 -76
- clickhouse_driver/columns/numpy/tuplecolumn.py +37 -0
- clickhouse_driver/columns/service.py +185 -131
- clickhouse_driver/columns/simpleaggregatefunctioncolumn.py +7 -7
- clickhouse_driver/columns/stringcolumn.py +73 -73
- clickhouse_driver/columns/tuplecolumn.py +63 -65
- clickhouse_driver/columns/util.py +61 -0
- clickhouse_driver/columns/uuidcolumn.py +64 -64
- clickhouse_driver/compression/__init__.py +32 -28
- clickhouse_driver/compression/base.py +87 -52
- clickhouse_driver/compression/lz4.py +21 -55
- clickhouse_driver/compression/lz4hc.py +9 -9
- clickhouse_driver/compression/zstd.py +20 -51
- clickhouse_driver/connection.py +825 -632
- clickhouse_driver/context.py +36 -36
- clickhouse_driver/dbapi/__init__.py +62 -62
- clickhouse_driver/dbapi/connection.py +99 -96
- clickhouse_driver/dbapi/cursor.py +370 -368
- clickhouse_driver/dbapi/errors.py +40 -40
- clickhouse_driver/dbapi/extras.py +73 -0
- clickhouse_driver/defines.py +58 -42
- clickhouse_driver/errors.py +453 -446
- clickhouse_driver/log.py +48 -44
- clickhouse_driver/numpy/block.py +8 -8
- clickhouse_driver/numpy/helpers.py +28 -25
- clickhouse_driver/numpy/result.py +123 -123
- clickhouse_driver/opentelemetry.py +43 -0
- clickhouse_driver/progress.py +44 -32
- clickhouse_driver/protocol.py +130 -105
- clickhouse_driver/queryprocessingstage.py +8 -8
- clickhouse_driver/reader.py +69 -69
- clickhouse_driver/readhelpers.py +26 -26
- clickhouse_driver/result.py +144 -144
- clickhouse_driver/settings/available.py +405 -405
- clickhouse_driver/settings/types.py +50 -50
- clickhouse_driver/settings/writer.py +34 -29
- clickhouse_driver/streams/compressed.py +88 -88
- clickhouse_driver/streams/native.py +108 -90
- clickhouse_driver/util/compat.py +39 -0
- clickhouse_driver/util/escape.py +94 -55
- clickhouse_driver/util/helpers.py +173 -57
- clickhouse_driver/varint.cp39-win_amd64.pyd +0 -0
- clickhouse_driver/writer.py +67 -67
- clickhouse_driver-0.2.10.dist-info/METADATA +215 -0
- clickhouse_driver-0.2.10.dist-info/RECORD +89 -0
- {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.10.dist-info}/WHEEL +1 -1
- {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.10.dist-info/licenses}/LICENSE +21 -21
- clickhouse_driver-0.2.1.dist-info/METADATA +0 -24
- clickhouse_driver-0.2.1.dist-info/RECORD +0 -80
- {clickhouse_driver-0.2.1.dist-info → clickhouse_driver-0.2.10.dist-info}/top_level.txt +0 -0
|
@@ -1,43 +1,43 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
from .base import NumpyColumn
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class NumpyInt8Column(NumpyColumn):
|
|
7
|
-
dtype = np.dtype(np.int8)
|
|
8
|
-
ch_type = 'Int8'
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class NumpyUInt8Column(NumpyColumn):
|
|
12
|
-
dtype = np.dtype(np.uint8)
|
|
13
|
-
ch_type = 'UInt8'
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class NumpyInt16Column(NumpyColumn):
|
|
17
|
-
dtype = np.dtype(np.int16)
|
|
18
|
-
ch_type = 'Int16'
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class NumpyUInt16Column(NumpyColumn):
|
|
22
|
-
dtype = np.dtype(np.uint16)
|
|
23
|
-
ch_type = 'UInt16'
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class NumpyInt32Column(NumpyColumn):
|
|
27
|
-
dtype = np.dtype(np.int32)
|
|
28
|
-
ch_type = 'Int32'
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class NumpyUInt32Column(NumpyColumn):
|
|
32
|
-
dtype = np.dtype(np.uint32)
|
|
33
|
-
ch_type = 'UInt32'
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class NumpyInt64Column(NumpyColumn):
|
|
37
|
-
dtype = np.dtype(np.int64)
|
|
38
|
-
ch_type = 'Int64'
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class NumpyUInt64Column(NumpyColumn):
|
|
42
|
-
dtype = np.dtype(np.uint64)
|
|
43
|
-
ch_type = 'UInt64'
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .base import NumpyColumn
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class NumpyInt8Column(NumpyColumn):
|
|
7
|
+
dtype = np.dtype(np.int8)
|
|
8
|
+
ch_type = 'Int8'
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NumpyUInt8Column(NumpyColumn):
|
|
12
|
+
dtype = np.dtype(np.uint8)
|
|
13
|
+
ch_type = 'UInt8'
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class NumpyInt16Column(NumpyColumn):
|
|
17
|
+
dtype = np.dtype(np.int16)
|
|
18
|
+
ch_type = 'Int16'
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class NumpyUInt16Column(NumpyColumn):
|
|
22
|
+
dtype = np.dtype(np.uint16)
|
|
23
|
+
ch_type = 'UInt16'
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class NumpyInt32Column(NumpyColumn):
|
|
27
|
+
dtype = np.dtype(np.int32)
|
|
28
|
+
ch_type = 'Int32'
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class NumpyUInt32Column(NumpyColumn):
|
|
32
|
+
dtype = np.dtype(np.uint32)
|
|
33
|
+
ch_type = 'UInt32'
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class NumpyInt64Column(NumpyColumn):
|
|
37
|
+
dtype = np.dtype(np.int64)
|
|
38
|
+
ch_type = 'Int64'
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NumpyUInt64Column(NumpyColumn):
|
|
42
|
+
dtype = np.dtype(np.uint64)
|
|
43
|
+
ch_type = 'UInt64'
|
|
@@ -1,83 +1,96 @@
|
|
|
1
|
-
from math import log
|
|
2
|
-
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
from
|
|
7
|
-
from ...
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
#
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
serialization_type
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
1
|
+
from math import log
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from ..lowcardinalitycolumn import LowCardinalityColumn
|
|
7
|
+
from ...reader import read_binary_uint64
|
|
8
|
+
from ...writer import write_binary_int64
|
|
9
|
+
from .intcolumn import (
|
|
10
|
+
NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NumpyLowCardinalityColumn(LowCardinalityColumn):
|
|
15
|
+
int_types = {
|
|
16
|
+
0: NumpyUInt8Column,
|
|
17
|
+
1: NumpyUInt16Column,
|
|
18
|
+
2: NumpyUInt32Column,
|
|
19
|
+
3: NumpyUInt64Column
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
def __init__(self, nested_column, **kwargs):
|
|
23
|
+
super(NumpyLowCardinalityColumn, self).__init__(nested_column,
|
|
24
|
+
**kwargs)
|
|
25
|
+
|
|
26
|
+
def _write_data(self, items, buf):
|
|
27
|
+
# Do not write anything for empty column.
|
|
28
|
+
# May happen while writing empty arrays.
|
|
29
|
+
if not len(items):
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
# Replace nans with defaults if not nullabe.
|
|
33
|
+
if isinstance(items, np.ndarray) and not self.nested_column.nullable:
|
|
34
|
+
nulls = pd.isnull(items)
|
|
35
|
+
items = np.where(nulls, self.nested_column.null_value, items)
|
|
36
|
+
|
|
37
|
+
c = pd.Categorical(items)
|
|
38
|
+
|
|
39
|
+
int_type = int(log(len(c.codes), 2) / 8)
|
|
40
|
+
int_column = self.int_types[int_type](**self.init_kwargs)
|
|
41
|
+
|
|
42
|
+
serialization_type = self.serialization_type | int_type
|
|
43
|
+
|
|
44
|
+
index = c.categories
|
|
45
|
+
keys = c.codes
|
|
46
|
+
|
|
47
|
+
if self.nested_column.nullable:
|
|
48
|
+
# First element represents NULL if column is nullable.
|
|
49
|
+
index = index.insert(0, self.nested_column.null_value)
|
|
50
|
+
keys = keys + 1
|
|
51
|
+
# Prevent null map writing. Reset nested column nullable flag.
|
|
52
|
+
self.nested_column.nullable = False
|
|
53
|
+
|
|
54
|
+
write_binary_int64(serialization_type, buf)
|
|
55
|
+
write_binary_int64(len(index), buf)
|
|
56
|
+
|
|
57
|
+
self.nested_column.write_data(index.to_numpy(items.dtype), buf)
|
|
58
|
+
write_binary_int64(len(items), buf)
|
|
59
|
+
int_column.write_items(keys, buf)
|
|
60
|
+
|
|
61
|
+
def _read_data(self, n_items, buf, nulls_map=None):
|
|
62
|
+
if not n_items:
|
|
63
|
+
return tuple()
|
|
64
|
+
|
|
65
|
+
serialization_type = read_binary_uint64(buf)
|
|
66
|
+
|
|
67
|
+
# Lowest byte contains info about key type.
|
|
68
|
+
key_type = serialization_type & 0xf
|
|
69
|
+
keys_column = self.int_types[key_type](**self.init_kwargs)
|
|
70
|
+
|
|
71
|
+
nullable = self.nested_column.nullable
|
|
72
|
+
# Prevent null map reading. Reset nested column nullable flag.
|
|
73
|
+
self.nested_column.nullable = False
|
|
74
|
+
|
|
75
|
+
index_size = read_binary_uint64(buf)
|
|
76
|
+
index = self.nested_column.read_data(index_size, buf)
|
|
77
|
+
|
|
78
|
+
read_binary_uint64(buf) # number of keys
|
|
79
|
+
keys = keys_column.read_data(n_items, buf)
|
|
80
|
+
|
|
81
|
+
if nullable:
|
|
82
|
+
# Shift all codes by one ("No value" code is -1 for pandas
|
|
83
|
+
# categorical) and drop corresponding first index
|
|
84
|
+
# this is analog of original operation:
|
|
85
|
+
# index = (None, ) + index[1:]
|
|
86
|
+
keys = np.array(keys, dtype='int64') # deal with possible overflow
|
|
87
|
+
keys = keys - 1
|
|
88
|
+
index = index[1:]
|
|
89
|
+
return pd.Categorical.from_codes(keys, index)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def create_numpy_low_cardinality_column(spec, column_by_spec_getter,
|
|
93
|
+
column_options):
|
|
94
|
+
inner = spec[15:-1]
|
|
95
|
+
nested = column_by_spec_getter(inner)
|
|
96
|
+
return NumpyLowCardinalityColumn(nested, **column_options)
|
|
@@ -1,80 +1,58 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
3
|
-
from .datecolumn import NumpyDateColumn
|
|
4
|
-
from .datetimecolumn import create_numpy_datetime_column
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
from .
|
|
13
|
-
from
|
|
14
|
-
from ..
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
elif spec.startswith('Tuple'):
|
|
61
|
-
return create_tuple_column(spec, create_column_with_options)
|
|
62
|
-
|
|
63
|
-
# elif spec.startswith('Nullable'):
|
|
64
|
-
# return create_nullable_column(spec, create_column_with_options)
|
|
65
|
-
|
|
66
|
-
elif spec.startswith('LowCardinality'):
|
|
67
|
-
return create_numpy_low_cardinality_column(spec,
|
|
68
|
-
create_column_with_options)
|
|
69
|
-
|
|
70
|
-
elif spec.startswith('SimpleAggregateFunction'):
|
|
71
|
-
return create_simple_aggregate_function_column(
|
|
72
|
-
spec, create_column_with_options)
|
|
73
|
-
|
|
74
|
-
else:
|
|
75
|
-
try:
|
|
76
|
-
cls = column_by_type[spec]
|
|
77
|
-
return cls(**column_options)
|
|
78
|
-
|
|
79
|
-
except KeyError as e:
|
|
80
|
-
raise errors.UnknownTypeError('Unknown type {}'.format(e.args[0]))
|
|
1
|
+
from ..service import aliases
|
|
2
|
+
from ... import errors
|
|
3
|
+
from .datecolumn import NumpyDateColumn
|
|
4
|
+
from .datetimecolumn import create_numpy_datetime_column
|
|
5
|
+
from .floatcolumn import NumpyFloat32Column, NumpyFloat64Column
|
|
6
|
+
from .intcolumn import (
|
|
7
|
+
NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
|
|
8
|
+
NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column
|
|
9
|
+
)
|
|
10
|
+
from .boolcolumn import NumpyBoolColumn
|
|
11
|
+
from .lowcardinalitycolumn import create_numpy_low_cardinality_column
|
|
12
|
+
from .stringcolumn import create_string_column
|
|
13
|
+
from .tuplecolumn import create_tuple_column
|
|
14
|
+
from ..nullablecolumn import create_nullable_column
|
|
15
|
+
|
|
16
|
+
column_by_type = {c.ch_type: c for c in [
|
|
17
|
+
NumpyDateColumn,
|
|
18
|
+
NumpyFloat32Column, NumpyFloat64Column,
|
|
19
|
+
NumpyInt8Column, NumpyInt16Column, NumpyInt32Column, NumpyInt64Column,
|
|
20
|
+
NumpyUInt8Column, NumpyUInt16Column, NumpyUInt32Column, NumpyUInt64Column,
|
|
21
|
+
NumpyBoolColumn
|
|
22
|
+
]}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_numpy_column_by_spec(spec, column_options):
|
|
26
|
+
def create_column_with_options(x):
|
|
27
|
+
return get_numpy_column_by_spec(x, column_options)
|
|
28
|
+
|
|
29
|
+
if spec == 'String' or spec.startswith('FixedString'):
|
|
30
|
+
return create_string_column(spec, column_options)
|
|
31
|
+
|
|
32
|
+
elif spec.startswith('DateTime'):
|
|
33
|
+
return create_numpy_datetime_column(spec, column_options)
|
|
34
|
+
|
|
35
|
+
elif spec.startswith('Tuple'):
|
|
36
|
+
return create_tuple_column(
|
|
37
|
+
spec, create_column_with_options, column_options
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
elif spec.startswith('Nullable'):
|
|
41
|
+
return create_nullable_column(spec, create_column_with_options)
|
|
42
|
+
|
|
43
|
+
elif spec.startswith('LowCardinality'):
|
|
44
|
+
return create_numpy_low_cardinality_column(
|
|
45
|
+
spec, create_column_with_options, column_options
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
for alias, primitive in aliases:
|
|
49
|
+
if spec.startswith(alias):
|
|
50
|
+
return create_column_with_options(
|
|
51
|
+
primitive + spec[len(alias):]
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
if spec in column_by_type:
|
|
55
|
+
cls = column_by_type[spec]
|
|
56
|
+
return cls(**column_options)
|
|
57
|
+
|
|
58
|
+
raise errors.UnknownTypeError('Unknown type {}'.format(spec))
|
|
@@ -1,76 +1,78 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
from ... import defines
|
|
4
|
-
from .base import NumpyColumn
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class NumpyStringColumn(NumpyColumn):
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
default_encoding = defines.STRINGS_ENCODING
|
|
11
|
-
|
|
12
|
-
def __init__(self, encoding=default_encoding, **kwargs):
|
|
13
|
-
self.encoding = encoding
|
|
14
|
-
super(NumpyStringColumn, self).__init__(**kwargs)
|
|
15
|
-
|
|
16
|
-
def read_items(self, n_items, buf):
|
|
17
|
-
return np.array(
|
|
18
|
-
buf.read_strings(n_items, encoding=self.encoding), dtype=self.dtype
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
def write_items(self, items, buf):
|
|
22
|
-
return buf.write_strings(items.tolist(), encoding=self.encoding)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class NumpyByteStringColumn(NumpyColumn):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from ... import defines
|
|
4
|
+
from .base import NumpyColumn
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class NumpyStringColumn(NumpyColumn):
|
|
8
|
+
null_value = ''
|
|
9
|
+
|
|
10
|
+
default_encoding = defines.STRINGS_ENCODING
|
|
11
|
+
|
|
12
|
+
def __init__(self, encoding=default_encoding, **kwargs):
|
|
13
|
+
self.encoding = encoding
|
|
14
|
+
super(NumpyStringColumn, self).__init__(**kwargs)
|
|
15
|
+
|
|
16
|
+
def read_items(self, n_items, buf):
|
|
17
|
+
return np.array(
|
|
18
|
+
buf.read_strings(n_items, encoding=self.encoding), dtype=self.dtype
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
def write_items(self, items, buf):
|
|
22
|
+
return buf.write_strings(items.tolist(), encoding=self.encoding)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class NumpyByteStringColumn(NumpyColumn):
|
|
26
|
+
null_value = b''
|
|
27
|
+
|
|
28
|
+
def read_items(self, n_items, buf):
|
|
29
|
+
return np.array(buf.read_strings(n_items), dtype=self.dtype)
|
|
30
|
+
|
|
31
|
+
def write_items(self, items, buf):
|
|
32
|
+
return buf.write_strings(items.tolist())
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class NumpyFixedString(NumpyStringColumn):
|
|
36
|
+
def __init__(self, length, **kwargs):
|
|
37
|
+
self.length = length
|
|
38
|
+
super(NumpyFixedString, self).__init__(**kwargs)
|
|
39
|
+
|
|
40
|
+
def read_items(self, n_items, buf):
|
|
41
|
+
return np.array(buf.read_fixed_strings(
|
|
42
|
+
n_items, self.length, encoding=self.encoding
|
|
43
|
+
), dtype=self.dtype)
|
|
44
|
+
|
|
45
|
+
def write_items(self, items, buf):
|
|
46
|
+
return buf.write_fixed_strings(
|
|
47
|
+
items.tolist(), self.length, encoding=self.encoding
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class NumpyByteFixedString(NumpyByteStringColumn):
|
|
52
|
+
def __init__(self, length, **kwargs):
|
|
53
|
+
self.length = length
|
|
54
|
+
super(NumpyByteFixedString, self).__init__(**kwargs)
|
|
55
|
+
|
|
56
|
+
def read_items(self, n_items, buf):
|
|
57
|
+
return np.array(
|
|
58
|
+
buf.read_fixed_strings(n_items, self.length), dtype=self.dtype
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def write_items(self, items, buf):
|
|
62
|
+
return buf.write_fixed_strings(items.tolist(), self.length)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def create_string_column(spec, column_options):
|
|
66
|
+
client_settings = column_options['context'].client_settings
|
|
67
|
+
strings_as_bytes = client_settings['strings_as_bytes']
|
|
68
|
+
encoding = client_settings.get(
|
|
69
|
+
'strings_encoding', NumpyStringColumn.default_encoding
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if spec == 'String':
|
|
73
|
+
cls = NumpyByteStringColumn if strings_as_bytes else NumpyStringColumn
|
|
74
|
+
return cls(encoding=encoding, **column_options)
|
|
75
|
+
else:
|
|
76
|
+
length = int(spec[12:-1])
|
|
77
|
+
cls = NumpyByteFixedString if strings_as_bytes else NumpyFixedString
|
|
78
|
+
return cls(length, encoding=encoding, **column_options)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .base import NumpyColumn
|
|
4
|
+
from ..util import get_inner_spec, get_inner_columns
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TupleColumn(NumpyColumn):
|
|
8
|
+
def __init__(self, nested_columns, **kwargs):
|
|
9
|
+
self.nested_columns = nested_columns
|
|
10
|
+
super(TupleColumn, self).__init__(**kwargs)
|
|
11
|
+
|
|
12
|
+
def write_data(self, items, buf):
|
|
13
|
+
names = items.dtype.names
|
|
14
|
+
for i, (x, name) in enumerate(zip(self.nested_columns, names)):
|
|
15
|
+
x.write_data(items[name], buf)
|
|
16
|
+
|
|
17
|
+
def write_items(self, items, buf):
|
|
18
|
+
return self.write_data(items, buf)
|
|
19
|
+
|
|
20
|
+
def read_data(self, n_items, buf):
|
|
21
|
+
data = [x.read_data(n_items, buf) for x in self.nested_columns]
|
|
22
|
+
dtype = [('f{}'.format(i), x.dtype) for i, x in enumerate(data)]
|
|
23
|
+
rv = np.empty(n_items, dtype=dtype)
|
|
24
|
+
for i, x in enumerate(data):
|
|
25
|
+
rv['f{}'.format(i)] = x
|
|
26
|
+
return rv
|
|
27
|
+
|
|
28
|
+
def read_items(self, n_items, buf):
|
|
29
|
+
return self.read_data(n_items, buf)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def create_tuple_column(spec, column_by_spec_getter, column_options):
|
|
33
|
+
inner_spec = get_inner_spec('Tuple', spec)
|
|
34
|
+
columns = get_inner_columns(inner_spec)
|
|
35
|
+
|
|
36
|
+
return TupleColumn([column_by_spec_getter(x) for x in columns],
|
|
37
|
+
**column_options)
|