singlestoredb 0.4.0__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of singlestoredb might be problematic. Click here for more details.
- singlestoredb/__init__.py +33 -1
- singlestoredb/alchemy/__init__.py +90 -0
- singlestoredb/auth.py +5 -1
- singlestoredb/config.py +116 -14
- singlestoredb/connection.py +483 -516
- singlestoredb/converters.py +238 -135
- singlestoredb/exceptions.py +30 -2
- singlestoredb/functions/__init__.py +1 -0
- singlestoredb/functions/decorator.py +142 -0
- singlestoredb/functions/dtypes.py +1639 -0
- singlestoredb/functions/ext/__init__.py +2 -0
- singlestoredb/functions/ext/arrow.py +375 -0
- singlestoredb/functions/ext/asgi.py +661 -0
- singlestoredb/functions/ext/json.py +427 -0
- singlestoredb/functions/ext/mmap.py +306 -0
- singlestoredb/functions/ext/rowdat_1.py +744 -0
- singlestoredb/functions/signature.py +673 -0
- singlestoredb/fusion/__init__.py +11 -0
- singlestoredb/fusion/graphql.py +213 -0
- singlestoredb/fusion/handler.py +621 -0
- singlestoredb/fusion/handlers/stage.py +257 -0
- singlestoredb/fusion/handlers/utils.py +162 -0
- singlestoredb/fusion/handlers/workspace.py +412 -0
- singlestoredb/fusion/registry.py +164 -0
- singlestoredb/fusion/result.py +399 -0
- singlestoredb/http/__init__.py +27 -0
- singlestoredb/{http.py → http/connection.py} +555 -154
- singlestoredb/management/__init__.py +3 -0
- singlestoredb/management/billing_usage.py +148 -0
- singlestoredb/management/cluster.py +14 -6
- singlestoredb/management/manager.py +100 -38
- singlestoredb/management/organization.py +188 -0
- singlestoredb/management/region.py +5 -5
- singlestoredb/management/utils.py +281 -2
- singlestoredb/management/workspace.py +1344 -49
- singlestoredb/{clients/pymysqlsv → mysql}/__init__.py +16 -21
- singlestoredb/{clients/pymysqlsv → mysql}/_auth.py +39 -8
- singlestoredb/{clients/pymysqlsv → mysql}/charset.py +26 -23
- singlestoredb/{clients/pymysqlsv/connections.py → mysql/connection.py} +532 -165
- singlestoredb/{clients/pymysqlsv → mysql}/constants/CLIENT.py +0 -1
- singlestoredb/{clients/pymysqlsv → mysql}/constants/COMMAND.py +0 -1
- singlestoredb/{clients/pymysqlsv → mysql}/constants/CR.py +0 -2
- singlestoredb/{clients/pymysqlsv → mysql}/constants/ER.py +0 -1
- singlestoredb/{clients/pymysqlsv → mysql}/constants/FIELD_TYPE.py +1 -1
- singlestoredb/{clients/pymysqlsv → mysql}/constants/FLAG.py +0 -1
- singlestoredb/{clients/pymysqlsv → mysql}/constants/SERVER_STATUS.py +0 -1
- singlestoredb/mysql/converters.py +271 -0
- singlestoredb/{clients/pymysqlsv → mysql}/cursors.py +228 -112
- singlestoredb/mysql/err.py +92 -0
- singlestoredb/{clients/pymysqlsv → mysql}/optionfile.py +5 -4
- singlestoredb/{clients/pymysqlsv → mysql}/protocol.py +49 -20
- singlestoredb/mysql/tests/__init__.py +19 -0
- singlestoredb/{clients/pymysqlsv → mysql}/tests/base.py +32 -12
- singlestoredb/mysql/tests/conftest.py +37 -0
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_DictCursor.py +11 -7
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_SSCursor.py +17 -12
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_basic.py +32 -24
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_connection.py +130 -119
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_converters.py +9 -7
- singlestoredb/mysql/tests/test_cursor.py +141 -0
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_err.py +3 -2
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_issues.py +35 -27
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_load_local.py +13 -11
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_nextset.py +7 -3
- singlestoredb/{clients/pymysqlsv → mysql}/tests/test_optionfile.py +2 -1
- singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/__init__.py +1 -1
- singlestoredb/mysql/tests/thirdparty/test_MySQLdb/__init__.py +9 -0
- singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/capabilities.py +19 -17
- singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/dbapi20.py +31 -22
- singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/test_MySQLdb_capabilities.py +3 -4
- singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/test_MySQLdb_dbapi20.py +24 -20
- singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/test_MySQLdb_nonstandard.py +4 -4
- singlestoredb/{clients/pymysqlsv → mysql}/times.py +3 -4
- singlestoredb/pytest.py +283 -0
- singlestoredb/tests/empty.sql +0 -0
- singlestoredb/tests/ext_funcs/__init__.py +385 -0
- singlestoredb/tests/test.sql +210 -0
- singlestoredb/tests/test2.sql +1 -0
- singlestoredb/tests/test_basics.py +482 -115
- singlestoredb/tests/test_config.py +13 -13
- singlestoredb/tests/test_connection.py +241 -305
- singlestoredb/tests/test_dbapi.py +27 -0
- singlestoredb/tests/test_ext_func.py +1193 -0
- singlestoredb/tests/test_ext_func_data.py +1101 -0
- singlestoredb/tests/test_fusion.py +465 -0
- singlestoredb/tests/test_http.py +32 -26
- singlestoredb/tests/test_management.py +588 -8
- singlestoredb/tests/test_plugin.py +33 -0
- singlestoredb/tests/test_results.py +11 -12
- singlestoredb/tests/test_udf.py +687 -0
- singlestoredb/tests/utils.py +3 -2
- singlestoredb/utils/config.py +58 -0
- singlestoredb/utils/debug.py +13 -0
- singlestoredb/utils/mogrify.py +151 -0
- singlestoredb/utils/results.py +4 -1
- singlestoredb-1.0.4.dist-info/METADATA +139 -0
- singlestoredb-1.0.4.dist-info/RECORD +112 -0
- {singlestoredb-0.4.0.dist-info → singlestoredb-1.0.4.dist-info}/WHEEL +1 -1
- singlestoredb-1.0.4.dist-info/entry_points.txt +2 -0
- singlestoredb/clients/pymysqlsv/converters.py +0 -365
- singlestoredb/clients/pymysqlsv/err.py +0 -144
- singlestoredb/clients/pymysqlsv/tests/__init__.py +0 -19
- singlestoredb/clients/pymysqlsv/tests/test_cursor.py +0 -133
- singlestoredb/clients/pymysqlsv/tests/thirdparty/test_MySQLdb/__init__.py +0 -9
- singlestoredb/drivers/__init__.py +0 -45
- singlestoredb/drivers/base.py +0 -198
- singlestoredb/drivers/cymysql.py +0 -38
- singlestoredb/drivers/http.py +0 -47
- singlestoredb/drivers/mariadb.py +0 -40
- singlestoredb/drivers/mysqlconnector.py +0 -49
- singlestoredb/drivers/mysqldb.py +0 -60
- singlestoredb/drivers/pymysql.py +0 -37
- singlestoredb/drivers/pymysqlsv.py +0 -35
- singlestoredb/drivers/pyodbc.py +0 -65
- singlestoredb-0.4.0.dist-info/METADATA +0 -111
- singlestoredb-0.4.0.dist-info/RECORD +0 -86
- /singlestoredb/{clients → fusion/handlers}/__init__.py +0 -0
- /singlestoredb/{clients/pymysqlsv → mysql}/constants/__init__.py +0 -0
- {singlestoredb-0.4.0.dist-info → singlestoredb-1.0.4.dist-info}/LICENSE +0 -0
- {singlestoredb-0.4.0.dist-info → singlestoredb-1.0.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,744 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import struct
|
|
3
|
+
import warnings
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from typing import Any
|
|
6
|
+
from typing import List
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from typing import Sequence
|
|
9
|
+
from typing import Tuple
|
|
10
|
+
|
|
11
|
+
from ...config import get_option
|
|
12
|
+
from ..dtypes import DEFAULT_VALUES
|
|
13
|
+
from ..dtypes import NUMPY_TYPE_MAP
|
|
14
|
+
from ..dtypes import PANDAS_TYPE_MAP
|
|
15
|
+
from ..dtypes import POLARS_TYPE_MAP
|
|
16
|
+
from ..dtypes import PYARROW_TYPE_MAP
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import numpy as np
|
|
20
|
+
has_numpy = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
has_numpy = False
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import polars as pl
|
|
26
|
+
has_polars = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
has_polars = False
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
import pandas as pd
|
|
32
|
+
has_pandas = True
|
|
33
|
+
except ImportError:
|
|
34
|
+
has_pandas = False
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
import pyarrow as pa
|
|
38
|
+
import pyarrow.compute as pc
|
|
39
|
+
has_pyarrow = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
has_pyarrow = False
|
|
42
|
+
|
|
43
|
+
from ...mysql.constants import FIELD_TYPE as ft
|
|
44
|
+
|
|
45
|
+
has_accel = False
|
|
46
|
+
try:
|
|
47
|
+
if not get_option('pure_python'):
|
|
48
|
+
import _singlestoredb_accel
|
|
49
|
+
has_accel = True
|
|
50
|
+
except ImportError:
|
|
51
|
+
warnings.warn(
|
|
52
|
+
'could not load accelerated data reader for external functions; '
|
|
53
|
+
'using pure Python implementation.',
|
|
54
|
+
RuntimeWarning,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
numeric_formats = {
|
|
58
|
+
ft.TINY: '<b',
|
|
59
|
+
-ft.TINY: '<B',
|
|
60
|
+
ft.SHORT: '<h',
|
|
61
|
+
-ft.SHORT: '<H',
|
|
62
|
+
ft.INT24: '<i',
|
|
63
|
+
-ft.INT24: '<I',
|
|
64
|
+
ft.LONG: '<i',
|
|
65
|
+
-ft.LONG: '<I',
|
|
66
|
+
ft.LONGLONG: '<q',
|
|
67
|
+
-ft.LONGLONG: '<Q',
|
|
68
|
+
ft.FLOAT: '<f',
|
|
69
|
+
ft.DOUBLE: '<d',
|
|
70
|
+
}
|
|
71
|
+
numeric_sizes = {
|
|
72
|
+
ft.TINY: 1,
|
|
73
|
+
-ft.TINY: 1,
|
|
74
|
+
ft.SHORT: 2,
|
|
75
|
+
-ft.SHORT: 2,
|
|
76
|
+
ft.INT24: 4,
|
|
77
|
+
-ft.INT24: 4,
|
|
78
|
+
ft.LONG: 4,
|
|
79
|
+
-ft.LONG: 4,
|
|
80
|
+
ft.LONGLONG: 8,
|
|
81
|
+
-ft.LONGLONG: 8,
|
|
82
|
+
ft.FLOAT: 4,
|
|
83
|
+
ft.DOUBLE: 8,
|
|
84
|
+
}
|
|
85
|
+
medium_int_types = set([ft.INT24, -ft.INT24])
|
|
86
|
+
int_types = set([
|
|
87
|
+
ft.TINY, -ft.TINY, ft.SHORT, -ft.SHORT, ft.INT24, -ft.INT24,
|
|
88
|
+
ft.LONG, -ft.LONG, ft.LONGLONG, -ft.LONGLONG,
|
|
89
|
+
])
|
|
90
|
+
string_types = set([15, 245, 247, 248, 249, 250, 251, 252, 253, 254])
|
|
91
|
+
binary_types = set([-x for x in string_types])
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _load(
|
|
95
|
+
colspec: List[Tuple[str, int]],
|
|
96
|
+
data: bytes,
|
|
97
|
+
) -> Tuple[List[int], List[Any]]:
|
|
98
|
+
'''
|
|
99
|
+
Convert bytes in rowdat_1 format into rows of data.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
colspec : List[str]
|
|
104
|
+
An List of column data types
|
|
105
|
+
data : bytes
|
|
106
|
+
The data in rowdat_1 format
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
Tuple[List[int], List[Any]]
|
|
111
|
+
|
|
112
|
+
'''
|
|
113
|
+
data_len = len(data)
|
|
114
|
+
data_io = BytesIO(data)
|
|
115
|
+
row_ids = []
|
|
116
|
+
rows = []
|
|
117
|
+
val = None
|
|
118
|
+
while data_io.tell() < data_len:
|
|
119
|
+
row_ids.append(struct.unpack('<q', data_io.read(8))[0])
|
|
120
|
+
row = []
|
|
121
|
+
for _, ctype in colspec:
|
|
122
|
+
is_null = data_io.read(1) == b'\x01'
|
|
123
|
+
if ctype in numeric_formats:
|
|
124
|
+
val = struct.unpack(
|
|
125
|
+
numeric_formats[ctype],
|
|
126
|
+
data_io.read(numeric_sizes[ctype]),
|
|
127
|
+
)[0]
|
|
128
|
+
elif ctype in string_types:
|
|
129
|
+
slen = struct.unpack('<q', data_io.read(8))[0]
|
|
130
|
+
val = data_io.read(slen).decode('utf-8')
|
|
131
|
+
elif ctype in binary_types:
|
|
132
|
+
slen = struct.unpack('<q', data_io.read(8))[0]
|
|
133
|
+
val = data_io.read(slen)
|
|
134
|
+
else:
|
|
135
|
+
raise TypeError(f'unrecognized column type: {ctype}')
|
|
136
|
+
row.append(None if is_null else val)
|
|
137
|
+
rows.append(row)
|
|
138
|
+
return row_ids, rows
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _load_vectors(
|
|
142
|
+
colspec: List[Tuple[str, int]],
|
|
143
|
+
data: bytes,
|
|
144
|
+
) -> Tuple[List[int], List[Tuple[Sequence[Any], Optional[Sequence[Any]]]]]:
|
|
145
|
+
'''
|
|
146
|
+
Convert bytes in rowdat_1 format into columns of data.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
colspec : List[str]
|
|
151
|
+
An List of column data types
|
|
152
|
+
data : bytes
|
|
153
|
+
The data in rowdat_1 format
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
Tuple[List[int], List[Tuple[Any, Any]]]
|
|
158
|
+
|
|
159
|
+
'''
|
|
160
|
+
data_len = len(data)
|
|
161
|
+
data_io = BytesIO(data)
|
|
162
|
+
row_ids = []
|
|
163
|
+
cols: List[Any] = [[] for _ in colspec]
|
|
164
|
+
masks: List[Any] = [[] for _ in colspec]
|
|
165
|
+
val = None
|
|
166
|
+
while data_io.tell() < data_len:
|
|
167
|
+
row_ids.append(struct.unpack('<q', data_io.read(8))[0])
|
|
168
|
+
for i, (_, ctype) in enumerate(colspec):
|
|
169
|
+
default = DEFAULT_VALUES[ctype]
|
|
170
|
+
is_null = data_io.read(1) == b'\x01'
|
|
171
|
+
if ctype in numeric_formats:
|
|
172
|
+
val = struct.unpack(
|
|
173
|
+
numeric_formats[ctype],
|
|
174
|
+
data_io.read(numeric_sizes[ctype]),
|
|
175
|
+
)[0]
|
|
176
|
+
elif ctype in string_types:
|
|
177
|
+
slen = struct.unpack('<q', data_io.read(8))[0]
|
|
178
|
+
val = data_io.read(slen).decode('utf-8')
|
|
179
|
+
elif ctype in binary_types:
|
|
180
|
+
slen = struct.unpack('<q', data_io.read(8))[0]
|
|
181
|
+
val = data_io.read(slen)
|
|
182
|
+
else:
|
|
183
|
+
raise TypeError(f'unrecognized column type: {ctype}')
|
|
184
|
+
cols[i].append(default if is_null else val)
|
|
185
|
+
masks[i].append(True if is_null else False)
|
|
186
|
+
return row_ids, [(x, y) for x, y in zip(cols, masks)]
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _load_pandas(
|
|
190
|
+
colspec: List[Tuple[str, int]],
|
|
191
|
+
data: bytes,
|
|
192
|
+
) -> Tuple[
|
|
193
|
+
'pd.Series[np.int64]',
|
|
194
|
+
List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
|
|
195
|
+
]:
|
|
196
|
+
'''
|
|
197
|
+
Convert bytes in rowdat_1 format into rows of data.
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
colspec : List[str]
|
|
202
|
+
An List of column data types
|
|
203
|
+
data : bytes
|
|
204
|
+
The data in rowdat_1 format
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
Tuple[pd.Series[int], List[Tuple[pd.Series[Any], pd.Series[bool]]]]
|
|
209
|
+
|
|
210
|
+
'''
|
|
211
|
+
if not has_pandas or not has_numpy:
|
|
212
|
+
raise RuntimeError('pandas must be installed for this operation')
|
|
213
|
+
|
|
214
|
+
row_ids, cols = _load_vectors(colspec, data)
|
|
215
|
+
index = pd.Series(row_ids)
|
|
216
|
+
return pd.Series(row_ids, dtype=np.int64), [
|
|
217
|
+
(
|
|
218
|
+
pd.Series(data, index=index, name=name, dtype=PANDAS_TYPE_MAP[dtype]),
|
|
219
|
+
pd.Series(mask, index=index, dtype=np.bool_),
|
|
220
|
+
)
|
|
221
|
+
for (data, mask), (name, dtype) in zip(cols, colspec)
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _load_polars(
|
|
226
|
+
colspec: List[Tuple[str, int]],
|
|
227
|
+
data: bytes,
|
|
228
|
+
) -> Tuple[
|
|
229
|
+
'pl.Series[pl.Int64]',
|
|
230
|
+
List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
|
|
231
|
+
]:
|
|
232
|
+
'''
|
|
233
|
+
Convert bytes in rowdat_1 format into rows of data.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
colspec : List[str]
|
|
238
|
+
An List of column data types
|
|
239
|
+
data : bytes
|
|
240
|
+
The data in rowdat_1 format
|
|
241
|
+
|
|
242
|
+
Returns
|
|
243
|
+
-------
|
|
244
|
+
Tuple[polars.Series[int], List[polars.Series[Any]]]
|
|
245
|
+
|
|
246
|
+
'''
|
|
247
|
+
if not has_polars:
|
|
248
|
+
raise RuntimeError('polars must be installed for this operation')
|
|
249
|
+
|
|
250
|
+
row_ids, cols = _load_vectors(colspec, data)
|
|
251
|
+
return pl.Series(None, row_ids, dtype=pl.Int64), \
|
|
252
|
+
[
|
|
253
|
+
(
|
|
254
|
+
pl.Series(name=name, values=data, dtype=POLARS_TYPE_MAP[dtype]),
|
|
255
|
+
pl.Series(values=mask, dtype=pl.Boolean),
|
|
256
|
+
)
|
|
257
|
+
for (data, mask), (name, dtype) in zip(cols, colspec)
|
|
258
|
+
]
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _load_numpy(
|
|
262
|
+
colspec: List[Tuple[str, int]],
|
|
263
|
+
data: bytes,
|
|
264
|
+
) -> Tuple[
|
|
265
|
+
'np.typing.NDArray[np.int64]',
|
|
266
|
+
List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
|
|
267
|
+
]:
|
|
268
|
+
'''
|
|
269
|
+
Convert bytes in rowdat_1 format into rows of data.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
colspec : List[str]
|
|
274
|
+
An List of column data types
|
|
275
|
+
data : bytes
|
|
276
|
+
The data in rowdat_1 format
|
|
277
|
+
|
|
278
|
+
Returns
|
|
279
|
+
-------
|
|
280
|
+
Tuple[np.ndarray[int], List[np.ndarray[Any]]]
|
|
281
|
+
|
|
282
|
+
'''
|
|
283
|
+
if not has_numpy:
|
|
284
|
+
raise RuntimeError('numpy must be installed for this operation')
|
|
285
|
+
|
|
286
|
+
row_ids, cols = _load_vectors(colspec, data)
|
|
287
|
+
return np.asarray(row_ids, dtype=np.int64), \
|
|
288
|
+
[
|
|
289
|
+
(
|
|
290
|
+
np.asarray(data, dtype=NUMPY_TYPE_MAP[dtype]),
|
|
291
|
+
np.asarray(mask, dtype=np.bool_),
|
|
292
|
+
)
|
|
293
|
+
for (data, mask), (name, dtype) in zip(cols, colspec)
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _load_arrow(
|
|
298
|
+
colspec: List[Tuple[str, int]],
|
|
299
|
+
data: bytes,
|
|
300
|
+
) -> Tuple[
|
|
301
|
+
'pa.Array[pa.int64()]',
|
|
302
|
+
List[Tuple['pa.Array[Any]', 'pa.Array[pa.bool_()]']],
|
|
303
|
+
]:
|
|
304
|
+
'''
|
|
305
|
+
Convert bytes in rowdat_1 format into rows of data.
|
|
306
|
+
|
|
307
|
+
Parameters
|
|
308
|
+
----------
|
|
309
|
+
colspec : List[str]
|
|
310
|
+
An List of column data types
|
|
311
|
+
data : bytes
|
|
312
|
+
The data in rowdat_1 format
|
|
313
|
+
|
|
314
|
+
Returns
|
|
315
|
+
-------
|
|
316
|
+
Tuple[pyarrow.Array[int], List[pyarrow.Array[Any]]]
|
|
317
|
+
|
|
318
|
+
'''
|
|
319
|
+
if not has_pyarrow:
|
|
320
|
+
raise RuntimeError('pyarrow must be installed for this operation')
|
|
321
|
+
|
|
322
|
+
row_ids, cols = _load_vectors(colspec, data)
|
|
323
|
+
return pa.array(row_ids, type=pa.int64()), \
|
|
324
|
+
[
|
|
325
|
+
(
|
|
326
|
+
pa.array(
|
|
327
|
+
data, type=PYARROW_TYPE_MAP[dtype],
|
|
328
|
+
mask=pa.array(mask, type=pa.bool_()),
|
|
329
|
+
),
|
|
330
|
+
pa.array(mask, type=pa.bool_()),
|
|
331
|
+
)
|
|
332
|
+
for (data, mask), (name, dtype) in zip(cols, colspec)
|
|
333
|
+
]
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _dump(
|
|
337
|
+
returns: List[int],
|
|
338
|
+
row_ids: List[int],
|
|
339
|
+
rows: List[List[Any]],
|
|
340
|
+
) -> bytes:
|
|
341
|
+
'''
|
|
342
|
+
Convert a list of lists of data into rowdat_1 format.
|
|
343
|
+
|
|
344
|
+
Parameters
|
|
345
|
+
----------
|
|
346
|
+
returns : List[int]
|
|
347
|
+
The returned data type
|
|
348
|
+
row_ids : List[int]
|
|
349
|
+
The row IDs
|
|
350
|
+
rows : List[List[Any]]
|
|
351
|
+
The rows of data and masks to serialize
|
|
352
|
+
|
|
353
|
+
Returns
|
|
354
|
+
-------
|
|
355
|
+
bytes
|
|
356
|
+
|
|
357
|
+
'''
|
|
358
|
+
out = BytesIO()
|
|
359
|
+
|
|
360
|
+
if len(rows) == 0 or len(row_ids) == 0:
|
|
361
|
+
return out.getbuffer()
|
|
362
|
+
|
|
363
|
+
for row_id, *values in zip(row_ids, *list(zip(*rows))):
|
|
364
|
+
out.write(struct.pack('<q', row_id))
|
|
365
|
+
for rtype, value in zip(returns, values):
|
|
366
|
+
out.write(b'\x01' if value is None else b'\x00')
|
|
367
|
+
default = DEFAULT_VALUES[rtype]
|
|
368
|
+
if rtype in numeric_formats:
|
|
369
|
+
if value is None:
|
|
370
|
+
out.write(struct.pack(numeric_formats[rtype], default))
|
|
371
|
+
else:
|
|
372
|
+
if rtype in int_types:
|
|
373
|
+
if rtype == ft.INT24:
|
|
374
|
+
if int(value) > 8388607 or int(value) < -8388608:
|
|
375
|
+
raise ValueError(
|
|
376
|
+
'value is outside range of MEDIUMINT',
|
|
377
|
+
)
|
|
378
|
+
elif rtype == -ft.INT24:
|
|
379
|
+
if int(value) > 16777215 or int(value) < 0:
|
|
380
|
+
raise ValueError(
|
|
381
|
+
'value is outside range of UNSIGNED MEDIUMINT',
|
|
382
|
+
)
|
|
383
|
+
out.write(struct.pack(numeric_formats[rtype], int(value)))
|
|
384
|
+
else:
|
|
385
|
+
out.write(struct.pack(numeric_formats[rtype], float(value)))
|
|
386
|
+
elif rtype in string_types:
|
|
387
|
+
if value is None:
|
|
388
|
+
out.write(struct.pack('<q', 0))
|
|
389
|
+
else:
|
|
390
|
+
sval = value.encode('utf-8')
|
|
391
|
+
out.write(struct.pack('<q', len(sval)))
|
|
392
|
+
out.write(sval)
|
|
393
|
+
elif rtype in binary_types:
|
|
394
|
+
if value is None:
|
|
395
|
+
out.write(struct.pack('<q', 0))
|
|
396
|
+
else:
|
|
397
|
+
out.write(struct.pack('<q', len(value)))
|
|
398
|
+
out.write(value)
|
|
399
|
+
else:
|
|
400
|
+
raise TypeError(f'unrecognized column type: {rtype}')
|
|
401
|
+
|
|
402
|
+
return out.getbuffer()
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _dump_vectors(
|
|
406
|
+
returns: List[int],
|
|
407
|
+
row_ids: List[int],
|
|
408
|
+
cols: List[Tuple[Sequence[Any], Optional[Sequence[Any]]]],
|
|
409
|
+
) -> bytes:
|
|
410
|
+
'''
|
|
411
|
+
Convert a list of columns of data into rowdat_1 format.
|
|
412
|
+
|
|
413
|
+
Parameters
|
|
414
|
+
----------
|
|
415
|
+
returns : List[int]
|
|
416
|
+
The returned data type
|
|
417
|
+
row_ids : List[int]
|
|
418
|
+
The row IDs
|
|
419
|
+
cols : List[Tuple[Any, Any]]
|
|
420
|
+
The rows of data and masks to serialize
|
|
421
|
+
|
|
422
|
+
Returns
|
|
423
|
+
-------
|
|
424
|
+
bytes
|
|
425
|
+
|
|
426
|
+
'''
|
|
427
|
+
out = BytesIO()
|
|
428
|
+
|
|
429
|
+
if len(cols) == 0 or len(row_ids) == 0:
|
|
430
|
+
return out.getbuffer()
|
|
431
|
+
|
|
432
|
+
for j, row_id in enumerate(row_ids):
|
|
433
|
+
|
|
434
|
+
out.write(struct.pack('<q', row_id))
|
|
435
|
+
|
|
436
|
+
for i, rtype in enumerate(returns):
|
|
437
|
+
value = cols[i][0][j]
|
|
438
|
+
if cols[i][1] is not None:
|
|
439
|
+
is_null = cols[i][1][j] # type: ignore
|
|
440
|
+
else:
|
|
441
|
+
is_null = False
|
|
442
|
+
|
|
443
|
+
out.write(b'\x01' if is_null or value is None else b'\x00')
|
|
444
|
+
default = DEFAULT_VALUES[rtype]
|
|
445
|
+
try:
|
|
446
|
+
if rtype in numeric_formats:
|
|
447
|
+
if value is None:
|
|
448
|
+
out.write(struct.pack(numeric_formats[rtype], default))
|
|
449
|
+
else:
|
|
450
|
+
if rtype in int_types:
|
|
451
|
+
if rtype == ft.INT24:
|
|
452
|
+
if int(value) > 8388607 or int(value) < -8388608:
|
|
453
|
+
raise ValueError(
|
|
454
|
+
'value is outside range of MEDIUMINT',
|
|
455
|
+
)
|
|
456
|
+
elif rtype == -ft.INT24:
|
|
457
|
+
if int(value) > 16777215 or int(value) < 0:
|
|
458
|
+
raise ValueError(
|
|
459
|
+
'value is outside range of UNSIGNED MEDIUMINT',
|
|
460
|
+
)
|
|
461
|
+
out.write(struct.pack(numeric_formats[rtype], int(value)))
|
|
462
|
+
else:
|
|
463
|
+
out.write(struct.pack(numeric_formats[rtype], float(value)))
|
|
464
|
+
elif rtype in string_types:
|
|
465
|
+
if value is None:
|
|
466
|
+
out.write(struct.pack('<q', 0))
|
|
467
|
+
else:
|
|
468
|
+
sval = value.encode('utf-8')
|
|
469
|
+
out.write(struct.pack('<q', len(sval)))
|
|
470
|
+
out.write(sval)
|
|
471
|
+
elif rtype in binary_types:
|
|
472
|
+
if value is None:
|
|
473
|
+
out.write(struct.pack('<q', 0))
|
|
474
|
+
else:
|
|
475
|
+
out.write(struct.pack('<q', len(value)))
|
|
476
|
+
out.write(value)
|
|
477
|
+
else:
|
|
478
|
+
raise TypeError(f'unrecognized column type: {rtype}')
|
|
479
|
+
|
|
480
|
+
except struct.error as exc:
|
|
481
|
+
raise ValueError(str(exc))
|
|
482
|
+
|
|
483
|
+
return out.getbuffer()
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _dump_arrow(
|
|
487
|
+
returns: List[int],
|
|
488
|
+
row_ids: 'pa.Array[int]',
|
|
489
|
+
cols: List[Tuple['pa.Array[Any]', 'pa.Array[bool]']],
|
|
490
|
+
) -> bytes:
|
|
491
|
+
if not has_pyarrow:
|
|
492
|
+
raise RuntimeError('pyarrow must be installed for this operation')
|
|
493
|
+
|
|
494
|
+
return _dump_vectors(
|
|
495
|
+
returns,
|
|
496
|
+
row_ids.tolist(),
|
|
497
|
+
[(x.tolist(), y.tolist() if y is not None else None) for x, y in cols],
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _dump_numpy(
|
|
502
|
+
returns: List[int],
|
|
503
|
+
row_ids: 'np.typing.NDArray[np.int64]',
|
|
504
|
+
cols: List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
|
|
505
|
+
) -> bytes:
|
|
506
|
+
if not has_numpy:
|
|
507
|
+
raise RuntimeError('numpy must be installed for this operation')
|
|
508
|
+
|
|
509
|
+
return _dump_vectors(
|
|
510
|
+
returns,
|
|
511
|
+
row_ids.tolist(),
|
|
512
|
+
[(x.tolist(), y.tolist() if y is not None else None) for x, y in cols],
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def _dump_pandas(
|
|
517
|
+
returns: List[int],
|
|
518
|
+
row_ids: 'pd.Series[np.int64]',
|
|
519
|
+
cols: List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
|
|
520
|
+
) -> bytes:
|
|
521
|
+
if not has_pandas or not has_numpy:
|
|
522
|
+
raise RuntimeError('pandas must be installed for this operation')
|
|
523
|
+
|
|
524
|
+
return _dump_vectors(
|
|
525
|
+
returns,
|
|
526
|
+
row_ids.to_list(),
|
|
527
|
+
[(x.to_list(), y.to_list() if y is not None else None) for x, y in cols],
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _dump_polars(
|
|
532
|
+
returns: List[int],
|
|
533
|
+
row_ids: 'pl.Series[pl.Int64]',
|
|
534
|
+
cols: List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
|
|
535
|
+
) -> bytes:
|
|
536
|
+
if not has_polars:
|
|
537
|
+
raise RuntimeError('polars must be installed for this operation')
|
|
538
|
+
|
|
539
|
+
return _dump_vectors(
|
|
540
|
+
returns,
|
|
541
|
+
row_ids.to_list(),
|
|
542
|
+
[(x.to_list(), y.to_list() if y is not None else None) for x, y in cols],
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _load_numpy_accel(
|
|
547
|
+
colspec: List[Tuple[str, int]],
|
|
548
|
+
data: bytes,
|
|
549
|
+
) -> Tuple[
|
|
550
|
+
'np.typing.NDArray[np.int64]',
|
|
551
|
+
List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
|
|
552
|
+
]:
|
|
553
|
+
if not has_numpy:
|
|
554
|
+
raise RuntimeError('numpy must be installed for this operation')
|
|
555
|
+
if not has_accel:
|
|
556
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
557
|
+
|
|
558
|
+
return _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _dump_numpy_accel(
|
|
562
|
+
returns: List[int],
|
|
563
|
+
row_ids: 'np.typing.NDArray[np.int64]',
|
|
564
|
+
cols: List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
|
|
565
|
+
) -> bytes:
|
|
566
|
+
if not has_numpy:
|
|
567
|
+
raise RuntimeError('numpy must be installed for this operation')
|
|
568
|
+
if not has_accel:
|
|
569
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
570
|
+
|
|
571
|
+
return _singlestoredb_accel.dump_rowdat_1_numpy(returns, row_ids, cols)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _load_pandas_accel(
|
|
575
|
+
colspec: List[Tuple[str, int]],
|
|
576
|
+
data: bytes,
|
|
577
|
+
) -> Tuple[
|
|
578
|
+
'pd.Series[np.int64]',
|
|
579
|
+
List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
|
|
580
|
+
]:
|
|
581
|
+
if not has_pandas or not has_numpy:
|
|
582
|
+
raise RuntimeError('pandas must be installed for this operation')
|
|
583
|
+
if not has_accel:
|
|
584
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
585
|
+
|
|
586
|
+
numpy_ids, numpy_cols = _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
|
|
587
|
+
cols = [
|
|
588
|
+
(
|
|
589
|
+
pd.Series(data, name=name, dtype=PANDAS_TYPE_MAP[dtype]),
|
|
590
|
+
pd.Series(mask, dtype=np.bool_),
|
|
591
|
+
)
|
|
592
|
+
for (name, dtype), (data, mask) in zip(colspec, numpy_cols)
|
|
593
|
+
]
|
|
594
|
+
return pd.Series(numpy_ids, dtype=np.int64), cols
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _dump_pandas_accel(
|
|
598
|
+
returns: List[int],
|
|
599
|
+
row_ids: 'pd.Series[np.int64]',
|
|
600
|
+
cols: List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
|
|
601
|
+
) -> bytes:
|
|
602
|
+
if not has_pandas or not has_numpy:
|
|
603
|
+
raise RuntimeError('pandas must be installed for this operation')
|
|
604
|
+
if not has_accel:
|
|
605
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
606
|
+
|
|
607
|
+
numpy_ids = row_ids.to_numpy()
|
|
608
|
+
numpy_cols = [
|
|
609
|
+
(
|
|
610
|
+
data.to_numpy(),
|
|
611
|
+
mask.to_numpy() if mask is not None else None,
|
|
612
|
+
)
|
|
613
|
+
for data, mask in cols
|
|
614
|
+
]
|
|
615
|
+
return _singlestoredb_accel.dump_rowdat_1_numpy(returns, numpy_ids, numpy_cols)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def _load_polars_accel(
|
|
619
|
+
colspec: List[Tuple[str, int]],
|
|
620
|
+
data: bytes,
|
|
621
|
+
) -> Tuple[
|
|
622
|
+
'pl.Series[pl.Int64]',
|
|
623
|
+
List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
|
|
624
|
+
]:
|
|
625
|
+
if not has_polars:
|
|
626
|
+
raise RuntimeError('polars must be installed for this operation')
|
|
627
|
+
if not has_accel:
|
|
628
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
629
|
+
|
|
630
|
+
numpy_ids, numpy_cols = _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
|
|
631
|
+
cols = [
|
|
632
|
+
(
|
|
633
|
+
pl.Series(
|
|
634
|
+
name=name, values=data.tolist()
|
|
635
|
+
if dtype in string_types or dtype in binary_types else data,
|
|
636
|
+
dtype=POLARS_TYPE_MAP[dtype],
|
|
637
|
+
),
|
|
638
|
+
pl.Series(values=mask, dtype=pl.Boolean),
|
|
639
|
+
)
|
|
640
|
+
for (name, dtype), (data, mask) in zip(colspec, numpy_cols)
|
|
641
|
+
]
|
|
642
|
+
return pl.Series(values=numpy_ids, dtype=pl.Int64), cols
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def _dump_polars_accel(
|
|
646
|
+
returns: List[int],
|
|
647
|
+
row_ids: 'pl.Series[pl.Int64]',
|
|
648
|
+
cols: List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
|
|
649
|
+
) -> bytes:
|
|
650
|
+
if not has_polars:
|
|
651
|
+
raise RuntimeError('polars must be installed for this operation')
|
|
652
|
+
if not has_accel:
|
|
653
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
654
|
+
|
|
655
|
+
numpy_ids = row_ids.to_numpy()
|
|
656
|
+
numpy_cols = [
|
|
657
|
+
(
|
|
658
|
+
data.to_numpy(),
|
|
659
|
+
mask.to_numpy() if mask is not None else None,
|
|
660
|
+
)
|
|
661
|
+
for data, mask in cols
|
|
662
|
+
]
|
|
663
|
+
return _singlestoredb_accel.dump_rowdat_1_numpy(returns, numpy_ids, numpy_cols)
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def _load_arrow_accel(
|
|
667
|
+
colspec: List[Tuple[str, int]],
|
|
668
|
+
data: bytes,
|
|
669
|
+
) -> Tuple[
|
|
670
|
+
'pa.Array[pa.int64()]',
|
|
671
|
+
List[Tuple['pa.Array[Any]', 'pa.Array[pa.bool_()]']],
|
|
672
|
+
]:
|
|
673
|
+
if not has_pyarrow:
|
|
674
|
+
raise RuntimeError('pyarrow must be installed for this operation')
|
|
675
|
+
if not has_accel:
|
|
676
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
677
|
+
|
|
678
|
+
numpy_ids, numpy_cols = _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
|
|
679
|
+
cols = [
|
|
680
|
+
(
|
|
681
|
+
pa.array(data, type=PYARROW_TYPE_MAP[dtype], mask=mask),
|
|
682
|
+
pa.array(mask, type=pa.bool_()),
|
|
683
|
+
)
|
|
684
|
+
for (data, mask), (name, dtype) in zip(numpy_cols, colspec)
|
|
685
|
+
]
|
|
686
|
+
return pa.array(numpy_ids, type=pa.int64()), cols
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def _create_arrow_mask(
|
|
690
|
+
data: 'pa.Array[Any]',
|
|
691
|
+
mask: 'pa.Array[pa.bool_()]',
|
|
692
|
+
) -> 'pa.Array[pa.bool_()]':
|
|
693
|
+
if mask is None:
|
|
694
|
+
return data.is_null().to_numpy(zero_copy_only=False)
|
|
695
|
+
return pc.or_(data.is_null(), mask.is_null()).to_numpy(zero_copy_only=False)
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def _dump_arrow_accel(
|
|
699
|
+
returns: List[int],
|
|
700
|
+
row_ids: 'pa.Array[pa.int64()]',
|
|
701
|
+
cols: List[Tuple['pa.Array[Any]', 'pa.Array[pa.bool_()]']],
|
|
702
|
+
) -> bytes:
|
|
703
|
+
if not has_pyarrow:
|
|
704
|
+
raise RuntimeError('pyarrow must be installed for this operation')
|
|
705
|
+
if not has_accel:
|
|
706
|
+
raise RuntimeError('could not load SingleStoreDB extension')
|
|
707
|
+
|
|
708
|
+
numpy_cols = [
|
|
709
|
+
(
|
|
710
|
+
data.fill_null(DEFAULT_VALUES[dtype]).to_numpy(zero_copy_only=False),
|
|
711
|
+
_create_arrow_mask(data, mask),
|
|
712
|
+
)
|
|
713
|
+
for (data, mask), dtype in zip(cols, returns)
|
|
714
|
+
]
|
|
715
|
+
return _singlestoredb_accel.dump_rowdat_1_numpy(
|
|
716
|
+
returns, row_ids.to_numpy(), numpy_cols,
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
if not has_accel:
|
|
721
|
+
load = _load_accel = _load
|
|
722
|
+
dump = _dump_accel = _dump
|
|
723
|
+
load_pandas = _load_pandas_accel = _load_pandas # noqa: F811
|
|
724
|
+
dump_pandas = _dump_pandas_accel = _dump_pandas # noqa: F811
|
|
725
|
+
load_numpy = _load_numpy_accel = _load_numpy # noqa: F811
|
|
726
|
+
dump_numpy = _dump_numpy_accel = _dump_numpy # noqa: F811
|
|
727
|
+
load_arrow = _load_arrow_accel = _load_arrow # noqa: F811
|
|
728
|
+
dump_arrow = _dump_arrow_accel = _dump_arrow # noqa: F811
|
|
729
|
+
load_polars = _load_polars_accel = _load_polars # noqa: F811
|
|
730
|
+
dump_polars = _dump_polars_accel = _dump_polars # noqa: F811
|
|
731
|
+
|
|
732
|
+
else:
|
|
733
|
+
_load_accel = _singlestoredb_accel.load_rowdat_1
|
|
734
|
+
_dump_accel = _singlestoredb_accel.dump_rowdat_1
|
|
735
|
+
load = _load_accel
|
|
736
|
+
dump = _dump_accel
|
|
737
|
+
load_pandas = _load_pandas_accel
|
|
738
|
+
dump_pandas = _dump_pandas_accel
|
|
739
|
+
load_numpy = _load_numpy_accel
|
|
740
|
+
dump_numpy = _dump_numpy_accel
|
|
741
|
+
load_arrow = _load_arrow_accel
|
|
742
|
+
dump_arrow = _dump_arrow_accel
|
|
743
|
+
load_polars = _load_polars_accel
|
|
744
|
+
dump_polars = _dump_polars_accel
|