singlestoredb 0.4.0__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of singlestoredb might be problematic. Click here for more details.

Files changed (120) hide show
  1. singlestoredb/__init__.py +33 -1
  2. singlestoredb/alchemy/__init__.py +90 -0
  3. singlestoredb/auth.py +5 -1
  4. singlestoredb/config.py +116 -14
  5. singlestoredb/connection.py +483 -516
  6. singlestoredb/converters.py +238 -135
  7. singlestoredb/exceptions.py +30 -2
  8. singlestoredb/functions/__init__.py +1 -0
  9. singlestoredb/functions/decorator.py +142 -0
  10. singlestoredb/functions/dtypes.py +1639 -0
  11. singlestoredb/functions/ext/__init__.py +2 -0
  12. singlestoredb/functions/ext/arrow.py +375 -0
  13. singlestoredb/functions/ext/asgi.py +661 -0
  14. singlestoredb/functions/ext/json.py +427 -0
  15. singlestoredb/functions/ext/mmap.py +306 -0
  16. singlestoredb/functions/ext/rowdat_1.py +744 -0
  17. singlestoredb/functions/signature.py +673 -0
  18. singlestoredb/fusion/__init__.py +11 -0
  19. singlestoredb/fusion/graphql.py +213 -0
  20. singlestoredb/fusion/handler.py +621 -0
  21. singlestoredb/fusion/handlers/stage.py +257 -0
  22. singlestoredb/fusion/handlers/utils.py +162 -0
  23. singlestoredb/fusion/handlers/workspace.py +412 -0
  24. singlestoredb/fusion/registry.py +164 -0
  25. singlestoredb/fusion/result.py +399 -0
  26. singlestoredb/http/__init__.py +27 -0
  27. singlestoredb/{http.py → http/connection.py} +555 -154
  28. singlestoredb/management/__init__.py +3 -0
  29. singlestoredb/management/billing_usage.py +148 -0
  30. singlestoredb/management/cluster.py +14 -6
  31. singlestoredb/management/manager.py +100 -38
  32. singlestoredb/management/organization.py +188 -0
  33. singlestoredb/management/region.py +5 -5
  34. singlestoredb/management/utils.py +281 -2
  35. singlestoredb/management/workspace.py +1344 -49
  36. singlestoredb/{clients/pymysqlsv → mysql}/__init__.py +16 -21
  37. singlestoredb/{clients/pymysqlsv → mysql}/_auth.py +39 -8
  38. singlestoredb/{clients/pymysqlsv → mysql}/charset.py +26 -23
  39. singlestoredb/{clients/pymysqlsv/connections.py → mysql/connection.py} +532 -165
  40. singlestoredb/{clients/pymysqlsv → mysql}/constants/CLIENT.py +0 -1
  41. singlestoredb/{clients/pymysqlsv → mysql}/constants/COMMAND.py +0 -1
  42. singlestoredb/{clients/pymysqlsv → mysql}/constants/CR.py +0 -2
  43. singlestoredb/{clients/pymysqlsv → mysql}/constants/ER.py +0 -1
  44. singlestoredb/{clients/pymysqlsv → mysql}/constants/FIELD_TYPE.py +1 -1
  45. singlestoredb/{clients/pymysqlsv → mysql}/constants/FLAG.py +0 -1
  46. singlestoredb/{clients/pymysqlsv → mysql}/constants/SERVER_STATUS.py +0 -1
  47. singlestoredb/mysql/converters.py +271 -0
  48. singlestoredb/{clients/pymysqlsv → mysql}/cursors.py +228 -112
  49. singlestoredb/mysql/err.py +92 -0
  50. singlestoredb/{clients/pymysqlsv → mysql}/optionfile.py +5 -4
  51. singlestoredb/{clients/pymysqlsv → mysql}/protocol.py +49 -20
  52. singlestoredb/mysql/tests/__init__.py +19 -0
  53. singlestoredb/{clients/pymysqlsv → mysql}/tests/base.py +32 -12
  54. singlestoredb/mysql/tests/conftest.py +37 -0
  55. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_DictCursor.py +11 -7
  56. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_SSCursor.py +17 -12
  57. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_basic.py +32 -24
  58. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_connection.py +130 -119
  59. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_converters.py +9 -7
  60. singlestoredb/mysql/tests/test_cursor.py +141 -0
  61. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_err.py +3 -2
  62. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_issues.py +35 -27
  63. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_load_local.py +13 -11
  64. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_nextset.py +7 -3
  65. singlestoredb/{clients/pymysqlsv → mysql}/tests/test_optionfile.py +2 -1
  66. singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/__init__.py +1 -1
  67. singlestoredb/mysql/tests/thirdparty/test_MySQLdb/__init__.py +9 -0
  68. singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/capabilities.py +19 -17
  69. singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/dbapi20.py +31 -22
  70. singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/test_MySQLdb_capabilities.py +3 -4
  71. singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/test_MySQLdb_dbapi20.py +24 -20
  72. singlestoredb/{clients/pymysqlsv → mysql}/tests/thirdparty/test_MySQLdb/test_MySQLdb_nonstandard.py +4 -4
  73. singlestoredb/{clients/pymysqlsv → mysql}/times.py +3 -4
  74. singlestoredb/pytest.py +283 -0
  75. singlestoredb/tests/empty.sql +0 -0
  76. singlestoredb/tests/ext_funcs/__init__.py +385 -0
  77. singlestoredb/tests/test.sql +210 -0
  78. singlestoredb/tests/test2.sql +1 -0
  79. singlestoredb/tests/test_basics.py +482 -115
  80. singlestoredb/tests/test_config.py +13 -13
  81. singlestoredb/tests/test_connection.py +241 -305
  82. singlestoredb/tests/test_dbapi.py +27 -0
  83. singlestoredb/tests/test_ext_func.py +1193 -0
  84. singlestoredb/tests/test_ext_func_data.py +1101 -0
  85. singlestoredb/tests/test_fusion.py +465 -0
  86. singlestoredb/tests/test_http.py +32 -26
  87. singlestoredb/tests/test_management.py +588 -8
  88. singlestoredb/tests/test_plugin.py +33 -0
  89. singlestoredb/tests/test_results.py +11 -12
  90. singlestoredb/tests/test_udf.py +687 -0
  91. singlestoredb/tests/utils.py +3 -2
  92. singlestoredb/utils/config.py +58 -0
  93. singlestoredb/utils/debug.py +13 -0
  94. singlestoredb/utils/mogrify.py +151 -0
  95. singlestoredb/utils/results.py +4 -1
  96. singlestoredb-1.0.4.dist-info/METADATA +139 -0
  97. singlestoredb-1.0.4.dist-info/RECORD +112 -0
  98. {singlestoredb-0.4.0.dist-info → singlestoredb-1.0.4.dist-info}/WHEEL +1 -1
  99. singlestoredb-1.0.4.dist-info/entry_points.txt +2 -0
  100. singlestoredb/clients/pymysqlsv/converters.py +0 -365
  101. singlestoredb/clients/pymysqlsv/err.py +0 -144
  102. singlestoredb/clients/pymysqlsv/tests/__init__.py +0 -19
  103. singlestoredb/clients/pymysqlsv/tests/test_cursor.py +0 -133
  104. singlestoredb/clients/pymysqlsv/tests/thirdparty/test_MySQLdb/__init__.py +0 -9
  105. singlestoredb/drivers/__init__.py +0 -45
  106. singlestoredb/drivers/base.py +0 -198
  107. singlestoredb/drivers/cymysql.py +0 -38
  108. singlestoredb/drivers/http.py +0 -47
  109. singlestoredb/drivers/mariadb.py +0 -40
  110. singlestoredb/drivers/mysqlconnector.py +0 -49
  111. singlestoredb/drivers/mysqldb.py +0 -60
  112. singlestoredb/drivers/pymysql.py +0 -37
  113. singlestoredb/drivers/pymysqlsv.py +0 -35
  114. singlestoredb/drivers/pyodbc.py +0 -65
  115. singlestoredb-0.4.0.dist-info/METADATA +0 -111
  116. singlestoredb-0.4.0.dist-info/RECORD +0 -86
  117. /singlestoredb/{clients → fusion/handlers}/__init__.py +0 -0
  118. /singlestoredb/{clients/pymysqlsv → mysql}/constants/__init__.py +0 -0
  119. {singlestoredb-0.4.0.dist-info → singlestoredb-1.0.4.dist-info}/LICENSE +0 -0
  120. {singlestoredb-0.4.0.dist-info → singlestoredb-1.0.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,744 @@
1
+ #!/usr/bin/env python3
2
+ import struct
3
+ import warnings
4
+ from io import BytesIO
5
+ from typing import Any
6
+ from typing import List
7
+ from typing import Optional
8
+ from typing import Sequence
9
+ from typing import Tuple
10
+
11
+ from ...config import get_option
12
+ from ..dtypes import DEFAULT_VALUES
13
+ from ..dtypes import NUMPY_TYPE_MAP
14
+ from ..dtypes import PANDAS_TYPE_MAP
15
+ from ..dtypes import POLARS_TYPE_MAP
16
+ from ..dtypes import PYARROW_TYPE_MAP
17
+
18
+ try:
19
+ import numpy as np
20
+ has_numpy = True
21
+ except ImportError:
22
+ has_numpy = False
23
+
24
+ try:
25
+ import polars as pl
26
+ has_polars = True
27
+ except ImportError:
28
+ has_polars = False
29
+
30
+ try:
31
+ import pandas as pd
32
+ has_pandas = True
33
+ except ImportError:
34
+ has_pandas = False
35
+
36
+ try:
37
+ import pyarrow as pa
38
+ import pyarrow.compute as pc
39
+ has_pyarrow = True
40
+ except ImportError:
41
+ has_pyarrow = False
42
+
43
+ from ...mysql.constants import FIELD_TYPE as ft
44
+
45
+ has_accel = False
46
+ try:
47
+ if not get_option('pure_python'):
48
+ import _singlestoredb_accel
49
+ has_accel = True
50
+ except ImportError:
51
+ warnings.warn(
52
+ 'could not load accelerated data reader for external functions; '
53
+ 'using pure Python implementation.',
54
+ RuntimeWarning,
55
+ )
56
+
57
+ numeric_formats = {
58
+ ft.TINY: '<b',
59
+ -ft.TINY: '<B',
60
+ ft.SHORT: '<h',
61
+ -ft.SHORT: '<H',
62
+ ft.INT24: '<i',
63
+ -ft.INT24: '<I',
64
+ ft.LONG: '<i',
65
+ -ft.LONG: '<I',
66
+ ft.LONGLONG: '<q',
67
+ -ft.LONGLONG: '<Q',
68
+ ft.FLOAT: '<f',
69
+ ft.DOUBLE: '<d',
70
+ }
71
+ numeric_sizes = {
72
+ ft.TINY: 1,
73
+ -ft.TINY: 1,
74
+ ft.SHORT: 2,
75
+ -ft.SHORT: 2,
76
+ ft.INT24: 4,
77
+ -ft.INT24: 4,
78
+ ft.LONG: 4,
79
+ -ft.LONG: 4,
80
+ ft.LONGLONG: 8,
81
+ -ft.LONGLONG: 8,
82
+ ft.FLOAT: 4,
83
+ ft.DOUBLE: 8,
84
+ }
85
+ medium_int_types = set([ft.INT24, -ft.INT24])
86
+ int_types = set([
87
+ ft.TINY, -ft.TINY, ft.SHORT, -ft.SHORT, ft.INT24, -ft.INT24,
88
+ ft.LONG, -ft.LONG, ft.LONGLONG, -ft.LONGLONG,
89
+ ])
90
+ string_types = set([15, 245, 247, 248, 249, 250, 251, 252, 253, 254])
91
+ binary_types = set([-x for x in string_types])
92
+
93
+
94
+ def _load(
95
+ colspec: List[Tuple[str, int]],
96
+ data: bytes,
97
+ ) -> Tuple[List[int], List[Any]]:
98
+ '''
99
+ Convert bytes in rowdat_1 format into rows of data.
100
+
101
+ Parameters
102
+ ----------
103
+ colspec : List[str]
104
+ An List of column data types
105
+ data : bytes
106
+ The data in rowdat_1 format
107
+
108
+ Returns
109
+ -------
110
+ Tuple[List[int], List[Any]]
111
+
112
+ '''
113
+ data_len = len(data)
114
+ data_io = BytesIO(data)
115
+ row_ids = []
116
+ rows = []
117
+ val = None
118
+ while data_io.tell() < data_len:
119
+ row_ids.append(struct.unpack('<q', data_io.read(8))[0])
120
+ row = []
121
+ for _, ctype in colspec:
122
+ is_null = data_io.read(1) == b'\x01'
123
+ if ctype in numeric_formats:
124
+ val = struct.unpack(
125
+ numeric_formats[ctype],
126
+ data_io.read(numeric_sizes[ctype]),
127
+ )[0]
128
+ elif ctype in string_types:
129
+ slen = struct.unpack('<q', data_io.read(8))[0]
130
+ val = data_io.read(slen).decode('utf-8')
131
+ elif ctype in binary_types:
132
+ slen = struct.unpack('<q', data_io.read(8))[0]
133
+ val = data_io.read(slen)
134
+ else:
135
+ raise TypeError(f'unrecognized column type: {ctype}')
136
+ row.append(None if is_null else val)
137
+ rows.append(row)
138
+ return row_ids, rows
139
+
140
+
141
+ def _load_vectors(
142
+ colspec: List[Tuple[str, int]],
143
+ data: bytes,
144
+ ) -> Tuple[List[int], List[Tuple[Sequence[Any], Optional[Sequence[Any]]]]]:
145
+ '''
146
+ Convert bytes in rowdat_1 format into columns of data.
147
+
148
+ Parameters
149
+ ----------
150
+ colspec : List[str]
151
+ An List of column data types
152
+ data : bytes
153
+ The data in rowdat_1 format
154
+
155
+ Returns
156
+ -------
157
+ Tuple[List[int], List[Tuple[Any, Any]]]
158
+
159
+ '''
160
+ data_len = len(data)
161
+ data_io = BytesIO(data)
162
+ row_ids = []
163
+ cols: List[Any] = [[] for _ in colspec]
164
+ masks: List[Any] = [[] for _ in colspec]
165
+ val = None
166
+ while data_io.tell() < data_len:
167
+ row_ids.append(struct.unpack('<q', data_io.read(8))[0])
168
+ for i, (_, ctype) in enumerate(colspec):
169
+ default = DEFAULT_VALUES[ctype]
170
+ is_null = data_io.read(1) == b'\x01'
171
+ if ctype in numeric_formats:
172
+ val = struct.unpack(
173
+ numeric_formats[ctype],
174
+ data_io.read(numeric_sizes[ctype]),
175
+ )[0]
176
+ elif ctype in string_types:
177
+ slen = struct.unpack('<q', data_io.read(8))[0]
178
+ val = data_io.read(slen).decode('utf-8')
179
+ elif ctype in binary_types:
180
+ slen = struct.unpack('<q', data_io.read(8))[0]
181
+ val = data_io.read(slen)
182
+ else:
183
+ raise TypeError(f'unrecognized column type: {ctype}')
184
+ cols[i].append(default if is_null else val)
185
+ masks[i].append(True if is_null else False)
186
+ return row_ids, [(x, y) for x, y in zip(cols, masks)]
187
+
188
+
189
+ def _load_pandas(
190
+ colspec: List[Tuple[str, int]],
191
+ data: bytes,
192
+ ) -> Tuple[
193
+ 'pd.Series[np.int64]',
194
+ List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
195
+ ]:
196
+ '''
197
+ Convert bytes in rowdat_1 format into rows of data.
198
+
199
+ Parameters
200
+ ----------
201
+ colspec : List[str]
202
+ An List of column data types
203
+ data : bytes
204
+ The data in rowdat_1 format
205
+
206
+ Returns
207
+ -------
208
+ Tuple[pd.Series[int], List[Tuple[pd.Series[Any], pd.Series[bool]]]]
209
+
210
+ '''
211
+ if not has_pandas or not has_numpy:
212
+ raise RuntimeError('pandas must be installed for this operation')
213
+
214
+ row_ids, cols = _load_vectors(colspec, data)
215
+ index = pd.Series(row_ids)
216
+ return pd.Series(row_ids, dtype=np.int64), [
217
+ (
218
+ pd.Series(data, index=index, name=name, dtype=PANDAS_TYPE_MAP[dtype]),
219
+ pd.Series(mask, index=index, dtype=np.bool_),
220
+ )
221
+ for (data, mask), (name, dtype) in zip(cols, colspec)
222
+ ]
223
+
224
+
225
+ def _load_polars(
226
+ colspec: List[Tuple[str, int]],
227
+ data: bytes,
228
+ ) -> Tuple[
229
+ 'pl.Series[pl.Int64]',
230
+ List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
231
+ ]:
232
+ '''
233
+ Convert bytes in rowdat_1 format into rows of data.
234
+
235
+ Parameters
236
+ ----------
237
+ colspec : List[str]
238
+ An List of column data types
239
+ data : bytes
240
+ The data in rowdat_1 format
241
+
242
+ Returns
243
+ -------
244
+ Tuple[polars.Series[int], List[polars.Series[Any]]]
245
+
246
+ '''
247
+ if not has_polars:
248
+ raise RuntimeError('polars must be installed for this operation')
249
+
250
+ row_ids, cols = _load_vectors(colspec, data)
251
+ return pl.Series(None, row_ids, dtype=pl.Int64), \
252
+ [
253
+ (
254
+ pl.Series(name=name, values=data, dtype=POLARS_TYPE_MAP[dtype]),
255
+ pl.Series(values=mask, dtype=pl.Boolean),
256
+ )
257
+ for (data, mask), (name, dtype) in zip(cols, colspec)
258
+ ]
259
+
260
+
261
+ def _load_numpy(
262
+ colspec: List[Tuple[str, int]],
263
+ data: bytes,
264
+ ) -> Tuple[
265
+ 'np.typing.NDArray[np.int64]',
266
+ List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
267
+ ]:
268
+ '''
269
+ Convert bytes in rowdat_1 format into rows of data.
270
+
271
+ Parameters
272
+ ----------
273
+ colspec : List[str]
274
+ An List of column data types
275
+ data : bytes
276
+ The data in rowdat_1 format
277
+
278
+ Returns
279
+ -------
280
+ Tuple[np.ndarray[int], List[np.ndarray[Any]]]
281
+
282
+ '''
283
+ if not has_numpy:
284
+ raise RuntimeError('numpy must be installed for this operation')
285
+
286
+ row_ids, cols = _load_vectors(colspec, data)
287
+ return np.asarray(row_ids, dtype=np.int64), \
288
+ [
289
+ (
290
+ np.asarray(data, dtype=NUMPY_TYPE_MAP[dtype]),
291
+ np.asarray(mask, dtype=np.bool_),
292
+ )
293
+ for (data, mask), (name, dtype) in zip(cols, colspec)
294
+ ]
295
+
296
+
297
+ def _load_arrow(
298
+ colspec: List[Tuple[str, int]],
299
+ data: bytes,
300
+ ) -> Tuple[
301
+ 'pa.Array[pa.int64()]',
302
+ List[Tuple['pa.Array[Any]', 'pa.Array[pa.bool_()]']],
303
+ ]:
304
+ '''
305
+ Convert bytes in rowdat_1 format into rows of data.
306
+
307
+ Parameters
308
+ ----------
309
+ colspec : List[str]
310
+ An List of column data types
311
+ data : bytes
312
+ The data in rowdat_1 format
313
+
314
+ Returns
315
+ -------
316
+ Tuple[pyarrow.Array[int], List[pyarrow.Array[Any]]]
317
+
318
+ '''
319
+ if not has_pyarrow:
320
+ raise RuntimeError('pyarrow must be installed for this operation')
321
+
322
+ row_ids, cols = _load_vectors(colspec, data)
323
+ return pa.array(row_ids, type=pa.int64()), \
324
+ [
325
+ (
326
+ pa.array(
327
+ data, type=PYARROW_TYPE_MAP[dtype],
328
+ mask=pa.array(mask, type=pa.bool_()),
329
+ ),
330
+ pa.array(mask, type=pa.bool_()),
331
+ )
332
+ for (data, mask), (name, dtype) in zip(cols, colspec)
333
+ ]
334
+
335
+
336
+ def _dump(
337
+ returns: List[int],
338
+ row_ids: List[int],
339
+ rows: List[List[Any]],
340
+ ) -> bytes:
341
+ '''
342
+ Convert a list of lists of data into rowdat_1 format.
343
+
344
+ Parameters
345
+ ----------
346
+ returns : List[int]
347
+ The returned data type
348
+ row_ids : List[int]
349
+ The row IDs
350
+ rows : List[List[Any]]
351
+ The rows of data and masks to serialize
352
+
353
+ Returns
354
+ -------
355
+ bytes
356
+
357
+ '''
358
+ out = BytesIO()
359
+
360
+ if len(rows) == 0 or len(row_ids) == 0:
361
+ return out.getbuffer()
362
+
363
+ for row_id, *values in zip(row_ids, *list(zip(*rows))):
364
+ out.write(struct.pack('<q', row_id))
365
+ for rtype, value in zip(returns, values):
366
+ out.write(b'\x01' if value is None else b'\x00')
367
+ default = DEFAULT_VALUES[rtype]
368
+ if rtype in numeric_formats:
369
+ if value is None:
370
+ out.write(struct.pack(numeric_formats[rtype], default))
371
+ else:
372
+ if rtype in int_types:
373
+ if rtype == ft.INT24:
374
+ if int(value) > 8388607 or int(value) < -8388608:
375
+ raise ValueError(
376
+ 'value is outside range of MEDIUMINT',
377
+ )
378
+ elif rtype == -ft.INT24:
379
+ if int(value) > 16777215 or int(value) < 0:
380
+ raise ValueError(
381
+ 'value is outside range of UNSIGNED MEDIUMINT',
382
+ )
383
+ out.write(struct.pack(numeric_formats[rtype], int(value)))
384
+ else:
385
+ out.write(struct.pack(numeric_formats[rtype], float(value)))
386
+ elif rtype in string_types:
387
+ if value is None:
388
+ out.write(struct.pack('<q', 0))
389
+ else:
390
+ sval = value.encode('utf-8')
391
+ out.write(struct.pack('<q', len(sval)))
392
+ out.write(sval)
393
+ elif rtype in binary_types:
394
+ if value is None:
395
+ out.write(struct.pack('<q', 0))
396
+ else:
397
+ out.write(struct.pack('<q', len(value)))
398
+ out.write(value)
399
+ else:
400
+ raise TypeError(f'unrecognized column type: {rtype}')
401
+
402
+ return out.getbuffer()
403
+
404
+
405
+ def _dump_vectors(
406
+ returns: List[int],
407
+ row_ids: List[int],
408
+ cols: List[Tuple[Sequence[Any], Optional[Sequence[Any]]]],
409
+ ) -> bytes:
410
+ '''
411
+ Convert a list of columns of data into rowdat_1 format.
412
+
413
+ Parameters
414
+ ----------
415
+ returns : List[int]
416
+ The returned data type
417
+ row_ids : List[int]
418
+ The row IDs
419
+ cols : List[Tuple[Any, Any]]
420
+ The rows of data and masks to serialize
421
+
422
+ Returns
423
+ -------
424
+ bytes
425
+
426
+ '''
427
+ out = BytesIO()
428
+
429
+ if len(cols) == 0 or len(row_ids) == 0:
430
+ return out.getbuffer()
431
+
432
+ for j, row_id in enumerate(row_ids):
433
+
434
+ out.write(struct.pack('<q', row_id))
435
+
436
+ for i, rtype in enumerate(returns):
437
+ value = cols[i][0][j]
438
+ if cols[i][1] is not None:
439
+ is_null = cols[i][1][j] # type: ignore
440
+ else:
441
+ is_null = False
442
+
443
+ out.write(b'\x01' if is_null or value is None else b'\x00')
444
+ default = DEFAULT_VALUES[rtype]
445
+ try:
446
+ if rtype in numeric_formats:
447
+ if value is None:
448
+ out.write(struct.pack(numeric_formats[rtype], default))
449
+ else:
450
+ if rtype in int_types:
451
+ if rtype == ft.INT24:
452
+ if int(value) > 8388607 or int(value) < -8388608:
453
+ raise ValueError(
454
+ 'value is outside range of MEDIUMINT',
455
+ )
456
+ elif rtype == -ft.INT24:
457
+ if int(value) > 16777215 or int(value) < 0:
458
+ raise ValueError(
459
+ 'value is outside range of UNSIGNED MEDIUMINT',
460
+ )
461
+ out.write(struct.pack(numeric_formats[rtype], int(value)))
462
+ else:
463
+ out.write(struct.pack(numeric_formats[rtype], float(value)))
464
+ elif rtype in string_types:
465
+ if value is None:
466
+ out.write(struct.pack('<q', 0))
467
+ else:
468
+ sval = value.encode('utf-8')
469
+ out.write(struct.pack('<q', len(sval)))
470
+ out.write(sval)
471
+ elif rtype in binary_types:
472
+ if value is None:
473
+ out.write(struct.pack('<q', 0))
474
+ else:
475
+ out.write(struct.pack('<q', len(value)))
476
+ out.write(value)
477
+ else:
478
+ raise TypeError(f'unrecognized column type: {rtype}')
479
+
480
+ except struct.error as exc:
481
+ raise ValueError(str(exc))
482
+
483
+ return out.getbuffer()
484
+
485
+
486
+ def _dump_arrow(
487
+ returns: List[int],
488
+ row_ids: 'pa.Array[int]',
489
+ cols: List[Tuple['pa.Array[Any]', 'pa.Array[bool]']],
490
+ ) -> bytes:
491
+ if not has_pyarrow:
492
+ raise RuntimeError('pyarrow must be installed for this operation')
493
+
494
+ return _dump_vectors(
495
+ returns,
496
+ row_ids.tolist(),
497
+ [(x.tolist(), y.tolist() if y is not None else None) for x, y in cols],
498
+ )
499
+
500
+
501
+ def _dump_numpy(
502
+ returns: List[int],
503
+ row_ids: 'np.typing.NDArray[np.int64]',
504
+ cols: List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
505
+ ) -> bytes:
506
+ if not has_numpy:
507
+ raise RuntimeError('numpy must be installed for this operation')
508
+
509
+ return _dump_vectors(
510
+ returns,
511
+ row_ids.tolist(),
512
+ [(x.tolist(), y.tolist() if y is not None else None) for x, y in cols],
513
+ )
514
+
515
+
516
+ def _dump_pandas(
517
+ returns: List[int],
518
+ row_ids: 'pd.Series[np.int64]',
519
+ cols: List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
520
+ ) -> bytes:
521
+ if not has_pandas or not has_numpy:
522
+ raise RuntimeError('pandas must be installed for this operation')
523
+
524
+ return _dump_vectors(
525
+ returns,
526
+ row_ids.to_list(),
527
+ [(x.to_list(), y.to_list() if y is not None else None) for x, y in cols],
528
+ )
529
+
530
+
531
+ def _dump_polars(
532
+ returns: List[int],
533
+ row_ids: 'pl.Series[pl.Int64]',
534
+ cols: List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
535
+ ) -> bytes:
536
+ if not has_polars:
537
+ raise RuntimeError('polars must be installed for this operation')
538
+
539
+ return _dump_vectors(
540
+ returns,
541
+ row_ids.to_list(),
542
+ [(x.to_list(), y.to_list() if y is not None else None) for x, y in cols],
543
+ )
544
+
545
+
546
+ def _load_numpy_accel(
547
+ colspec: List[Tuple[str, int]],
548
+ data: bytes,
549
+ ) -> Tuple[
550
+ 'np.typing.NDArray[np.int64]',
551
+ List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
552
+ ]:
553
+ if not has_numpy:
554
+ raise RuntimeError('numpy must be installed for this operation')
555
+ if not has_accel:
556
+ raise RuntimeError('could not load SingleStoreDB extension')
557
+
558
+ return _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
559
+
560
+
561
+ def _dump_numpy_accel(
562
+ returns: List[int],
563
+ row_ids: 'np.typing.NDArray[np.int64]',
564
+ cols: List[Tuple['np.typing.NDArray[Any]', 'np.typing.NDArray[np.bool_]']],
565
+ ) -> bytes:
566
+ if not has_numpy:
567
+ raise RuntimeError('numpy must be installed for this operation')
568
+ if not has_accel:
569
+ raise RuntimeError('could not load SingleStoreDB extension')
570
+
571
+ return _singlestoredb_accel.dump_rowdat_1_numpy(returns, row_ids, cols)
572
+
573
+
574
+ def _load_pandas_accel(
575
+ colspec: List[Tuple[str, int]],
576
+ data: bytes,
577
+ ) -> Tuple[
578
+ 'pd.Series[np.int64]',
579
+ List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
580
+ ]:
581
+ if not has_pandas or not has_numpy:
582
+ raise RuntimeError('pandas must be installed for this operation')
583
+ if not has_accel:
584
+ raise RuntimeError('could not load SingleStoreDB extension')
585
+
586
+ numpy_ids, numpy_cols = _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
587
+ cols = [
588
+ (
589
+ pd.Series(data, name=name, dtype=PANDAS_TYPE_MAP[dtype]),
590
+ pd.Series(mask, dtype=np.bool_),
591
+ )
592
+ for (name, dtype), (data, mask) in zip(colspec, numpy_cols)
593
+ ]
594
+ return pd.Series(numpy_ids, dtype=np.int64), cols
595
+
596
+
597
+ def _dump_pandas_accel(
598
+ returns: List[int],
599
+ row_ids: 'pd.Series[np.int64]',
600
+ cols: List[Tuple['pd.Series[Any]', 'pd.Series[np.bool_]']],
601
+ ) -> bytes:
602
+ if not has_pandas or not has_numpy:
603
+ raise RuntimeError('pandas must be installed for this operation')
604
+ if not has_accel:
605
+ raise RuntimeError('could not load SingleStoreDB extension')
606
+
607
+ numpy_ids = row_ids.to_numpy()
608
+ numpy_cols = [
609
+ (
610
+ data.to_numpy(),
611
+ mask.to_numpy() if mask is not None else None,
612
+ )
613
+ for data, mask in cols
614
+ ]
615
+ return _singlestoredb_accel.dump_rowdat_1_numpy(returns, numpy_ids, numpy_cols)
616
+
617
+
618
+ def _load_polars_accel(
619
+ colspec: List[Tuple[str, int]],
620
+ data: bytes,
621
+ ) -> Tuple[
622
+ 'pl.Series[pl.Int64]',
623
+ List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
624
+ ]:
625
+ if not has_polars:
626
+ raise RuntimeError('polars must be installed for this operation')
627
+ if not has_accel:
628
+ raise RuntimeError('could not load SingleStoreDB extension')
629
+
630
+ numpy_ids, numpy_cols = _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
631
+ cols = [
632
+ (
633
+ pl.Series(
634
+ name=name, values=data.tolist()
635
+ if dtype in string_types or dtype in binary_types else data,
636
+ dtype=POLARS_TYPE_MAP[dtype],
637
+ ),
638
+ pl.Series(values=mask, dtype=pl.Boolean),
639
+ )
640
+ for (name, dtype), (data, mask) in zip(colspec, numpy_cols)
641
+ ]
642
+ return pl.Series(values=numpy_ids, dtype=pl.Int64), cols
643
+
644
+
645
+ def _dump_polars_accel(
646
+ returns: List[int],
647
+ row_ids: 'pl.Series[pl.Int64]',
648
+ cols: List[Tuple['pl.Series[Any]', 'pl.Series[pl.Boolean]']],
649
+ ) -> bytes:
650
+ if not has_polars:
651
+ raise RuntimeError('polars must be installed for this operation')
652
+ if not has_accel:
653
+ raise RuntimeError('could not load SingleStoreDB extension')
654
+
655
+ numpy_ids = row_ids.to_numpy()
656
+ numpy_cols = [
657
+ (
658
+ data.to_numpy(),
659
+ mask.to_numpy() if mask is not None else None,
660
+ )
661
+ for data, mask in cols
662
+ ]
663
+ return _singlestoredb_accel.dump_rowdat_1_numpy(returns, numpy_ids, numpy_cols)
664
+
665
+
666
+ def _load_arrow_accel(
667
+ colspec: List[Tuple[str, int]],
668
+ data: bytes,
669
+ ) -> Tuple[
670
+ 'pa.Array[pa.int64()]',
671
+ List[Tuple['pa.Array[Any]', 'pa.Array[pa.bool_()]']],
672
+ ]:
673
+ if not has_pyarrow:
674
+ raise RuntimeError('pyarrow must be installed for this operation')
675
+ if not has_accel:
676
+ raise RuntimeError('could not load SingleStoreDB extension')
677
+
678
+ numpy_ids, numpy_cols = _singlestoredb_accel.load_rowdat_1_numpy(colspec, data)
679
+ cols = [
680
+ (
681
+ pa.array(data, type=PYARROW_TYPE_MAP[dtype], mask=mask),
682
+ pa.array(mask, type=pa.bool_()),
683
+ )
684
+ for (data, mask), (name, dtype) in zip(numpy_cols, colspec)
685
+ ]
686
+ return pa.array(numpy_ids, type=pa.int64()), cols
687
+
688
+
689
+ def _create_arrow_mask(
690
+ data: 'pa.Array[Any]',
691
+ mask: 'pa.Array[pa.bool_()]',
692
+ ) -> 'pa.Array[pa.bool_()]':
693
+ if mask is None:
694
+ return data.is_null().to_numpy(zero_copy_only=False)
695
+ return pc.or_(data.is_null(), mask.is_null()).to_numpy(zero_copy_only=False)
696
+
697
+
698
+ def _dump_arrow_accel(
699
+ returns: List[int],
700
+ row_ids: 'pa.Array[pa.int64()]',
701
+ cols: List[Tuple['pa.Array[Any]', 'pa.Array[pa.bool_()]']],
702
+ ) -> bytes:
703
+ if not has_pyarrow:
704
+ raise RuntimeError('pyarrow must be installed for this operation')
705
+ if not has_accel:
706
+ raise RuntimeError('could not load SingleStoreDB extension')
707
+
708
+ numpy_cols = [
709
+ (
710
+ data.fill_null(DEFAULT_VALUES[dtype]).to_numpy(zero_copy_only=False),
711
+ _create_arrow_mask(data, mask),
712
+ )
713
+ for (data, mask), dtype in zip(cols, returns)
714
+ ]
715
+ return _singlestoredb_accel.dump_rowdat_1_numpy(
716
+ returns, row_ids.to_numpy(), numpy_cols,
717
+ )
718
+
719
+
720
+ if not has_accel:
721
+ load = _load_accel = _load
722
+ dump = _dump_accel = _dump
723
+ load_pandas = _load_pandas_accel = _load_pandas # noqa: F811
724
+ dump_pandas = _dump_pandas_accel = _dump_pandas # noqa: F811
725
+ load_numpy = _load_numpy_accel = _load_numpy # noqa: F811
726
+ dump_numpy = _dump_numpy_accel = _dump_numpy # noqa: F811
727
+ load_arrow = _load_arrow_accel = _load_arrow # noqa: F811
728
+ dump_arrow = _dump_arrow_accel = _dump_arrow # noqa: F811
729
+ load_polars = _load_polars_accel = _load_polars # noqa: F811
730
+ dump_polars = _dump_polars_accel = _dump_polars # noqa: F811
731
+
732
+ else:
733
+ _load_accel = _singlestoredb_accel.load_rowdat_1
734
+ _dump_accel = _singlestoredb_accel.dump_rowdat_1
735
+ load = _load_accel
736
+ dump = _dump_accel
737
+ load_pandas = _load_pandas_accel
738
+ dump_pandas = _dump_pandas_accel
739
+ load_numpy = _load_numpy_accel
740
+ dump_numpy = _dump_numpy_accel
741
+ load_arrow = _load_arrow_accel
742
+ dump_arrow = _dump_arrow_accel
743
+ load_polars = _load_polars_accel
744
+ dump_polars = _dump_polars_accel