singlestoredb 1.10.0__cp38-abi3-win32.whl → 1.12.0__cp38-abi3-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of singlestoredb might be problematic. Click here for more details.
- _singlestoredb_accel.pyd +0 -0
- singlestoredb/__init__.py +1 -1
- singlestoredb/config.py +6 -0
- singlestoredb/connection.py +7 -0
- singlestoredb/converters.py +5 -5
- singlestoredb/functions/__init__.py +1 -0
- singlestoredb/functions/decorator.py +258 -69
- singlestoredb/functions/ext/asgi.py +121 -27
- singlestoredb/functions/signature.py +100 -9
- singlestoredb/fusion/handlers/export.py +58 -2
- singlestoredb/fusion/handlers/files.py +6 -6
- singlestoredb/fusion/handlers/models.py +250 -0
- singlestoredb/fusion/handlers/utils.py +5 -5
- singlestoredb/fusion/result.py +1 -1
- singlestoredb/http/connection.py +4 -0
- singlestoredb/management/export.py +30 -7
- singlestoredb/management/files.py +89 -26
- singlestoredb/mysql/connection.py +25 -19
- singlestoredb/server/__init__.py +0 -0
- singlestoredb/server/docker.py +455 -0
- singlestoredb/server/free_tier.py +267 -0
- singlestoredb/tests/test_udf.py +84 -32
- singlestoredb/utils/events.py +16 -0
- {singlestoredb-1.10.0.dist-info → singlestoredb-1.12.0.dist-info}/METADATA +3 -1
- {singlestoredb-1.10.0.dist-info → singlestoredb-1.12.0.dist-info}/RECORD +29 -25
- {singlestoredb-1.10.0.dist-info → singlestoredb-1.12.0.dist-info}/LICENSE +0 -0
- {singlestoredb-1.10.0.dist-info → singlestoredb-1.12.0.dist-info}/WHEEL +0 -0
- {singlestoredb-1.10.0.dist-info → singlestoredb-1.12.0.dist-info}/entry_points.txt +0 -0
- {singlestoredb-1.10.0.dist-info → singlestoredb-1.12.0.dist-info}/top_level.txt +0 -0
_singlestoredb_accel.pyd
CHANGED
|
Binary file
|
singlestoredb/__init__.py
CHANGED
singlestoredb/config.py
CHANGED
|
@@ -201,6 +201,12 @@ register_option(
|
|
|
201
201
|
environ='SINGLESTOREDB_BUFFERED',
|
|
202
202
|
)
|
|
203
203
|
|
|
204
|
+
register_option(
|
|
205
|
+
'parse_json', 'bool', check_bool, True,
|
|
206
|
+
'Parse JSON values into Python objects?',
|
|
207
|
+
environ='SINGLESTOREDB_PARSE_JSON',
|
|
208
|
+
)
|
|
209
|
+
|
|
204
210
|
register_option(
|
|
205
211
|
'connect_timeout', 'int', check_int, 10,
|
|
206
212
|
'The timeout for connecting to the database in seconds. '
|
singlestoredb/connection.py
CHANGED
|
@@ -541,10 +541,16 @@ class Cursor(metaclass=abc.ABCMeta):
|
|
|
541
541
|
|
|
542
542
|
Examples
|
|
543
543
|
--------
|
|
544
|
+
Query with no parameters
|
|
545
|
+
|
|
544
546
|
>>> cur.execute('select * from mytable')
|
|
545
547
|
|
|
548
|
+
Query with positional parameters
|
|
549
|
+
|
|
546
550
|
>>> cur.execute('select * from mytable where id < %s', [100])
|
|
547
551
|
|
|
552
|
+
Query with named parameters
|
|
553
|
+
|
|
548
554
|
>>> cur.execute('select * from mytable where id < %(max)s', dict(max=100))
|
|
549
555
|
|
|
550
556
|
Returns
|
|
@@ -1317,6 +1323,7 @@ def connect(
|
|
|
1317
1323
|
track_env: Optional[bool] = None,
|
|
1318
1324
|
enable_extended_data_types: Optional[bool] = None,
|
|
1319
1325
|
vector_data_format: Optional[str] = None,
|
|
1326
|
+
parse_json: Optional[bool] = None,
|
|
1320
1327
|
) -> Connection:
|
|
1321
1328
|
"""
|
|
1322
1329
|
Return a SingleStoreDB connection.
|
singlestoredb/converters.py
CHANGED
|
@@ -594,7 +594,7 @@ def float32_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
|
|
|
594
594
|
if has_numpy:
|
|
595
595
|
return numpy.frombuffer(x, dtype=numpy.float32)
|
|
596
596
|
|
|
597
|
-
return struct.unpack(f'<{len(x)
|
|
597
|
+
return struct.unpack(f'<{len(x)//4}f', x)
|
|
598
598
|
|
|
599
599
|
|
|
600
600
|
def float64_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
|
|
@@ -650,7 +650,7 @@ def float64_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
|
|
|
650
650
|
if has_numpy:
|
|
651
651
|
return numpy.frombuffer(x, dtype=numpy.float64)
|
|
652
652
|
|
|
653
|
-
return struct.unpack(f'<{len(x)
|
|
653
|
+
return struct.unpack(f'<{len(x)//8}d', x)
|
|
654
654
|
|
|
655
655
|
|
|
656
656
|
def int8_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
|
|
@@ -762,7 +762,7 @@ def int16_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
|
|
|
762
762
|
if has_numpy:
|
|
763
763
|
return numpy.frombuffer(x, dtype=numpy.int16)
|
|
764
764
|
|
|
765
|
-
return struct.unpack(f'<{len(x)
|
|
765
|
+
return struct.unpack(f'<{len(x)//2}h', x)
|
|
766
766
|
|
|
767
767
|
|
|
768
768
|
def int32_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
|
|
@@ -818,7 +818,7 @@ def int32_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
|
|
|
818
818
|
if has_numpy:
|
|
819
819
|
return numpy.frombuffer(x, dtype=numpy.int32)
|
|
820
820
|
|
|
821
|
-
return struct.unpack(f'<{len(x)
|
|
821
|
+
return struct.unpack(f'<{len(x)//4}l', x)
|
|
822
822
|
|
|
823
823
|
|
|
824
824
|
def int64_vector_json_or_none(x: Optional[str]) -> Optional[Any]:
|
|
@@ -875,7 +875,7 @@ def int64_vector_or_none(x: Optional[bytes]) -> Optional[Any]:
|
|
|
875
875
|
if has_numpy:
|
|
876
876
|
return numpy.frombuffer(x, dtype=numpy.int64)
|
|
877
877
|
|
|
878
|
-
return struct.unpack(f'<{len(x)
|
|
878
|
+
return struct.unpack(f'<{len(x)//8}l', x)
|
|
879
879
|
|
|
880
880
|
|
|
881
881
|
def bson_or_none(x: Optional[bytes]) -> Optional[Any]:
|
|
@@ -1,12 +1,36 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
1
3
|
import functools
|
|
4
|
+
import inspect
|
|
2
5
|
from typing import Any
|
|
3
6
|
from typing import Callable
|
|
4
7
|
from typing import Dict
|
|
5
8
|
from typing import List
|
|
6
9
|
from typing import Optional
|
|
10
|
+
from typing import Tuple
|
|
7
11
|
from typing import Union
|
|
8
12
|
|
|
13
|
+
from . import dtypes
|
|
9
14
|
from .dtypes import DataType
|
|
15
|
+
from .signature import simplify_dtype
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import pydantic
|
|
19
|
+
has_pydantic = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
has_pydantic = False
|
|
22
|
+
|
|
23
|
+
python_type_map: Dict[Any, Callable[..., str]] = {
|
|
24
|
+
str: dtypes.TEXT,
|
|
25
|
+
int: dtypes.BIGINT,
|
|
26
|
+
float: dtypes.DOUBLE,
|
|
27
|
+
bool: dtypes.BOOL,
|
|
28
|
+
bytes: dtypes.BINARY,
|
|
29
|
+
bytearray: dtypes.BINARY,
|
|
30
|
+
datetime.datetime: dtypes.DATETIME,
|
|
31
|
+
datetime.date: dtypes.DATE,
|
|
32
|
+
datetime.timedelta: dtypes.TIME,
|
|
33
|
+
}
|
|
10
34
|
|
|
11
35
|
|
|
12
36
|
def listify(x: Any) -> List[Any]:
|
|
@@ -18,12 +42,170 @@ def listify(x: Any) -> List[Any]:
|
|
|
18
42
|
return [x]
|
|
19
43
|
|
|
20
44
|
|
|
45
|
+
def process_annotation(annotation: Any) -> Tuple[Any, bool]:
|
|
46
|
+
types = simplify_dtype(annotation)
|
|
47
|
+
if isinstance(types, list):
|
|
48
|
+
nullable = False
|
|
49
|
+
if type(None) in types:
|
|
50
|
+
nullable = True
|
|
51
|
+
types = [x for x in types if x is not type(None)]
|
|
52
|
+
if len(types) > 1:
|
|
53
|
+
raise ValueError(f'multiple types not supported: {annotation}')
|
|
54
|
+
return types[0], nullable
|
|
55
|
+
return types, True
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def process_types(params: Any) -> Any:
|
|
59
|
+
if params is None:
|
|
60
|
+
return params, []
|
|
61
|
+
|
|
62
|
+
elif isinstance(params, (list, tuple)):
|
|
63
|
+
params = list(params)
|
|
64
|
+
for i, item in enumerate(params):
|
|
65
|
+
if params[i] in python_type_map:
|
|
66
|
+
params[i] = python_type_map[params[i]]()
|
|
67
|
+
elif callable(item):
|
|
68
|
+
params[i] = item()
|
|
69
|
+
for item in params:
|
|
70
|
+
if not isinstance(item, str):
|
|
71
|
+
raise TypeError(f'unrecognized type for parameter: {item}')
|
|
72
|
+
return params, []
|
|
73
|
+
|
|
74
|
+
elif isinstance(params, dict):
|
|
75
|
+
names = []
|
|
76
|
+
params = dict(params)
|
|
77
|
+
for k, v in list(params.items()):
|
|
78
|
+
names.append(k)
|
|
79
|
+
if params[k] in python_type_map:
|
|
80
|
+
params[k] = python_type_map[params[k]]()
|
|
81
|
+
elif callable(v):
|
|
82
|
+
params[k] = v()
|
|
83
|
+
for item in params.values():
|
|
84
|
+
if not isinstance(item, str):
|
|
85
|
+
raise TypeError(f'unrecognized type for parameter: {item}')
|
|
86
|
+
return params, names
|
|
87
|
+
|
|
88
|
+
elif dataclasses.is_dataclass(params):
|
|
89
|
+
names = []
|
|
90
|
+
out = []
|
|
91
|
+
for item in dataclasses.fields(params):
|
|
92
|
+
typ, nullable = process_annotation(item.type)
|
|
93
|
+
sql_type = process_types(typ)[0]
|
|
94
|
+
if not nullable:
|
|
95
|
+
sql_type = sql_type.replace('NULL', 'NOT NULL')
|
|
96
|
+
out.append(sql_type)
|
|
97
|
+
names.append(item.name)
|
|
98
|
+
return out, names
|
|
99
|
+
|
|
100
|
+
elif has_pydantic and inspect.isclass(params) \
|
|
101
|
+
and issubclass(params, pydantic.BaseModel):
|
|
102
|
+
names = []
|
|
103
|
+
out = []
|
|
104
|
+
for name, item in params.model_fields.items():
|
|
105
|
+
typ, nullable = process_annotation(item.annotation)
|
|
106
|
+
sql_type = process_types(typ)[0]
|
|
107
|
+
if not nullable:
|
|
108
|
+
sql_type = sql_type.replace('NULL', 'NOT NULL')
|
|
109
|
+
out.append(sql_type)
|
|
110
|
+
names.append(name)
|
|
111
|
+
return out, names
|
|
112
|
+
|
|
113
|
+
elif params in python_type_map:
|
|
114
|
+
return python_type_map[params](), []
|
|
115
|
+
|
|
116
|
+
elif callable(params):
|
|
117
|
+
return params(), []
|
|
118
|
+
|
|
119
|
+
elif isinstance(params, str):
|
|
120
|
+
return params, []
|
|
121
|
+
|
|
122
|
+
raise TypeError(f'unrecognized data type for args: {params}')
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _func(
|
|
126
|
+
func: Optional[Callable[..., Any]] = None,
|
|
127
|
+
*,
|
|
128
|
+
name: Optional[str] = None,
|
|
129
|
+
args: Optional[
|
|
130
|
+
Union[
|
|
131
|
+
DataType,
|
|
132
|
+
List[DataType],
|
|
133
|
+
Dict[str, DataType],
|
|
134
|
+
'pydantic.BaseModel',
|
|
135
|
+
type,
|
|
136
|
+
]
|
|
137
|
+
] = None,
|
|
138
|
+
returns: Optional[
|
|
139
|
+
Union[
|
|
140
|
+
str,
|
|
141
|
+
List[DataType],
|
|
142
|
+
List[type],
|
|
143
|
+
'pydantic.BaseModel',
|
|
144
|
+
type,
|
|
145
|
+
]
|
|
146
|
+
] = None,
|
|
147
|
+
data_format: Optional[str] = None,
|
|
148
|
+
include_masks: bool = False,
|
|
149
|
+
function_type: str = 'udf',
|
|
150
|
+
output_fields: Optional[List[str]] = None,
|
|
151
|
+
) -> Callable[..., Any]:
|
|
152
|
+
"""Generic wrapper for UDF and TVF decorators."""
|
|
153
|
+
args, _ = process_types(args)
|
|
154
|
+
returns, fields = process_types(returns)
|
|
155
|
+
|
|
156
|
+
if not output_fields and fields:
|
|
157
|
+
output_fields = fields
|
|
158
|
+
|
|
159
|
+
if isinstance(returns, list) \
|
|
160
|
+
and isinstance(output_fields, list) \
|
|
161
|
+
and len(output_fields) != len(returns):
|
|
162
|
+
raise ValueError(
|
|
163
|
+
'The number of output fields must match the number of return types',
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if include_masks and data_format == 'python':
|
|
167
|
+
raise RuntimeError(
|
|
168
|
+
'include_masks is only valid when using '
|
|
169
|
+
'vectors for input parameters',
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
_singlestoredb_attrs = { # type: ignore
|
|
173
|
+
k: v for k, v in dict(
|
|
174
|
+
name=name,
|
|
175
|
+
args=args,
|
|
176
|
+
returns=returns,
|
|
177
|
+
data_format=data_format,
|
|
178
|
+
include_masks=include_masks,
|
|
179
|
+
function_type=function_type,
|
|
180
|
+
output_fields=output_fields or None,
|
|
181
|
+
).items() if v is not None
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# No func was specified, this is an uncalled decorator that will get
|
|
185
|
+
# called later, so the wrapper much be created with the func passed
|
|
186
|
+
# in at that time.
|
|
187
|
+
if func is None:
|
|
188
|
+
def decorate(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
189
|
+
def wrapper(*args: Any, **kwargs: Any) -> Callable[..., Any]:
|
|
190
|
+
return func(*args, **kwargs) # type: ignore
|
|
191
|
+
wrapper._singlestoredb_attrs = _singlestoredb_attrs # type: ignore
|
|
192
|
+
return functools.wraps(func)(wrapper)
|
|
193
|
+
return decorate
|
|
194
|
+
|
|
195
|
+
def wrapper(*args: Any, **kwargs: Any) -> Callable[..., Any]:
|
|
196
|
+
return func(*args, **kwargs) # type: ignore
|
|
197
|
+
|
|
198
|
+
wrapper._singlestoredb_attrs = _singlestoredb_attrs # type: ignore
|
|
199
|
+
|
|
200
|
+
return functools.wraps(func)(wrapper)
|
|
201
|
+
|
|
202
|
+
|
|
21
203
|
def udf(
|
|
22
204
|
func: Optional[Callable[..., Any]] = None,
|
|
23
205
|
*,
|
|
24
206
|
name: Optional[str] = None,
|
|
25
207
|
args: Optional[Union[DataType, List[DataType], Dict[str, DataType]]] = None,
|
|
26
|
-
returns: Optional[str] = None,
|
|
208
|
+
returns: Optional[Union[str, List[DataType], List[type]]] = None,
|
|
27
209
|
data_format: Optional[str] = None,
|
|
28
210
|
include_masks: bool = False,
|
|
29
211
|
) -> Callable[..., Any]:
|
|
@@ -64,79 +246,86 @@ def udf(
|
|
|
64
246
|
Callable
|
|
65
247
|
|
|
66
248
|
"""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
args
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
raise TypeError(f'unrecognized type for parameter: {item}')
|
|
77
|
-
elif isinstance(args, dict):
|
|
78
|
-
args = dict(args)
|
|
79
|
-
for k, v in list(args.items()):
|
|
80
|
-
if callable(v):
|
|
81
|
-
args[k] = v()
|
|
82
|
-
for item in args.values():
|
|
83
|
-
if not isinstance(item, str):
|
|
84
|
-
raise TypeError(f'unrecognized type for parameter: {item}')
|
|
85
|
-
elif callable(args):
|
|
86
|
-
args = args()
|
|
87
|
-
elif isinstance(args, str):
|
|
88
|
-
args = args
|
|
89
|
-
else:
|
|
90
|
-
raise TypeError(f'unrecognized data type for args: {args}')
|
|
91
|
-
|
|
92
|
-
if returns is None:
|
|
93
|
-
pass
|
|
94
|
-
elif callable(returns):
|
|
95
|
-
returns = returns()
|
|
96
|
-
elif isinstance(returns, str):
|
|
97
|
-
returns = returns
|
|
98
|
-
else:
|
|
99
|
-
raise TypeError(f'unrecognized return type: {returns}')
|
|
100
|
-
|
|
101
|
-
if returns is not None and not isinstance(returns, str):
|
|
102
|
-
raise TypeError(f'unrecognized return type: {returns}')
|
|
249
|
+
return _func(
|
|
250
|
+
func=func,
|
|
251
|
+
name=name,
|
|
252
|
+
args=args,
|
|
253
|
+
returns=returns,
|
|
254
|
+
data_format=data_format,
|
|
255
|
+
include_masks=include_masks,
|
|
256
|
+
function_type='udf',
|
|
257
|
+
)
|
|
103
258
|
|
|
104
|
-
if include_masks and data_format == 'python':
|
|
105
|
-
raise RuntimeError(
|
|
106
|
-
'include_masks is only valid when using '
|
|
107
|
-
'vectors for input parameters',
|
|
108
|
-
)
|
|
109
259
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
returns=returns,
|
|
115
|
-
data_format=data_format,
|
|
116
|
-
include_masks=include_masks,
|
|
117
|
-
).items() if v is not None
|
|
118
|
-
}
|
|
260
|
+
udf.pandas = functools.partial(udf, data_format='pandas') # type: ignore
|
|
261
|
+
udf.polars = functools.partial(udf, data_format='polars') # type: ignore
|
|
262
|
+
udf.arrow = functools.partial(udf, data_format='arrow') # type: ignore
|
|
263
|
+
udf.numpy = functools.partial(udf, data_format='numpy') # type: ignore
|
|
119
264
|
|
|
120
|
-
# No func was specified, this is an uncalled decorator that will get
|
|
121
|
-
# called later, so the wrapper much be created with the func passed
|
|
122
|
-
# in at that time.
|
|
123
|
-
if func is None:
|
|
124
|
-
def decorate(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
125
|
-
def wrapper(*args: Any, **kwargs: Any) -> Callable[..., Any]:
|
|
126
|
-
return func(*args, **kwargs) # type: ignore
|
|
127
|
-
wrapper._singlestoredb_attrs = _singlestoredb_attrs # type: ignore
|
|
128
|
-
return functools.wraps(func)(wrapper)
|
|
129
|
-
return decorate
|
|
130
265
|
|
|
131
|
-
|
|
132
|
-
|
|
266
|
+
def tvf(
|
|
267
|
+
func: Optional[Callable[..., Any]] = None,
|
|
268
|
+
*,
|
|
269
|
+
name: Optional[str] = None,
|
|
270
|
+
args: Optional[Union[DataType, List[DataType], Dict[str, DataType]]] = None,
|
|
271
|
+
returns: Optional[Union[str, List[DataType], List[type]]] = None,
|
|
272
|
+
data_format: Optional[str] = None,
|
|
273
|
+
include_masks: bool = False,
|
|
274
|
+
output_fields: Optional[List[str]] = None,
|
|
275
|
+
) -> Callable[..., Any]:
|
|
276
|
+
"""
|
|
277
|
+
Apply attributes to a TVF.
|
|
133
278
|
|
|
134
|
-
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
func : callable, optional
|
|
282
|
+
The TVF to apply parameters to
|
|
283
|
+
name : str, optional
|
|
284
|
+
The name to use for the TVF in the database
|
|
285
|
+
args : str | Callable | List[str | Callable] | Dict[str, str | Callable], optional
|
|
286
|
+
Specifies the data types of the function arguments. Typically,
|
|
287
|
+
the function data types are derived from the function parameter
|
|
288
|
+
annotations. These annotations can be overridden. If the function
|
|
289
|
+
takes a single type for all parameters, `args` can be set to a
|
|
290
|
+
SQL string describing all parameters. If the function takes more
|
|
291
|
+
than one parameter and all of the parameters are being manually
|
|
292
|
+
defined, a list of SQL strings may be used (one for each parameter).
|
|
293
|
+
A dictionary of SQL strings may be used to specify a parameter type
|
|
294
|
+
for a subset of parameters; the keys are the names of the
|
|
295
|
+
function parameters. Callables may also be used for datatypes. This
|
|
296
|
+
is primarily for using the functions in the ``dtypes`` module that
|
|
297
|
+
are associated with SQL types with all default options (e.g., ``dt.FLOAT``).
|
|
298
|
+
returns : str, optional
|
|
299
|
+
Specifies the return data type of the function. If not specified,
|
|
300
|
+
the type annotation from the function is used.
|
|
301
|
+
data_format : str, optional
|
|
302
|
+
The data format of each parameter: python, pandas, arrow, polars
|
|
303
|
+
include_masks : bool, optional
|
|
304
|
+
Should boolean masks be included with each input parameter to indicate
|
|
305
|
+
which elements are NULL? This is only used when a input parameters are
|
|
306
|
+
configured to a vector type (numpy, pandas, polars, arrow).
|
|
307
|
+
output_fields : List[str], optional
|
|
308
|
+
The names of the output fields for the TVF. If not specified, the
|
|
309
|
+
names are generated.
|
|
135
310
|
|
|
136
|
-
|
|
311
|
+
Returns
|
|
312
|
+
-------
|
|
313
|
+
Callable
|
|
137
314
|
|
|
315
|
+
"""
|
|
316
|
+
return _func(
|
|
317
|
+
func=func,
|
|
318
|
+
name=name,
|
|
319
|
+
args=args,
|
|
320
|
+
returns=returns,
|
|
321
|
+
data_format=data_format,
|
|
322
|
+
include_masks=include_masks,
|
|
323
|
+
function_type='tvf',
|
|
324
|
+
output_fields=output_fields,
|
|
325
|
+
)
|
|
138
326
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
327
|
+
|
|
328
|
+
tvf.pandas = functools.partial(tvf, data_format='pandas') # type: ignore
|
|
329
|
+
tvf.polars = functools.partial(tvf, data_format='polars') # type: ignore
|
|
330
|
+
tvf.arrow = functools.partial(tvf, data_format='arrow') # type: ignore
|
|
331
|
+
tvf.numpy = functools.partial(tvf, data_format='numpy') # type: ignore
|
|
@@ -24,6 +24,7 @@ Example
|
|
|
24
24
|
"""
|
|
25
25
|
import argparse
|
|
26
26
|
import asyncio
|
|
27
|
+
import dataclasses
|
|
27
28
|
import importlib.util
|
|
28
29
|
import io
|
|
29
30
|
import itertools
|
|
@@ -136,6 +137,14 @@ def get_func_names(funcs: str) -> List[Tuple[str, str]]:
|
|
|
136
137
|
return out
|
|
137
138
|
|
|
138
139
|
|
|
140
|
+
def as_tuple(x: Any) -> Any:
|
|
141
|
+
if hasattr(x, 'model_fields'):
|
|
142
|
+
return tuple(x.model_fields.values())
|
|
143
|
+
if dataclasses.is_dataclass(x):
|
|
144
|
+
return dataclasses.astuple(x)
|
|
145
|
+
return x
|
|
146
|
+
|
|
147
|
+
|
|
139
148
|
def make_func(
|
|
140
149
|
name: str,
|
|
141
150
|
func: Callable[..., Any],
|
|
@@ -158,32 +167,104 @@ def make_func(
|
|
|
158
167
|
attrs = getattr(func, '_singlestoredb_attrs', {})
|
|
159
168
|
data_format = attrs.get('data_format') or 'python'
|
|
160
169
|
include_masks = attrs.get('include_masks', False)
|
|
170
|
+
function_type = attrs.get('function_type', 'udf').lower()
|
|
161
171
|
info: Dict[str, Any] = {}
|
|
162
172
|
|
|
163
|
-
if
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
+
if function_type == 'tvf':
|
|
174
|
+
if data_format == 'python':
|
|
175
|
+
async def do_func(
|
|
176
|
+
row_ids: Sequence[int],
|
|
177
|
+
rows: Sequence[Sequence[Any]],
|
|
178
|
+
) -> Tuple[
|
|
179
|
+
Sequence[int],
|
|
180
|
+
List[Tuple[Any]],
|
|
181
|
+
]:
|
|
182
|
+
'''Call function on given rows of data.'''
|
|
183
|
+
out_ids: List[int] = []
|
|
184
|
+
out = []
|
|
185
|
+
for i, res in zip(row_ids, func_map(func, rows)):
|
|
186
|
+
out.extend(as_tuple(res))
|
|
187
|
+
out_ids.extend([row_ids[i]] * (len(out)-len(out_ids)))
|
|
188
|
+
return out_ids, out
|
|
189
|
+
|
|
190
|
+
else:
|
|
191
|
+
# Vector formats use the same function wrapper
|
|
192
|
+
async def do_func( # type: ignore
|
|
193
|
+
row_ids: Sequence[int],
|
|
194
|
+
cols: Sequence[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
195
|
+
) -> Tuple[Sequence[int], List[Tuple[Any, ...]]]:
|
|
196
|
+
'''Call function on given cols of data.'''
|
|
197
|
+
if include_masks:
|
|
198
|
+
out = func(*cols)
|
|
199
|
+
assert isinstance(out, tuple)
|
|
200
|
+
return row_ids, [out]
|
|
201
|
+
|
|
202
|
+
out = []
|
|
203
|
+
res = func(*[x[0] for x in cols])
|
|
204
|
+
rtype = str(type(res)).lower()
|
|
205
|
+
|
|
206
|
+
# Map tables / dataframes to a list of columns
|
|
207
|
+
if 'dataframe' in rtype:
|
|
208
|
+
res = [res[x] for x in res.columns]
|
|
209
|
+
elif 'table' in rtype:
|
|
210
|
+
res = res.columns
|
|
211
|
+
|
|
212
|
+
for vec in res:
|
|
213
|
+
# C extension only supports Python objects as strings
|
|
214
|
+
if data_format == 'numpy' and str(vec.dtype)[:2] in ['<U', '<S']:
|
|
215
|
+
vec = vec.astype(object)
|
|
216
|
+
out.append((vec, None))
|
|
217
|
+
|
|
218
|
+
# NOTE: There is no way to determine which row ID belongs to
|
|
219
|
+
# each result row, so we just have to use the same
|
|
220
|
+
# row ID for all rows in the result.
|
|
221
|
+
if data_format == 'polars':
|
|
222
|
+
import polars as pl
|
|
223
|
+
array_cls = pl.Series
|
|
224
|
+
elif data_format == 'arrow':
|
|
225
|
+
import pyarrow as pa
|
|
226
|
+
array_cls = pa.array
|
|
227
|
+
elif data_format == 'pandas':
|
|
228
|
+
import pandas as pd
|
|
229
|
+
array_cls = pd.Series
|
|
230
|
+
else:
|
|
231
|
+
import numpy as np
|
|
232
|
+
array_cls = np.array
|
|
233
|
+
|
|
234
|
+
return array_cls([row_ids[0]] * len(out[0][0])), out
|
|
173
235
|
|
|
174
236
|
else:
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
237
|
+
if data_format == 'python':
|
|
238
|
+
async def do_func(
|
|
239
|
+
row_ids: Sequence[int],
|
|
240
|
+
rows: Sequence[Sequence[Any]],
|
|
241
|
+
) -> Tuple[
|
|
242
|
+
Sequence[int],
|
|
243
|
+
List[Tuple[Any]],
|
|
244
|
+
]:
|
|
245
|
+
'''Call function on given rows of data.'''
|
|
246
|
+
return row_ids, [as_tuple(x) for x in zip(func_map(func, rows))]
|
|
247
|
+
|
|
248
|
+
else:
|
|
249
|
+
# Vector formats use the same function wrapper
|
|
250
|
+
async def do_func( # type: ignore
|
|
251
|
+
row_ids: Sequence[int],
|
|
252
|
+
cols: Sequence[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
253
|
+
) -> Tuple[Sequence[int], List[Tuple[Any, ...]]]:
|
|
254
|
+
'''Call function on given cols of data.'''
|
|
255
|
+
if include_masks:
|
|
256
|
+
out = func(*cols)
|
|
257
|
+
assert isinstance(out, tuple)
|
|
258
|
+
return row_ids, [out]
|
|
259
|
+
|
|
260
|
+
out = func(*[x[0] for x in cols])
|
|
261
|
+
|
|
262
|
+
# Multiple return values
|
|
263
|
+
if isinstance(out, tuple):
|
|
264
|
+
return row_ids, [(x, None) for x in out]
|
|
265
|
+
|
|
266
|
+
# Single return value
|
|
267
|
+
return row_ids, [(out, None)]
|
|
187
268
|
|
|
188
269
|
do_func.__name__ = name
|
|
189
270
|
do_func.__doc__ = func.__doc__
|
|
@@ -196,6 +277,9 @@ def make_func(
|
|
|
196
277
|
# Set data format
|
|
197
278
|
info['data_format'] = data_format
|
|
198
279
|
|
|
280
|
+
# Set function type
|
|
281
|
+
info['function_type'] = function_type
|
|
282
|
+
|
|
199
283
|
# Setup argument types for rowdat_1 parser
|
|
200
284
|
colspec = []
|
|
201
285
|
for x in sig['args']:
|
|
@@ -205,11 +289,21 @@ def make_func(
|
|
|
205
289
|
colspec.append((x['name'], rowdat_1_type_map[dtype]))
|
|
206
290
|
info['colspec'] = colspec
|
|
207
291
|
|
|
292
|
+
def parse_return_type(s: str) -> List[str]:
|
|
293
|
+
if s.startswith('tuple['):
|
|
294
|
+
return s[6:-1].split(',')
|
|
295
|
+
if s.startswith('array[tuple['):
|
|
296
|
+
return s[12:-2].split(',')
|
|
297
|
+
return [s]
|
|
298
|
+
|
|
208
299
|
# Setup return type
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
300
|
+
returns = []
|
|
301
|
+
for x in parse_return_type(sig['returns']['dtype']):
|
|
302
|
+
dtype = x.replace('?', '')
|
|
303
|
+
if dtype not in rowdat_1_type_map:
|
|
304
|
+
raise TypeError(f'no data type mapping for {dtype}')
|
|
305
|
+
returns.append(rowdat_1_type_map[dtype])
|
|
306
|
+
info['returns'] = returns
|
|
213
307
|
|
|
214
308
|
return do_func, info
|
|
215
309
|
|
|
@@ -233,7 +327,7 @@ class Application(object):
|
|
|
233
327
|
* Function aliases : <pkg1>.[<func1@alias1,func2@alias2,...]
|
|
234
328
|
* Multiple packages : <pkg1>.<func1>:<pkg2>.<func2>
|
|
235
329
|
app_mode : str, optional
|
|
236
|
-
The mode of operation for the application: remote or collocated
|
|
330
|
+
The mode of operation for the application: remote, managed, or collocated
|
|
237
331
|
url : str, optional
|
|
238
332
|
The URL of the function API
|
|
239
333
|
data_format : str, optional
|