singlestoredb 1.12.4__cp38-abi3-win32.whl → 1.13.1__cp38-abi3-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of singlestoredb might be problematic. Click here for more details.
- _singlestoredb_accel.pyd +0 -0
- singlestoredb/__init__.py +1 -1
- singlestoredb/ai/__init__.py +1 -0
- singlestoredb/ai/chat.py +26 -0
- singlestoredb/ai/embeddings.py +18 -15
- singlestoredb/apps/__init__.py +1 -0
- singlestoredb/apps/_config.py +6 -0
- singlestoredb/apps/_connection_info.py +8 -0
- singlestoredb/apps/_python_udfs.py +85 -0
- singlestoredb/config.py +14 -2
- singlestoredb/functions/__init__.py +15 -1
- singlestoredb/functions/decorator.py +102 -252
- singlestoredb/functions/dtypes.py +545 -198
- singlestoredb/functions/ext/asgi.py +421 -129
- singlestoredb/functions/ext/json.py +29 -36
- singlestoredb/functions/ext/mmap.py +1 -1
- singlestoredb/functions/ext/rowdat_1.py +50 -70
- singlestoredb/functions/signature.py +816 -144
- singlestoredb/functions/typing.py +41 -0
- singlestoredb/functions/utils.py +421 -0
- singlestoredb/http/connection.py +3 -1
- singlestoredb/management/inference_api.py +101 -0
- singlestoredb/management/manager.py +6 -1
- singlestoredb/management/organization.py +17 -0
- singlestoredb/management/utils.py +2 -2
- singlestoredb/tests/ext_funcs/__init__.py +476 -237
- singlestoredb/tests/test_ext_func.py +192 -3
- singlestoredb/tests/test_management.py +5 -5
- singlestoredb/tests/test_udf.py +101 -131
- singlestoredb/tests/test_udf_returns.py +459 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.1.dist-info}/METADATA +2 -1
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.1.dist-info}/RECORD +36 -30
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.1.dist-info}/LICENSE +0 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.1.dist-info}/WHEEL +0 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.1.dist-info}/entry_points.txt +0 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.1.dist-info}/top_level.txt +0 -0
|
@@ -26,6 +26,7 @@ import argparse
|
|
|
26
26
|
import asyncio
|
|
27
27
|
import dataclasses
|
|
28
28
|
import importlib.util
|
|
29
|
+
import inspect
|
|
29
30
|
import io
|
|
30
31
|
import itertools
|
|
31
32
|
import json
|
|
@@ -36,6 +37,7 @@ import secrets
|
|
|
36
37
|
import sys
|
|
37
38
|
import tempfile
|
|
38
39
|
import textwrap
|
|
40
|
+
import typing
|
|
39
41
|
import urllib
|
|
40
42
|
import zipfile
|
|
41
43
|
import zipimport
|
|
@@ -62,6 +64,8 @@ from ...config import get_option
|
|
|
62
64
|
from ...mysql.constants import FIELD_TYPE as ft
|
|
63
65
|
from ..signature import get_signature
|
|
64
66
|
from ..signature import signature_to_sql
|
|
67
|
+
from ..typing import Masked
|
|
68
|
+
from ..typing import Table
|
|
65
69
|
|
|
66
70
|
try:
|
|
67
71
|
import cloudpickle
|
|
@@ -148,20 +152,286 @@ def as_tuple(x: Any) -> Any:
|
|
|
148
152
|
if has_pydantic and isinstance(x, BaseModel):
|
|
149
153
|
return tuple(x.model_dump().values())
|
|
150
154
|
if dataclasses.is_dataclass(x):
|
|
151
|
-
return dataclasses.astuple(x)
|
|
152
|
-
|
|
155
|
+
return dataclasses.astuple(x) # type: ignore
|
|
156
|
+
if isinstance(x, dict):
|
|
157
|
+
return tuple(x.values())
|
|
158
|
+
return tuple(x)
|
|
153
159
|
|
|
154
160
|
|
|
155
161
|
def as_list_of_tuples(x: Any) -> Any:
|
|
156
162
|
"""Convert object to a list of tuples."""
|
|
163
|
+
if isinstance(x, Table):
|
|
164
|
+
x = x[0]
|
|
157
165
|
if isinstance(x, (list, tuple)) and len(x) > 0:
|
|
166
|
+
if isinstance(x[0], (list, tuple)):
|
|
167
|
+
return x
|
|
158
168
|
if has_pydantic and isinstance(x[0], BaseModel):
|
|
159
169
|
return [tuple(y.model_dump().values()) for y in x]
|
|
160
170
|
if dataclasses.is_dataclass(x[0]):
|
|
161
171
|
return [dataclasses.astuple(y) for y in x]
|
|
172
|
+
if isinstance(x[0], dict):
|
|
173
|
+
return [tuple(y.values()) for y in x]
|
|
174
|
+
return [(y,) for y in x]
|
|
162
175
|
return x
|
|
163
176
|
|
|
164
177
|
|
|
178
|
+
def get_dataframe_columns(df: Any) -> List[Any]:
|
|
179
|
+
"""Return columns of data from a dataframe/table."""
|
|
180
|
+
if isinstance(df, Table):
|
|
181
|
+
if len(df) == 1:
|
|
182
|
+
df = df[0]
|
|
183
|
+
else:
|
|
184
|
+
return list(df)
|
|
185
|
+
|
|
186
|
+
if isinstance(df, Masked):
|
|
187
|
+
return [df]
|
|
188
|
+
|
|
189
|
+
if isinstance(df, tuple):
|
|
190
|
+
return list(df)
|
|
191
|
+
|
|
192
|
+
rtype = str(type(df)).lower()
|
|
193
|
+
if 'dataframe' in rtype:
|
|
194
|
+
return [df[x] for x in df.columns]
|
|
195
|
+
elif 'table' in rtype:
|
|
196
|
+
return df.columns
|
|
197
|
+
elif 'series' in rtype:
|
|
198
|
+
return [df]
|
|
199
|
+
elif 'array' in rtype:
|
|
200
|
+
return [df]
|
|
201
|
+
elif 'tuple' in rtype:
|
|
202
|
+
return list(df)
|
|
203
|
+
|
|
204
|
+
raise TypeError(
|
|
205
|
+
'Unsupported data type for dataframe columns: '
|
|
206
|
+
f'{rtype}',
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def get_array_class(data_format: str) -> Callable[..., Any]:
|
|
211
|
+
"""
|
|
212
|
+
Get the array class for the current data format.
|
|
213
|
+
|
|
214
|
+
"""
|
|
215
|
+
if data_format == 'polars':
|
|
216
|
+
import polars as pl
|
|
217
|
+
array_cls = pl.Series
|
|
218
|
+
elif data_format == 'arrow':
|
|
219
|
+
import pyarrow as pa
|
|
220
|
+
array_cls = pa.array
|
|
221
|
+
elif data_format == 'pandas':
|
|
222
|
+
import pandas as pd
|
|
223
|
+
array_cls = pd.Series
|
|
224
|
+
else:
|
|
225
|
+
import numpy as np
|
|
226
|
+
array_cls = np.array
|
|
227
|
+
return array_cls
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def get_masked_params(func: Callable[..., Any]) -> List[bool]:
|
|
231
|
+
"""
|
|
232
|
+
Get the list of masked parameters for the function.
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
func : Callable
|
|
237
|
+
The function to call as the endpoint
|
|
238
|
+
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
List[bool]
|
|
242
|
+
Boolean list of masked parameters
|
|
243
|
+
|
|
244
|
+
"""
|
|
245
|
+
params = inspect.signature(func).parameters
|
|
246
|
+
return [typing.get_origin(x.annotation) is Masked for x in params.values()]
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def build_tuple(x: Any) -> Any:
|
|
250
|
+
"""Convert object to tuple."""
|
|
251
|
+
return tuple(x) if isinstance(x, Masked) else (x, None)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def build_udf_endpoint(
|
|
255
|
+
func: Callable[..., Any],
|
|
256
|
+
returns_data_format: str,
|
|
257
|
+
) -> Callable[..., Any]:
|
|
258
|
+
"""
|
|
259
|
+
Build a UDF endpoint for scalar / list types (row-based).
|
|
260
|
+
|
|
261
|
+
Parameters
|
|
262
|
+
----------
|
|
263
|
+
func : Callable
|
|
264
|
+
The function to call as the endpoint
|
|
265
|
+
returns_data_format : str
|
|
266
|
+
The format of the return values
|
|
267
|
+
|
|
268
|
+
Returns
|
|
269
|
+
-------
|
|
270
|
+
Callable
|
|
271
|
+
The function endpoint
|
|
272
|
+
|
|
273
|
+
"""
|
|
274
|
+
if returns_data_format in ['scalar', 'list']:
|
|
275
|
+
|
|
276
|
+
async def do_func(
|
|
277
|
+
row_ids: Sequence[int],
|
|
278
|
+
rows: Sequence[Sequence[Any]],
|
|
279
|
+
) -> Tuple[Sequence[int], List[Tuple[Any, ...]]]:
|
|
280
|
+
'''Call function on given rows of data.'''
|
|
281
|
+
return row_ids, [as_tuple(x) for x in zip(func_map(func, rows))]
|
|
282
|
+
|
|
283
|
+
return do_func
|
|
284
|
+
|
|
285
|
+
return build_vector_udf_endpoint(func, returns_data_format)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def build_vector_udf_endpoint(
|
|
289
|
+
func: Callable[..., Any],
|
|
290
|
+
returns_data_format: str,
|
|
291
|
+
) -> Callable[..., Any]:
|
|
292
|
+
"""
|
|
293
|
+
Build a UDF endpoint for vector formats (column-based).
|
|
294
|
+
|
|
295
|
+
Parameters
|
|
296
|
+
----------
|
|
297
|
+
func : Callable
|
|
298
|
+
The function to call as the endpoint
|
|
299
|
+
returns_data_format : str
|
|
300
|
+
The format of the return values
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
Callable
|
|
305
|
+
The function endpoint
|
|
306
|
+
|
|
307
|
+
"""
|
|
308
|
+
masks = get_masked_params(func)
|
|
309
|
+
array_cls = get_array_class(returns_data_format)
|
|
310
|
+
|
|
311
|
+
async def do_func(
|
|
312
|
+
row_ids: Sequence[int],
|
|
313
|
+
cols: Sequence[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
314
|
+
) -> Tuple[
|
|
315
|
+
Sequence[int],
|
|
316
|
+
List[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
317
|
+
]:
|
|
318
|
+
'''Call function on given columns of data.'''
|
|
319
|
+
row_ids = array_cls(row_ids)
|
|
320
|
+
|
|
321
|
+
# Call the function with `cols` as the function parameters
|
|
322
|
+
if cols and cols[0]:
|
|
323
|
+
out = func(*[x if m else x[0] for x, m in zip(cols, masks)])
|
|
324
|
+
else:
|
|
325
|
+
out = func()
|
|
326
|
+
|
|
327
|
+
# Single masked value
|
|
328
|
+
if isinstance(out, Masked):
|
|
329
|
+
return row_ids, [tuple(out)]
|
|
330
|
+
|
|
331
|
+
# Multiple return values
|
|
332
|
+
if isinstance(out, tuple):
|
|
333
|
+
return row_ids, [build_tuple(x) for x in out]
|
|
334
|
+
|
|
335
|
+
# Single return value
|
|
336
|
+
return row_ids, [(out, None)]
|
|
337
|
+
|
|
338
|
+
return do_func
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def build_tvf_endpoint(
|
|
342
|
+
func: Callable[..., Any],
|
|
343
|
+
returns_data_format: str,
|
|
344
|
+
) -> Callable[..., Any]:
|
|
345
|
+
"""
|
|
346
|
+
Build a TVF endpoint for scalar / list types (row-based).
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
func : Callable
|
|
351
|
+
The function to call as the endpoint
|
|
352
|
+
returns_data_format : str
|
|
353
|
+
The format of the return values
|
|
354
|
+
|
|
355
|
+
Returns
|
|
356
|
+
-------
|
|
357
|
+
Callable
|
|
358
|
+
The function endpoint
|
|
359
|
+
|
|
360
|
+
"""
|
|
361
|
+
if returns_data_format in ['scalar', 'list']:
|
|
362
|
+
|
|
363
|
+
async def do_func(
|
|
364
|
+
row_ids: Sequence[int],
|
|
365
|
+
rows: Sequence[Sequence[Any]],
|
|
366
|
+
) -> Tuple[Sequence[int], List[Tuple[Any, ...]]]:
|
|
367
|
+
'''Call function on given rows of data.'''
|
|
368
|
+
out_ids: List[int] = []
|
|
369
|
+
out = []
|
|
370
|
+
# Call function on each row of data
|
|
371
|
+
for i, res in zip(row_ids, func_map(func, rows)):
|
|
372
|
+
out.extend(as_list_of_tuples(res))
|
|
373
|
+
out_ids.extend([row_ids[i]] * (len(out)-len(out_ids)))
|
|
374
|
+
return out_ids, out
|
|
375
|
+
|
|
376
|
+
return do_func
|
|
377
|
+
|
|
378
|
+
return build_vector_tvf_endpoint(func, returns_data_format)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def build_vector_tvf_endpoint(
|
|
382
|
+
func: Callable[..., Any],
|
|
383
|
+
returns_data_format: str,
|
|
384
|
+
) -> Callable[..., Any]:
|
|
385
|
+
"""
|
|
386
|
+
Build a TVF endpoint for vector formats (column-based).
|
|
387
|
+
|
|
388
|
+
Parameters
|
|
389
|
+
----------
|
|
390
|
+
func : Callable
|
|
391
|
+
The function to call as the endpoint
|
|
392
|
+
returns_data_format : str
|
|
393
|
+
The format of the return values
|
|
394
|
+
|
|
395
|
+
Returns
|
|
396
|
+
-------
|
|
397
|
+
Callable
|
|
398
|
+
The function endpoint
|
|
399
|
+
|
|
400
|
+
"""
|
|
401
|
+
masks = get_masked_params(func)
|
|
402
|
+
array_cls = get_array_class(returns_data_format)
|
|
403
|
+
|
|
404
|
+
async def do_func(
|
|
405
|
+
row_ids: Sequence[int],
|
|
406
|
+
cols: Sequence[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
407
|
+
) -> Tuple[
|
|
408
|
+
Sequence[int],
|
|
409
|
+
List[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
410
|
+
]:
|
|
411
|
+
'''Call function on given columns of data.'''
|
|
412
|
+
# NOTE: There is no way to determine which row ID belongs to
|
|
413
|
+
# each result row, so we just have to use the same
|
|
414
|
+
# row ID for all rows in the result.
|
|
415
|
+
|
|
416
|
+
# Call function on each column of data
|
|
417
|
+
if cols and cols[0]:
|
|
418
|
+
res = get_dataframe_columns(
|
|
419
|
+
func(*[x if m else x[0] for x, m in zip(cols, masks)]),
|
|
420
|
+
)
|
|
421
|
+
else:
|
|
422
|
+
res = get_dataframe_columns(func())
|
|
423
|
+
|
|
424
|
+
# Generate row IDs
|
|
425
|
+
if isinstance(res[0], Masked):
|
|
426
|
+
row_ids = array_cls([row_ids[0]] * len(res[0][0]))
|
|
427
|
+
else:
|
|
428
|
+
row_ids = array_cls([row_ids[0]] * len(res[0]))
|
|
429
|
+
|
|
430
|
+
return row_ids, [build_tuple(x) for x in res]
|
|
431
|
+
|
|
432
|
+
return do_func
|
|
433
|
+
|
|
434
|
+
|
|
165
435
|
def make_func(
|
|
166
436
|
name: str,
|
|
167
437
|
func: Callable[..., Any],
|
|
@@ -181,118 +451,28 @@ def make_func(
|
|
|
181
451
|
(Callable, Dict[str, Any])
|
|
182
452
|
|
|
183
453
|
"""
|
|
184
|
-
attrs = getattr(func, '_singlestoredb_attrs', {})
|
|
185
|
-
data_format = attrs.get('data_format') or 'python'
|
|
186
|
-
include_masks = attrs.get('include_masks', False)
|
|
187
|
-
function_type = attrs.get('function_type', 'udf').lower()
|
|
188
454
|
info: Dict[str, Any] = {}
|
|
189
455
|
|
|
190
|
-
|
|
191
|
-
if data_format == 'python':
|
|
192
|
-
async def do_func(
|
|
193
|
-
row_ids: Sequence[int],
|
|
194
|
-
rows: Sequence[Sequence[Any]],
|
|
195
|
-
) -> Tuple[
|
|
196
|
-
Sequence[int],
|
|
197
|
-
List[Tuple[Any]],
|
|
198
|
-
]:
|
|
199
|
-
'''Call function on given rows of data.'''
|
|
200
|
-
out_ids: List[int] = []
|
|
201
|
-
out = []
|
|
202
|
-
for i, res in zip(row_ids, func_map(func, rows)):
|
|
203
|
-
out.extend(as_list_of_tuples(res))
|
|
204
|
-
out_ids.extend([row_ids[i]] * (len(out)-len(out_ids)))
|
|
205
|
-
return out_ids, out
|
|
456
|
+
sig = get_signature(func, func_name=name)
|
|
206
457
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
row_ids: Sequence[int],
|
|
211
|
-
cols: Sequence[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
212
|
-
) -> Tuple[Sequence[int], List[Tuple[Any, ...]]]:
|
|
213
|
-
'''Call function on given cols of data.'''
|
|
214
|
-
if include_masks:
|
|
215
|
-
out = func(*cols)
|
|
216
|
-
assert isinstance(out, tuple)
|
|
217
|
-
return row_ids, [out]
|
|
218
|
-
|
|
219
|
-
out = []
|
|
220
|
-
res = func(*[x[0] for x in cols])
|
|
221
|
-
rtype = str(type(res)).lower()
|
|
222
|
-
|
|
223
|
-
# Map tables / dataframes to a list of columns
|
|
224
|
-
if 'dataframe' in rtype:
|
|
225
|
-
res = [res[x] for x in res.columns]
|
|
226
|
-
elif 'table' in rtype:
|
|
227
|
-
res = res.columns
|
|
228
|
-
|
|
229
|
-
for vec in res:
|
|
230
|
-
# C extension only supports Python objects as strings
|
|
231
|
-
if data_format == 'numpy' and str(vec.dtype)[:2] in ['<U', '<S']:
|
|
232
|
-
vec = vec.astype(object)
|
|
233
|
-
out.append((vec, None))
|
|
234
|
-
|
|
235
|
-
# NOTE: There is no way to determine which row ID belongs to
|
|
236
|
-
# each result row, so we just have to use the same
|
|
237
|
-
# row ID for all rows in the result.
|
|
238
|
-
if data_format == 'polars':
|
|
239
|
-
import polars as pl
|
|
240
|
-
array_cls = pl.Series
|
|
241
|
-
elif data_format == 'arrow':
|
|
242
|
-
import pyarrow as pa
|
|
243
|
-
array_cls = pa.array
|
|
244
|
-
elif data_format == 'pandas':
|
|
245
|
-
import pandas as pd
|
|
246
|
-
array_cls = pd.Series
|
|
247
|
-
else:
|
|
248
|
-
import numpy as np
|
|
249
|
-
array_cls = np.array
|
|
250
|
-
|
|
251
|
-
return array_cls([row_ids[0]] * len(out[0][0])), out
|
|
458
|
+
function_type = sig.get('function_type', 'udf')
|
|
459
|
+
args_data_format = sig.get('args_data_format', 'scalar')
|
|
460
|
+
returns_data_format = sig.get('returns_data_format', 'scalar')
|
|
252
461
|
|
|
462
|
+
if function_type == 'tvf':
|
|
463
|
+
do_func = build_tvf_endpoint(func, returns_data_format)
|
|
253
464
|
else:
|
|
254
|
-
|
|
255
|
-
async def do_func(
|
|
256
|
-
row_ids: Sequence[int],
|
|
257
|
-
rows: Sequence[Sequence[Any]],
|
|
258
|
-
) -> Tuple[
|
|
259
|
-
Sequence[int],
|
|
260
|
-
List[Tuple[Any]],
|
|
261
|
-
]:
|
|
262
|
-
'''Call function on given rows of data.'''
|
|
263
|
-
return row_ids, [as_tuple(x) for x in zip(func_map(func, rows))]
|
|
264
|
-
|
|
265
|
-
else:
|
|
266
|
-
# Vector formats use the same function wrapper
|
|
267
|
-
async def do_func( # type: ignore
|
|
268
|
-
row_ids: Sequence[int],
|
|
269
|
-
cols: Sequence[Tuple[Sequence[Any], Optional[Sequence[bool]]]],
|
|
270
|
-
) -> Tuple[Sequence[int], List[Tuple[Any, ...]]]:
|
|
271
|
-
'''Call function on given cols of data.'''
|
|
272
|
-
if include_masks:
|
|
273
|
-
out = func(*cols)
|
|
274
|
-
assert isinstance(out, tuple)
|
|
275
|
-
return row_ids, [out]
|
|
276
|
-
|
|
277
|
-
out = func(*[x[0] for x in cols])
|
|
278
|
-
|
|
279
|
-
# Multiple return values
|
|
280
|
-
if isinstance(out, tuple):
|
|
281
|
-
return row_ids, [(x, None) for x in out]
|
|
282
|
-
|
|
283
|
-
# Single return value
|
|
284
|
-
return row_ids, [(out, None)]
|
|
465
|
+
do_func = build_udf_endpoint(func, returns_data_format)
|
|
285
466
|
|
|
286
467
|
do_func.__name__ = name
|
|
287
468
|
do_func.__doc__ = func.__doc__
|
|
288
469
|
|
|
289
|
-
sig = get_signature(func, name=name)
|
|
290
|
-
|
|
291
470
|
# Store signature for generating CREATE FUNCTION calls
|
|
292
471
|
info['signature'] = sig
|
|
293
472
|
|
|
294
473
|
# Set data format
|
|
295
|
-
info['
|
|
474
|
+
info['args_data_format'] = args_data_format
|
|
475
|
+
info['returns_data_format'] = returns_data_format
|
|
296
476
|
|
|
297
477
|
# Set function type
|
|
298
478
|
info['function_type'] = function_type
|
|
@@ -306,20 +486,13 @@ def make_func(
|
|
|
306
486
|
colspec.append((x['name'], rowdat_1_type_map[dtype]))
|
|
307
487
|
info['colspec'] = colspec
|
|
308
488
|
|
|
309
|
-
def parse_return_type(s: str) -> List[str]:
|
|
310
|
-
if s.startswith('tuple['):
|
|
311
|
-
return s[6:-1].split(',')
|
|
312
|
-
if s.startswith('array[tuple['):
|
|
313
|
-
return s[12:-2].split(',')
|
|
314
|
-
return [s]
|
|
315
|
-
|
|
316
489
|
# Setup return type
|
|
317
490
|
returns = []
|
|
318
|
-
for x in
|
|
319
|
-
dtype = x.replace('?', '')
|
|
491
|
+
for x in sig['returns']:
|
|
492
|
+
dtype = x['dtype'].replace('?', '')
|
|
320
493
|
if dtype not in rowdat_1_type_map:
|
|
321
494
|
raise TypeError(f'no data type mapping for {dtype}')
|
|
322
|
-
returns.append(rowdat_1_type_map[dtype])
|
|
495
|
+
returns.append((x['name'], rowdat_1_type_map[dtype]))
|
|
323
496
|
info['returns'] = returns
|
|
324
497
|
|
|
325
498
|
return do_func, info
|
|
@@ -371,6 +544,13 @@ class Application(object):
|
|
|
371
544
|
headers=[(b'content-type', b'text/plain')],
|
|
372
545
|
)
|
|
373
546
|
|
|
547
|
+
# Error response start
|
|
548
|
+
error_response_dict: Dict[str, Any] = dict(
|
|
549
|
+
type='http.response.start',
|
|
550
|
+
status=401,
|
|
551
|
+
headers=[(b'content-type', b'text/plain')],
|
|
552
|
+
)
|
|
553
|
+
|
|
374
554
|
# JSON response start
|
|
375
555
|
json_response_dict: Dict[str, Any] = dict(
|
|
376
556
|
type='http.response.start',
|
|
@@ -405,7 +585,12 @@ class Application(object):
|
|
|
405
585
|
|
|
406
586
|
# Data format + version handlers
|
|
407
587
|
handlers = {
|
|
408
|
-
(b'application/octet-stream', b'1.0', '
|
|
588
|
+
(b'application/octet-stream', b'1.0', 'scalar'): dict(
|
|
589
|
+
load=rowdat_1.load,
|
|
590
|
+
dump=rowdat_1.dump,
|
|
591
|
+
response=rowdat_1_response_dict,
|
|
592
|
+
),
|
|
593
|
+
(b'application/octet-stream', b'1.0', 'list'): dict(
|
|
409
594
|
load=rowdat_1.load,
|
|
410
595
|
dump=rowdat_1.dump,
|
|
411
596
|
response=rowdat_1_response_dict,
|
|
@@ -430,7 +615,12 @@ class Application(object):
|
|
|
430
615
|
dump=rowdat_1.dump_arrow,
|
|
431
616
|
response=rowdat_1_response_dict,
|
|
432
617
|
),
|
|
433
|
-
(b'application/json', b'1.0', '
|
|
618
|
+
(b'application/json', b'1.0', 'scalar'): dict(
|
|
619
|
+
load=jdata.load,
|
|
620
|
+
dump=jdata.dump,
|
|
621
|
+
response=json_response_dict,
|
|
622
|
+
),
|
|
623
|
+
(b'application/json', b'1.0', 'list'): dict(
|
|
434
624
|
load=jdata.load,
|
|
435
625
|
dump=jdata.dump,
|
|
436
626
|
response=json_response_dict,
|
|
@@ -455,7 +645,7 @@ class Application(object):
|
|
|
455
645
|
dump=jdata.dump_arrow,
|
|
456
646
|
response=json_response_dict,
|
|
457
647
|
),
|
|
458
|
-
(b'application/vnd.apache.arrow.file', b'1.0', '
|
|
648
|
+
(b'application/vnd.apache.arrow.file', b'1.0', 'scalar'): dict(
|
|
459
649
|
load=arrow.load,
|
|
460
650
|
dump=arrow.dump,
|
|
461
651
|
response=arrow_response_dict,
|
|
@@ -485,6 +675,7 @@ class Application(object):
|
|
|
485
675
|
# Valid URL paths
|
|
486
676
|
invoke_path = ('invoke',)
|
|
487
677
|
show_create_function_path = ('show', 'create_function')
|
|
678
|
+
show_function_info_path = ('show', 'function_info')
|
|
488
679
|
|
|
489
680
|
def __init__(
|
|
490
681
|
self,
|
|
@@ -505,6 +696,8 @@ class Application(object):
|
|
|
505
696
|
link_name: Optional[str] = get_option('external_function.link_name'),
|
|
506
697
|
link_config: Optional[Dict[str, Any]] = None,
|
|
507
698
|
link_credentials: Optional[Dict[str, Any]] = None,
|
|
699
|
+
name_prefix: str = get_option('external_function.name_prefix'),
|
|
700
|
+
name_suffix: str = get_option('external_function.name_suffix'),
|
|
508
701
|
) -> None:
|
|
509
702
|
if link_name and (link_config or link_credentials):
|
|
510
703
|
raise ValueError(
|
|
@@ -561,6 +754,7 @@ class Application(object):
|
|
|
561
754
|
if not hasattr(x, '_singlestoredb_attrs'):
|
|
562
755
|
continue
|
|
563
756
|
name = x._singlestoredb_attrs.get('name', x.__name__)
|
|
757
|
+
name = f'{name_prefix}{name}{name_suffix}'
|
|
564
758
|
external_functions[x.__name__] = x
|
|
565
759
|
func, info = make_func(name, x)
|
|
566
760
|
endpoints[name.encode('utf-8')] = func, info
|
|
@@ -576,6 +770,7 @@ class Application(object):
|
|
|
576
770
|
# Add endpoint for each exported function
|
|
577
771
|
for name, alias in get_func_names(func_names):
|
|
578
772
|
item = getattr(pkg, name)
|
|
773
|
+
alias = f'{name_prefix}{name}{name_suffix}'
|
|
579
774
|
external_functions[name] = item
|
|
580
775
|
func, info = make_func(alias, item)
|
|
581
776
|
endpoints[alias.encode('utf-8')] = func, info
|
|
@@ -588,6 +783,7 @@ class Application(object):
|
|
|
588
783
|
if not hasattr(x, '_singlestoredb_attrs'):
|
|
589
784
|
continue
|
|
590
785
|
name = x._singlestoredb_attrs.get('name', x.__name__)
|
|
786
|
+
name = f'{name_prefix}{name}{name_suffix}'
|
|
591
787
|
external_functions[x.__name__] = x
|
|
592
788
|
func, info = make_func(name, x)
|
|
593
789
|
endpoints[name.encode('utf-8')] = func, info
|
|
@@ -595,6 +791,7 @@ class Application(object):
|
|
|
595
791
|
else:
|
|
596
792
|
alias = funcs.__name__
|
|
597
793
|
external_functions[funcs.__name__] = funcs
|
|
794
|
+
alias = f'{name_prefix}{alias}{name_suffix}'
|
|
598
795
|
func, info = make_func(alias, funcs)
|
|
599
796
|
endpoints[alias.encode('utf-8')] = func, info
|
|
600
797
|
|
|
@@ -648,7 +845,8 @@ class Application(object):
|
|
|
648
845
|
|
|
649
846
|
# Call the endpoint
|
|
650
847
|
if method == 'POST' and func is not None and path == self.invoke_path:
|
|
651
|
-
|
|
848
|
+
args_data_format = func_info['args_data_format']
|
|
849
|
+
returns_data_format = func_info['returns_data_format']
|
|
652
850
|
data = []
|
|
653
851
|
more_body = True
|
|
654
852
|
while more_body:
|
|
@@ -657,17 +855,24 @@ class Application(object):
|
|
|
657
855
|
more_body = request.get('more_body', False)
|
|
658
856
|
|
|
659
857
|
data_version = headers.get(b's2-ef-version', b'')
|
|
660
|
-
input_handler = self.handlers[(content_type, data_version,
|
|
661
|
-
output_handler = self.handlers[(accepts, data_version,
|
|
858
|
+
input_handler = self.handlers[(content_type, data_version, args_data_format)]
|
|
859
|
+
output_handler = self.handlers[(accepts, data_version, returns_data_format)]
|
|
662
860
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
861
|
+
try:
|
|
862
|
+
out = await func(
|
|
863
|
+
*input_handler['load']( # type: ignore
|
|
864
|
+
func_info['colspec'], b''.join(data),
|
|
865
|
+
),
|
|
866
|
+
)
|
|
867
|
+
body = output_handler['dump'](
|
|
868
|
+
[x[1] for x in func_info['returns']], *out, # type: ignore
|
|
869
|
+
)
|
|
870
|
+
await send(output_handler['response'])
|
|
669
871
|
|
|
670
|
-
|
|
872
|
+
except Exception as e:
|
|
873
|
+
logging.exception('Error in function call')
|
|
874
|
+
body = f'[{type(e).__name__}] {str(e).strip()}'.encode('utf-8')
|
|
875
|
+
await send(self.error_response_dict)
|
|
671
876
|
|
|
672
877
|
# Handle api reflection
|
|
673
878
|
elif method == 'GET' and path == self.show_create_function_path:
|
|
@@ -688,6 +893,12 @@ class Application(object):
|
|
|
688
893
|
|
|
689
894
|
await send(self.text_response_dict)
|
|
690
895
|
|
|
896
|
+
# Return function info
|
|
897
|
+
elif method == 'GET' and (path == self.show_function_info_path or not path):
|
|
898
|
+
functions = self.get_function_info()
|
|
899
|
+
body = json.dumps(dict(functions=functions)).encode('utf-8')
|
|
900
|
+
await send(self.text_response_dict)
|
|
901
|
+
|
|
691
902
|
# Path not found
|
|
692
903
|
else:
|
|
693
904
|
body = b''
|
|
@@ -725,21 +936,78 @@ class Application(object):
|
|
|
725
936
|
"""Locate all current functions and links belonging to this app."""
|
|
726
937
|
funcs, links = set(), set()
|
|
727
938
|
cur.execute('SHOW FUNCTIONS')
|
|
728
|
-
for
|
|
939
|
+
for row in list(cur):
|
|
940
|
+
name, ftype, link = row[0], row[1], row[-1]
|
|
729
941
|
# Only look at external functions
|
|
730
942
|
if 'external' not in ftype.lower():
|
|
731
943
|
continue
|
|
732
944
|
# See if function URL matches url
|
|
733
945
|
cur.execute(f'SHOW CREATE FUNCTION `{name}`')
|
|
734
946
|
for fname, _, code, *_ in list(cur):
|
|
735
|
-
m = re.search(r" (?:\w+) SERVICE '([^']+)'", code)
|
|
947
|
+
m = re.search(r" (?:\w+) (?:SERVICE|MANAGED) '([^']+)'", code)
|
|
736
948
|
if m and m.group(1) == self.url:
|
|
737
949
|
funcs.add(fname)
|
|
738
950
|
if link and re.match(r'^py_ext_func_link_\S{14}$', link):
|
|
739
951
|
links.add(link)
|
|
740
952
|
return funcs, links
|
|
741
953
|
|
|
742
|
-
def
|
|
954
|
+
def get_function_info(
|
|
955
|
+
self,
|
|
956
|
+
func_name: Optional[str] = None,
|
|
957
|
+
) -> Dict[str, Any]:
|
|
958
|
+
"""
|
|
959
|
+
Return the functions and function signature information.
|
|
960
|
+
Returns
|
|
961
|
+
-------
|
|
962
|
+
Dict[str, Any]
|
|
963
|
+
"""
|
|
964
|
+
functions = {}
|
|
965
|
+
no_default = object()
|
|
966
|
+
|
|
967
|
+
for key, (_, info) in self.endpoints.items():
|
|
968
|
+
if not func_name or key == func_name:
|
|
969
|
+
sig = info['signature']
|
|
970
|
+
args = []
|
|
971
|
+
|
|
972
|
+
# Function arguments
|
|
973
|
+
for a in sig.get('args', []):
|
|
974
|
+
dtype = a['dtype']
|
|
975
|
+
nullable = '?' in dtype
|
|
976
|
+
args.append(
|
|
977
|
+
dict(
|
|
978
|
+
name=a['name'],
|
|
979
|
+
dtype=dtype.replace('?', ''),
|
|
980
|
+
nullable=nullable,
|
|
981
|
+
),
|
|
982
|
+
)
|
|
983
|
+
if a.get('default', no_default) is not no_default:
|
|
984
|
+
args[-1]['default'] = a['default']
|
|
985
|
+
|
|
986
|
+
# Return values
|
|
987
|
+
ret = sig.get('returns', [])
|
|
988
|
+
returns = []
|
|
989
|
+
|
|
990
|
+
for a in ret:
|
|
991
|
+
dtype = a['dtype']
|
|
992
|
+
nullable = '?' in dtype
|
|
993
|
+
returns.append(
|
|
994
|
+
dict(
|
|
995
|
+
dtype=dtype.replace('?', ''),
|
|
996
|
+
nullable=nullable,
|
|
997
|
+
),
|
|
998
|
+
)
|
|
999
|
+
if a.get('name', None):
|
|
1000
|
+
returns[-1]['name'] = a['name']
|
|
1001
|
+
if a.get('default', no_default) is not no_default:
|
|
1002
|
+
returns[-1]['default'] = a['default']
|
|
1003
|
+
|
|
1004
|
+
functions[sig['name']] = dict(
|
|
1005
|
+
args=args, returns=returns, function_type=info['function_type'],
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
return functions
|
|
1009
|
+
|
|
1010
|
+
def get_create_functions(
|
|
743
1011
|
self,
|
|
744
1012
|
replace: bool = False,
|
|
745
1013
|
) -> List[str]:
|
|
@@ -807,7 +1075,7 @@ class Application(object):
|
|
|
807
1075
|
cur.execute(f'DROP FUNCTION IF EXISTS `{fname}`')
|
|
808
1076
|
for link in links:
|
|
809
1077
|
cur.execute(f'DROP LINK {link}')
|
|
810
|
-
for func in self.
|
|
1078
|
+
for func in self.get_create_functions(replace=replace):
|
|
811
1079
|
cur.execute(func)
|
|
812
1080
|
|
|
813
1081
|
def drop_functions(
|
|
@@ -1135,6 +1403,22 @@ def main(argv: Optional[List[str]] = None) -> None:
|
|
|
1135
1403
|
),
|
|
1136
1404
|
help='logging level',
|
|
1137
1405
|
)
|
|
1406
|
+
parser.add_argument(
|
|
1407
|
+
'--name-prefix', metavar='name_prefix',
|
|
1408
|
+
default=defaults.get(
|
|
1409
|
+
'name_prefix',
|
|
1410
|
+
get_option('external_function.name_prefix'),
|
|
1411
|
+
),
|
|
1412
|
+
help='Prefix to add to function names',
|
|
1413
|
+
)
|
|
1414
|
+
parser.add_argument(
|
|
1415
|
+
'--name-suffix', metavar='name_suffix',
|
|
1416
|
+
default=defaults.get(
|
|
1417
|
+
'name_suffix',
|
|
1418
|
+
get_option('external_function.name_suffix'),
|
|
1419
|
+
),
|
|
1420
|
+
help='Suffix to add to function names',
|
|
1421
|
+
)
|
|
1138
1422
|
parser.add_argument(
|
|
1139
1423
|
'functions', metavar='module.or.func.path', nargs='*',
|
|
1140
1424
|
help='functions or modules to export in UDF server',
|
|
@@ -1217,6 +1501,11 @@ def main(argv: Optional[List[str]] = None) -> None:
|
|
|
1217
1501
|
or defaults.get('replace_existing') \
|
|
1218
1502
|
or get_option('external_function.replace_existing')
|
|
1219
1503
|
|
|
1504
|
+
# Substitute in host / port if specified
|
|
1505
|
+
if args.host != defaults.get('host') or args.port != defaults.get('port'):
|
|
1506
|
+
u = urllib.parse.urlparse(args.url)
|
|
1507
|
+
args.url = u._replace(netloc=f'{args.host}:{args.port}').geturl()
|
|
1508
|
+
|
|
1220
1509
|
# Create application from functions / module
|
|
1221
1510
|
app = Application(
|
|
1222
1511
|
functions=args.functions,
|
|
@@ -1227,9 +1516,11 @@ def main(argv: Optional[List[str]] = None) -> None:
|
|
|
1227
1516
|
link_config=json.loads(args.link_config) or None,
|
|
1228
1517
|
link_credentials=json.loads(args.link_credentials) or None,
|
|
1229
1518
|
app_mode='remote',
|
|
1519
|
+
name_prefix=args.name_prefix,
|
|
1520
|
+
name_suffix=args.name_suffix,
|
|
1230
1521
|
)
|
|
1231
1522
|
|
|
1232
|
-
funcs = app.
|
|
1523
|
+
funcs = app.get_create_functions(replace=args.replace_existing)
|
|
1233
1524
|
if not funcs:
|
|
1234
1525
|
raise RuntimeError('no functions specified')
|
|
1235
1526
|
|
|
@@ -1249,6 +1540,7 @@ def main(argv: Optional[List[str]] = None) -> None:
|
|
|
1249
1540
|
host=args.host or None,
|
|
1250
1541
|
port=args.port or None,
|
|
1251
1542
|
log_level=args.log_level,
|
|
1543
|
+
lifespan='off',
|
|
1252
1544
|
).items() if v is not None
|
|
1253
1545
|
}
|
|
1254
1546
|
|