singlestoredb 1.12.4__py3-none-any.whl → 1.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of singlestoredb might be problematic. Click here for more details.
- singlestoredb/__init__.py +1 -1
- singlestoredb/apps/__init__.py +1 -0
- singlestoredb/apps/_config.py +6 -0
- singlestoredb/apps/_connection_info.py +8 -0
- singlestoredb/apps/_python_udfs.py +85 -0
- singlestoredb/config.py +14 -2
- singlestoredb/functions/__init__.py +11 -1
- singlestoredb/functions/decorator.py +102 -252
- singlestoredb/functions/dtypes.py +545 -198
- singlestoredb/functions/ext/asgi.py +288 -90
- singlestoredb/functions/ext/json.py +29 -36
- singlestoredb/functions/ext/mmap.py +1 -1
- singlestoredb/functions/ext/rowdat_1.py +50 -70
- singlestoredb/functions/signature.py +816 -144
- singlestoredb/functions/typing.py +41 -0
- singlestoredb/functions/utils.py +342 -0
- singlestoredb/http/connection.py +3 -1
- singlestoredb/management/manager.py +6 -1
- singlestoredb/management/utils.py +2 -2
- singlestoredb/tests/ext_funcs/__init__.py +476 -237
- singlestoredb/tests/test_ext_func.py +192 -3
- singlestoredb/tests/test_udf.py +101 -131
- singlestoredb/tests/test_udf_returns.py +459 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.0.dist-info}/METADATA +2 -1
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.0.dist-info}/RECORD +29 -25
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.0.dist-info}/LICENSE +0 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.0.dist-info}/WHEEL +0 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.0.dist-info}/entry_points.txt +0 -0
- {singlestoredb-1.12.4.dist-info → singlestoredb-1.13.0.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,34 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
#
|
|
2
|
+
# mypy: disable-error-code="type-arg"
|
|
3
|
+
import typing
|
|
4
|
+
from typing import List
|
|
5
|
+
from typing import NamedTuple
|
|
3
6
|
from typing import Optional
|
|
4
7
|
from typing import Tuple
|
|
5
8
|
|
|
6
|
-
|
|
9
|
+
import numpy as np
|
|
10
|
+
import numpy.typing as npt
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import polars as pl
|
|
13
|
+
import pyarrow as pa
|
|
14
|
+
|
|
15
|
+
import singlestoredb.functions.dtypes as dt
|
|
16
|
+
from singlestoredb.functions import Masked
|
|
17
|
+
from singlestoredb.functions import Table
|
|
18
|
+
from singlestoredb.functions import udf
|
|
7
19
|
from singlestoredb.functions.dtypes import BIGINT
|
|
20
|
+
from singlestoredb.functions.dtypes import BLOB
|
|
21
|
+
from singlestoredb.functions.dtypes import DOUBLE
|
|
8
22
|
from singlestoredb.functions.dtypes import FLOAT
|
|
9
23
|
from singlestoredb.functions.dtypes import MEDIUMINT
|
|
10
24
|
from singlestoredb.functions.dtypes import SMALLINT
|
|
25
|
+
from singlestoredb.functions.dtypes import TEXT
|
|
11
26
|
from singlestoredb.functions.dtypes import TINYINT
|
|
12
|
-
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@udf
|
|
30
|
+
def int_mult(x: int, y: int) -> int:
|
|
31
|
+
return x * y
|
|
13
32
|
|
|
14
33
|
|
|
15
34
|
@udf
|
|
@@ -17,24 +36,36 @@ def double_mult(x: float, y: float) -> float:
|
|
|
17
36
|
return x * y
|
|
18
37
|
|
|
19
38
|
|
|
20
|
-
@udf
|
|
21
|
-
|
|
39
|
+
@udf(
|
|
40
|
+
args=[DOUBLE(nullable=False), DOUBLE(nullable=False)],
|
|
41
|
+
returns=DOUBLE(nullable=False),
|
|
42
|
+
)
|
|
43
|
+
def pandas_double_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
22
44
|
return x * y
|
|
23
45
|
|
|
24
46
|
|
|
25
|
-
@udf
|
|
26
|
-
def numpy_double_mult(
|
|
47
|
+
@udf
|
|
48
|
+
def numpy_double_mult(
|
|
49
|
+
x: npt.NDArray[np.float64],
|
|
50
|
+
y: npt.NDArray[np.float64],
|
|
51
|
+
) -> npt.NDArray[np.float64]:
|
|
27
52
|
return x * y
|
|
28
53
|
|
|
29
54
|
|
|
30
|
-
@udf
|
|
31
|
-
|
|
55
|
+
@udf(
|
|
56
|
+
args=[DOUBLE(nullable=False), DOUBLE(nullable=False)],
|
|
57
|
+
returns=DOUBLE(nullable=False),
|
|
58
|
+
)
|
|
59
|
+
def arrow_double_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
32
60
|
import pyarrow.compute as pc
|
|
33
61
|
return pc.multiply(x, y)
|
|
34
62
|
|
|
35
63
|
|
|
36
|
-
@udf
|
|
37
|
-
|
|
64
|
+
@udf(
|
|
65
|
+
args=[DOUBLE(nullable=False), DOUBLE(nullable=False)],
|
|
66
|
+
returns=DOUBLE(nullable=False),
|
|
67
|
+
)
|
|
68
|
+
def polars_double_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
38
69
|
return x * y
|
|
39
70
|
|
|
40
71
|
|
|
@@ -57,279 +88,315 @@ def nullable_float_mult(x: Optional[float], y: Optional[float]) -> Optional[floa
|
|
|
57
88
|
return x * y
|
|
58
89
|
|
|
59
90
|
|
|
60
|
-
|
|
91
|
+
#
|
|
92
|
+
# TINYINT
|
|
93
|
+
#
|
|
94
|
+
|
|
95
|
+
tinyint_udf = udf(
|
|
96
|
+
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
97
|
+
returns=TINYINT(nullable=False),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@tinyint_udf
|
|
102
|
+
def tinyint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
61
103
|
if x is None or y is None:
|
|
62
104
|
return None
|
|
63
105
|
return x * y
|
|
64
106
|
|
|
65
107
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
return
|
|
108
|
+
@tinyint_udf
|
|
109
|
+
def pandas_tinyint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
110
|
+
return x * y
|
|
69
111
|
|
|
70
112
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
return (x_data * y_data, x_nulls | y_nulls)
|
|
113
|
+
@tinyint_udf
|
|
114
|
+
def polars_tinyint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
115
|
+
return x * y
|
|
75
116
|
|
|
76
117
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
118
|
+
@tinyint_udf
|
|
119
|
+
def numpy_tinyint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
120
|
+
return x * y
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@tinyint_udf
|
|
124
|
+
def arrow_tinyint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
81
125
|
import pyarrow.compute as pc
|
|
82
|
-
|
|
83
|
-
y_data, y_nulls = y
|
|
84
|
-
return (pc.multiply(x_data, y_data), pc.or_(x_nulls, y_nulls))
|
|
126
|
+
return pc.multiply(x, y)
|
|
85
127
|
|
|
128
|
+
#
|
|
129
|
+
# SMALLINT
|
|
130
|
+
#
|
|
86
131
|
|
|
87
|
-
int_mult = udf(_int_mult, name='int_mult')
|
|
88
132
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
93
|
-
returns=TINYINT(nullable=False),
|
|
133
|
+
smallint_udf = udf(
|
|
134
|
+
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
135
|
+
returns=SMALLINT(nullable=False),
|
|
94
136
|
)
|
|
95
137
|
|
|
96
|
-
pandas_tinyint_mult = udf.pandas(
|
|
97
|
-
_int_mult,
|
|
98
|
-
name='pandas_tinyint_mult',
|
|
99
|
-
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
100
|
-
returns=TINYINT(nullable=False),
|
|
101
|
-
)
|
|
102
138
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
)
|
|
139
|
+
@smallint_udf
|
|
140
|
+
def smallint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
141
|
+
if x is None or y is None:
|
|
142
|
+
return None
|
|
143
|
+
return x * y
|
|
109
144
|
|
|
110
|
-
numpy_tinyint_mult = udf.numpy(
|
|
111
|
-
_int_mult,
|
|
112
|
-
name='numpy_tinyint_mult',
|
|
113
|
-
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
114
|
-
returns=TINYINT(nullable=False),
|
|
115
|
-
)
|
|
116
145
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
121
|
-
returns=TINYINT(nullable=False),
|
|
122
|
-
)
|
|
146
|
+
@smallint_udf
|
|
147
|
+
def pandas_smallint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
148
|
+
return x * y
|
|
123
149
|
|
|
124
|
-
smallint_mult = udf(
|
|
125
|
-
_int_mult,
|
|
126
|
-
name='smallint_mult',
|
|
127
|
-
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
128
|
-
returns=SMALLINT(nullable=False),
|
|
129
|
-
)
|
|
130
150
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
135
|
-
returns=SMALLINT(nullable=False),
|
|
136
|
-
)
|
|
151
|
+
@smallint_udf
|
|
152
|
+
def polars_smallint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
153
|
+
return x * y
|
|
137
154
|
|
|
138
|
-
polars_smallint_mult = udf.polars(
|
|
139
|
-
_int_mult,
|
|
140
|
-
name='polars_smallint_mult',
|
|
141
|
-
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
142
|
-
returns=SMALLINT(nullable=False),
|
|
143
|
-
)
|
|
144
155
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
149
|
-
returns=SMALLINT(nullable=False),
|
|
150
|
-
)
|
|
156
|
+
@smallint_udf
|
|
157
|
+
def numpy_smallint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
158
|
+
return x * y
|
|
151
159
|
|
|
152
|
-
arrow_smallint_mult = udf.arrow(
|
|
153
|
-
_arrow_int_mult,
|
|
154
|
-
name='arrow_smallint_mult',
|
|
155
|
-
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
156
|
-
returns=SMALLINT(nullable=False),
|
|
157
|
-
)
|
|
158
160
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
returns=MEDIUMINT(nullable=False),
|
|
164
|
-
)
|
|
161
|
+
@smallint_udf
|
|
162
|
+
def arrow_smallint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
163
|
+
import pyarrow.compute as pc
|
|
164
|
+
return pc.multiply(x, y)
|
|
165
165
|
|
|
166
|
-
pandas_mediumint_mult = udf.pandas(
|
|
167
|
-
_int_mult,
|
|
168
|
-
name='pandas_mediumint_mult',
|
|
169
|
-
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
170
|
-
returns=MEDIUMINT(nullable=False),
|
|
171
|
-
)
|
|
172
166
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
177
|
-
returns=MEDIUMINT(nullable=False),
|
|
178
|
-
)
|
|
167
|
+
#
|
|
168
|
+
# MEDIUMINT
|
|
169
|
+
#
|
|
179
170
|
|
|
180
|
-
numpy_mediumint_mult = udf.numpy(
|
|
181
|
-
_int_mult,
|
|
182
|
-
name='numpy_mediumint_mult',
|
|
183
|
-
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
184
|
-
returns=MEDIUMINT(nullable=False),
|
|
185
|
-
)
|
|
186
171
|
|
|
187
|
-
|
|
188
|
-
_arrow_int_mult,
|
|
189
|
-
name='arrow_mediumint_mult',
|
|
172
|
+
mediumint_udf = udf(
|
|
190
173
|
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
191
174
|
returns=MEDIUMINT(nullable=False),
|
|
192
175
|
)
|
|
193
176
|
|
|
194
|
-
bigint_mult = udf(
|
|
195
|
-
_int_mult,
|
|
196
|
-
name='bigint_mult',
|
|
197
|
-
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
198
|
-
returns=BIGINT(nullable=False),
|
|
199
|
-
)
|
|
200
177
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
)
|
|
178
|
+
@mediumint_udf
|
|
179
|
+
def mediumint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
180
|
+
if x is None or y is None:
|
|
181
|
+
return None
|
|
182
|
+
return x * y
|
|
207
183
|
|
|
208
|
-
polars_bigint_mult = udf.polars(
|
|
209
|
-
_int_mult,
|
|
210
|
-
name='polars_bigint_mult',
|
|
211
|
-
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
212
|
-
returns=BIGINT(nullable=False),
|
|
213
|
-
)
|
|
214
184
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
185
|
+
@mediumint_udf
|
|
186
|
+
def pandas_mediumint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
187
|
+
return x * y
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@mediumint_udf
|
|
191
|
+
def polars_mediumint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
192
|
+
return x * y
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@mediumint_udf
|
|
196
|
+
def numpy_mediumint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
197
|
+
return x * y
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@mediumint_udf
|
|
201
|
+
def arrow_mediumint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
202
|
+
import pyarrow.compute as pc
|
|
203
|
+
return pc.multiply(x, y)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
#
|
|
207
|
+
# BIGINT
|
|
208
|
+
#
|
|
209
|
+
|
|
221
210
|
|
|
222
|
-
|
|
223
|
-
_arrow_int_mult,
|
|
224
|
-
name='arrow_bigint_mult',
|
|
211
|
+
bigint_udf = udf(
|
|
225
212
|
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
226
213
|
returns=BIGINT(nullable=False),
|
|
227
214
|
)
|
|
228
215
|
|
|
229
|
-
nullable_tinyint_mult = udf(
|
|
230
|
-
_int_mult,
|
|
231
|
-
name='nullable_tinyint_mult',
|
|
232
|
-
args=[TINYINT, TINYINT],
|
|
233
|
-
returns=TINYINT,
|
|
234
|
-
)
|
|
235
216
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
)
|
|
217
|
+
@bigint_udf
|
|
218
|
+
def bigint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
219
|
+
if x is None or y is None:
|
|
220
|
+
return None
|
|
221
|
+
return x * y
|
|
242
222
|
|
|
243
|
-
pandas_nullable_tinyint_mult_with_masks = udf.pandas(
|
|
244
|
-
_int_mult_with_masks,
|
|
245
|
-
name='pandas_nullable_tinyint_mult_with_masks',
|
|
246
|
-
args=[TINYINT, TINYINT],
|
|
247
|
-
returns=TINYINT,
|
|
248
|
-
include_masks=True,
|
|
249
|
-
)
|
|
250
223
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
args=[TINYINT, TINYINT],
|
|
255
|
-
returns=TINYINT,
|
|
256
|
-
)
|
|
224
|
+
@bigint_udf
|
|
225
|
+
def pandas_bigint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
226
|
+
return x * y
|
|
257
227
|
|
|
258
|
-
polars_nullable_tinyint_mult_with_masks = udf.polars(
|
|
259
|
-
_int_mult_with_masks,
|
|
260
|
-
name='polars_nullable_tinyint_mult_with_masks',
|
|
261
|
-
args=[TINYINT, TINYINT],
|
|
262
|
-
returns=TINYINT,
|
|
263
|
-
include_masks=True,
|
|
264
|
-
)
|
|
265
228
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
args=[TINYINT, TINYINT],
|
|
270
|
-
returns=TINYINT,
|
|
271
|
-
)
|
|
229
|
+
@bigint_udf
|
|
230
|
+
def polars_bigint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
231
|
+
return x * y
|
|
272
232
|
|
|
273
|
-
numpy_nullable_tinyint_mult_with_masks = udf.numpy(
|
|
274
|
-
_int_mult_with_masks,
|
|
275
|
-
name='numpy_nullable_tinyint_mult_with_masks',
|
|
276
|
-
args=[TINYINT, TINYINT],
|
|
277
|
-
returns=TINYINT,
|
|
278
|
-
include_masks=True,
|
|
279
|
-
)
|
|
280
233
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
args=[TINYINT, TINYINT],
|
|
285
|
-
returns=TINYINT,
|
|
286
|
-
)
|
|
234
|
+
@bigint_udf
|
|
235
|
+
def numpy_bigint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
236
|
+
return x * y
|
|
287
237
|
|
|
288
|
-
arrow_nullable_tinyint_mult_with_masks = udf.arrow(
|
|
289
|
-
_arrow_int_mult_with_masks,
|
|
290
|
-
name='arrow_nullable_tinyint_mult_with_masks',
|
|
291
|
-
args=[TINYINT, TINYINT],
|
|
292
|
-
returns=TINYINT,
|
|
293
|
-
include_masks=True,
|
|
294
|
-
)
|
|
295
238
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
239
|
+
@bigint_udf
|
|
240
|
+
def arrow_bigint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
241
|
+
import pyarrow.compute as pc
|
|
242
|
+
return pc.multiply(x, y)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
#
|
|
246
|
+
# NULLABLE TINYINT
|
|
247
|
+
#
|
|
248
|
+
|
|
302
249
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
args=[MEDIUMINT, MEDIUMINT],
|
|
307
|
-
returns=MEDIUMINT,
|
|
250
|
+
nullable_tinyint_udf = udf(
|
|
251
|
+
args=[TINYINT(nullable=True), TINYINT(nullable=True)],
|
|
252
|
+
returns=TINYINT(nullable=True),
|
|
308
253
|
)
|
|
309
254
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
255
|
+
|
|
256
|
+
@nullable_tinyint_udf
|
|
257
|
+
def nullable_tinyint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
258
|
+
if x is None or y is None:
|
|
259
|
+
return None
|
|
260
|
+
return x * y
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@nullable_tinyint_udf
|
|
264
|
+
def pandas_nullable_tinyint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
265
|
+
return x * y
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
@nullable_tinyint_udf
|
|
269
|
+
def polars_nullable_tinyint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
270
|
+
return x * y
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
@nullable_tinyint_udf
|
|
274
|
+
def numpy_nullable_tinyint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
275
|
+
return x * y
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@nullable_tinyint_udf
|
|
279
|
+
def arrow_nullable_tinyint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
280
|
+
import pyarrow.compute as pc
|
|
281
|
+
return pc.multiply(x, y)
|
|
282
|
+
|
|
283
|
+
#
|
|
284
|
+
# NULLABLE SMALLINT
|
|
285
|
+
#
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
nullable_smallint_udf = udf(
|
|
289
|
+
args=[SMALLINT(nullable=True), SMALLINT(nullable=True)],
|
|
290
|
+
returns=SMALLINT(nullable=True),
|
|
315
291
|
)
|
|
316
292
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
293
|
+
|
|
294
|
+
@nullable_smallint_udf
|
|
295
|
+
def nullable_smallint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
296
|
+
if x is None or y is None:
|
|
297
|
+
return None
|
|
298
|
+
return x * y
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@nullable_smallint_udf
|
|
302
|
+
def pandas_nullable_smallint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
303
|
+
return x * y
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
@nullable_smallint_udf
|
|
307
|
+
def polars_nullable_smallint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
308
|
+
return x * y
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@nullable_smallint_udf
|
|
312
|
+
def numpy_nullable_smallint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
313
|
+
return x * y
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
@nullable_smallint_udf
|
|
317
|
+
def arrow_nullable_smallint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
318
|
+
import pyarrow.compute as pc
|
|
319
|
+
return pc.multiply(x, y)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
#
|
|
323
|
+
# NULLABLE MEDIUMINT
|
|
324
|
+
#
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
nullable_mediumint_udf = udf(
|
|
328
|
+
args=[MEDIUMINT(nullable=True), MEDIUMINT(nullable=True)],
|
|
329
|
+
returns=MEDIUMINT(nullable=True),
|
|
322
330
|
)
|
|
323
331
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
332
|
+
|
|
333
|
+
@nullable_mediumint_udf
|
|
334
|
+
def nullable_mediumint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
335
|
+
if x is None or y is None:
|
|
336
|
+
return None
|
|
337
|
+
return x * y
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@nullable_mediumint_udf
|
|
341
|
+
def pandas_nullable_mediumint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
342
|
+
return x * y
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@nullable_mediumint_udf
|
|
346
|
+
def polars_nullable_mediumint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
347
|
+
return x * y
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@nullable_mediumint_udf
|
|
351
|
+
def numpy_nullable_mediumint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
352
|
+
return x * y
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
@nullable_mediumint_udf
|
|
356
|
+
def arrow_nullable_mediumint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
357
|
+
import pyarrow.compute as pc
|
|
358
|
+
return pc.multiply(x, y)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
#
|
|
362
|
+
# NULLABLE BIGINT
|
|
363
|
+
#
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
nullable_bigint_udf = udf(
|
|
367
|
+
args=[BIGINT(nullable=True), BIGINT(nullable=True)],
|
|
368
|
+
returns=BIGINT(nullable=True),
|
|
330
369
|
)
|
|
331
370
|
|
|
332
371
|
|
|
372
|
+
@nullable_bigint_udf
|
|
373
|
+
def nullable_bigint_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
374
|
+
if x is None or y is None:
|
|
375
|
+
return None
|
|
376
|
+
return x * y
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
@nullable_bigint_udf
|
|
380
|
+
def pandas_nullable_bigint_mult(x: pd.Series, y: pd.Series) -> pd.Series:
|
|
381
|
+
return x * y
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@nullable_bigint_udf
|
|
385
|
+
def polars_nullable_bigint_mult(x: pl.Series, y: pl.Series) -> pl.Series:
|
|
386
|
+
return x * y
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
@nullable_bigint_udf
|
|
390
|
+
def numpy_nullable_bigint_mult(x: np.ndarray, y: np.ndarray) -> np.ndarray:
|
|
391
|
+
return x * y
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
@nullable_bigint_udf
|
|
395
|
+
def arrow_nullable_bigint_mult(x: pa.Array, y: pa.Array) -> pa.Array:
|
|
396
|
+
import pyarrow.compute as pc
|
|
397
|
+
return pc.multiply(x, y)
|
|
398
|
+
|
|
399
|
+
|
|
333
400
|
@udf
|
|
334
401
|
def nullable_int_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
335
402
|
if x is None or y is None:
|
|
@@ -342,13 +409,15 @@ def string_mult(x: str, times: int) -> str:
|
|
|
342
409
|
return x * times
|
|
343
410
|
|
|
344
411
|
|
|
345
|
-
@udf
|
|
346
|
-
def pandas_string_mult(x:
|
|
412
|
+
@udf(args=[TEXT(nullable=False), BIGINT(nullable=False)], returns=TEXT(nullable=False))
|
|
413
|
+
def pandas_string_mult(x: pd.Series, times: pd.Series) -> pd.Series:
|
|
347
414
|
return x * times
|
|
348
415
|
|
|
349
416
|
|
|
350
|
-
@udf
|
|
351
|
-
def numpy_string_mult(
|
|
417
|
+
@udf
|
|
418
|
+
def numpy_string_mult(
|
|
419
|
+
x: npt.NDArray[np.str_], times: npt.NDArray[np.int_],
|
|
420
|
+
) -> npt.NDArray[np.str_]:
|
|
352
421
|
return x * times
|
|
353
422
|
|
|
354
423
|
|
|
@@ -373,13 +442,183 @@ def nullable_string_mult(x: Optional[str], times: Optional[int]) -> Optional[str
|
|
|
373
442
|
return x * times
|
|
374
443
|
|
|
375
444
|
|
|
376
|
-
@udf(
|
|
377
|
-
|
|
378
|
-
|
|
445
|
+
@udf(
|
|
446
|
+
args=[TINYINT(nullable=True), TINYINT(nullable=True)],
|
|
447
|
+
returns=TINYINT(nullable=True),
|
|
448
|
+
)
|
|
449
|
+
def pandas_nullable_tinyint_mult_with_masks(
|
|
450
|
+
x: Masked[pd.Series], y: Masked[pd.Series],
|
|
451
|
+
) -> Masked[pd.Series]:
|
|
452
|
+
x_data, x_nulls = x
|
|
453
|
+
y_data, y_nulls = y
|
|
454
|
+
return Masked(x_data * y_data, x_nulls | y_nulls)
|
|
379
455
|
|
|
380
456
|
|
|
381
|
-
@udf
|
|
382
|
-
def
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
457
|
+
@udf
|
|
458
|
+
def numpy_nullable_tinyint_mult_with_masks(
|
|
459
|
+
x: Masked[npt.NDArray[np.int8]], y: Masked[npt.NDArray[np.int8]],
|
|
460
|
+
) -> Masked[npt.NDArray[np.int8]]:
|
|
461
|
+
x_data, x_nulls = x
|
|
462
|
+
y_data, y_nulls = y
|
|
463
|
+
return Masked(x_data * y_data, x_nulls | y_nulls)
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
@udf(
|
|
467
|
+
args=[TINYINT(nullable=True), TINYINT(nullable=True)],
|
|
468
|
+
returns=TINYINT(nullable=True),
|
|
469
|
+
)
|
|
470
|
+
def polars_nullable_tinyint_mult_with_masks(
|
|
471
|
+
x: Masked[pl.Series], y: Masked[pl.Series],
|
|
472
|
+
) -> Masked[pl.Series]:
|
|
473
|
+
x_data, x_nulls = x
|
|
474
|
+
y_data, y_nulls = y
|
|
475
|
+
return Masked(x_data * y_data, x_nulls | y_nulls)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
@udf(
|
|
479
|
+
args=[TINYINT(nullable=True), TINYINT(nullable=True)],
|
|
480
|
+
returns=TINYINT(nullable=True),
|
|
481
|
+
)
|
|
482
|
+
def arrow_nullable_tinyint_mult_with_masks(
|
|
483
|
+
x: Masked[pa.Array], y: Masked[pa.Array],
|
|
484
|
+
) -> Masked[pa.Array]:
|
|
485
|
+
import pyarrow.compute as pc
|
|
486
|
+
x_data, x_nulls = x
|
|
487
|
+
y_data, y_nulls = y
|
|
488
|
+
return Masked(pc.multiply(x_data, y_data), pc.or_(x_nulls, y_nulls))
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
@udf(returns=[TEXT(nullable=False, name='res')])
|
|
492
|
+
def numpy_fixed_strings() -> Table[npt.NDArray[np.str_]]:
|
|
493
|
+
out = np.array(
|
|
494
|
+
[
|
|
495
|
+
'hello',
|
|
496
|
+
'hi there 😜',
|
|
497
|
+
'😜 bye',
|
|
498
|
+
], dtype=np.str_,
|
|
499
|
+
)
|
|
500
|
+
assert str(out.dtype) == '<U10'
|
|
501
|
+
return Table(out)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
@udf(returns=[TEXT(nullable=False, name='res'), TINYINT(nullable=False, name='res2')])
|
|
505
|
+
def numpy_fixed_strings_2() -> Table[npt.NDArray[np.str_], npt.NDArray[np.int8]]:
|
|
506
|
+
out = np.array(
|
|
507
|
+
[
|
|
508
|
+
'hello',
|
|
509
|
+
'hi there 😜',
|
|
510
|
+
'😜 bye',
|
|
511
|
+
], dtype=np.str_,
|
|
512
|
+
)
|
|
513
|
+
assert str(out.dtype) == '<U10'
|
|
514
|
+
return Table(out, out)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
@udf(returns=[BLOB(nullable=False, name='res')])
|
|
518
|
+
def numpy_fixed_binary() -> Table[npt.NDArray[np.bytes_]]:
|
|
519
|
+
out = np.array(
|
|
520
|
+
[
|
|
521
|
+
'hello'.encode('utf8'),
|
|
522
|
+
'hi there 😜'.encode('utf8'),
|
|
523
|
+
'😜 bye'.encode('utf8'),
|
|
524
|
+
], dtype=np.bytes_,
|
|
525
|
+
)
|
|
526
|
+
assert str(out.dtype) == '|S13'
|
|
527
|
+
return Table(out)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
@udf
|
|
531
|
+
def no_args_no_return_value() -> None:
|
|
532
|
+
pass
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
@udf
|
|
536
|
+
def table_function(n: int) -> Table[List[int]]:
|
|
537
|
+
return Table([10] * n)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
@udf(
|
|
541
|
+
returns=[
|
|
542
|
+
dt.INT(name='c_int', nullable=False),
|
|
543
|
+
dt.DOUBLE(name='c_float', nullable=False),
|
|
544
|
+
dt.TEXT(name='c_str', nullable=False),
|
|
545
|
+
],
|
|
546
|
+
)
|
|
547
|
+
def table_function_tuple(n: int) -> Table[List[Tuple[int, float, str]]]:
|
|
548
|
+
return Table([(10, 10.0, 'ten')] * n)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
class MyTable(NamedTuple):
|
|
552
|
+
c_int: int
|
|
553
|
+
c_float: float
|
|
554
|
+
c_str: str
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
@udf
|
|
558
|
+
def table_function_struct(n: int) -> Table[List[MyTable]]:
|
|
559
|
+
return Table([MyTable(10, 10.0, 'ten')] * n)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
@udf
|
|
563
|
+
def vec_function(
|
|
564
|
+
x: npt.NDArray[np.float64], y: npt.NDArray[np.float64],
|
|
565
|
+
) -> npt.NDArray[np.float64]:
|
|
566
|
+
return x * y
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
class VecInputs(typing.NamedTuple):
|
|
570
|
+
x: np.int8
|
|
571
|
+
y: np.int8
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
class VecOutputs(typing.NamedTuple):
|
|
575
|
+
res: np.int16
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
@udf(args=VecInputs, returns=VecOutputs)
|
|
579
|
+
def vec_function_ints(
|
|
580
|
+
x: npt.NDArray[np.int_], y: npt.NDArray[np.int_],
|
|
581
|
+
) -> npt.NDArray[np.int_]:
|
|
582
|
+
return x * y
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
class DFOutputs(typing.NamedTuple):
|
|
586
|
+
res: np.int16
|
|
587
|
+
res2: np.float64
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
@udf(args=VecInputs, returns=DFOutputs)
|
|
591
|
+
def vec_function_df(
|
|
592
|
+
x: npt.NDArray[np.int_], y: npt.NDArray[np.int_],
|
|
593
|
+
) -> Table[pd.DataFrame]:
|
|
594
|
+
return pd.DataFrame(dict(res=[1, 2, 3], res2=[1.1, 2.2, 3.3]))
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
class MaskOutputs(typing.NamedTuple):
|
|
598
|
+
res: Optional[np.int16]
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
@udf(args=VecInputs, returns=MaskOutputs)
|
|
602
|
+
def vec_function_ints_masked(
|
|
603
|
+
x: Masked[npt.NDArray[np.int_]], y: Masked[npt.NDArray[np.int_]],
|
|
604
|
+
) -> Table[Masked[npt.NDArray[np.int_]]]:
|
|
605
|
+
x_data, x_nulls = x
|
|
606
|
+
y_data, y_nulls = y
|
|
607
|
+
return Table(Masked(x_data * y_data, x_nulls | y_nulls))
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
class MaskOutputs2(typing.NamedTuple):
|
|
611
|
+
res: Optional[np.int16]
|
|
612
|
+
res2: Optional[np.int16]
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
@udf(args=VecInputs, returns=MaskOutputs2)
|
|
616
|
+
def vec_function_ints_masked2(
|
|
617
|
+
x: Masked[npt.NDArray[np.int_]], y: Masked[npt.NDArray[np.int_]],
|
|
618
|
+
) -> Table[Masked[npt.NDArray[np.int_]], Masked[npt.NDArray[np.int_]]]:
|
|
619
|
+
x_data, x_nulls = x
|
|
620
|
+
y_data, y_nulls = y
|
|
621
|
+
return Table(
|
|
622
|
+
Masked(x_data * y_data, x_nulls | y_nulls),
|
|
623
|
+
Masked(x_data * y_data, x_nulls | y_nulls),
|
|
624
|
+
)
|