singlestoredb 0.3.3__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of singlestoredb might be problematic. Click here for more details.
- singlestoredb/__init__.py +33 -2
- singlestoredb/alchemy/__init__.py +90 -0
- singlestoredb/auth.py +6 -4
- singlestoredb/config.py +116 -16
- singlestoredb/connection.py +489 -523
- singlestoredb/converters.py +275 -26
- singlestoredb/exceptions.py +30 -4
- singlestoredb/functions/__init__.py +1 -0
- singlestoredb/functions/decorator.py +142 -0
- singlestoredb/functions/dtypes.py +1639 -0
- singlestoredb/functions/ext/__init__.py +2 -0
- singlestoredb/functions/ext/arrow.py +375 -0
- singlestoredb/functions/ext/asgi.py +661 -0
- singlestoredb/functions/ext/json.py +427 -0
- singlestoredb/functions/ext/mmap.py +306 -0
- singlestoredb/functions/ext/rowdat_1.py +744 -0
- singlestoredb/functions/signature.py +673 -0
- singlestoredb/fusion/__init__.py +11 -0
- singlestoredb/fusion/graphql.py +213 -0
- singlestoredb/fusion/handler.py +621 -0
- singlestoredb/fusion/handlers/__init__.py +0 -0
- singlestoredb/fusion/handlers/stage.py +257 -0
- singlestoredb/fusion/handlers/utils.py +162 -0
- singlestoredb/fusion/handlers/workspace.py +412 -0
- singlestoredb/fusion/registry.py +164 -0
- singlestoredb/fusion/result.py +399 -0
- singlestoredb/http/__init__.py +27 -0
- singlestoredb/http/connection.py +1192 -0
- singlestoredb/management/__init__.py +3 -2
- singlestoredb/management/billing_usage.py +148 -0
- singlestoredb/management/cluster.py +19 -14
- singlestoredb/management/manager.py +100 -40
- singlestoredb/management/organization.py +188 -0
- singlestoredb/management/region.py +6 -8
- singlestoredb/management/utils.py +253 -4
- singlestoredb/management/workspace.py +1153 -35
- singlestoredb/mysql/__init__.py +177 -0
- singlestoredb/mysql/_auth.py +298 -0
- singlestoredb/mysql/charset.py +214 -0
- singlestoredb/mysql/connection.py +1814 -0
- singlestoredb/mysql/constants/CLIENT.py +38 -0
- singlestoredb/mysql/constants/COMMAND.py +32 -0
- singlestoredb/mysql/constants/CR.py +78 -0
- singlestoredb/mysql/constants/ER.py +474 -0
- singlestoredb/mysql/constants/FIELD_TYPE.py +32 -0
- singlestoredb/mysql/constants/FLAG.py +15 -0
- singlestoredb/mysql/constants/SERVER_STATUS.py +10 -0
- singlestoredb/mysql/constants/__init__.py +0 -0
- singlestoredb/mysql/converters.py +271 -0
- singlestoredb/mysql/cursors.py +713 -0
- singlestoredb/mysql/err.py +92 -0
- singlestoredb/mysql/optionfile.py +20 -0
- singlestoredb/mysql/protocol.py +388 -0
- singlestoredb/mysql/tests/__init__.py +19 -0
- singlestoredb/mysql/tests/base.py +126 -0
- singlestoredb/mysql/tests/conftest.py +37 -0
- singlestoredb/mysql/tests/test_DictCursor.py +132 -0
- singlestoredb/mysql/tests/test_SSCursor.py +141 -0
- singlestoredb/mysql/tests/test_basic.py +452 -0
- singlestoredb/mysql/tests/test_connection.py +851 -0
- singlestoredb/mysql/tests/test_converters.py +58 -0
- singlestoredb/mysql/tests/test_cursor.py +141 -0
- singlestoredb/mysql/tests/test_err.py +16 -0
- singlestoredb/mysql/tests/test_issues.py +514 -0
- singlestoredb/mysql/tests/test_load_local.py +75 -0
- singlestoredb/mysql/tests/test_nextset.py +88 -0
- singlestoredb/mysql/tests/test_optionfile.py +27 -0
- singlestoredb/mysql/tests/thirdparty/__init__.py +6 -0
- singlestoredb/mysql/tests/thirdparty/test_MySQLdb/__init__.py +9 -0
- singlestoredb/mysql/tests/thirdparty/test_MySQLdb/capabilities.py +323 -0
- singlestoredb/mysql/tests/thirdparty/test_MySQLdb/dbapi20.py +865 -0
- singlestoredb/mysql/tests/thirdparty/test_MySQLdb/test_MySQLdb_capabilities.py +110 -0
- singlestoredb/mysql/tests/thirdparty/test_MySQLdb/test_MySQLdb_dbapi20.py +224 -0
- singlestoredb/mysql/tests/thirdparty/test_MySQLdb/test_MySQLdb_nonstandard.py +101 -0
- singlestoredb/mysql/times.py +23 -0
- singlestoredb/pytest.py +283 -0
- singlestoredb/tests/empty.sql +0 -0
- singlestoredb/tests/ext_funcs/__init__.py +385 -0
- singlestoredb/tests/test.sql +210 -0
- singlestoredb/tests/test2.sql +1 -0
- singlestoredb/tests/test_basics.py +482 -117
- singlestoredb/tests/test_config.py +13 -15
- singlestoredb/tests/test_connection.py +241 -289
- singlestoredb/tests/test_dbapi.py +27 -0
- singlestoredb/tests/test_exceptions.py +0 -2
- singlestoredb/tests/test_ext_func.py +1193 -0
- singlestoredb/tests/test_ext_func_data.py +1101 -0
- singlestoredb/tests/test_fusion.py +465 -0
- singlestoredb/tests/test_http.py +32 -28
- singlestoredb/tests/test_management.py +588 -10
- singlestoredb/tests/test_plugin.py +33 -0
- singlestoredb/tests/test_results.py +11 -14
- singlestoredb/tests/test_types.py +0 -2
- singlestoredb/tests/test_udf.py +687 -0
- singlestoredb/tests/test_xdict.py +0 -2
- singlestoredb/tests/utils.py +3 -4
- singlestoredb/types.py +4 -5
- singlestoredb/utils/config.py +71 -12
- singlestoredb/utils/convert_rows.py +0 -2
- singlestoredb/utils/debug.py +13 -0
- singlestoredb/utils/mogrify.py +151 -0
- singlestoredb/utils/results.py +4 -3
- singlestoredb/utils/xdict.py +12 -12
- singlestoredb-1.0.3.dist-info/METADATA +139 -0
- singlestoredb-1.0.3.dist-info/RECORD +112 -0
- {singlestoredb-0.3.3.dist-info → singlestoredb-1.0.3.dist-info}/WHEEL +1 -1
- singlestoredb-1.0.3.dist-info/entry_points.txt +2 -0
- singlestoredb/drivers/__init__.py +0 -46
- singlestoredb/drivers/base.py +0 -200
- singlestoredb/drivers/cymysql.py +0 -40
- singlestoredb/drivers/http.py +0 -49
- singlestoredb/drivers/mariadb.py +0 -42
- singlestoredb/drivers/mysqlconnector.py +0 -51
- singlestoredb/drivers/mysqldb.py +0 -62
- singlestoredb/drivers/pymysql.py +0 -39
- singlestoredb/drivers/pyodbc.py +0 -67
- singlestoredb/http.py +0 -794
- singlestoredb-0.3.3.dist-info/METADATA +0 -105
- singlestoredb-0.3.3.dist-info/RECORD +0 -46
- {singlestoredb-0.3.3.dist-info → singlestoredb-1.0.3.dist-info}/LICENSE +0 -0
- {singlestoredb-0.3.3.dist-info → singlestoredb-1.0.3.dist-info}/top_level.txt +0 -0
singlestoredb/pytest.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""Pytest plugin"""
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import time
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Iterator
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import pytest
|
|
12
|
+
|
|
13
|
+
from . import connect
|
|
14
|
+
from .connection import Connection
|
|
15
|
+
from .connection import Cursor
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# How many times to attempt to connect to the container
|
|
22
|
+
STARTUP_CONNECT_ATTEMPTS = 10
|
|
23
|
+
# How long to wait between connection attempts
|
|
24
|
+
STARTUP_CONNECT_TIMEOUT_SECONDS = 2
|
|
25
|
+
# How many times to check if all connections are closed
|
|
26
|
+
TEARDOWN_WAIT_ATTEMPTS = 20
|
|
27
|
+
# How long to wait between checking connections
|
|
28
|
+
TEARDOWN_WAIT_SECONDS = 2
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ExecutionMode(Enum):
|
|
32
|
+
SEQUENTIAL = 1
|
|
33
|
+
LEADER = 2
|
|
34
|
+
FOLLOWER = 3
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.fixture(scope='session')
|
|
38
|
+
def execution_mode() -> ExecutionMode:
|
|
39
|
+
"""Determine the pytest mode for this process"""
|
|
40
|
+
|
|
41
|
+
worker = os.environ.get('PYTEST_XDIST_WORKER')
|
|
42
|
+
worker_count = os.environ.get('PYTEST_XDIST_WORKER_COUNT')
|
|
43
|
+
|
|
44
|
+
# If we're not in pytest-xdist, the mode is Sequential
|
|
45
|
+
if worker is None or worker_count is None:
|
|
46
|
+
logger.debug('XDIST environment vars not found')
|
|
47
|
+
return ExecutionMode.SEQUENTIAL
|
|
48
|
+
|
|
49
|
+
logger.debug(f'PYTEST_XDIST_WORKER == {worker}')
|
|
50
|
+
logger.debug(f'PYTEST_XDIST_WORKER_COUNT == {worker_count}')
|
|
51
|
+
|
|
52
|
+
# If we're the only worker, than the mode is Sequential
|
|
53
|
+
if worker_count == '1':
|
|
54
|
+
return ExecutionMode.SEQUENTIAL
|
|
55
|
+
else:
|
|
56
|
+
# The first worker (named "gw0") is the leader
|
|
57
|
+
# if there are multiple workers
|
|
58
|
+
if worker == 'gw0':
|
|
59
|
+
return ExecutionMode.LEADER
|
|
60
|
+
else:
|
|
61
|
+
return ExecutionMode.FOLLOWER
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@pytest.fixture(scope='session')
|
|
65
|
+
def node_name() -> Iterator[str]:
|
|
66
|
+
"""Determine the name of this worker node"""
|
|
67
|
+
|
|
68
|
+
worker = os.environ.get('PYTEST_XDIST_WORKER')
|
|
69
|
+
|
|
70
|
+
if worker is None:
|
|
71
|
+
logger.debug('XDIST environment vars not found')
|
|
72
|
+
yield 'master'
|
|
73
|
+
else:
|
|
74
|
+
logger.debug(f'PYTEST_XDIST_WORKER == {worker}')
|
|
75
|
+
yield worker
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class _TestContainerManager():
|
|
79
|
+
"""Manages the setup and teardown of a SingleStoreDB Dev Container"""
|
|
80
|
+
|
|
81
|
+
def __init__(self) -> None:
|
|
82
|
+
self.container_name = 'singlestoredb-test-container'
|
|
83
|
+
self.dev_image_name = 'ghcr.io/singlestore-labs/singlestoredb-dev'
|
|
84
|
+
|
|
85
|
+
assert 'SINGLESTORE_LICENSE' in os.environ, 'SINGLESTORE_LICENSE not set'
|
|
86
|
+
|
|
87
|
+
self.root_password = 'Q8r4D7yXR8oqn'
|
|
88
|
+
self.environment_vars = {
|
|
89
|
+
'SINGLESTORE_LICENSE': None,
|
|
90
|
+
'ROOT_PASSWORD': f"\"{self.root_password}\"",
|
|
91
|
+
'SINGLESTORE_SET_GLOBAL_DEFAULT_PARTITIONS_PER_LEAF': '1',
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
self.ports = ['3306', '8080', '9000']
|
|
95
|
+
|
|
96
|
+
self.url = f'root:{self.root_password}@127.0.0.1:3306'
|
|
97
|
+
|
|
98
|
+
def start(self) -> None:
|
|
99
|
+
command = ' '.join(self._start_command())
|
|
100
|
+
|
|
101
|
+
logger.info(f'Starting container {self.container_name}')
|
|
102
|
+
try:
|
|
103
|
+
license = os.environ['SINGLESTORE_LICENSE']
|
|
104
|
+
env = {
|
|
105
|
+
'SINGLESTORE_LICENSE': license,
|
|
106
|
+
}
|
|
107
|
+
subprocess.check_call(command, shell=True, env=env)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.exception(e)
|
|
110
|
+
raise RuntimeError(
|
|
111
|
+
'Failed to start container. '
|
|
112
|
+
'Is one already running?',
|
|
113
|
+
) from e
|
|
114
|
+
logger.debug('Container started')
|
|
115
|
+
|
|
116
|
+
def _start_command(self) -> Iterator[str]:
|
|
117
|
+
yield 'docker run -d --name'
|
|
118
|
+
yield self.container_name
|
|
119
|
+
for key, value in self.environment_vars.items():
|
|
120
|
+
yield '-e'
|
|
121
|
+
if value is None:
|
|
122
|
+
yield key
|
|
123
|
+
else:
|
|
124
|
+
yield f'{key}={value}'
|
|
125
|
+
|
|
126
|
+
for port in self.ports:
|
|
127
|
+
yield '-p'
|
|
128
|
+
yield f'{port}:{port}'
|
|
129
|
+
|
|
130
|
+
yield self.dev_image_name
|
|
131
|
+
|
|
132
|
+
def print_logs(self) -> None:
|
|
133
|
+
logs_command = ['docker', 'logs', self.container_name]
|
|
134
|
+
logger.info('Getting logs')
|
|
135
|
+
logger.info(subprocess.check_output(logs_command))
|
|
136
|
+
|
|
137
|
+
def connect(self) -> Connection:
|
|
138
|
+
# Run all but one attempts trying again if they fail
|
|
139
|
+
for i in range(STARTUP_CONNECT_ATTEMPTS - 1):
|
|
140
|
+
try:
|
|
141
|
+
return connect(self.url)
|
|
142
|
+
except Exception:
|
|
143
|
+
logger.debug(f'Database not available yet (attempt #{i}).')
|
|
144
|
+
time.sleep(STARTUP_CONNECT_TIMEOUT_SECONDS)
|
|
145
|
+
else:
|
|
146
|
+
# Try one last time and report error if it fails
|
|
147
|
+
try:
|
|
148
|
+
return connect(self.url)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logger.error('Timed out while waiting to connect to database.')
|
|
151
|
+
logger.exception(e)
|
|
152
|
+
self.print_logs()
|
|
153
|
+
raise RuntimeError('Failed to connect to database') from e
|
|
154
|
+
|
|
155
|
+
def wait_till_connections_closed(self) -> None:
|
|
156
|
+
heart_beat = connect(self.url)
|
|
157
|
+
for i in range(TEARDOWN_WAIT_ATTEMPTS):
|
|
158
|
+
connections = self.get_open_connections(heart_beat)
|
|
159
|
+
if connections is None:
|
|
160
|
+
raise RuntimeError('Could not determine the number of open connections.')
|
|
161
|
+
logger.debug(
|
|
162
|
+
f'Waiting for other connections (n={connections-1}) '
|
|
163
|
+
f'to close (attempt #{i})',
|
|
164
|
+
)
|
|
165
|
+
time.sleep(TEARDOWN_WAIT_SECONDS)
|
|
166
|
+
else:
|
|
167
|
+
logger.warning('Timed out while waiting for other connections to close')
|
|
168
|
+
self.print_logs()
|
|
169
|
+
|
|
170
|
+
def get_open_connections(self, conn: Connection) -> Optional[int]:
|
|
171
|
+
for row in conn.show.status(extended=True):
|
|
172
|
+
name = row['Name']
|
|
173
|
+
value = row['Value']
|
|
174
|
+
logger.info(f'{name} = {value}')
|
|
175
|
+
if name == 'Threads_connected':
|
|
176
|
+
return int(value)
|
|
177
|
+
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
def stop(self) -> None:
|
|
181
|
+
logger.info('Cleaning up SingleStore DB dev container')
|
|
182
|
+
logger.debug('Stopping container')
|
|
183
|
+
try:
|
|
184
|
+
subprocess.check_call(f'docker stop {self.container_name}', shell=True)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.exception(e)
|
|
187
|
+
raise RuntimeError('Failed to stop container.') from e
|
|
188
|
+
|
|
189
|
+
logger.debug('Removing container')
|
|
190
|
+
try:
|
|
191
|
+
subprocess.check_call(f'docker rm {self.container_name}', shell=True)
|
|
192
|
+
except Exception as e:
|
|
193
|
+
logger.exception(e)
|
|
194
|
+
raise RuntimeError('Failed to stop container.') from e
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@pytest.fixture(scope='session')
|
|
198
|
+
def singlestoredb_test_container(
|
|
199
|
+
execution_mode: ExecutionMode,
|
|
200
|
+
) -> Iterator[_TestContainerManager]:
|
|
201
|
+
"""Sets up and tears down the test container"""
|
|
202
|
+
|
|
203
|
+
if not isinstance(execution_mode, ExecutionMode):
|
|
204
|
+
raise TypeError(f"Invalid execution mode '{execution_mode}'")
|
|
205
|
+
|
|
206
|
+
container_manager = _TestContainerManager()
|
|
207
|
+
|
|
208
|
+
# In sequential operation do all the steps
|
|
209
|
+
if execution_mode == ExecutionMode.SEQUENTIAL:
|
|
210
|
+
logger.debug('Not distributed')
|
|
211
|
+
container_manager.start()
|
|
212
|
+
yield container_manager
|
|
213
|
+
container_manager.stop()
|
|
214
|
+
|
|
215
|
+
# In distributed execution as leader,
|
|
216
|
+
# do the steps but wait for other workers before stopping
|
|
217
|
+
elif execution_mode == ExecutionMode.LEADER:
|
|
218
|
+
logger.debug('Distributed leader')
|
|
219
|
+
container_manager.start()
|
|
220
|
+
yield container_manager
|
|
221
|
+
container_manager.wait_till_connections_closed()
|
|
222
|
+
container_manager.stop()
|
|
223
|
+
|
|
224
|
+
# In distributed exeuction as a non-leader,
|
|
225
|
+
# don't worry about the container lifecycle
|
|
226
|
+
elif execution_mode == ExecutionMode.FOLLOWER:
|
|
227
|
+
logger.debug('Distributed follower')
|
|
228
|
+
yield container_manager
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@pytest.fixture(scope='session')
|
|
232
|
+
def singlestoredb_connection(
|
|
233
|
+
singlestoredb_test_container: _TestContainerManager,
|
|
234
|
+
) -> Iterator[Connection]:
|
|
235
|
+
"""Creates and closes the connection"""
|
|
236
|
+
|
|
237
|
+
connection = singlestoredb_test_container.connect()
|
|
238
|
+
logger.debug('Connected to database.')
|
|
239
|
+
|
|
240
|
+
yield connection
|
|
241
|
+
|
|
242
|
+
logger.debug('Closing connection')
|
|
243
|
+
connection.close()
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class _NameAllocator():
|
|
247
|
+
"""Generates unique names for each database"""
|
|
248
|
+
|
|
249
|
+
def __init__(self, id: str) -> None:
|
|
250
|
+
self.id = id
|
|
251
|
+
self.names = 0
|
|
252
|
+
|
|
253
|
+
def get_name(self) -> str:
|
|
254
|
+
name = f'x_db_{self.id}_{self.names}'
|
|
255
|
+
self.names += 1
|
|
256
|
+
return name
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@pytest.fixture(scope='session')
|
|
260
|
+
def name_allocator(node_name: str) -> Iterator[_NameAllocator]:
|
|
261
|
+
"""Makes a worker-local name allocator using the node name"""
|
|
262
|
+
|
|
263
|
+
yield _NameAllocator(node_name)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@pytest.fixture
|
|
267
|
+
def singlestoredb_tempdb(
|
|
268
|
+
singlestoredb_connection: Connection, name_allocator: _NameAllocator,
|
|
269
|
+
) -> Iterator[Cursor]:
|
|
270
|
+
"""Provides a connection to a unique temporary test database"""
|
|
271
|
+
|
|
272
|
+
assert singlestoredb_connection.is_connected(), 'Database is no longer connected'
|
|
273
|
+
db = name_allocator.get_name()
|
|
274
|
+
|
|
275
|
+
with singlestoredb_connection.cursor() as cursor:
|
|
276
|
+
logger.debug(f"Creating temporary DB \"{db}\"")
|
|
277
|
+
cursor.execute(f'CREATE DATABASE {db}')
|
|
278
|
+
cursor.execute(f'USE {db}')
|
|
279
|
+
|
|
280
|
+
yield cursor
|
|
281
|
+
|
|
282
|
+
logger.debug(f"Dropping temporary DB \"{db}\"")
|
|
283
|
+
cursor.execute(f'DROP DATABASE {db}')
|
|
File without changes
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# type: ignore
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from typing import Tuple
|
|
5
|
+
|
|
6
|
+
from singlestoredb.functions.decorator import udf
|
|
7
|
+
from singlestoredb.functions.dtypes import BIGINT
|
|
8
|
+
from singlestoredb.functions.dtypes import FLOAT
|
|
9
|
+
from singlestoredb.functions.dtypes import MEDIUMINT
|
|
10
|
+
from singlestoredb.functions.dtypes import SMALLINT
|
|
11
|
+
from singlestoredb.functions.dtypes import TINYINT
|
|
12
|
+
from singlestoredb.functions.dtypes import VARCHAR
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@udf
|
|
16
|
+
def double_mult(x: float, y: float) -> float:
|
|
17
|
+
return x * y
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@udf.pandas
|
|
21
|
+
def pandas_double_mult(x: float, y: float) -> float:
|
|
22
|
+
return x * y
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@udf.numpy
|
|
26
|
+
def numpy_double_mult(x: float, y: float) -> float:
|
|
27
|
+
return x * y
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@udf.arrow
|
|
31
|
+
def arrow_double_mult(x: float, y: float) -> float:
|
|
32
|
+
import pyarrow.compute as pc
|
|
33
|
+
return pc.multiply(x, y)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@udf.polars
|
|
37
|
+
def polars_double_mult(x: float, y: float) -> float:
|
|
38
|
+
return x * y
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@udf
|
|
42
|
+
def nullable_double_mult(x: Optional[float], y: Optional[float]) -> Optional[float]:
|
|
43
|
+
if x is None or y is None:
|
|
44
|
+
return None
|
|
45
|
+
return x * y
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@udf(args=[FLOAT(nullable=False), FLOAT(nullable=False)], returns=FLOAT(nullable=False))
|
|
49
|
+
def float_mult(x: float, y: float) -> float:
|
|
50
|
+
return x * y
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@udf(args=[FLOAT(nullable=True), FLOAT(nullable=True)], returns=FLOAT(nullable=True))
|
|
54
|
+
def nullable_float_mult(x: Optional[float], y: Optional[float]) -> Optional[float]:
|
|
55
|
+
if x is None or y is None:
|
|
56
|
+
return None
|
|
57
|
+
return x * y
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _int_mult(x: int, y: int) -> int:
|
|
61
|
+
if x is None or y is None:
|
|
62
|
+
return None
|
|
63
|
+
return x * y
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _arrow_int_mult(x: int, y: int) -> int:
|
|
67
|
+
import pyarrow.compute as pc
|
|
68
|
+
return pc.multiply(x, y)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _int_mult_with_masks(x: Tuple[int, bool], y: Tuple[int, bool]) -> Tuple[int, bool]:
|
|
72
|
+
x_data, x_nulls = x
|
|
73
|
+
y_data, y_nulls = y
|
|
74
|
+
return (x_data * y_data, x_nulls | y_nulls)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _arrow_int_mult_with_masks(
|
|
78
|
+
x: Tuple[int, bool],
|
|
79
|
+
y: Tuple[int, bool],
|
|
80
|
+
) -> Tuple[int, bool]:
|
|
81
|
+
import pyarrow.compute as pc
|
|
82
|
+
x_data, x_nulls = x
|
|
83
|
+
y_data, y_nulls = y
|
|
84
|
+
return (pc.multiply(x_data, y_data), pc.or_(x_nulls, y_nulls))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
int_mult = udf(_int_mult, name='int_mult')
|
|
88
|
+
|
|
89
|
+
tinyint_mult = udf(
|
|
90
|
+
_int_mult,
|
|
91
|
+
name='tinyint_mult',
|
|
92
|
+
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
93
|
+
returns=TINYINT(nullable=False),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
pandas_tinyint_mult = udf.pandas(
|
|
97
|
+
_int_mult,
|
|
98
|
+
name='pandas_tinyint_mult',
|
|
99
|
+
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
100
|
+
returns=TINYINT(nullable=False),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
polars_tinyint_mult = udf.polars(
|
|
104
|
+
_int_mult,
|
|
105
|
+
name='polars_tinyint_mult',
|
|
106
|
+
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
107
|
+
returns=TINYINT(nullable=False),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
numpy_tinyint_mult = udf.numpy(
|
|
111
|
+
_int_mult,
|
|
112
|
+
name='numpy_tinyint_mult',
|
|
113
|
+
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
114
|
+
returns=TINYINT(nullable=False),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
arrow_tinyint_mult = udf.arrow(
|
|
118
|
+
_arrow_int_mult,
|
|
119
|
+
name='arrow_tinyint_mult',
|
|
120
|
+
args=[TINYINT(nullable=False), TINYINT(nullable=False)],
|
|
121
|
+
returns=TINYINT(nullable=False),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
smallint_mult = udf(
|
|
125
|
+
_int_mult,
|
|
126
|
+
name='smallint_mult',
|
|
127
|
+
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
128
|
+
returns=SMALLINT(nullable=False),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
pandas_smallint_mult = udf.pandas(
|
|
132
|
+
_int_mult,
|
|
133
|
+
name='pandas_smallint_mult',
|
|
134
|
+
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
135
|
+
returns=SMALLINT(nullable=False),
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
polars_smallint_mult = udf.polars(
|
|
139
|
+
_int_mult,
|
|
140
|
+
name='polars_smallint_mult',
|
|
141
|
+
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
142
|
+
returns=SMALLINT(nullable=False),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
numpy_smallint_mult = udf.numpy(
|
|
146
|
+
_int_mult,
|
|
147
|
+
name='numpy_smallint_mult',
|
|
148
|
+
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
149
|
+
returns=SMALLINT(nullable=False),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
arrow_smallint_mult = udf.arrow(
|
|
153
|
+
_arrow_int_mult,
|
|
154
|
+
name='arrow_smallint_mult',
|
|
155
|
+
args=[SMALLINT(nullable=False), SMALLINT(nullable=False)],
|
|
156
|
+
returns=SMALLINT(nullable=False),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
mediumint_mult = udf(
|
|
160
|
+
_int_mult,
|
|
161
|
+
name='mediumint_mult',
|
|
162
|
+
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
163
|
+
returns=MEDIUMINT(nullable=False),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
pandas_mediumint_mult = udf.pandas(
|
|
167
|
+
_int_mult,
|
|
168
|
+
name='pandas_mediumint_mult',
|
|
169
|
+
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
170
|
+
returns=MEDIUMINT(nullable=False),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
polars_mediumint_mult = udf.polars(
|
|
174
|
+
_int_mult,
|
|
175
|
+
name='polars_mediumint_mult',
|
|
176
|
+
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
177
|
+
returns=MEDIUMINT(nullable=False),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
numpy_mediumint_mult = udf.numpy(
|
|
181
|
+
_int_mult,
|
|
182
|
+
name='numpy_mediumint_mult',
|
|
183
|
+
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
184
|
+
returns=MEDIUMINT(nullable=False),
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
arrow_mediumint_mult = udf.arrow(
|
|
188
|
+
_arrow_int_mult,
|
|
189
|
+
name='arrow_mediumint_mult',
|
|
190
|
+
args=[MEDIUMINT(nullable=False), MEDIUMINT(nullable=False)],
|
|
191
|
+
returns=MEDIUMINT(nullable=False),
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
bigint_mult = udf(
|
|
195
|
+
_int_mult,
|
|
196
|
+
name='bigint_mult',
|
|
197
|
+
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
198
|
+
returns=BIGINT(nullable=False),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
pandas_bigint_mult = udf.pandas(
|
|
202
|
+
_int_mult,
|
|
203
|
+
name='pandas_bigint_mult',
|
|
204
|
+
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
205
|
+
returns=BIGINT(nullable=False),
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
polars_bigint_mult = udf.polars(
|
|
209
|
+
_int_mult,
|
|
210
|
+
name='polars_bigint_mult',
|
|
211
|
+
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
212
|
+
returns=BIGINT(nullable=False),
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
numpy_bigint_mult = udf.numpy(
|
|
216
|
+
_int_mult,
|
|
217
|
+
name='numpy_bigint_mult',
|
|
218
|
+
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
219
|
+
returns=BIGINT(nullable=False),
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
arrow_bigint_mult = udf.arrow(
|
|
223
|
+
_arrow_int_mult,
|
|
224
|
+
name='arrow_bigint_mult',
|
|
225
|
+
args=[BIGINT(nullable=False), BIGINT(nullable=False)],
|
|
226
|
+
returns=BIGINT(nullable=False),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
nullable_tinyint_mult = udf(
|
|
230
|
+
_int_mult,
|
|
231
|
+
name='nullable_tinyint_mult',
|
|
232
|
+
args=[TINYINT, TINYINT],
|
|
233
|
+
returns=TINYINT,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
pandas_nullable_tinyint_mult = udf.pandas(
|
|
237
|
+
_int_mult,
|
|
238
|
+
name='pandas_nullable_tinyint_mult',
|
|
239
|
+
args=[TINYINT, TINYINT],
|
|
240
|
+
returns=TINYINT,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
pandas_nullable_tinyint_mult_with_masks = udf.pandas(
|
|
244
|
+
_int_mult_with_masks,
|
|
245
|
+
name='pandas_nullable_tinyint_mult_with_masks',
|
|
246
|
+
args=[TINYINT, TINYINT],
|
|
247
|
+
returns=TINYINT,
|
|
248
|
+
include_masks=True,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
polars_nullable_tinyint_mult = udf.polars(
|
|
252
|
+
_int_mult,
|
|
253
|
+
name='polars_nullable_tinyint_mult',
|
|
254
|
+
args=[TINYINT, TINYINT],
|
|
255
|
+
returns=TINYINT,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
polars_nullable_tinyint_mult_with_masks = udf.polars(
|
|
259
|
+
_int_mult_with_masks,
|
|
260
|
+
name='polars_nullable_tinyint_mult_with_masks',
|
|
261
|
+
args=[TINYINT, TINYINT],
|
|
262
|
+
returns=TINYINT,
|
|
263
|
+
include_masks=True,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
numpy_nullable_tinyint_mult = udf.numpy(
|
|
267
|
+
_int_mult,
|
|
268
|
+
name='numpy_nullable_tinyint_mult',
|
|
269
|
+
args=[TINYINT, TINYINT],
|
|
270
|
+
returns=TINYINT,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
numpy_nullable_tinyint_mult_with_masks = udf.numpy(
|
|
274
|
+
_int_mult_with_masks,
|
|
275
|
+
name='numpy_nullable_tinyint_mult_with_masks',
|
|
276
|
+
args=[TINYINT, TINYINT],
|
|
277
|
+
returns=TINYINT,
|
|
278
|
+
include_masks=True,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
arrow_nullable_tinyint_mult = udf.arrow(
|
|
282
|
+
_arrow_int_mult,
|
|
283
|
+
name='arrow_nullable_tinyint_mult',
|
|
284
|
+
args=[TINYINT, TINYINT],
|
|
285
|
+
returns=TINYINT,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
arrow_nullable_tinyint_mult_with_masks = udf.arrow(
|
|
289
|
+
_arrow_int_mult_with_masks,
|
|
290
|
+
name='arrow_nullable_tinyint_mult_with_masks',
|
|
291
|
+
args=[TINYINT, TINYINT],
|
|
292
|
+
returns=TINYINT,
|
|
293
|
+
include_masks=True,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
nullable_smallint_mult = udf(
|
|
297
|
+
_int_mult,
|
|
298
|
+
name='nullable_smallint_mult',
|
|
299
|
+
args=[SMALLINT, SMALLINT],
|
|
300
|
+
returns=SMALLINT,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
nullable_mediumint_mult = udf(
|
|
304
|
+
_int_mult,
|
|
305
|
+
name='nullable_mediumint_mult',
|
|
306
|
+
args=[MEDIUMINT, MEDIUMINT],
|
|
307
|
+
returns=MEDIUMINT,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
nullable_bigint_mult = udf(
|
|
311
|
+
_int_mult,
|
|
312
|
+
name='nullable_bigint_mult',
|
|
313
|
+
args=[BIGINT, BIGINT],
|
|
314
|
+
returns=BIGINT,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
numpy_nullable_bigint_mult = udf.numpy(
|
|
318
|
+
_int_mult,
|
|
319
|
+
name='numpy_nullable_bigint_mult',
|
|
320
|
+
args=[BIGINT, BIGINT],
|
|
321
|
+
returns=BIGINT,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
numpy_nullable_bigint_mult_with_masks = udf.numpy(
|
|
325
|
+
_int_mult_with_masks,
|
|
326
|
+
name='numpy_nullable_bigint_mult',
|
|
327
|
+
args=[BIGINT, BIGINT],
|
|
328
|
+
returns=BIGINT,
|
|
329
|
+
include_masks=True,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
@udf
|
|
334
|
+
def nullable_int_mult(x: Optional[int], y: Optional[int]) -> Optional[int]:
|
|
335
|
+
if x is None or y is None:
|
|
336
|
+
return None
|
|
337
|
+
return x * y
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@udf
|
|
341
|
+
def string_mult(x: str, times: int) -> str:
|
|
342
|
+
return x * times
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@udf.pandas
|
|
346
|
+
def pandas_string_mult(x: str, times: int) -> str:
|
|
347
|
+
return x * times
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@udf.numpy
|
|
351
|
+
def numpy_string_mult(x: str, times: int) -> str:
|
|
352
|
+
return x * times
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# @udf.polars
|
|
356
|
+
# def polars_string_mult(x: str, times: int) -> str:
|
|
357
|
+
# print(type(x), x, type(times), times)
|
|
358
|
+
# return x * times
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# @udf.arrow
|
|
362
|
+
# def arrow_string_mult(x: str, times: int) -> str:
|
|
363
|
+
# print(type(x), x, type(times), times)
|
|
364
|
+
# import pyarrow.compute as pc
|
|
365
|
+
# return pc.multiply(x, times)
|
|
366
|
+
# return x * times
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
@udf
|
|
370
|
+
def nullable_string_mult(x: Optional[str], times: Optional[int]) -> Optional[str]:
|
|
371
|
+
if x is None or times is None:
|
|
372
|
+
return None
|
|
373
|
+
return x * times
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
@udf(args=dict(x=VARCHAR(20, nullable=False)))
|
|
377
|
+
def varchar_mult(x: str, times: int) -> str:
|
|
378
|
+
return x * times
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
@udf(args=dict(x=VARCHAR(20, nullable=True)))
|
|
382
|
+
def nullable_varchar_mult(x: Optional[str], times: Optional[int]) -> Optional[str]:
|
|
383
|
+
if x is None or times is None:
|
|
384
|
+
return None
|
|
385
|
+
return x * times
|