singlestoredb 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of singlestoredb might be problematic. Click here for more details.

Files changed (30) hide show
  1. singlestoredb/__init__.py +1 -1
  2. singlestoredb/config.py +125 -0
  3. singlestoredb/functions/dtypes.py +5 -198
  4. singlestoredb/functions/ext/__init__.py +0 -1
  5. singlestoredb/functions/ext/asgi.py +665 -153
  6. singlestoredb/functions/ext/json.py +2 -2
  7. singlestoredb/functions/ext/mmap.py +174 -67
  8. singlestoredb/functions/ext/rowdat_1.py +2 -2
  9. singlestoredb/functions/ext/utils.py +169 -0
  10. singlestoredb/fusion/handler.py +109 -9
  11. singlestoredb/fusion/handlers/stage.py +150 -0
  12. singlestoredb/fusion/handlers/workspace.py +265 -4
  13. singlestoredb/fusion/registry.py +69 -1
  14. singlestoredb/http/connection.py +40 -2
  15. singlestoredb/management/utils.py +30 -0
  16. singlestoredb/management/workspace.py +209 -35
  17. singlestoredb/mysql/connection.py +69 -0
  18. singlestoredb/mysql/cursors.py +176 -4
  19. singlestoredb/tests/test.sql +210 -0
  20. singlestoredb/tests/test_connection.py +1408 -0
  21. singlestoredb/tests/test_ext_func.py +2 -2
  22. singlestoredb/tests/test_ext_func_data.py +1 -1
  23. singlestoredb/utils/dtypes.py +205 -0
  24. singlestoredb/utils/results.py +367 -14
  25. {singlestoredb-1.0.3.dist-info → singlestoredb-1.1.0.dist-info}/METADATA +2 -1
  26. {singlestoredb-1.0.3.dist-info → singlestoredb-1.1.0.dist-info}/RECORD +30 -28
  27. {singlestoredb-1.0.3.dist-info → singlestoredb-1.1.0.dist-info}/LICENSE +0 -0
  28. {singlestoredb-1.0.3.dist-info → singlestoredb-1.1.0.dist-info}/WHEEL +0 -0
  29. {singlestoredb-1.0.3.dist-info → singlestoredb-1.1.0.dist-info}/entry_points.txt +0 -0
  30. {singlestoredb-1.0.3.dist-info → singlestoredb-1.1.0.dist-info}/top_level.txt +0 -0
@@ -212,8 +212,8 @@ def load_numpy(
212
212
  return np.asarray(row_ids, dtype=np.longlong), \
213
213
  [
214
214
  (
215
- np.asarray(data, dtype=NUMPY_TYPE_MAP[spec[1]]),
216
- np.asarray(mask, dtype=np.bool_),
215
+ np.asarray(data, dtype=NUMPY_TYPE_MAP[spec[1]]), # type: ignore
216
+ np.asarray(mask, dtype=np.bool_), # type: ignore
217
217
  )
218
218
  for (data, mask), spec in zip(cols, colspec)
219
219
  ]
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python
2
- '''
2
+ """
3
3
  Module for creating collocated Python UDFs
4
4
 
5
5
  This module implements the collocated form of external functions for
@@ -37,7 +37,7 @@ With the functions registered, you can now run the UDFs::
37
37
  SELECT print_it(3.14, 'my string');
38
38
  SELECT print_it_pandas(3.14, 'my string');
39
39
 
40
- '''
40
+ """
41
41
  import argparse
42
42
  import array
43
43
  import asyncio
@@ -53,21 +53,24 @@ import sys
53
53
  import tempfile
54
54
  import threading
55
55
  import traceback
56
+ import urllib
57
+ import zipfile
56
58
  from typing import Any
59
+ from typing import Dict
60
+ from typing import List
61
+ from typing import Optional
57
62
 
58
63
  from . import asgi
64
+ from . import utils
65
+ from ... import manage_workspaces
66
+ from ...config import get_option
59
67
 
60
68
 
61
- logger = logging.getLogger('singlestoredb.functions.ext.mmap')
62
- handler = logging.StreamHandler()
63
- formatter = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
64
- handler.setFormatter(formatter)
65
- logger.addHandler(handler)
66
- logger.setLevel(logging.INFO)
69
+ logger = utils.get_logger('singlestoredb.functions.ext.mmap')
67
70
 
68
71
 
69
72
  def _handle_request(app: Any, connection: Any, client_address: Any) -> None:
70
- '''
73
+ """
71
74
  Handle function call request.
72
75
 
73
76
  Parameters:
@@ -78,7 +81,7 @@ def _handle_request(app: Any, connection: Any, client_address: Any) -> None:
78
81
  client_address : string
79
82
  Address of connecting client
80
83
 
81
- '''
84
+ """
82
85
  logger.info('connection from {}'.format(str(connection).split(', ')[0][-4:]))
83
86
 
84
87
  # Receive the request header. Format:
@@ -186,77 +189,162 @@ def _handle_request(app: Any, connection: Any, client_address: Any) -> None:
186
189
  connection.close()
187
190
 
188
191
 
189
- if __name__ == '__main__':
190
- parser = argparse.ArgumentParser(
191
- prog='python -m singlestoredb.functions.ext.mmap',
192
- description='Run a collacated Python UDF server',
193
- )
194
- parser.add_argument(
195
- '--max-connections', metavar='n', type=int, default=32,
196
- help='maximum number of server connections before refusing them',
197
- )
198
- parser.add_argument(
199
- '--single-thread', default=False, action='store_true',
200
- help='should the server run in single-thread mode?',
201
- )
202
- parser.add_argument(
203
- '--socket-path', metavar='file-path',
204
- default=os.path.join(tempfile.gettempdir(), secrets.token_hex(16)),
205
- help='path to communications socket',
206
- )
207
- parser.add_argument(
208
- '--db', metavar='conn-str', default='',
209
- help='connection string to use for registering functions',
210
- )
211
- parser.add_argument(
212
- '--replace-existing', action='store_true',
213
- help='should existing functions of the same name '
214
- 'in the database be replaced?',
215
- )
216
- parser.add_argument(
217
- '--log-level', metavar='[info|debug|warning|error]', default='info',
218
- help='logging level',
219
- )
220
- parser.add_argument(
221
- '--process-mode', metavar='[thread|subprocess]', default='subprocess',
222
- help='how to handle concurrent handlers',
223
- )
224
- parser.add_argument(
225
- 'functions', metavar='module.or.func.path', nargs='*',
226
- help='functions or modules to export in UDF server',
227
- )
228
- args = parser.parse_args()
192
+ def main(argv: Optional[List[str]] = None) -> None:
193
+ """
194
+ Main program for collocated Python UDFs
195
+
196
+ Parameters
197
+ ----------
198
+ argv : List[str], optional
199
+ List of command-line parameters
200
+
201
+ """
202
+ tmpdir = None
203
+ functions = []
204
+ defaults: Dict[str, Any] = {}
205
+ for i in range(2):
206
+ parser = argparse.ArgumentParser(
207
+ prog='python -m singlestoredb.functions.ext.mmap',
208
+ description='Run a collacated Python UDF server',
209
+ )
210
+ parser.add_argument(
211
+ '--max-connections', metavar='n', type=int,
212
+ default=get_option('external_function.max_connections'),
213
+ help='maximum number of server connections before refusing them',
214
+ )
215
+ parser.add_argument(
216
+ '--single-thread', action='store_true',
217
+ default=get_option('external_function.single_thread'),
218
+ help='should the server run in single-thread mode?',
219
+ )
220
+ parser.add_argument(
221
+ '--socket-path', metavar='file-path',
222
+ default=(
223
+ get_option('external_function.socket_path') or
224
+ os.path.join(tempfile.gettempdir(), secrets.token_hex(16))
225
+ ),
226
+ help='path to communications socket',
227
+ )
228
+ parser.add_argument(
229
+ '--db', metavar='conn-str',
230
+ default=os.environ.get('SINGLESTOREDB_URL', ''),
231
+ help='connection string to use for registering functions',
232
+ )
233
+ parser.add_argument(
234
+ '--replace-existing', action='store_true',
235
+ help='should existing functions of the same name '
236
+ 'in the database be replaced?',
237
+ )
238
+ parser.add_argument(
239
+ '--log-level', metavar='[info|debug|warning|error]',
240
+ default=get_option('external_function.log_level'),
241
+ help='logging level',
242
+ )
243
+ parser.add_argument(
244
+ '--process-mode', metavar='[thread|subprocess]',
245
+ default=get_option('external_function.process_mode'),
246
+ help='how to handle concurrent handlers',
247
+ )
248
+ parser.add_argument(
249
+ 'functions', metavar='module.or.func.path', nargs='*',
250
+ help='functions or modules to export in UDF server',
251
+ )
229
252
 
230
- logger.setLevel(getattr(logging, args.log_level.upper()))
253
+ args = parser.parse_args(argv)
254
+
255
+ logger.setLevel(getattr(logging, args.log_level.upper()))
256
+
257
+ if i > 0:
258
+ break
259
+
260
+ # Download Stage files as needed
261
+ for i, f in enumerate(args.functions):
262
+ if f.startswith('stage://'):
263
+ url = urllib.parse.urlparse(f)
264
+ if not url.path or url.path == '/':
265
+ raise ValueError(f'no stage path was specified: {f}')
266
+ if url.path.endswith('/'):
267
+ raise ValueError(f'an environment file must be specified: {f}')
268
+
269
+ mgr = manage_workspaces()
270
+ if url.hostname:
271
+ wsg = mgr.get_workspace_group(url.hostname)
272
+ elif os.environ.get('SINGLESTOREDB_WORKSPACE_GROUP'):
273
+ wsg = mgr.get_workspace_group(
274
+ os.environ['SINGLESTOREDB_WORKSPACE_GROUP'],
275
+ )
276
+ else:
277
+ raise ValueError(f'no workspace group specified: {f}')
278
+
279
+ if tmpdir is None:
280
+ tmpdir = tempfile.TemporaryDirectory()
281
+
282
+ local_path = os.path.join(tmpdir.name, url.path.split('/')[-1])
283
+ wsg.stage.download_file(url.path, local_path)
284
+ args.functions[i] = local_path
285
+
286
+ elif f.startswith('http://') or f.startswith('https://'):
287
+ if tmpdir is None:
288
+ tmpdir = tempfile.TemporaryDirectory()
289
+
290
+ local_path = os.path.join(tmpdir.name, f.split('/')[-1])
291
+ urllib.request.urlretrieve(f, local_path)
292
+ args.functions[i] = local_path
293
+
294
+ # See if any of the args are zip files (assume they are environment files)
295
+ modules = [(x, zipfile.is_zipfile(x)) for x in args.functions]
296
+ envs = [x[0] for x in modules if x[1]]
297
+ others = [x[0] for x in modules if not x[1]]
298
+
299
+ if envs and len(envs) > 1:
300
+ raise RuntimeError('only one environment file may be specified.')
301
+
302
+ if envs and others:
303
+ raise RuntimeError('environment files and other modules can not be mixed.')
304
+
305
+ # See if an environment file was specified. If so, use those settings
306
+ # as the defaults and reprocess command line.
307
+ if envs:
308
+ # Add zip file to the Python path
309
+ sys.path.insert(0, envs[0])
310
+ functions = [os.path.splitext(os.path.basename(envs[0]))[0]]
311
+
312
+ # Add pyproject.toml variables and redo command-line processing
313
+ defaults = utils.read_config(
314
+ envs[0],
315
+ ['tool.external_function', 'tool.external-function.collocated'],
316
+ )
317
+ if defaults:
318
+ continue
319
+
320
+ args.functions = functions or args.functions or None
321
+ args.replace_existing = args.replace_existing \
322
+ or defaults.get('replace_existing') \
323
+ or get_option('external_function.replace_existing')
231
324
 
232
325
  if os.path.exists(args.socket_path):
233
326
  try:
234
327
  os.unlink(args.socket_path)
235
328
  except (IOError, OSError):
236
- logger.error(f'could not remove existing socket path: {args.socket_path}')
237
- sys.exit(1)
329
+ raise RuntimeError(
330
+ f'could not remove existing socket path: {args.socket_path}',
331
+ )
238
332
 
239
- # Create application
333
+ # Create application from functions / module
240
334
  app = asgi.create_app(
241
- args.functions,
242
- app_mode='collocated',
243
- data_format='rowdat_1',
335
+ functions=args.functions,
244
336
  url=args.socket_path,
337
+ data_format='rowdat_1',
338
+ app_mode='collocated',
245
339
  )
246
340
 
247
- funcs = app.show_create_functions(replace=True) # type: ignore
341
+ funcs = app.show_create_functions(replace=args.replace_existing)
248
342
  if not funcs:
249
- logger.error('no functions specified')
250
- sys.exit(1)
343
+ raise RuntimeError('no functions specified')
251
344
 
252
345
  for f in funcs:
253
346
  logger.info(f'function: {f}')
254
347
 
255
- # Register functions with database
256
- if args.db:
257
- logger.info('registering functions with database')
258
- app.register_functions(args.db, replace=args.replace_existing) # type: ignore
259
-
260
348
  # Create the Unix socket server.
261
349
  server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
262
350
 
@@ -270,8 +358,13 @@ if __name__ == '__main__':
270
358
  # simple case.
271
359
  server.listen(args.max_connections)
272
360
 
273
- # Accept connections forever.
274
361
  try:
362
+ # Register functions with database
363
+ if args.db:
364
+ logger.info('registering functions with database')
365
+ app.register_functions(args.db, replace=args.replace_existing)
366
+
367
+ # Accept connections forever.
275
368
  while True:
276
369
  # Listen for the next connection on our port.
277
370
  connection, client_address = server.accept()
@@ -296,11 +389,25 @@ if __name__ == '__main__':
296
389
  t.join()
297
390
 
298
391
  except KeyboardInterrupt:
299
- sys.exit(0)
392
+ return
300
393
 
301
394
  finally:
395
+ if args.db:
396
+ logger.info('dropping functions from database')
397
+ app.drop_functions(args.db)
398
+
302
399
  # Remove the socket file before we exit.
303
400
  try:
304
401
  os.unlink(args.socket_path)
305
402
  except (IOError, OSError):
306
403
  logger.error(f'could not remove socket path: {args.socket_path}')
404
+
405
+
406
+ if __name__ == '__main__':
407
+ try:
408
+ main()
409
+ except RuntimeError as exc:
410
+ logger.error(str(exc))
411
+ sys.exit(1)
412
+ except KeyboardInterrupt:
413
+ pass
@@ -287,8 +287,8 @@ def _load_numpy(
287
287
  return np.asarray(row_ids, dtype=np.int64), \
288
288
  [
289
289
  (
290
- np.asarray(data, dtype=NUMPY_TYPE_MAP[dtype]),
291
- np.asarray(mask, dtype=np.bool_),
290
+ np.asarray(data, dtype=NUMPY_TYPE_MAP[dtype]), # type: ignore
291
+ np.asarray(mask, dtype=np.bool_), # type: ignore
292
292
  )
293
293
  for (data, mask), (name, dtype) in zip(cols, colspec)
294
294
  ]
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env python
2
+ import json
3
+ import logging
4
+ import re
5
+ import sys
6
+ import zipfile
7
+ from copy import copy
8
+ from typing import Any
9
+ from typing import Dict
10
+ from typing import List
11
+ from typing import Union
12
+
13
+ try:
14
+ import tomllib
15
+ except ImportError:
16
+ import tomli as tomllib # type: ignore
17
+
18
+ try:
19
+ from uvicorn.logging import DefaultFormatter
20
+
21
+ except ImportError:
22
+
23
+ class DefaultFormatter(logging.Formatter): # type: ignore
24
+
25
+ def formatMessage(self, record: logging.LogRecord) -> str:
26
+ recordcopy = copy(record)
27
+ levelname = recordcopy.levelname
28
+ seperator = ' ' * (8 - len(recordcopy.levelname))
29
+ recordcopy.__dict__['levelprefix'] = levelname + ':' + seperator
30
+ return super().formatMessage(recordcopy)
31
+
32
+
33
+ def get_logger(name: str) -> logging.Logger:
34
+ """Return a new logger."""
35
+ logger = logging.getLogger(name)
36
+ handler = logging.StreamHandler()
37
+ formatter = DefaultFormatter('%(levelprefix)s %(message)s')
38
+ handler.setFormatter(formatter)
39
+ logger.addHandler(handler)
40
+ logger.setLevel(logging.INFO)
41
+ return logger
42
+
43
+
44
+ def read_config(
45
+ archive: str,
46
+ keys: Union[str, List[str]],
47
+ config_file: str = 'pyproject.toml',
48
+ ) -> Dict[str, Any]:
49
+ """
50
+ Read a key from a Toml config file.
51
+
52
+ Parameters
53
+ ----------
54
+ archive : str
55
+ Path to an environment file
56
+ keys : str or List[str]
57
+ Period-separated paths to the desired keys
58
+ config_file : str, optional
59
+ Name of the config file in the zip file
60
+
61
+ Returns
62
+ -------
63
+ Dict[str, Any]
64
+
65
+ """
66
+ defaults = {}
67
+ keys = [keys] if isinstance(keys, str) else list(keys)
68
+ with zipfile.ZipFile(archive) as arc:
69
+ try:
70
+ orig_options = tomllib.loads(arc.read(config_file).decode('utf8'))
71
+ verify_python_version(orig_options)
72
+ for key in keys:
73
+ path = key.split('.')
74
+ options = orig_options
75
+ while path:
76
+ options = options.get(path.pop(0), {})
77
+ for k, v in options.items():
78
+ defaults[k.lower().replace('-', '_')] = v
79
+ except KeyError:
80
+ pass
81
+ return defaults
82
+
83
+
84
+ def verify_python_version(options: Dict[str, Any]) -> None:
85
+ """Verify the version of Python matches the pyproject.toml requirement."""
86
+ requires_python = options.get('project', {}).get('requires_python', None)
87
+ if not requires_python:
88
+ return
89
+
90
+ m = re.match(r'\s*([<=>])+\s*((?:\d+\.)+\d+)\s*', requires_python)
91
+ if not m:
92
+ raise ValueError(f'python version string is not valid: {requires_python}')
93
+
94
+ operator = m.group(1)
95
+ version_info = tuple(int(x) for x in m.group(2))
96
+
97
+ if operator == '<=':
98
+ if not (sys.version_info <= version_info):
99
+ raise RuntimeError(
100
+ 'python version is not compatible: ' +
101
+ f'{sys.version_info} > {m.group(2)}',
102
+ )
103
+
104
+ elif operator == '>=':
105
+ if not (sys.version_info >= version_info):
106
+ raise RuntimeError(
107
+ 'python version is not compatible: ' +
108
+ f'{sys.version_info} < {m.group(2)}',
109
+ )
110
+
111
+ elif operator in ['==', '=']:
112
+ if not (sys.version_info == version_info):
113
+ raise RuntimeError(
114
+ 'python version is not compatible: ' +
115
+ f'{sys.version_info} != {m.group(2)}',
116
+ )
117
+
118
+ elif operator == '>':
119
+ if not (sys.version_info > version_info):
120
+ raise RuntimeError(
121
+ 'python version is not compatible: ' +
122
+ f'{sys.version_info} <= {m.group(2)}',
123
+ )
124
+
125
+ elif operator == '<':
126
+ if not (sys.version_info < version_info):
127
+ raise RuntimeError(
128
+ 'python version is not compatible: ' +
129
+ f'{sys.version_info} >= {m.group(2)}',
130
+ )
131
+
132
+ else:
133
+ raise ValueError(f'invalid python_version operator: {operator}')
134
+
135
+
136
+ def to_toml(data: Dict[str, Any]) -> str:
137
+ """Dump data to a pyproject.toml."""
138
+ out = []
139
+ for top_k, top_v in data.items():
140
+ if top_v is None:
141
+ continue
142
+ top_k = top_k.replace('_', '-')
143
+ out.append('')
144
+ out.append(f'[{top_k}]')
145
+ for k, v in top_v.items():
146
+ if v is None:
147
+ continue
148
+ k = k.replace('_', '-')
149
+ if isinstance(v, (tuple, list)):
150
+ out.append(f'{k} = [')
151
+ items = []
152
+ for item in v:
153
+ if item is None:
154
+ pass
155
+ elif isinstance(item, (tuple, list)):
156
+ items.append(f' {json.dumps(item)}')
157
+ elif isinstance(item, dict):
158
+ items.append(
159
+ re.sub(r'"([^"]+)":', r'\1 =', f' {json.dumps(item)}'),
160
+ )
161
+ else:
162
+ items.append(f' {json.dumps([item])[1:-1]}')
163
+ out.append(',\n'.join(items))
164
+ out.append(']')
165
+ elif isinstance(v, dict):
166
+ out.append(re.sub(r'"([^"]+)":', r'\1 =', f' {json.dumps(v)}'))
167
+ else:
168
+ out.append(f'{k} = {json.dumps([v])[1:-1]}')
169
+ return '\n'.join(out).strip()