seamless-database 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
database.py
ADDED
|
@@ -0,0 +1,833 @@
|
|
|
1
|
+
from aiohttp import web
|
|
2
|
+
import asyncio
|
|
3
|
+
import contextlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
7
|
+
import signal
|
|
8
|
+
import socket
|
|
9
|
+
import sys
|
|
10
|
+
import time
|
|
11
|
+
from urllib.parse import quote
|
|
12
|
+
from peewee import DoesNotExist
|
|
13
|
+
|
|
14
|
+
from database_models import (
|
|
15
|
+
db_init,
|
|
16
|
+
db_atomic,
|
|
17
|
+
Transformation,
|
|
18
|
+
RevTransformation,
|
|
19
|
+
BufferInfo,
|
|
20
|
+
SyntacticToSemantic,
|
|
21
|
+
Expression,
|
|
22
|
+
MetaData,
|
|
23
|
+
IrreproducibleTransformation,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
STATUS_FILE_WAIT_TIMEOUT = 20.0
|
|
28
|
+
INACTIVITY_CHECK_INTERVAL = 1.0
|
|
29
|
+
|
|
30
|
+
status_tracker = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# from the Seamless code
|
|
34
|
+
def parse_checksum(checksum, as_bytes=False):
|
|
35
|
+
"""Parses checksum and returns it as string"""
|
|
36
|
+
if isinstance(checksum, bytes):
|
|
37
|
+
checksum = checksum.hex()
|
|
38
|
+
if isinstance(checksum, str):
|
|
39
|
+
checksum = bytes.fromhex(checksum)
|
|
40
|
+
|
|
41
|
+
if isinstance(checksum, bytes):
|
|
42
|
+
assert len(checksum) == 32, len(checksum)
|
|
43
|
+
if as_bytes:
|
|
44
|
+
return checksum
|
|
45
|
+
else:
|
|
46
|
+
return checksum.hex()
|
|
47
|
+
|
|
48
|
+
if checksum is None:
|
|
49
|
+
return
|
|
50
|
+
raise TypeError(type(checksum))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# from the Seamless code
|
|
54
|
+
class SeamlessBufferInfo:
|
|
55
|
+
__slots__ = (
|
|
56
|
+
"checksum",
|
|
57
|
+
"length",
|
|
58
|
+
"is_utf8",
|
|
59
|
+
"is_json",
|
|
60
|
+
"json_type",
|
|
61
|
+
"is_json_numeric_array",
|
|
62
|
+
"is_json_numeric_scalar",
|
|
63
|
+
"is_numpy",
|
|
64
|
+
"dtype",
|
|
65
|
+
"shape",
|
|
66
|
+
"is_seamless_mixed",
|
|
67
|
+
"str2text",
|
|
68
|
+
"text2str",
|
|
69
|
+
"binary2bytes",
|
|
70
|
+
"bytes2binary",
|
|
71
|
+
"binary2json",
|
|
72
|
+
"json2binary",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def __init__(self, checksum, params: dict = {}):
|
|
76
|
+
for slot in self.__slots__:
|
|
77
|
+
setattr(self, slot, params.get(slot))
|
|
78
|
+
if isinstance(checksum, str):
|
|
79
|
+
checksum = parse_checksum(checksum)
|
|
80
|
+
self.checksum = checksum
|
|
81
|
+
|
|
82
|
+
def __setattr__(self, attr, value):
|
|
83
|
+
if value is not None:
|
|
84
|
+
if attr == "length":
|
|
85
|
+
if not isinstance(value, int):
|
|
86
|
+
raise TypeError(type(value))
|
|
87
|
+
if not value >= 0:
|
|
88
|
+
raise ValueError
|
|
89
|
+
if attr.startswith("is_"):
|
|
90
|
+
if not isinstance(value, bool):
|
|
91
|
+
raise TypeError(type(value))
|
|
92
|
+
if attr.find("2") > -1 and value is not None:
|
|
93
|
+
if isinstance(value, bytes):
|
|
94
|
+
value = value.hex()
|
|
95
|
+
super().__setattr__(attr, value)
|
|
96
|
+
|
|
97
|
+
def __setitem__(self, item, value):
|
|
98
|
+
return setattr(self, item, value)
|
|
99
|
+
|
|
100
|
+
def __getitem__(self, item):
|
|
101
|
+
return getattr(self, item)
|
|
102
|
+
|
|
103
|
+
def update(self, other):
|
|
104
|
+
if not isinstance(other, SeamlessBufferInfo):
|
|
105
|
+
raise TypeError
|
|
106
|
+
for attr in self.__slots__:
|
|
107
|
+
v = getattr(other, attr)
|
|
108
|
+
if v is not None:
|
|
109
|
+
setattr(self, attr, v)
|
|
110
|
+
|
|
111
|
+
def get(self, attr, default=None):
|
|
112
|
+
value = getattr(self, attr)
|
|
113
|
+
if value is None:
|
|
114
|
+
return default
|
|
115
|
+
else:
|
|
116
|
+
return value
|
|
117
|
+
|
|
118
|
+
def as_dict(self):
|
|
119
|
+
result = {}
|
|
120
|
+
for attr in self.__slots__:
|
|
121
|
+
if attr == "checksum":
|
|
122
|
+
continue
|
|
123
|
+
v = getattr(self, attr)
|
|
124
|
+
if v is not None:
|
|
125
|
+
result[attr] = v
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def err(*args, **kwargs):
|
|
130
|
+
print("ERROR: " + args[0], *args[1:], **kwargs)
|
|
131
|
+
exit(1)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class DatabaseError(Exception):
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def is_port_in_use(address, port):
|
|
139
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
140
|
+
return s.connect_ex((address, port)) == 0
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def wait_for_status_file(path: str, timeout: float = STATUS_FILE_WAIT_TIMEOUT):
|
|
144
|
+
deadline = time.monotonic() + timeout
|
|
145
|
+
while True:
|
|
146
|
+
try:
|
|
147
|
+
with open(path, "r", encoding="utf-8") as status_stream:
|
|
148
|
+
contents = json.load(status_stream)
|
|
149
|
+
break
|
|
150
|
+
except FileNotFoundError:
|
|
151
|
+
if time.monotonic() >= deadline:
|
|
152
|
+
print(
|
|
153
|
+
f"Status file '{path}' not found after {int(timeout)} seconds",
|
|
154
|
+
file=sys.stderr,
|
|
155
|
+
)
|
|
156
|
+
sys.exit(1)
|
|
157
|
+
time.sleep(0.1)
|
|
158
|
+
continue
|
|
159
|
+
except json.JSONDecodeError as exc:
|
|
160
|
+
print(
|
|
161
|
+
f"Status file '{path}' is not valid JSON: {exc}",
|
|
162
|
+
file=sys.stderr,
|
|
163
|
+
)
|
|
164
|
+
sys.exit(1)
|
|
165
|
+
|
|
166
|
+
if not isinstance(contents, dict):
|
|
167
|
+
print(
|
|
168
|
+
f"Status file '{path}' must contain a JSON object",
|
|
169
|
+
file=sys.stderr,
|
|
170
|
+
)
|
|
171
|
+
sys.exit(1)
|
|
172
|
+
|
|
173
|
+
return contents
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class StatusFileTracker:
|
|
177
|
+
def __init__(self, path: str, base_contents: dict, port: int):
|
|
178
|
+
self.path = path
|
|
179
|
+
self._base_contents = dict(base_contents)
|
|
180
|
+
self.port = port
|
|
181
|
+
self.running_written = False
|
|
182
|
+
|
|
183
|
+
def _write(self, payload: dict):
|
|
184
|
+
tmp_path = f"{self.path}.tmp"
|
|
185
|
+
with open(tmp_path, "w", encoding="utf-8") as status_stream:
|
|
186
|
+
json.dump(payload, status_stream)
|
|
187
|
+
status_stream.write("\n")
|
|
188
|
+
os.replace(tmp_path, self.path)
|
|
189
|
+
|
|
190
|
+
def write_running(self):
|
|
191
|
+
payload = dict(self._base_contents)
|
|
192
|
+
payload["port"] = self.port
|
|
193
|
+
payload["status"] = "running"
|
|
194
|
+
self._write(payload)
|
|
195
|
+
self._base_contents = payload
|
|
196
|
+
self.running_written = True
|
|
197
|
+
|
|
198
|
+
def write_failed(self):
|
|
199
|
+
payload = dict(self._base_contents)
|
|
200
|
+
payload["status"] = "failed"
|
|
201
|
+
self._write(payload)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def raise_startup_error(exc: BaseException):
|
|
205
|
+
if status_tracker and not status_tracker.running_written:
|
|
206
|
+
status_tracker.write_failed()
|
|
207
|
+
raise exc
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def pick_random_free_port(host: str, start: int, end: int) -> int:
|
|
211
|
+
if start < 0 or end > 65535:
|
|
212
|
+
raise RuntimeError("--port-range values must be between 0 and 65535")
|
|
213
|
+
if start > end:
|
|
214
|
+
raise RuntimeError("--port-range START must be less than or equal to END")
|
|
215
|
+
|
|
216
|
+
span = end - start + 1
|
|
217
|
+
attempted = set()
|
|
218
|
+
while len(attempted) < span:
|
|
219
|
+
port = random.randint(start, end)
|
|
220
|
+
if port in attempted:
|
|
221
|
+
continue
|
|
222
|
+
attempted.add(port)
|
|
223
|
+
try:
|
|
224
|
+
with socket.create_server((host, port), reuse_port=False):
|
|
225
|
+
pass
|
|
226
|
+
except OSError:
|
|
227
|
+
continue
|
|
228
|
+
return port
|
|
229
|
+
|
|
230
|
+
raise RuntimeError(f"No free port available in range {start}-{end}")
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def build_sqlite_readonly_uri(path: str) -> str:
|
|
234
|
+
abs_path = os.path.abspath(path)
|
|
235
|
+
normalized = abs_path.replace("\\", "/")
|
|
236
|
+
quoted = quote(normalized, safe="/:")
|
|
237
|
+
return f"file:{quoted}?mode=ro"
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
types = (
|
|
241
|
+
"protocol",
|
|
242
|
+
"buffer_info",
|
|
243
|
+
"syntactic_to_semantic",
|
|
244
|
+
"semantic_to_syntactic",
|
|
245
|
+
"transformation",
|
|
246
|
+
"metadata",
|
|
247
|
+
"expression",
|
|
248
|
+
"irreproducible", # only PUT
|
|
249
|
+
"rev_expression", # only GET
|
|
250
|
+
"rev_transformations", # only GET
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def format_response(response, *, none_as_404=False):
|
|
255
|
+
status = None
|
|
256
|
+
if response is None:
|
|
257
|
+
if not none_as_404:
|
|
258
|
+
status = 400
|
|
259
|
+
response = "ERROR: No response"
|
|
260
|
+
else:
|
|
261
|
+
status = 404
|
|
262
|
+
response = "ERROR: Unknown key"
|
|
263
|
+
elif isinstance(response, (bool, dict, list)):
|
|
264
|
+
response = json.dumps(response)
|
|
265
|
+
elif not isinstance(response, (str, bytes)):
|
|
266
|
+
status = 400
|
|
267
|
+
print("ERROR: wrong response format")
|
|
268
|
+
print(type(response), response)
|
|
269
|
+
print("/ERROR: wrong response format")
|
|
270
|
+
response = "ERROR: wrong response format"
|
|
271
|
+
return status, response
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class DatabaseServer:
|
|
275
|
+
future = None
|
|
276
|
+
PROTOCOL = ("seamless", "database", "2.0")
|
|
277
|
+
|
|
278
|
+
def __init__(
|
|
279
|
+
self,
|
|
280
|
+
host,
|
|
281
|
+
port,
|
|
282
|
+
*,
|
|
283
|
+
timeout_seconds=None,
|
|
284
|
+
status_tracker=None,
|
|
285
|
+
writable=True,
|
|
286
|
+
):
|
|
287
|
+
self.host = host
|
|
288
|
+
self.port = port
|
|
289
|
+
self._timeout_seconds = timeout_seconds
|
|
290
|
+
self._status_tracker = status_tracker
|
|
291
|
+
self._writable = writable
|
|
292
|
+
self._timeout_task = None
|
|
293
|
+
self._last_request = None
|
|
294
|
+
self._runner = None
|
|
295
|
+
self._site = None
|
|
296
|
+
|
|
297
|
+
async def _start(self):
|
|
298
|
+
if is_port_in_use(self.host, self.port): # KLUDGE
|
|
299
|
+
print("ERROR: %s port %d already in use" % (self.host, self.port))
|
|
300
|
+
raise Exception
|
|
301
|
+
|
|
302
|
+
app = web.Application(client_max_size=10e9)
|
|
303
|
+
app.add_routes(
|
|
304
|
+
[
|
|
305
|
+
web.get("/healthcheck", self._healthcheck),
|
|
306
|
+
web.get("/{tail:.*}", self._handle_get),
|
|
307
|
+
web.put("/{tail:.*}", self._handle_put),
|
|
308
|
+
]
|
|
309
|
+
)
|
|
310
|
+
runner = web.AppRunner(app)
|
|
311
|
+
await runner.setup()
|
|
312
|
+
site = web.TCPSite(runner, self.host, self.port)
|
|
313
|
+
await site.start()
|
|
314
|
+
self._runner = runner
|
|
315
|
+
self._site = site
|
|
316
|
+
if self._status_tracker and not self._status_tracker.running_written:
|
|
317
|
+
self._status_tracker.write_running()
|
|
318
|
+
if self._timeout_seconds is not None:
|
|
319
|
+
self._last_request = time.monotonic()
|
|
320
|
+
loop = asyncio.get_running_loop()
|
|
321
|
+
self._timeout_task = loop.create_task(self._monitor_inactivity())
|
|
322
|
+
|
|
323
|
+
def start(self):
|
|
324
|
+
if self.future is not None:
|
|
325
|
+
return
|
|
326
|
+
try:
|
|
327
|
+
loop = asyncio.get_event_loop()
|
|
328
|
+
except RuntimeError:
|
|
329
|
+
loop = asyncio.new_event_loop()
|
|
330
|
+
asyncio.set_event_loop(loop)
|
|
331
|
+
else:
|
|
332
|
+
if loop.is_closed():
|
|
333
|
+
loop = asyncio.new_event_loop()
|
|
334
|
+
asyncio.set_event_loop(loop)
|
|
335
|
+
coro = self._start()
|
|
336
|
+
self.future = loop.create_task(coro)
|
|
337
|
+
|
|
338
|
+
async def stop(self):
|
|
339
|
+
if self._timeout_task:
|
|
340
|
+
self._timeout_task.cancel()
|
|
341
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
342
|
+
await self._timeout_task
|
|
343
|
+
self._timeout_task = None
|
|
344
|
+
if self._site is not None:
|
|
345
|
+
await self._site.stop()
|
|
346
|
+
self._site = None
|
|
347
|
+
if self._runner is not None:
|
|
348
|
+
await self._runner.cleanup()
|
|
349
|
+
self._runner = None
|
|
350
|
+
|
|
351
|
+
async def _monitor_inactivity(self):
|
|
352
|
+
try:
|
|
353
|
+
while True:
|
|
354
|
+
await asyncio.sleep(INACTIVITY_CHECK_INTERVAL)
|
|
355
|
+
if self._last_request is None:
|
|
356
|
+
continue
|
|
357
|
+
if time.monotonic() - self._last_request >= self._timeout_seconds:
|
|
358
|
+
loop = asyncio.get_running_loop()
|
|
359
|
+
loop.call_soon(loop.stop)
|
|
360
|
+
break
|
|
361
|
+
except asyncio.CancelledError:
|
|
362
|
+
raise
|
|
363
|
+
|
|
364
|
+
def _register_activity(self):
|
|
365
|
+
if self._timeout_seconds is not None:
|
|
366
|
+
self._last_request = time.monotonic()
|
|
367
|
+
|
|
368
|
+
async def _healthcheck(self, _):
|
|
369
|
+
self._register_activity()
|
|
370
|
+
return web.Response(status=200, body="OK")
|
|
371
|
+
|
|
372
|
+
async def _handle_get(self, request):
|
|
373
|
+
try:
|
|
374
|
+
self._register_activity()
|
|
375
|
+
# print("NEW GET REQUEST", hex(id(request)))
|
|
376
|
+
data = await request.read()
|
|
377
|
+
# print("NEW GET REQUEST", data)
|
|
378
|
+
status = 200
|
|
379
|
+
type_ = None
|
|
380
|
+
try:
|
|
381
|
+
try:
|
|
382
|
+
rq = json.loads(data)
|
|
383
|
+
except Exception:
|
|
384
|
+
raise DatabaseError("Malformed request") from None
|
|
385
|
+
# print("NEW GET REQUEST DATA", rq)
|
|
386
|
+
try:
|
|
387
|
+
type_ = rq["type"]
|
|
388
|
+
if type_ not in types:
|
|
389
|
+
raise KeyError
|
|
390
|
+
if type_ != "protocol":
|
|
391
|
+
checksum = rq["checksum"]
|
|
392
|
+
except KeyError:
|
|
393
|
+
raise DatabaseError("Malformed request") from None
|
|
394
|
+
|
|
395
|
+
if type_ == "protocol":
|
|
396
|
+
response = list(self.PROTOCOL)
|
|
397
|
+
else:
|
|
398
|
+
try:
|
|
399
|
+
checksum = parse_checksum(checksum, as_bytes=False)
|
|
400
|
+
except ValueError:
|
|
401
|
+
# import traceback; traceback.print_exc()
|
|
402
|
+
raise DatabaseError("Malformed request") from None
|
|
403
|
+
response = await self._get(type_, checksum, rq)
|
|
404
|
+
except DatabaseError as exc:
|
|
405
|
+
status = 400
|
|
406
|
+
if exc.args[0] == "Unknown key":
|
|
407
|
+
status = 404
|
|
408
|
+
response = "ERROR: " + exc.args[0]
|
|
409
|
+
if isinstance(response, web.Response):
|
|
410
|
+
return response
|
|
411
|
+
status2, response = format_response(response, none_as_404=True)
|
|
412
|
+
if status == 200 and status2 is not None:
|
|
413
|
+
status = status2
|
|
414
|
+
###if status != 200: print(response)
|
|
415
|
+
return web.Response(status=status, body=response)
|
|
416
|
+
finally:
|
|
417
|
+
# print("END GET REQUEST", hex(id(request)))
|
|
418
|
+
pass
|
|
419
|
+
|
|
420
|
+
async def _handle_put(self, request):
|
|
421
|
+
try:
|
|
422
|
+
if not self._writable:
|
|
423
|
+
return web.Response(
|
|
424
|
+
status=405, body="ERROR: Database server is read-only"
|
|
425
|
+
)
|
|
426
|
+
self._register_activity()
|
|
427
|
+
# print("NEW PUT REQUEST", hex(id(request)))
|
|
428
|
+
data = await request.read()
|
|
429
|
+
# print("NEW PUT REQUEST", data)
|
|
430
|
+
status = 200
|
|
431
|
+
try:
|
|
432
|
+
try:
|
|
433
|
+
rq = json.loads(data)
|
|
434
|
+
except Exception:
|
|
435
|
+
import traceback
|
|
436
|
+
|
|
437
|
+
traceback.print_exc()
|
|
438
|
+
# raise DatabaseError("Malformed request") from None
|
|
439
|
+
if not isinstance(rq, dict):
|
|
440
|
+
# import traceback; traceback.print_exc()
|
|
441
|
+
raise DatabaseError("Malformed request")
|
|
442
|
+
|
|
443
|
+
# print("NEW PUT REQUEST DATA", rq)
|
|
444
|
+
try:
|
|
445
|
+
type_ = rq["type"]
|
|
446
|
+
if type_ not in types:
|
|
447
|
+
raise KeyError
|
|
448
|
+
checksum = rq["checksum"]
|
|
449
|
+
except KeyError:
|
|
450
|
+
# import traceback; traceback.print_exc()
|
|
451
|
+
raise DatabaseError("Malformed request") from None
|
|
452
|
+
|
|
453
|
+
try:
|
|
454
|
+
checksum = parse_checksum(checksum, as_bytes=False)
|
|
455
|
+
except ValueError:
|
|
456
|
+
# import traceback; traceback.print_exc()
|
|
457
|
+
raise DatabaseError("Malformed request") from None
|
|
458
|
+
|
|
459
|
+
response = await self._put(type_, checksum, rq)
|
|
460
|
+
except DatabaseError as exc:
|
|
461
|
+
status = 400
|
|
462
|
+
response = "ERROR: " + exc.args[0]
|
|
463
|
+
status2, response = format_response(response)
|
|
464
|
+
if status == 200 and status2 is not None:
|
|
465
|
+
status = status2
|
|
466
|
+
# if status != 200: print(response)
|
|
467
|
+
return web.Response(status=status, body=response)
|
|
468
|
+
finally:
|
|
469
|
+
# print("END PUT REQUEST", hex(id(request)))
|
|
470
|
+
pass
|
|
471
|
+
|
|
472
|
+
async def _get(self, type_, checksum, request):
|
|
473
|
+
if type_ == "buffer_info":
|
|
474
|
+
try:
|
|
475
|
+
return json.loads(BufferInfo[checksum].buffer_info)
|
|
476
|
+
except DoesNotExist:
|
|
477
|
+
raise DatabaseError("Unknown key") from None
|
|
478
|
+
|
|
479
|
+
elif type_ == "semantic_to_syntactic":
|
|
480
|
+
try:
|
|
481
|
+
celltype, subcelltype = request["celltype"], request["subcelltype"]
|
|
482
|
+
except KeyError:
|
|
483
|
+
raise DatabaseError("Malformed semantic-to-syntactic request")
|
|
484
|
+
results = (
|
|
485
|
+
SyntacticToSemantic.select()
|
|
486
|
+
.where(
|
|
487
|
+
SyntacticToSemantic.semantic == checksum,
|
|
488
|
+
SyntacticToSemantic.celltype == celltype,
|
|
489
|
+
SyntacticToSemantic.subcelltype == subcelltype,
|
|
490
|
+
)
|
|
491
|
+
.execute()
|
|
492
|
+
)
|
|
493
|
+
if results:
|
|
494
|
+
return [parse_checksum(result.syntactic) for result in results]
|
|
495
|
+
raise DatabaseError("Unknown key")
|
|
496
|
+
|
|
497
|
+
elif type_ == "syntactic_to_semantic":
|
|
498
|
+
try:
|
|
499
|
+
celltype, subcelltype = request["celltype"], request["subcelltype"]
|
|
500
|
+
except KeyError:
|
|
501
|
+
raise DatabaseError("Malformed syntactic-to-semantic request")
|
|
502
|
+
results = (
|
|
503
|
+
SyntacticToSemantic.select()
|
|
504
|
+
.where(
|
|
505
|
+
SyntacticToSemantic.syntactic == checksum,
|
|
506
|
+
SyntacticToSemantic.celltype == celltype,
|
|
507
|
+
SyntacticToSemantic.subcelltype == subcelltype,
|
|
508
|
+
)
|
|
509
|
+
.execute()
|
|
510
|
+
)
|
|
511
|
+
if results:
|
|
512
|
+
return [parse_checksum(result.semantic) for result in results]
|
|
513
|
+
raise DatabaseError("Unknown key")
|
|
514
|
+
|
|
515
|
+
elif type_ == "transformation":
|
|
516
|
+
try:
|
|
517
|
+
return parse_checksum(Transformation[checksum].result)
|
|
518
|
+
except DoesNotExist:
|
|
519
|
+
return None # None is also a valid response
|
|
520
|
+
|
|
521
|
+
elif type_ == "metadata":
|
|
522
|
+
try:
|
|
523
|
+
return MetaData[checksum].metadata
|
|
524
|
+
except DoesNotExist:
|
|
525
|
+
return None # None is also a valid response
|
|
526
|
+
|
|
527
|
+
elif type_ == "expression":
|
|
528
|
+
try:
|
|
529
|
+
celltype = request["celltype"]
|
|
530
|
+
path = json.dumps(request["path"])
|
|
531
|
+
hash_pattern = json.dumps(request.get("hash_pattern", ""))
|
|
532
|
+
target_celltype = request["target_celltype"]
|
|
533
|
+
target_hash_pattern = json.dumps(request.get("target_hash_pattern", ""))
|
|
534
|
+
except KeyError:
|
|
535
|
+
raise DatabaseError("Malformed expression request")
|
|
536
|
+
result = (
|
|
537
|
+
Expression.select()
|
|
538
|
+
.where(
|
|
539
|
+
Expression.input_checksum == checksum,
|
|
540
|
+
Expression.path == path,
|
|
541
|
+
Expression.celltype == celltype,
|
|
542
|
+
Expression.hash_pattern == hash_pattern,
|
|
543
|
+
Expression.target_celltype == target_celltype,
|
|
544
|
+
Expression.target_hash_pattern == target_hash_pattern,
|
|
545
|
+
)
|
|
546
|
+
.execute()
|
|
547
|
+
)
|
|
548
|
+
if not result:
|
|
549
|
+
return None
|
|
550
|
+
return parse_checksum(result[0].result)
|
|
551
|
+
|
|
552
|
+
elif type_ == "rev_expression":
|
|
553
|
+
expressions = (
|
|
554
|
+
Expression.select()
|
|
555
|
+
.where(
|
|
556
|
+
Expression.result == checksum,
|
|
557
|
+
)
|
|
558
|
+
.execute()
|
|
559
|
+
)
|
|
560
|
+
if not expressions:
|
|
561
|
+
return None
|
|
562
|
+
result = []
|
|
563
|
+
for expression in expressions:
|
|
564
|
+
expr = {
|
|
565
|
+
"checksum": expression.input_checksum,
|
|
566
|
+
"path": json.loads(expression.path),
|
|
567
|
+
"celltype": expression.celltype,
|
|
568
|
+
"hash_pattern": json.loads(expression.hash_pattern),
|
|
569
|
+
"target_celltype": expression.target_celltype,
|
|
570
|
+
"target_hash_pattern": json.loads(expression.target_hash_pattern),
|
|
571
|
+
"result": checksum,
|
|
572
|
+
}
|
|
573
|
+
result.append(expr)
|
|
574
|
+
return result
|
|
575
|
+
|
|
576
|
+
elif type_ == "rev_transformations":
|
|
577
|
+
transformations = (
|
|
578
|
+
RevTransformation.select()
|
|
579
|
+
.where(
|
|
580
|
+
RevTransformation.result == checksum,
|
|
581
|
+
)
|
|
582
|
+
.execute()
|
|
583
|
+
)
|
|
584
|
+
if not transformations:
|
|
585
|
+
return None
|
|
586
|
+
result = [transformation.checksum for transformation in transformations]
|
|
587
|
+
return result
|
|
588
|
+
|
|
589
|
+
else:
|
|
590
|
+
raise DatabaseError("Unknown request type")
|
|
591
|
+
|
|
592
|
+
async def _put(self, type_, checksum, request):
|
|
593
|
+
|
|
594
|
+
if type_ == "buffer_info":
|
|
595
|
+
try:
|
|
596
|
+
value = request["value"]
|
|
597
|
+
if not isinstance(value, dict):
|
|
598
|
+
raise TypeError
|
|
599
|
+
SeamlessBufferInfo(checksum, value)
|
|
600
|
+
try:
|
|
601
|
+
existing = json.loads(BufferInfo[checksum].buffer_info)
|
|
602
|
+
existing.update(value)
|
|
603
|
+
value = existing
|
|
604
|
+
except DoesNotExist:
|
|
605
|
+
pass
|
|
606
|
+
value = json.dumps(value, sort_keys=True, indent=2)
|
|
607
|
+
except Exception:
|
|
608
|
+
raise DatabaseError("Malformed PUT buffer info request") from None
|
|
609
|
+
BufferInfo.create(checksum=checksum, buffer_info=value)
|
|
610
|
+
|
|
611
|
+
elif type_ == "semantic_to_syntactic":
|
|
612
|
+
try:
|
|
613
|
+
value = request["value"]
|
|
614
|
+
assert isinstance(value, list)
|
|
615
|
+
except Exception:
|
|
616
|
+
raise DatabaseError("Malformed PUT semantic-to-syntactic request")
|
|
617
|
+
try:
|
|
618
|
+
celltype, subcelltype = request["celltype"], request["subcelltype"]
|
|
619
|
+
except KeyError:
|
|
620
|
+
raise DatabaseError(
|
|
621
|
+
"Malformed PUT semantic-to-syntactic request"
|
|
622
|
+
) from None
|
|
623
|
+
for syntactic_checksum0 in value:
|
|
624
|
+
syntactic_checksum = parse_checksum(syntactic_checksum0, as_bytes=False)
|
|
625
|
+
with db_atomic():
|
|
626
|
+
SyntacticToSemantic.create(
|
|
627
|
+
semantic=checksum,
|
|
628
|
+
celltype=celltype,
|
|
629
|
+
subcelltype=subcelltype,
|
|
630
|
+
syntactic=syntactic_checksum,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
elif type_ == "transformation":
|
|
634
|
+
try:
|
|
635
|
+
value = parse_checksum(request["value"], as_bytes=False)
|
|
636
|
+
except (KeyError, ValueError):
|
|
637
|
+
raise DatabaseError(
|
|
638
|
+
"Malformed PUT transformation result request: value must be a checksum"
|
|
639
|
+
) from None
|
|
640
|
+
Transformation.create(checksum=checksum, result=value)
|
|
641
|
+
RevTransformation.create(checksum=checksum, result=value)
|
|
642
|
+
|
|
643
|
+
elif type_ == "expression":
|
|
644
|
+
try:
|
|
645
|
+
value = parse_checksum(request["value"], as_bytes=False)
|
|
646
|
+
celltype = request["celltype"]
|
|
647
|
+
path = json.dumps(request["path"])
|
|
648
|
+
hash_pattern = json.dumps(request.get("hash_pattern", ""))
|
|
649
|
+
target_celltype = request["target_celltype"]
|
|
650
|
+
target_hash_pattern = json.dumps(request.get("target_hash_pattern", ""))
|
|
651
|
+
except KeyError:
|
|
652
|
+
raise DatabaseError("Malformed expression request")
|
|
653
|
+
try:
|
|
654
|
+
# assert celltype in celltypes TODO? also for target_celltype
|
|
655
|
+
assert len(path) <= 100
|
|
656
|
+
if len(request["path"]):
|
|
657
|
+
assert celltype in ("mixed", "plain", "binary")
|
|
658
|
+
assert len(celltype) <= 20
|
|
659
|
+
assert len(hash_pattern) <= 20
|
|
660
|
+
assert len(target_celltype) <= 20
|
|
661
|
+
assert len(target_hash_pattern) <= 20
|
|
662
|
+
except AssertionError:
|
|
663
|
+
raise DatabaseError(
|
|
664
|
+
"Malformed expression request (constraint violation)"
|
|
665
|
+
)
|
|
666
|
+
Expression.create(
|
|
667
|
+
input_checksum=checksum,
|
|
668
|
+
path=path,
|
|
669
|
+
celltype=celltype,
|
|
670
|
+
hash_pattern=hash_pattern,
|
|
671
|
+
target_celltype=target_celltype,
|
|
672
|
+
target_hash_pattern=target_hash_pattern,
|
|
673
|
+
result=value,
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
elif type_ == "metadata":
|
|
677
|
+
try:
|
|
678
|
+
value = request["value"]
|
|
679
|
+
value = json.loads(value)
|
|
680
|
+
except (KeyError, ValueError):
|
|
681
|
+
raise DatabaseError("Malformed PUT metadata request") from None
|
|
682
|
+
MetaData.create(checksum=checksum, metadata=value)
|
|
683
|
+
|
|
684
|
+
elif type_ == "irreproducible":
|
|
685
|
+
try:
|
|
686
|
+
result = parse_checksum(request["result"], as_bytes=False)
|
|
687
|
+
except (KeyError, ValueError):
|
|
688
|
+
raise DatabaseError("Malformed 'irreproducible' request") from None
|
|
689
|
+
in_transformations = False
|
|
690
|
+
try:
|
|
691
|
+
tf = Transformation[checksum]
|
|
692
|
+
tf_result = parse_checksum(tf.result, as_bytes=False)
|
|
693
|
+
in_transformations = True
|
|
694
|
+
except DoesNotExist:
|
|
695
|
+
pass
|
|
696
|
+
if in_transformations:
|
|
697
|
+
if tf_result != result:
|
|
698
|
+
return web.Response(
|
|
699
|
+
status=404,
|
|
700
|
+
reason="Transformation does not have the irreproducible result",
|
|
701
|
+
)
|
|
702
|
+
try:
|
|
703
|
+
metadata = MetaData[checksum].metadata
|
|
704
|
+
in_metadata = True
|
|
705
|
+
except DoesNotExist:
|
|
706
|
+
metadata = ""
|
|
707
|
+
in_metadata = False
|
|
708
|
+
IrreproducibleTransformation.create(
|
|
709
|
+
checksum=checksum, result=result, metadata=metadata
|
|
710
|
+
)
|
|
711
|
+
if in_transformations:
|
|
712
|
+
tf.delete_instance()
|
|
713
|
+
if in_metadata:
|
|
714
|
+
MetaData[checksum].delete_instance()
|
|
715
|
+
else:
|
|
716
|
+
raise DatabaseError("Unknown request type")
|
|
717
|
+
return "OK"
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def main():
|
|
721
|
+
import argparse
|
|
722
|
+
|
|
723
|
+
p = argparse.ArgumentParser()
|
|
724
|
+
p.add_argument(
|
|
725
|
+
"database_file",
|
|
726
|
+
help="""File where the database is stored.
|
|
727
|
+
The database contents are stored as a SQLite file.
|
|
728
|
+
If it doesn't exist, a new file is created.""",
|
|
729
|
+
)
|
|
730
|
+
port_group = p.add_mutually_exclusive_group()
|
|
731
|
+
port_group.add_argument("--port", type=int, help="Network port")
|
|
732
|
+
port_group.add_argument(
|
|
733
|
+
"--port-range",
|
|
734
|
+
type=int,
|
|
735
|
+
nargs=2,
|
|
736
|
+
metavar=("START", "END"),
|
|
737
|
+
help="Inclusive port range to select a random free port from",
|
|
738
|
+
)
|
|
739
|
+
p.add_argument("--host", default="0.0.0.0")
|
|
740
|
+
p.add_argument(
|
|
741
|
+
"--writable",
|
|
742
|
+
action="store_true",
|
|
743
|
+
help="Allow HTTP PUT requests (opens database read/write)",
|
|
744
|
+
)
|
|
745
|
+
p.add_argument(
|
|
746
|
+
"--status-file",
|
|
747
|
+
type=str,
|
|
748
|
+
help="JSON file used to report server status",
|
|
749
|
+
)
|
|
750
|
+
p.add_argument(
|
|
751
|
+
"--timeout",
|
|
752
|
+
type=float,
|
|
753
|
+
help="Stop the server after this many seconds of inactivity",
|
|
754
|
+
)
|
|
755
|
+
args = p.parse_args()
|
|
756
|
+
|
|
757
|
+
global status_tracker
|
|
758
|
+
database_file = args.database_file
|
|
759
|
+
print("DATABASE FILE", database_file)
|
|
760
|
+
writable = args.writable
|
|
761
|
+
if writable:
|
|
762
|
+
db_init(database_file)
|
|
763
|
+
else:
|
|
764
|
+
if not os.path.exists(database_file):
|
|
765
|
+
raise_startup_error(
|
|
766
|
+
FileNotFoundError(
|
|
767
|
+
f"Database file '{database_file}' must exist when --writable is not set"
|
|
768
|
+
)
|
|
769
|
+
)
|
|
770
|
+
readonly_uri = build_sqlite_readonly_uri(database_file)
|
|
771
|
+
db_init(
|
|
772
|
+
readonly_uri,
|
|
773
|
+
init_parameters={"uri": True},
|
|
774
|
+
connection_parameters={"uri": True},
|
|
775
|
+
create_tables=False,
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
selected_port = args.port if args.port is not None else 5522
|
|
779
|
+
status_file_path = args.status_file
|
|
780
|
+
status_tracker = None
|
|
781
|
+
if status_file_path:
|
|
782
|
+
status_file_contents = wait_for_status_file(status_file_path)
|
|
783
|
+
status_tracker = StatusFileTracker(
|
|
784
|
+
status_file_path, status_file_contents, args.port
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
if args.port_range:
|
|
788
|
+
start, end = args.port_range
|
|
789
|
+
try:
|
|
790
|
+
selected_port = pick_random_free_port(args.host, start, end)
|
|
791
|
+
except BaseException as exc:
|
|
792
|
+
raise_startup_error(exc)
|
|
793
|
+
if status_tracker:
|
|
794
|
+
status_tracker.port = selected_port
|
|
795
|
+
|
|
796
|
+
timeout_seconds = args.timeout
|
|
797
|
+
if timeout_seconds is not None and timeout_seconds <= 0:
|
|
798
|
+
raise_startup_error(RuntimeError("--timeout must be a positive number"))
|
|
799
|
+
|
|
800
|
+
def raise_system_exit(*args, **kwargs):
|
|
801
|
+
raise SystemExit
|
|
802
|
+
|
|
803
|
+
signal.signal(signal.SIGTERM, raise_system_exit)
|
|
804
|
+
signal.signal(signal.SIGHUP, raise_system_exit)
|
|
805
|
+
signal.signal(signal.SIGINT, raise_system_exit)
|
|
806
|
+
|
|
807
|
+
database_server = DatabaseServer(
|
|
808
|
+
args.host,
|
|
809
|
+
selected_port,
|
|
810
|
+
timeout_seconds=timeout_seconds,
|
|
811
|
+
status_tracker=status_tracker,
|
|
812
|
+
writable=writable,
|
|
813
|
+
)
|
|
814
|
+
database_server.start()
|
|
815
|
+
|
|
816
|
+
"""
|
|
817
|
+
import logging
|
|
818
|
+
logging.basicConfig()
|
|
819
|
+
logging.getLogger("database").setLevel(logging.DEBUG)
|
|
820
|
+
"""
|
|
821
|
+
|
|
822
|
+
loop = asyncio.get_event_loop()
|
|
823
|
+
try:
|
|
824
|
+
print("Press Ctrl+C to end")
|
|
825
|
+
loop.run_forever()
|
|
826
|
+
except KeyboardInterrupt:
|
|
827
|
+
pass
|
|
828
|
+
except BaseException:
|
|
829
|
+
if status_tracker and not status_tracker.running_written:
|
|
830
|
+
status_tracker.write_failed()
|
|
831
|
+
raise
|
|
832
|
+
finally:
|
|
833
|
+
loop.run_until_complete(database_server.stop())
|
database_models.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from peewee import (
|
|
2
|
+
SqliteDatabase,
|
|
3
|
+
Model,
|
|
4
|
+
CharField,
|
|
5
|
+
TextField,
|
|
6
|
+
FixedCharField,
|
|
7
|
+
CompositeKey,
|
|
8
|
+
IntegrityError,
|
|
9
|
+
)
|
|
10
|
+
from playhouse.sqlite_ext import JSONField
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def ChecksumField(*args, **kwargs):
|
|
14
|
+
return FixedCharField(max_length=64, *args, **kwargs)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_db = SqliteDatabase(
|
|
18
|
+
None,
|
|
19
|
+
pragmas={
|
|
20
|
+
"cache_size": -1 * 64000, # 64MB
|
|
21
|
+
"foreign_keys": 1,
|
|
22
|
+
"ignore_check_constraints": 0,
|
|
23
|
+
"synchronous": 0,
|
|
24
|
+
},
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class BaseModel(Model):
|
|
29
|
+
class Meta:
|
|
30
|
+
database = _db
|
|
31
|
+
legacy_table_names = False
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def create(cls, **kwargs):
|
|
35
|
+
if cls not in _primary:
|
|
36
|
+
return super().create(**kwargs)
|
|
37
|
+
try:
|
|
38
|
+
return super().create(**kwargs)
|
|
39
|
+
except IntegrityError as exc:
|
|
40
|
+
prim = _primary[cls]
|
|
41
|
+
if prim == "id" and prim not in kwargs:
|
|
42
|
+
raise exc from None
|
|
43
|
+
instance = cls.get(**{prim: kwargs[prim]})
|
|
44
|
+
for k, v in kwargs.items():
|
|
45
|
+
setattr(instance, k, v)
|
|
46
|
+
instance.save()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Transformation(BaseModel):
|
|
50
|
+
checksum = ChecksumField(primary_key=True)
|
|
51
|
+
result = ChecksumField(index=True, unique=False)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RevTransformation(BaseModel):
|
|
55
|
+
result = ChecksumField(index=True, unique=False)
|
|
56
|
+
checksum = ChecksumField(unique=False)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class BufferInfo(BaseModel):
|
|
60
|
+
# store SeamlessBufferInfo as JSON
|
|
61
|
+
checksum = ChecksumField(primary_key=True)
|
|
62
|
+
buffer_info = TextField()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SyntacticToSemantic(BaseModel):
|
|
66
|
+
syntactic = ChecksumField(index=True)
|
|
67
|
+
celltype = TextField()
|
|
68
|
+
subcelltype = TextField()
|
|
69
|
+
semantic = ChecksumField(index=True)
|
|
70
|
+
|
|
71
|
+
class Meta:
|
|
72
|
+
database = _db
|
|
73
|
+
legacy_table_names = False
|
|
74
|
+
primary_key = CompositeKey(
|
|
75
|
+
"syntactic",
|
|
76
|
+
"celltype",
|
|
77
|
+
"subcelltype",
|
|
78
|
+
"semantic",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def create(cls, **kwargs):
|
|
83
|
+
try:
|
|
84
|
+
return super().create(**kwargs)
|
|
85
|
+
except IntegrityError as exc:
|
|
86
|
+
if exc.args[0].split()[0] != "UNIQUE":
|
|
87
|
+
raise exc from None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Expression(BaseModel):
|
|
91
|
+
|
|
92
|
+
input_checksum = ChecksumField()
|
|
93
|
+
path = CharField(max_length=100)
|
|
94
|
+
celltype = CharField(max_length=20)
|
|
95
|
+
target_celltype = CharField(max_length=20)
|
|
96
|
+
validator = ChecksumField(null=True)
|
|
97
|
+
validator_language = CharField(max_length=20, null=True)
|
|
98
|
+
result = ChecksumField(index=True, unique=False)
|
|
99
|
+
|
|
100
|
+
class Meta:
|
|
101
|
+
database = _db
|
|
102
|
+
legacy_table_names = False
|
|
103
|
+
primary_key = CompositeKey(
|
|
104
|
+
"input_checksum",
|
|
105
|
+
"path",
|
|
106
|
+
"celltype",
|
|
107
|
+
"target_celltype",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def create(cls, **kwargs):
|
|
112
|
+
try:
|
|
113
|
+
return super().create(**kwargs)
|
|
114
|
+
except IntegrityError:
|
|
115
|
+
kwargs2 = {}
|
|
116
|
+
for k in (
|
|
117
|
+
"input_checksum",
|
|
118
|
+
"path",
|
|
119
|
+
"celltype",
|
|
120
|
+
"target_celltype",
|
|
121
|
+
):
|
|
122
|
+
kwargs2[k] = kwargs[k]
|
|
123
|
+
instance = cls.get(**kwargs2)
|
|
124
|
+
instance.result = kwargs["result"]
|
|
125
|
+
instance.save()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class MetaData(BaseModel):
|
|
129
|
+
# store meta-data for transformations:
|
|
130
|
+
# - executor name (seamless-internal, SLURM, ...)
|
|
131
|
+
# - Seamless version (including Docker/Singularity/conda version)
|
|
132
|
+
# - exact environment conda packages (as environment checksum)
|
|
133
|
+
# - hardware (GPU, memory)
|
|
134
|
+
# - execution time (also if failed)
|
|
135
|
+
# - last recorded progress (if failed)
|
|
136
|
+
checksum = ChecksumField(primary_key=True)
|
|
137
|
+
metadata = JSONField()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class IrreproducibleTransformation(BaseModel):
|
|
141
|
+
result = ChecksumField(index=True, unique=False)
|
|
142
|
+
checksum = ChecksumField(index=True, unique=False)
|
|
143
|
+
metadata = JSONField()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
_model_classes = [
|
|
147
|
+
Transformation,
|
|
148
|
+
RevTransformation,
|
|
149
|
+
BufferInfo,
|
|
150
|
+
SyntacticToSemantic,
|
|
151
|
+
Expression,
|
|
152
|
+
MetaData,
|
|
153
|
+
IrreproducibleTransformation,
|
|
154
|
+
]
|
|
155
|
+
_primary = {}
|
|
156
|
+
for model_class in _model_classes:
|
|
157
|
+
if (
|
|
158
|
+
model_class is Expression
|
|
159
|
+
or model_class is SyntacticToSemantic
|
|
160
|
+
or model_class is RevTransformation
|
|
161
|
+
):
|
|
162
|
+
continue
|
|
163
|
+
for fieldname, field in model_class._meta.fields.items():
|
|
164
|
+
if field.primary_key:
|
|
165
|
+
_primary[model_class] = fieldname
|
|
166
|
+
break
|
|
167
|
+
else:
|
|
168
|
+
raise Exception
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def db_init(
|
|
172
|
+
filename,
|
|
173
|
+
init_parameters: dict = None,
|
|
174
|
+
connection_parameters: dict = None,
|
|
175
|
+
*,
|
|
176
|
+
create_tables: bool = True,
|
|
177
|
+
):
|
|
178
|
+
if init_parameters is None:
|
|
179
|
+
init_parameters = {}
|
|
180
|
+
if connection_parameters is None:
|
|
181
|
+
connection_parameters = {}
|
|
182
|
+
_db.init(filename, **init_parameters)
|
|
183
|
+
_db.connect(**connection_parameters)
|
|
184
|
+
if create_tables:
|
|
185
|
+
_db.create_tables(_model_classes, safe=True)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
db_atomic = _db.atomic
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: seamless-database
|
|
3
|
+
Version: 2.0
|
|
4
|
+
Summary: SQLite-backed metadata database service for Seamless
|
|
5
|
+
Author: Sjoerd de Vries
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/sjdv1982/seamless
|
|
8
|
+
Project-URL: Repository, https://github.com/sjdv1982/seamless
|
|
9
|
+
Project-URL: Issues, https://github.com/sjdv1982/seamless/issues
|
|
10
|
+
Keywords: seamless,database,sqlite,service
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: aiohttp
|
|
23
|
+
Requires-Dist: peewee
|
|
24
|
+
|
|
25
|
+
# seamless-database
|
|
26
|
+
|
|
27
|
+
`seamless-database` is the checksum-based metadata and caching service for the [Seamless](https://github.com/sjdv1982/seamless) framework. It acts as the distributed computation cache that allows Seamless workflows to avoid recomputing identical transformations, both within a single session and across the entire cluster.
|
|
28
|
+
|
|
29
|
+
## How it works
|
|
30
|
+
|
|
31
|
+
Seamless uses content-addressed storage: every piece of data (buffers, code, parameters) is identified by its checksum. When a transformation (computation) is submitted, its inputs are hashed into a transformation checksum. Before executing the computation, Seamless components (such as `seamless-dask`) query the database: *"has this transformation been computed before?"* If a cached result is found, the result checksum is returned immediately, skipping the computation entirely.
|
|
32
|
+
|
|
33
|
+
The database stores the following kinds of records:
|
|
34
|
+
|
|
35
|
+
| Table | Purpose |
|
|
36
|
+
|-------|---------|
|
|
37
|
+
| **Transformation** | Maps a transformation checksum to its result checksum |
|
|
38
|
+
| **RevTransformation** | Reverse lookup: finds which transformations produced a given result |
|
|
39
|
+
| **BufferInfo** | Stores buffer metadata (length, dtype, encoding, etc.) for a checksum |
|
|
40
|
+
| **SyntacticToSemantic** | Maps between syntactic and semantic checksums per celltype |
|
|
41
|
+
| **Expression** | Caches expression evaluation results (input checksum + path + celltype → result checksum) |
|
|
42
|
+
| **MetaData** | Stores execution metadata for transformations (executor, environment, timing) |
|
|
43
|
+
| **IrreproducibleTransformation** | Records transformations whose results are not reproducible |
|
|
44
|
+
|
|
45
|
+
All data is persisted in a single SQLite file (typically `seamless.db`).
|
|
46
|
+
|
|
47
|
+
## Role in the Seamless ecosystem
|
|
48
|
+
|
|
49
|
+
Other Seamless components interact with the database over HTTP:
|
|
50
|
+
|
|
51
|
+
- **seamless-dask** checks the database cache before scheduling a transformation on the Dask cluster, and writes results back after computation.
|
|
52
|
+
- **seamless-remote** provides the `DatabaseClient` / `DatabaseLaunchedClient` classes that other components use to communicate with the database server.
|
|
53
|
+
- **seamless-config** defines the launch template for the database server (port range, host, timeout, read/write mode).
|
|
54
|
+
|
|
55
|
+
The server exposes a JSON-over-HTTP protocol: clients send `{"type": "<record_type>", "checksum": "<hex>", ...}` via GET (read) or PUT (write) requests.
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install seamless-database
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Usage
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Start a writable database server on a random port
|
|
67
|
+
seamless-database seamless.db --port-range 5520 5530 --writable
|
|
68
|
+
|
|
69
|
+
# Start a read-only server on a fixed port
|
|
70
|
+
seamless-database seamless.db --port 5522
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### CLI options
|
|
74
|
+
|
|
75
|
+
| Option | Description |
|
|
76
|
+
|--------|-------------|
|
|
77
|
+
| `database_file` | Path to the SQLite file (created if it doesn't exist and `--writable` is set) |
|
|
78
|
+
| `--port PORT` | Fixed network port |
|
|
79
|
+
| `--port-range START END` | Pick a random free port from an inclusive range |
|
|
80
|
+
| `--host HOST` | Bind address (default: `0.0.0.0`) |
|
|
81
|
+
| `--writable` | Allow PUT requests; opens the database in read/write mode |
|
|
82
|
+
| `--status-file FILE` | JSON file used to report server status (for process managers) |
|
|
83
|
+
| `--timeout SECONDS` | Stop the server after this many seconds of inactivity |
|
|
84
|
+
|
|
85
|
+
## CLI scripts
|
|
86
|
+
|
|
87
|
+
Installing `seamless-database` also provides:
|
|
88
|
+
|
|
89
|
+
- `seamless-database`
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
database.py,sha256=1mlFlRs2Nwpn7zRqQrv97-KPx-Tp9zxtfmAZLMQd1Rg,28202
|
|
2
|
+
database_models.py,sha256=2oTYNfjNNkUWqhmb3minD53gsKMSkB6F9vXDU8tpwyY,4839
|
|
3
|
+
seamless_database-2.0.dist-info/METADATA,sha256=4DsvMno9hfm1mRTB_jCjVOPIOOtAGnLG4h3uHkAxKmE,4255
|
|
4
|
+
seamless_database-2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
5
|
+
seamless_database-2.0.dist-info/entry_points.txt,sha256=X89fpS83S9MrTQBWytb-pw8vLOCPeN6U_-Qbdj9HX-4,52
|
|
6
|
+
seamless_database-2.0.dist-info/top_level.txt,sha256=yCIIelHORCWQ5jOknatpX4ip8CYo5r_D1GDYUgowSPc,25
|
|
7
|
+
seamless_database-2.0.dist-info/RECORD,,
|