pybiolib 1.1.1881__py3-none-any.whl → 1.2.7.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +11 -4
- biolib/_data_record/data_record.py +278 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +97 -151
- biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
- biolib/_internal/file_utils.py +77 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +31 -9
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +6 -1
- biolib/_internal/runtime.py +3 -56
- biolib/_internal/types/__init__.py +4 -0
- biolib/_internal/types/app.py +9 -0
- biolib/_internal/types/data_record.py +40 -0
- biolib/_internal/types/experiment.py +10 -0
- biolib/_internal/types/resource.py +14 -0
- biolib/_internal/types/typing.py +7 -0
- biolib/_internal/utils/multinode.py +264 -0
- biolib/_runtime/runtime.py +84 -0
- biolib/api/__init__.py +1 -0
- biolib/api/client.py +39 -17
- biolib/app/app.py +34 -71
- biolib/biolib_api_client/api_client.py +9 -2
- biolib/biolib_api_client/app_types.py +3 -2
- biolib/biolib_api_client/biolib_job_api.py +6 -0
- biolib/biolib_api_client/job_types.py +4 -4
- biolib/biolib_api_client/lfs_types.py +8 -2
- biolib/biolib_binary_format/remote_endpoints.py +12 -10
- biolib/biolib_binary_format/utils.py +23 -3
- biolib/cli/auth.py +1 -1
- biolib/cli/data_record.py +45 -6
- biolib/cli/lfs.py +10 -6
- biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
- biolib/compute_node/job_worker/executors/docker_executor.py +127 -108
- biolib/compute_node/job_worker/job_storage.py +17 -5
- biolib/compute_node/job_worker/job_worker.py +25 -15
- biolib/compute_node/remote_host_proxy.py +72 -84
- biolib/compute_node/webserver/webserver_types.py +0 -1
- biolib/compute_node/webserver/worker_thread.py +42 -39
- biolib/experiments/experiment.py +75 -44
- biolib/jobs/job.py +98 -19
- biolib/jobs/job_result.py +46 -21
- biolib/jobs/types.py +1 -1
- biolib/runtime/__init__.py +2 -1
- biolib/sdk/__init__.py +18 -7
- biolib/typing_utils.py +2 -7
- biolib/user/sign_in.py +2 -2
- biolib/utils/seq_util.py +38 -35
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/METADATA +1 -1
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/RECORD +57 -45
- biolib/experiments/types.py +0 -9
- biolib/lfs/__init__.py +0 -4
- biolib/lfs/utils.py +0 -153
- /biolib/{lfs → _internal/lfs}/cache.py +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,77 @@
|
|
1
|
+
import io
|
2
|
+
import os
|
3
|
+
import zipfile as zf
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
from biolib.typing_utils import Iterator, List, Tuple
|
7
|
+
|
8
|
+
|
9
|
+
def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
|
10
|
+
data_size = 0
|
11
|
+
file_list: List[str] = []
|
12
|
+
|
13
|
+
for path, _, files in os.walk(directory):
|
14
|
+
for file in files:
|
15
|
+
file_path = os.path.join(path, file)
|
16
|
+
if os.path.islink(file_path):
|
17
|
+
continue # skip symlinks
|
18
|
+
|
19
|
+
relative_file_path = file_path[len(directory) + 1 :] # +1 to remove starting slash
|
20
|
+
file_list.append(relative_file_path)
|
21
|
+
data_size += os.path.getsize(file_path)
|
22
|
+
|
23
|
+
return file_list, data_size
|
24
|
+
|
25
|
+
|
26
|
+
def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes]:
|
27
|
+
class ChunkedIOBuffer(io.RawIOBase):
|
28
|
+
def __init__(self, chunk_size: int):
|
29
|
+
super().__init__()
|
30
|
+
self.chunk_size = chunk_size
|
31
|
+
self.tmp_data = bytearray()
|
32
|
+
|
33
|
+
def get_buffer_size(self):
|
34
|
+
return len(self.tmp_data)
|
35
|
+
|
36
|
+
def read_chunk(self):
|
37
|
+
chunk = bytes(self.tmp_data[: self.chunk_size])
|
38
|
+
self.tmp_data = self.tmp_data[self.chunk_size :]
|
39
|
+
return chunk
|
40
|
+
|
41
|
+
def write(self, data):
|
42
|
+
data_length = len(data)
|
43
|
+
self.tmp_data += data
|
44
|
+
return data_length
|
45
|
+
|
46
|
+
# create chunked buffer to hold data temporarily
|
47
|
+
io_buffer = ChunkedIOBuffer(chunk_size)
|
48
|
+
|
49
|
+
# create zip writer that will write to the io buffer
|
50
|
+
zip_writer = zf.ZipFile(io_buffer, mode='w') # type: ignore
|
51
|
+
|
52
|
+
for file_path in files:
|
53
|
+
# generate zip info and prepare zip pointer for writing
|
54
|
+
z_info = zf.ZipInfo.from_file(file_path)
|
55
|
+
zip_pointer = zip_writer.open(z_info, mode='w')
|
56
|
+
if Path(file_path).is_file():
|
57
|
+
# read file chunk by chunk
|
58
|
+
with open(file_path, 'br') as file_pointer:
|
59
|
+
while True:
|
60
|
+
chunk = file_pointer.read(chunk_size)
|
61
|
+
if len(chunk) == 0:
|
62
|
+
break
|
63
|
+
# write the chunk to the zip
|
64
|
+
zip_pointer.write(chunk)
|
65
|
+
# if writing the chunk caused us to go over chunk_size, flush it
|
66
|
+
if io_buffer.get_buffer_size() > chunk_size:
|
67
|
+
yield io_buffer.read_chunk()
|
68
|
+
|
69
|
+
zip_pointer.close()
|
70
|
+
|
71
|
+
# flush any remaining data in the stream (e.g. zip file meta data)
|
72
|
+
zip_writer.close()
|
73
|
+
while True:
|
74
|
+
chunk = io_buffer.read_chunk()
|
75
|
+
if len(chunk) == 0:
|
76
|
+
break
|
77
|
+
yield chunk
|
@@ -0,0 +1 @@
|
|
1
|
+
from .experiment_fuse_mount import ExperimentFuseMount
|
@@ -0,0 +1,209 @@
|
|
1
|
+
import errno
|
2
|
+
import os
|
3
|
+
import stat
|
4
|
+
from datetime import datetime, timezone
|
5
|
+
from time import time
|
6
|
+
|
7
|
+
from biolib._internal.libs.fusepy import FUSE, FuseOSError, Operations
|
8
|
+
from biolib.biolib_errors import BioLibError
|
9
|
+
from biolib.jobs import Job
|
10
|
+
from biolib.typing_utils import Dict, List, Optional, Tuple, TypedDict
|
11
|
+
|
12
|
+
|
13
|
+
class _AttributeDict(TypedDict):
|
14
|
+
st_atime: int
|
15
|
+
st_ctime: int
|
16
|
+
st_gid: int
|
17
|
+
st_mode: int
|
18
|
+
st_mtime: int
|
19
|
+
st_nlink: int
|
20
|
+
st_size: int
|
21
|
+
st_uid: int
|
22
|
+
|
23
|
+
|
24
|
+
_SUCCESS_CODE = 0
|
25
|
+
|
26
|
+
|
27
|
+
class ExperimentFuseMount(Operations):
|
28
|
+
def __init__(self, experiment):
|
29
|
+
self._experiment = experiment
|
30
|
+
self._job_names_map: Optional[Dict[str, Job]] = None
|
31
|
+
self._jobs_last_fetched_at: float = 0.0
|
32
|
+
self._mounted_at_epoch_seconds: int = int(time())
|
33
|
+
|
34
|
+
@staticmethod
|
35
|
+
def mount_experiment(experiment, mount_path: str) -> None:
|
36
|
+
FUSE(
|
37
|
+
operations=ExperimentFuseMount(experiment),
|
38
|
+
mountpoint=mount_path,
|
39
|
+
nothreads=True,
|
40
|
+
foreground=True,
|
41
|
+
allow_other=False,
|
42
|
+
)
|
43
|
+
|
44
|
+
def getattr(self, path: str, fh=None) -> _AttributeDict:
|
45
|
+
if path == '/':
|
46
|
+
return self._get_directory_attributes(timestamp_epoch_seconds=self._mounted_at_epoch_seconds)
|
47
|
+
|
48
|
+
job, path_in_job = self._parse_path(path)
|
49
|
+
job_finished_at_epoch_seconds: int = int(
|
50
|
+
datetime.fromisoformat(job.to_dict()['finished_at'].rstrip('Z')).replace(tzinfo=timezone.utc).timestamp()
|
51
|
+
)
|
52
|
+
|
53
|
+
if path_in_job == '/':
|
54
|
+
return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
|
55
|
+
|
56
|
+
try:
|
57
|
+
file = job.get_output_file(path_in_job)
|
58
|
+
return self._get_file_attributes(
|
59
|
+
timestamp_epoch_seconds=job_finished_at_epoch_seconds,
|
60
|
+
size_in_bytes=file.length,
|
61
|
+
)
|
62
|
+
except BioLibError:
|
63
|
+
# file not found
|
64
|
+
pass
|
65
|
+
|
66
|
+
file_paths_in_job = [file.path for file in job.list_output_files()]
|
67
|
+
|
68
|
+
for file_path_in_job in file_paths_in_job:
|
69
|
+
if file_path_in_job.startswith(path_in_job):
|
70
|
+
return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
|
71
|
+
|
72
|
+
raise FuseOSError(errno.ENOENT) from None # No such file or directory
|
73
|
+
|
74
|
+
def readdir(self, path: str, fh: int) -> List[str]:
|
75
|
+
directory_entries = ['.', '..']
|
76
|
+
|
77
|
+
if path == '/':
|
78
|
+
directory_entries.extend(self._get_job_names_map(refresh_jobs=True).keys())
|
79
|
+
else:
|
80
|
+
job, path_in_job = self._parse_path(path)
|
81
|
+
dir_path_in_job = '/' if path_in_job == '/' else path_in_job + '/'
|
82
|
+
depth = dir_path_in_job.count('/')
|
83
|
+
directory_entries.extend(
|
84
|
+
set(
|
85
|
+
[
|
86
|
+
file.path.split('/')[depth]
|
87
|
+
for file in job.list_output_files()
|
88
|
+
if file.path.startswith(dir_path_in_job)
|
89
|
+
]
|
90
|
+
)
|
91
|
+
)
|
92
|
+
|
93
|
+
return directory_entries
|
94
|
+
|
95
|
+
def open(self, path: str, flags: int) -> int:
|
96
|
+
job, path_in_job = self._parse_path(path)
|
97
|
+
try:
|
98
|
+
job.get_output_file(path_in_job)
|
99
|
+
except BioLibError:
|
100
|
+
# file not found
|
101
|
+
raise FuseOSError(errno.ENOENT) from None
|
102
|
+
|
103
|
+
return 1234 # dummy file handle
|
104
|
+
|
105
|
+
def read(self, path: str, size: int, offset: int, fh: int) -> bytes:
|
106
|
+
job, path_in_job = self._parse_path(path)
|
107
|
+
try:
|
108
|
+
file = job.get_output_file(path_in_job)
|
109
|
+
except BioLibError:
|
110
|
+
raise FuseOSError(errno.ENOENT) from None # No such file or directory
|
111
|
+
|
112
|
+
return file.get_data(start=offset, length=size)
|
113
|
+
|
114
|
+
def release(self, path: str, fh: int) -> int:
|
115
|
+
return _SUCCESS_CODE
|
116
|
+
|
117
|
+
def releasedir(self, path: str, fh: int) -> int:
|
118
|
+
return _SUCCESS_CODE
|
119
|
+
|
120
|
+
def flush(self, path: str, fh: int) -> int:
|
121
|
+
return _SUCCESS_CODE
|
122
|
+
|
123
|
+
@staticmethod
|
124
|
+
def _get_directory_attributes(timestamp_epoch_seconds: int) -> _AttributeDict:
|
125
|
+
return _AttributeDict(
|
126
|
+
st_atime=timestamp_epoch_seconds,
|
127
|
+
st_ctime=timestamp_epoch_seconds,
|
128
|
+
st_gid=os.getgid(),
|
129
|
+
st_mode=stat.S_IFDIR | 0o555, # Directory that is readable and executable by owner, group, and others.
|
130
|
+
st_mtime=timestamp_epoch_seconds,
|
131
|
+
st_nlink=1,
|
132
|
+
st_size=1,
|
133
|
+
st_uid=os.getuid(),
|
134
|
+
)
|
135
|
+
|
136
|
+
@staticmethod
|
137
|
+
def _get_file_attributes(timestamp_epoch_seconds: int, size_in_bytes: int) -> _AttributeDict:
|
138
|
+
return _AttributeDict(
|
139
|
+
st_atime=timestamp_epoch_seconds,
|
140
|
+
st_ctime=timestamp_epoch_seconds,
|
141
|
+
st_gid=os.getgid(),
|
142
|
+
st_mode=stat.S_IFREG | 0o444, # Regular file with read permissions for owner, group, and others.
|
143
|
+
st_mtime=timestamp_epoch_seconds,
|
144
|
+
st_nlink=1,
|
145
|
+
st_size=size_in_bytes,
|
146
|
+
st_uid=os.getuid(),
|
147
|
+
)
|
148
|
+
|
149
|
+
def _get_job_names_map(self, refresh_jobs=False) -> Dict[str, Job]:
|
150
|
+
current_time = time()
|
151
|
+
if not self._job_names_map or (current_time - self._jobs_last_fetched_at > 1 and refresh_jobs):
|
152
|
+
self._jobs_last_fetched_at = current_time
|
153
|
+
self._job_names_map = {job.get_name(): job for job in self._experiment.get_jobs(status='completed')}
|
154
|
+
|
155
|
+
return self._job_names_map
|
156
|
+
|
157
|
+
def _parse_path(self, path: str) -> Tuple[Job, str]:
|
158
|
+
path_splitted = path.split('/')
|
159
|
+
job_name = path_splitted[1]
|
160
|
+
path_in_job = '/' + '/'.join(path_splitted[2:])
|
161
|
+
job = self._get_job_names_map().get(job_name)
|
162
|
+
if not job:
|
163
|
+
raise FuseOSError(errno.ENOENT) # No such file or directory
|
164
|
+
|
165
|
+
return job, path_in_job
|
166
|
+
|
167
|
+
# ----------------------------------- File system methods not implemented below -----------------------------------
|
168
|
+
|
169
|
+
def chmod(self, path, mode):
|
170
|
+
raise FuseOSError(errno.EACCES)
|
171
|
+
|
172
|
+
def chown(self, path, uid, gid):
|
173
|
+
raise FuseOSError(errno.EACCES)
|
174
|
+
|
175
|
+
def mknod(self, path, mode, dev):
|
176
|
+
raise FuseOSError(errno.EACCES)
|
177
|
+
|
178
|
+
def rmdir(self, path):
|
179
|
+
raise FuseOSError(errno.EACCES)
|
180
|
+
|
181
|
+
def mkdir(self, path, mode):
|
182
|
+
raise FuseOSError(errno.EACCES)
|
183
|
+
|
184
|
+
def unlink(self, path):
|
185
|
+
raise FuseOSError(errno.EACCES)
|
186
|
+
|
187
|
+
def symlink(self, target, source):
|
188
|
+
raise FuseOSError(errno.EACCES)
|
189
|
+
|
190
|
+
def rename(self, old, new):
|
191
|
+
raise FuseOSError(errno.EACCES)
|
192
|
+
|
193
|
+
def link(self, target, source):
|
194
|
+
raise FuseOSError(errno.EACCES)
|
195
|
+
|
196
|
+
def utimens(self, path, times=None):
|
197
|
+
raise FuseOSError(errno.EACCES)
|
198
|
+
|
199
|
+
def create(self, path, mode, fi=None):
|
200
|
+
raise FuseOSError(errno.EACCES)
|
201
|
+
|
202
|
+
def write(self, path, data, offset, fh):
|
203
|
+
raise FuseOSError(errno.EACCES)
|
204
|
+
|
205
|
+
def truncate(self, path, length, fh=None):
|
206
|
+
raise FuseOSError(errno.EACCES)
|
207
|
+
|
208
|
+
def fsync(self, path, datasync, fh):
|
209
|
+
raise FuseOSError(errno.EACCES)
|
biolib/_internal/http_client.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import platform
|
3
|
+
import shutil
|
3
4
|
import socket
|
4
5
|
import ssl
|
5
6
|
import subprocess
|
@@ -41,15 +42,24 @@ class HttpError(urllib.error.HTTPError):
|
|
41
42
|
|
42
43
|
|
43
44
|
class HttpResponse:
|
44
|
-
def __init__(self, response) -> None:
|
45
|
+
def __init__(self, response, response_path) -> None:
|
45
46
|
self.headers: Dict[str, str] = dict(response.headers)
|
46
47
|
self.status_code: int = int(response.status)
|
47
|
-
self.
|
48
|
+
self.response_path = response_path
|
49
|
+
if self.response_path:
|
50
|
+
with open(self.response_path, 'wb') as out_file:
|
51
|
+
shutil.copyfileobj(response, out_file)
|
52
|
+
else:
|
53
|
+
self.content: bytes = response.read()
|
48
54
|
self.url: str = response.geturl()
|
49
55
|
|
50
56
|
@property
|
51
57
|
def text(self) -> str:
|
52
|
-
|
58
|
+
if self.response_path:
|
59
|
+
with open(self.response_path, 'rb') as fp:
|
60
|
+
return cast(str, fp.read().decode('utf-8'))
|
61
|
+
else:
|
62
|
+
return cast(str, self.content.decode('utf-8'))
|
53
63
|
|
54
64
|
def json(self):
|
55
65
|
return json.loads(self.text)
|
@@ -66,6 +76,7 @@ class HttpClient:
|
|
66
76
|
headers: Optional[Dict[str, str]] = None,
|
67
77
|
retries: int = 5,
|
68
78
|
timeout_in_seconds: Optional[int] = None,
|
79
|
+
response_path: Optional[str] = None,
|
69
80
|
) -> HttpResponse:
|
70
81
|
if not HttpClient.ssl_context:
|
71
82
|
HttpClient.ssl_context = _create_ssl_context()
|
@@ -83,7 +94,7 @@ class HttpClient:
|
|
83
94
|
if timeout_in_seconds is None:
|
84
95
|
timeout_in_seconds = 60 if isinstance(data, dict) else 180 # TODO: Calculate timeout based on data size
|
85
96
|
|
86
|
-
last_error: Optional[
|
97
|
+
last_error: Optional[Exception] = None
|
87
98
|
for retry_count in range(retries + 1):
|
88
99
|
if retry_count > 0:
|
89
100
|
time.sleep(5 * retry_count)
|
@@ -94,23 +105,34 @@ class HttpClient:
|
|
94
105
|
context=HttpClient.ssl_context,
|
95
106
|
timeout=timeout_in_seconds,
|
96
107
|
) as response:
|
97
|
-
return HttpResponse(response)
|
108
|
+
return HttpResponse(response, response_path)
|
98
109
|
|
99
110
|
except urllib.error.HTTPError as error:
|
100
|
-
if error.code ==
|
101
|
-
logger_no_user_data.
|
111
|
+
if error.code == 429:
|
112
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 429 for "{url}"')
|
113
|
+
last_error = error
|
114
|
+
elif error.code == 502:
|
115
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 502 for "{url}"')
|
102
116
|
last_error = error
|
103
117
|
elif error.code == 503:
|
104
|
-
logger_no_user_data.
|
118
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 503 for "{url}"')
|
119
|
+
last_error = error
|
120
|
+
elif error.code == 504:
|
121
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 504 for "{url}"')
|
105
122
|
last_error = error
|
106
123
|
else:
|
107
124
|
raise HttpError(error) from None
|
108
125
|
|
109
126
|
except urllib.error.URLError as error:
|
110
127
|
if isinstance(error.reason, socket.timeout):
|
111
|
-
|
128
|
+
if retry_count > 0:
|
129
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
|
112
130
|
last_error = error
|
113
131
|
else:
|
114
132
|
raise error
|
133
|
+
except socket.timeout as error:
|
134
|
+
if retry_count > 0:
|
135
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
|
136
|
+
last_error = error
|
115
137
|
|
116
138
|
raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')
|
@@ -0,0 +1 @@
|
|
1
|
+
from .cache import prune_lfs_cache
|
@@ -0,0 +1 @@
|
|
1
|
+
# Note: this directory is purely for libraries to be directly included instead of as dependencies
|