pybiolib 1.1.1747__py3-none-any.whl → 1.1.2193__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +18 -5
- biolib/_data_record/data_record.py +278 -0
- biolib/_internal/data_record/__init__.py +1 -0
- biolib/_internal/data_record/data_record.py +97 -0
- biolib/_internal/data_record/remote_storage_endpoint.py +38 -0
- biolib/_internal/file_utils.py +77 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +42 -23
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +22 -37
- biolib/_internal/runtime.py +19 -0
- biolib/_internal/types/__init__.py +4 -0
- biolib/_internal/types/app.py +9 -0
- biolib/_internal/types/data_record.py +40 -0
- biolib/_internal/types/experiment.py +10 -0
- biolib/_internal/types/resource.py +14 -0
- biolib/_internal/types/typing.py +7 -0
- biolib/_internal/utils/__init__.py +18 -0
- biolib/_runtime/runtime.py +80 -0
- biolib/api/__init__.py +1 -0
- biolib/api/client.py +39 -17
- biolib/app/app.py +40 -72
- biolib/app/search_apps.py +8 -12
- biolib/biolib_api_client/api_client.py +22 -10
- biolib/biolib_api_client/app_types.py +2 -1
- biolib/biolib_api_client/biolib_app_api.py +1 -1
- biolib/biolib_api_client/biolib_job_api.py +6 -0
- biolib/biolib_api_client/job_types.py +4 -4
- biolib/biolib_api_client/lfs_types.py +8 -2
- biolib/biolib_binary_format/remote_endpoints.py +12 -10
- biolib/biolib_binary_format/utils.py +41 -4
- biolib/cli/__init__.py +6 -2
- biolib/cli/auth.py +58 -0
- biolib/cli/data_record.py +80 -0
- biolib/cli/download_container.py +3 -1
- biolib/cli/init.py +1 -0
- biolib/cli/lfs.py +45 -11
- biolib/cli/push.py +1 -1
- biolib/cli/run.py +3 -2
- biolib/cli/start.py +1 -0
- biolib/compute_node/cloud_utils/cloud_utils.py +15 -18
- biolib/compute_node/job_worker/cache_state.py +1 -1
- biolib/compute_node/job_worker/executors/docker_executor.py +134 -114
- biolib/compute_node/job_worker/job_storage.py +3 -4
- biolib/compute_node/job_worker/job_worker.py +31 -15
- biolib/compute_node/remote_host_proxy.py +75 -70
- biolib/compute_node/webserver/webserver_types.py +0 -1
- biolib/experiments/experiment.py +75 -44
- biolib/jobs/job.py +125 -47
- biolib/jobs/job_result.py +46 -21
- biolib/jobs/types.py +1 -1
- biolib/runtime/__init__.py +14 -1
- biolib/sdk/__init__.py +29 -5
- biolib/typing_utils.py +2 -7
- biolib/user/sign_in.py +10 -14
- biolib/utils/__init__.py +1 -1
- biolib/utils/app_uri.py +11 -4
- biolib/utils/cache_state.py +2 -2
- biolib/utils/seq_util.py +38 -30
- {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
- pybiolib-1.1.2193.dist-info/RECORD +123 -0
- {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +1 -1
- biolib/biolib_api_client/biolib_account_api.py +0 -8
- biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
- biolib/experiments/types.py +0 -9
- biolib/lfs/__init__.py +0 -6
- biolib/lfs/utils.py +0 -237
- biolib/runtime/results.py +0 -20
- pybiolib-1.1.1747.dist-info/RECORD +0 -108
- /biolib/{lfs → _internal/lfs}/cache.py +0 -0
- {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0
biolib/cli/run.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import sys
|
2
|
+
|
2
3
|
import click
|
3
4
|
|
4
5
|
from biolib import biolib_errors, utils
|
@@ -27,13 +28,13 @@ def run(local: bool, non_blocking: bool, uri: str, args: Tuple[str]) -> None:
|
|
27
28
|
stdin = sys.stdin.read()
|
28
29
|
return stdin
|
29
30
|
|
30
|
-
blocking =
|
31
|
+
blocking = not non_blocking
|
31
32
|
job = app.cli(
|
32
33
|
args=list(args),
|
33
34
|
stdin=_get_stdin(),
|
34
35
|
files=None,
|
35
36
|
machine=('local' if local else ''),
|
36
|
-
blocking=blocking
|
37
|
+
blocking=blocking,
|
37
38
|
)
|
38
39
|
|
39
40
|
if blocking:
|
biolib/cli/start.py
CHANGED
@@ -7,11 +7,11 @@ import time
|
|
7
7
|
from datetime import datetime
|
8
8
|
from socket import gethostbyname, gethostname
|
9
9
|
|
10
|
-
from biolib import
|
11
|
-
from biolib.biolib_logging import logger_no_user_data
|
12
|
-
from biolib.typing_utils import Optional, List, Dict, cast
|
10
|
+
from biolib import api, utils
|
13
11
|
from biolib.biolib_api_client import BiolibApiClient
|
14
|
-
from biolib.
|
12
|
+
from biolib.biolib_logging import logger_no_user_data
|
13
|
+
from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo, ShutdownTimes, WebserverConfig
|
14
|
+
from biolib.typing_utils import Dict, List, Optional, cast
|
15
15
|
|
16
16
|
|
17
17
|
def trust_ceritificates(certs_data: List[str]) -> None:
|
@@ -54,15 +54,12 @@ class CloudUtils:
|
|
54
54
|
pybiolib_version=utils.BIOLIB_PACKAGE_VERSION,
|
55
55
|
),
|
56
56
|
base_url=CloudUtils._get_environment_variable_or_fail('BIOLIB_BASE_URL'),
|
57
|
-
s3_general_storage_bucket_name=CloudUtils._get_environment_variable_or_fail(
|
58
|
-
'BIOLIB_S3_GENERAL_STORAGE_BUCKET_NAME',
|
59
|
-
),
|
60
57
|
is_dev=os.environ.get('BIOLIB_DEV') == 'TRUE',
|
61
58
|
shutdown_times=ShutdownTimes(
|
62
59
|
auto_shutdown_time_in_seconds=CloudUtils._get_environment_variable_as_int(
|
63
60
|
'BIOLIB_CLOUD_AUTO_SHUTDOWN_TIME_IN_SECONDS'
|
64
61
|
),
|
65
|
-
)
|
62
|
+
),
|
66
63
|
)
|
67
64
|
|
68
65
|
return CloudUtils._webserver_config
|
@@ -81,10 +78,10 @@ class CloudUtils:
|
|
81
78
|
'error': error,
|
82
79
|
},
|
83
80
|
)
|
84
|
-
except BaseException as
|
85
|
-
logger_no_user_data.error(f'Failed to deregister got error: {
|
81
|
+
except BaseException as error_object:
|
82
|
+
logger_no_user_data.error(f'Failed to deregister got error: {error_object}')
|
86
83
|
else:
|
87
|
-
logger_no_user_data.error(
|
84
|
+
logger_no_user_data.error('Not deregistering as environment is not cloud')
|
88
85
|
|
89
86
|
@staticmethod
|
90
87
|
def shutdown() -> None:
|
@@ -98,7 +95,7 @@ class CloudUtils:
|
|
98
95
|
except Exception as error: # pylint: disable=broad-except
|
99
96
|
logger_no_user_data.error(f'Failed to shutdown got error: {error}')
|
100
97
|
else:
|
101
|
-
logger_no_user_data.error(
|
98
|
+
logger_no_user_data.error('Not running shutdown as environment is not cloud')
|
102
99
|
|
103
100
|
@staticmethod
|
104
101
|
def deregister_and_shutdown() -> None:
|
@@ -131,7 +128,7 @@ class CloudUtils:
|
|
131
128
|
'auth_token': config['compute_node_info']['auth_token'],
|
132
129
|
'cloud_job_id': cloud_job_id,
|
133
130
|
'system_exception_code': system_exception_code,
|
134
|
-
'exit_code': exit_code
|
131
|
+
'exit_code': exit_code,
|
135
132
|
},
|
136
133
|
)
|
137
134
|
except BaseException as error:
|
@@ -152,14 +149,14 @@ class CloudUtils:
|
|
152
149
|
data=cast(Dict[str, str], compute_node_info),
|
153
150
|
)
|
154
151
|
if response.status_code != 201:
|
155
|
-
raise Exception(
|
152
|
+
raise Exception('Non 201 error code')
|
156
153
|
else:
|
157
|
-
logger_no_user_data.info(
|
154
|
+
logger_no_user_data.info('Compute node registered!')
|
158
155
|
response_data = response.json()
|
159
|
-
logger_no_user_data.info(f
|
156
|
+
logger_no_user_data.info(f'Got data on register: {json.dumps(response_data)}')
|
160
157
|
certs = []
|
161
|
-
for federation in response_data[
|
162
|
-
for cert_b64 in federation[
|
158
|
+
for federation in response_data['federation']:
|
159
|
+
for cert_b64 in federation['certs_b64']:
|
163
160
|
certs.append(base64.b64decode(cert_b64).decode())
|
164
161
|
trust_ceritificates(certs)
|
165
162
|
|
@@ -1,43 +1,42 @@
|
|
1
|
+
import io
|
1
2
|
import json
|
3
|
+
import os
|
4
|
+
import re
|
5
|
+
import shlex
|
2
6
|
import subprocess
|
7
|
+
import tarfile
|
3
8
|
import tempfile
|
4
9
|
import time
|
5
|
-
import tarfile
|
6
10
|
import zipfile
|
7
|
-
import os
|
8
|
-
import io
|
9
|
-
import re
|
10
|
-
import shlex
|
11
11
|
from copy import copy
|
12
12
|
from datetime import datetime
|
13
13
|
|
14
14
|
import docker # type: ignore
|
15
15
|
import docker.types # type: ignore
|
16
|
-
|
17
|
-
from docker.errors import ImageNotFound, APIError # type: ignore
|
16
|
+
from docker.errors import APIError, ImageNotFound # type: ignore
|
18
17
|
from docker.models.containers import Container # type: ignore
|
19
18
|
|
20
19
|
from biolib import utils
|
20
|
+
from biolib._internal.runtime import RuntimeJobDataDict
|
21
21
|
from biolib.biolib_binary_format import ModuleInput, ModuleOutputV2
|
22
|
+
from biolib.biolib_binary_format.file_in_container import FileInContainer
|
22
23
|
from biolib.biolib_docker_client import BiolibDockerClient
|
23
|
-
from biolib.biolib_errors import
|
24
|
+
from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException
|
24
25
|
from biolib.biolib_logging import logger, logger_no_user_data
|
25
26
|
from biolib.compute_node import utils as compute_node_utils
|
26
27
|
from biolib.compute_node.cloud_utils import CloudUtils
|
27
28
|
from biolib.compute_node.job_worker.docker_image_cache import DockerImageCache
|
28
|
-
from biolib.biolib_binary_format.file_in_container import FileInContainer
|
29
29
|
from biolib.compute_node.job_worker.executors.docker_types import DockerDiffKind
|
30
|
-
from biolib.compute_node.job_worker.executors.types import
|
30
|
+
from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
|
31
31
|
from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
|
32
|
-
from biolib.compute_node.job_worker.utils import ComputeProcessException
|
33
32
|
from biolib.compute_node.job_worker.utilization_reporter_thread import UtilizationReporterThread
|
33
|
+
from biolib.compute_node.job_worker.utils import ComputeProcessException
|
34
34
|
from biolib.compute_node.utils import SystemExceptionCodes
|
35
|
-
from biolib.typing_utils import
|
35
|
+
from biolib.typing_utils import Dict, List, Optional
|
36
36
|
|
37
37
|
|
38
38
|
class DockerExecutor:
|
39
|
-
|
40
|
-
def __init__(self, options: LocalExecutorOptions):
|
39
|
+
def __init__(self, options: LocalExecutorOptions) -> None:
|
41
40
|
self._options: LocalExecutorOptions = options
|
42
41
|
self._is_cleaning_up = False
|
43
42
|
|
@@ -81,11 +80,12 @@ class DockerExecutor:
|
|
81
80
|
raise Exception('Docker container was None')
|
82
81
|
return self._docker_container
|
83
82
|
|
84
|
-
def execute_module(self,
|
83
|
+
def execute_module(self, module_input_path: str, module_output_path: str) -> None:
|
85
84
|
try:
|
86
85
|
job_uuid = self._options['job']['public_id']
|
87
86
|
send_status_update = self._options['send_status_update']
|
88
|
-
|
87
|
+
with open(module_input_path, 'rb') as fp:
|
88
|
+
module_input = ModuleInput(fp.read()).deserialize()
|
89
89
|
|
90
90
|
send_status_update(StatusUpdate(progress=55, log_message='Pulling images...'))
|
91
91
|
|
@@ -113,35 +113,49 @@ class DockerExecutor:
|
|
113
113
|
except Exception: # pylint: disable=broad-except
|
114
114
|
logger_no_user_data.error('DockerExecutor failed to clean up container')
|
115
115
|
|
116
|
-
def _pull(self):
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
image_uri=self._absolute_image_uri,
|
122
|
-
estimated_image_size_bytes=self._options['module']['estimated_image_size_bytes'],
|
123
|
-
job_id=self._options['job']['public_id'],
|
124
|
-
)
|
125
|
-
else:
|
126
|
-
docker_client = BiolibDockerClient.get_docker_client()
|
127
|
-
try:
|
128
|
-
docker_client.images.get(self._absolute_image_uri)
|
129
|
-
except ImageNotFound:
|
130
|
-
job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
|
131
|
-
docker_client.images.pull(
|
132
|
-
self._absolute_image_uri,
|
133
|
-
auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
|
134
|
-
)
|
116
|
+
def _pull(self) -> None:
|
117
|
+
retries = 3
|
118
|
+
last_error: Optional[Exception] = None
|
119
|
+
estimated_image_size_bytes = self._options['module']['estimated_image_size_bytes']
|
120
|
+
assert estimated_image_size_bytes is not None, 'No estimated image size'
|
135
121
|
|
136
|
-
|
122
|
+
for retry_count in range(retries + 1):
|
123
|
+
if retry_count > 0:
|
124
|
+
logger_no_user_data.debug(f'Retrying Docker image pull of "{self._absolute_image_uri}"')
|
125
|
+
time.sleep(5 * retry_count)
|
126
|
+
try:
|
127
|
+
start_time = time.time()
|
128
|
+
if utils.IS_RUNNING_IN_CLOUD and not self._options['job'].get('federated_job_uuid'):
|
129
|
+
DockerImageCache().get(
|
130
|
+
image_uri=self._absolute_image_uri,
|
131
|
+
estimated_image_size_bytes=estimated_image_size_bytes,
|
132
|
+
job_id=self._options['job']['public_id'],
|
133
|
+
)
|
134
|
+
else:
|
135
|
+
docker_client = BiolibDockerClient.get_docker_client()
|
136
|
+
try:
|
137
|
+
docker_client.images.get(self._absolute_image_uri)
|
138
|
+
except ImageNotFound:
|
139
|
+
job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
|
140
|
+
docker_client.images.pull(
|
141
|
+
self._absolute_image_uri,
|
142
|
+
auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
|
143
|
+
)
|
144
|
+
|
145
|
+
logger_no_user_data.debug(f'Pulled image in: {time.time() - start_time}')
|
146
|
+
return
|
147
|
+
except Exception as error:
|
148
|
+
logger_no_user_data.warning(
|
149
|
+
f'Pull of Docker image "{self._absolute_image_uri}" returned error: {error}'
|
150
|
+
)
|
151
|
+
last_error = error
|
137
152
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
) from exception
|
153
|
+
raise ComputeProcessException(
|
154
|
+
last_error or Exception('Retries exceeded: failed to pull Docker image'),
|
155
|
+
SystemExceptionCodes.FAILED_TO_PULL_DOCKER_IMAGE.value,
|
156
|
+
self._send_system_exception,
|
157
|
+
may_contain_user_data=False,
|
158
|
+
)
|
145
159
|
|
146
160
|
def _execute_helper(self, module_input, module_output_path: str) -> None:
|
147
161
|
job_uuid = self._options['job']['public_id']
|
@@ -152,10 +166,10 @@ class DockerExecutor:
|
|
152
166
|
logger_no_user_data.debug(f'Job "{job_uuid}" starting utilization metrics reporter thread...')
|
153
167
|
config = CloudUtils.get_webserver_config()
|
154
168
|
node_auth_token = config['compute_node_info']['auth_token'] # pylint: disable=unsubscriptable-object
|
155
|
-
cloud_job = self._options[
|
169
|
+
cloud_job = self._options['cloud_job']
|
156
170
|
include_gpu_stats = False
|
157
171
|
if cloud_job:
|
158
|
-
include_gpu_stats = cloud_job.get(
|
172
|
+
include_gpu_stats = cloud_job.get('reserved_gpu_count', 0) > 0
|
159
173
|
UtilizationReporterThread(
|
160
174
|
container=self._container,
|
161
175
|
job_uuid=job_uuid,
|
@@ -210,7 +224,7 @@ class DockerExecutor:
|
|
210
224
|
raise ComputeProcessException(
|
211
225
|
MemoryError(),
|
212
226
|
SystemExceptionCodes.OUT_OF_MEMORY.value,
|
213
|
-
self._send_system_exception
|
227
|
+
self._send_system_exception,
|
214
228
|
)
|
215
229
|
|
216
230
|
logger_no_user_data.debug(f'Docker container exited with code {exit_code} for {job_uuid}')
|
@@ -241,24 +255,23 @@ class DockerExecutor:
|
|
241
255
|
for path_to_delete in [self._input_tar_path, self._runtime_tar_path]:
|
242
256
|
if os.path.exists(path_to_delete):
|
243
257
|
os.remove(path_to_delete)
|
244
|
-
logger_no_user_data.debug(f
|
258
|
+
logger_no_user_data.debug(f'Deleted tars in: {time.time() - tar_time}')
|
245
259
|
|
246
260
|
container_time = time.time()
|
247
261
|
if self._docker_container:
|
248
262
|
self._docker_container.remove(force=True)
|
249
263
|
|
250
264
|
if utils.IS_RUNNING_IN_CLOUD:
|
251
|
-
DockerImageCache().detach_job(
|
252
|
-
image_uri=self._absolute_image_uri,
|
253
|
-
job_id=self._options['job']['public_id']
|
254
|
-
)
|
265
|
+
DockerImageCache().detach_job(image_uri=self._absolute_image_uri, job_id=self._options['job']['public_id'])
|
255
266
|
|
256
|
-
logger_no_user_data.debug(f
|
267
|
+
logger_no_user_data.debug(f'Deleted compute container in: {time.time() - container_time}')
|
257
268
|
self._tmp_secrets_dir.cleanup()
|
258
269
|
|
259
270
|
# TODO: type this method
|
260
271
|
def _initialize_docker_container(self, module_input):
|
261
272
|
try:
|
273
|
+
job_uuid = self._options['job']['public_id']
|
274
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container...')
|
262
275
|
module = self._options['module']
|
263
276
|
logger.debug(f"Initializing docker container with command: {module['command']}")
|
264
277
|
|
@@ -267,12 +280,13 @@ class DockerExecutor:
|
|
267
280
|
internal_network = self._options['internal_network']
|
268
281
|
extra_hosts: Dict[str, str] = {}
|
269
282
|
|
270
|
-
biolib_system_secret =
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
283
|
+
biolib_system_secret = RuntimeJobDataDict(
|
284
|
+
version='1.0.0',
|
285
|
+
job_requested_machine=self._options['job']['requested_machine'],
|
286
|
+
job_uuid=self._options['job']['public_id'],
|
287
|
+
job_auth_token=self._options['job']['auth_token'],
|
288
|
+
app_uri=self._options['job']['app_uri'],
|
289
|
+
)
|
276
290
|
secrets: Dict[str, str] = dict(
|
277
291
|
**module.get('secrets', {}),
|
278
292
|
biolib_system_secret=json.dumps(biolib_system_secret, indent=4),
|
@@ -288,32 +302,40 @@ class DockerExecutor:
|
|
288
302
|
)
|
289
303
|
if app_version_created_at < datetime(2022, 11, 30, 0, 0):
|
290
304
|
environment_vars = module.get('secrets', {})
|
291
|
-
environment_vars.update(
|
292
|
-
|
293
|
-
|
294
|
-
|
305
|
+
environment_vars.update(
|
306
|
+
{
|
307
|
+
'BIOLIB_JOB_UUID': self._options['job']['public_id'],
|
308
|
+
'BIOLIB_JOB_AUTH_TOKEN': self._options['job']['auth_token'],
|
309
|
+
}
|
310
|
+
)
|
295
311
|
|
296
312
|
if utils.IS_RUNNING_IN_CLOUD and self._options['cloud_job']:
|
297
|
-
environment_vars.update(
|
298
|
-
|
299
|
-
|
313
|
+
environment_vars.update(
|
314
|
+
{
|
315
|
+
'BIOLIB_JOB_MAX_RUNTIME_IN_SECONDS': self._options['cloud_job']['max_runtime_in_seconds'],
|
316
|
+
}
|
317
|
+
)
|
300
318
|
|
319
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Getting IPs for proxies...')
|
301
320
|
for proxy in self._options['remote_host_proxies']:
|
302
321
|
proxy_ip = proxy.get_ip_address_on_network(internal_network)
|
303
322
|
if proxy.is_app_caller_proxy:
|
304
323
|
logger_no_user_data.debug('Found app caller proxy, setting both base URLs in compute container')
|
305
|
-
environment_vars.update(
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
324
|
+
environment_vars.update(
|
325
|
+
{
|
326
|
+
'BIOLIB_BASE_URL': f'http://{proxy_ip}',
|
327
|
+
'BIOLIB_CLOUD_BASE_URL': f'http://{proxy_ip}',
|
328
|
+
# This should be removed eventually, but will break apps calling apps on older versions
|
329
|
+
'BIOLIB_CLOUD_RESULTS_BASE_URL': f'http://{proxy_ip}',
|
330
|
+
'BIOLIB_CLOUD_JOB_STORAGE_BASE_URL': f'http://{proxy_ip}',
|
331
|
+
# Inform container if we are targeting public biolib as we change the BIOLIB_BASE_URL
|
332
|
+
'BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB': bool(utils.BASE_URL_IS_PUBLIC_BIOLIB),
|
333
|
+
}
|
334
|
+
)
|
314
335
|
else:
|
315
336
|
extra_hosts[proxy.hostname] = proxy_ip
|
316
337
|
|
338
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Constructing container args...')
|
317
339
|
create_container_args = {
|
318
340
|
'environment': environment_vars,
|
319
341
|
'extra_hosts': extra_hosts,
|
@@ -325,45 +347,38 @@ class DockerExecutor:
|
|
325
347
|
|
326
348
|
if self._options['job'].get('arguments_override_command'):
|
327
349
|
# In this case, arguments contains a user specified command to run in the app
|
328
|
-
create_container_args.update({
|
329
|
-
'command': module_input['arguments'],
|
330
|
-
'entrypoint': ''
|
331
|
-
})
|
350
|
+
create_container_args.update({'command': module_input['arguments'], 'entrypoint': ''})
|
332
351
|
|
333
352
|
else:
|
334
|
-
create_container_args.update({
|
335
|
-
'command': shlex.split(module['command']) + module_input['arguments']
|
336
|
-
})
|
353
|
+
create_container_args.update({'command': shlex.split(module['command']) + module_input['arguments']})
|
337
354
|
|
338
355
|
app_version = self._options['job']['app_version']
|
339
356
|
if app_version.get('main_output_file') or app_version.get('stdout_render_type') == 'text':
|
340
357
|
create_container_args['tty'] = True
|
341
358
|
|
342
359
|
if utils.IS_RUNNING_IN_CLOUD:
|
343
|
-
cloud_job = self._options[
|
360
|
+
cloud_job = self._options['cloud_job']
|
344
361
|
create_container_args['mem_limit'] = f'{cloud_job["reserved_memory_in_bytes"]}b'
|
345
|
-
create_container_args['nano_cpus'] = cloud_job[
|
362
|
+
create_container_args['nano_cpus'] = cloud_job['reserved_cpu_in_nano_shares']
|
363
|
+
create_container_args['pids_limit'] = 10_000
|
346
364
|
|
347
365
|
biolib_identity_user_email: Optional[str] = cloud_job.get('biolib_identity_user_email')
|
348
366
|
if biolib_identity_user_email:
|
349
|
-
create_container_args['environment'].update(
|
350
|
-
'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email
|
351
|
-
|
367
|
+
create_container_args['environment'].update(
|
368
|
+
{'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email}
|
369
|
+
)
|
352
370
|
|
353
371
|
docker_runtime = os.getenv('BIOLIB_DOCKER_RUNTIME')
|
354
372
|
if docker_runtime is not None:
|
355
373
|
create_container_args['runtime'] = docker_runtime
|
356
374
|
|
357
|
-
|
358
|
-
|
359
|
-
)
|
360
|
-
|
361
|
-
logger_no_user_data.debug('Finished initializing docker container')
|
375
|
+
docker_client = BiolibDockerClient.get_docker_client()
|
376
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Creating container...')
|
377
|
+
self._docker_container = docker_client.containers.create(**create_container_args)
|
378
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" finished initializing Docker container.')
|
362
379
|
except Exception as exception:
|
363
380
|
raise ComputeProcessException(
|
364
|
-
exception,
|
365
|
-
SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value,
|
366
|
-
self._send_system_exception
|
381
|
+
exception, SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value, self._send_system_exception
|
367
382
|
) from exception
|
368
383
|
|
369
384
|
def _add_file_to_tar(self, tar, current_path, mapped_path, data):
|
@@ -430,7 +445,7 @@ class DockerExecutor:
|
|
430
445
|
raise ComputeProcessException(
|
431
446
|
exception,
|
432
447
|
SystemExceptionCodes.FAILED_TO_COPY_INPUT_FILES_TO_COMPUTE_CONTAINER.value,
|
433
|
-
self._send_system_exception
|
448
|
+
self._send_system_exception,
|
434
449
|
) from exception
|
435
450
|
|
436
451
|
def _map_and_copy_runtime_files_to_container(self, runtime_zip_data, arguments: List[str], remove_root_folder=True):
|
@@ -445,17 +460,17 @@ class DockerExecutor:
|
|
445
460
|
raise ComputeProcessException(
|
446
461
|
exception,
|
447
462
|
SystemExceptionCodes.FAILED_TO_COPY_RUNTIME_FILES_TO_COMPUTE_CONTAINER.value,
|
448
|
-
self._send_system_exception
|
463
|
+
self._send_system_exception,
|
449
464
|
) from exception
|
450
465
|
|
451
466
|
def _write_module_output_to_file(
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
467
|
+
self,
|
468
|
+
arguments: List[str],
|
469
|
+
exit_code: int,
|
470
|
+
module_output_path: str,
|
471
|
+
stderr: bytes,
|
472
|
+
stdout: bytes,
|
473
|
+
pre_start_diff: List[Dict],
|
459
474
|
) -> None:
|
460
475
|
mapped_files: List[FileInContainer] = []
|
461
476
|
try:
|
@@ -500,9 +515,11 @@ class DockerExecutor:
|
|
500
515
|
result = subprocess.run(
|
501
516
|
args=[
|
502
517
|
'ctr',
|
503
|
-
'--namespace',
|
518
|
+
'--namespace',
|
519
|
+
'moby',
|
504
520
|
'snapshots',
|
505
|
-
'--snapshotter',
|
521
|
+
'--snapshotter',
|
522
|
+
'nydus',
|
506
523
|
'mounts',
|
507
524
|
'/some_arbitrary_path',
|
508
525
|
str(self._container.id),
|
@@ -524,9 +541,10 @@ class DockerExecutor:
|
|
524
541
|
pre_start_diff_paths = [obj['Path'] for obj in pre_start_diff]
|
525
542
|
post_run_diff = self._container.diff()
|
526
543
|
run_diff_paths: List[str] = [
|
527
|
-
obj['Path']
|
528
|
-
obj
|
529
|
-
obj['
|
544
|
+
obj['Path']
|
545
|
+
for obj in post_run_diff
|
546
|
+
if obj['Kind'] in (DockerDiffKind.CHANGED.value, DockerDiffKind.ADDED.value)
|
547
|
+
and obj['Path'] not in pre_start_diff_paths
|
530
548
|
]
|
531
549
|
|
532
550
|
known_directories = set()
|
@@ -536,7 +554,7 @@ class DockerExecutor:
|
|
536
554
|
if idx == 0:
|
537
555
|
continue # always skip root
|
538
556
|
|
539
|
-
folder = '/' + '/'.join(parent_folders[1:idx + 1])
|
557
|
+
folder = '/' + '/'.join(parent_folders[1 : idx + 1])
|
540
558
|
known_directories.add(folder)
|
541
559
|
|
542
560
|
def path_is_included_in_from_mappings(path: str) -> bool:
|
@@ -556,11 +574,13 @@ class DockerExecutor:
|
|
556
574
|
files_and_empty_dirs: List[FileInContainer] = []
|
557
575
|
for path in run_diff_paths:
|
558
576
|
if path not in known_directories and path_is_included_in_from_mappings(path):
|
559
|
-
files_and_empty_dirs.append(
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
577
|
+
files_and_empty_dirs.append(
|
578
|
+
FileInContainer(
|
579
|
+
container=self._container,
|
580
|
+
overlay_upper_dir_path=overlay_upper_dir_path,
|
581
|
+
path_in_container=path,
|
582
|
+
)
|
583
|
+
)
|
564
584
|
|
565
585
|
return files_and_empty_dirs
|
566
586
|
|
@@ -10,6 +10,7 @@ from biolib.utils.multipart_uploader import get_chunk_iterator_from_file_object
|
|
10
10
|
|
11
11
|
|
12
12
|
class JobStorage:
|
13
|
+
module_input_file_name = 'input-output.bbf'
|
13
14
|
module_output_file_name = 'module-output.bbf'
|
14
15
|
|
15
16
|
@staticmethod
|
@@ -81,7 +82,7 @@ class JobStorage:
|
|
81
82
|
)
|
82
83
|
|
83
84
|
@staticmethod
|
84
|
-
def
|
85
|
+
def download_module_input(job: CreatedJobDict, path: str):
|
85
86
|
job_uuid = job['public_id']
|
86
87
|
logger_no_user_data.debug(f'Job "{job_uuid}" downloading module input...')
|
87
88
|
presigned_download_url = BiolibJobApi.get_job_storage_download_url(
|
@@ -89,7 +90,5 @@ class JobStorage:
|
|
89
90
|
job_auth_token=job['auth_token'],
|
90
91
|
storage_type='input',
|
91
92
|
)
|
92
|
-
|
93
|
-
data: bytes = response.content
|
93
|
+
HttpClient.request(url=presigned_download_url, response_path=path)
|
94
94
|
logger_no_user_data.debug(f'Job "{job_uuid}" module input downloaded')
|
95
|
-
return data
|