pybiolib 1.1.1881__py3-none-any.whl → 1.1.2193__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +11 -4
- biolib/_data_record/data_record.py +278 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +95 -151
- biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
- biolib/_internal/file_utils.py +77 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +29 -9
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +1 -1
- biolib/_internal/runtime.py +2 -56
- biolib/_internal/types/__init__.py +4 -0
- biolib/_internal/types/app.py +9 -0
- biolib/_internal/types/data_record.py +40 -0
- biolib/_internal/types/experiment.py +10 -0
- biolib/_internal/types/resource.py +14 -0
- biolib/_internal/types/typing.py +7 -0
- biolib/_runtime/runtime.py +80 -0
- biolib/api/__init__.py +1 -0
- biolib/api/client.py +39 -17
- biolib/app/app.py +34 -71
- biolib/biolib_api_client/api_client.py +9 -2
- biolib/biolib_api_client/app_types.py +2 -2
- biolib/biolib_api_client/biolib_job_api.py +6 -0
- biolib/biolib_api_client/job_types.py +4 -4
- biolib/biolib_api_client/lfs_types.py +8 -2
- biolib/biolib_binary_format/remote_endpoints.py +12 -10
- biolib/biolib_binary_format/utils.py +23 -3
- biolib/cli/auth.py +1 -1
- biolib/cli/data_record.py +43 -6
- biolib/cli/lfs.py +10 -6
- biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
- biolib/compute_node/job_worker/executors/docker_executor.py +126 -108
- biolib/compute_node/job_worker/job_storage.py +3 -4
- biolib/compute_node/job_worker/job_worker.py +25 -15
- biolib/compute_node/remote_host_proxy.py +61 -84
- biolib/compute_node/webserver/webserver_types.py +0 -1
- biolib/experiments/experiment.py +75 -44
- biolib/jobs/job.py +98 -19
- biolib/jobs/job_result.py +46 -21
- biolib/jobs/types.py +1 -1
- biolib/runtime/__init__.py +2 -1
- biolib/sdk/__init__.py +18 -7
- biolib/typing_utils.py +2 -7
- biolib/user/sign_in.py +2 -2
- biolib/utils/seq_util.py +38 -35
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/RECORD +55 -44
- biolib/experiments/types.py +0 -9
- biolib/lfs/__init__.py +0 -4
- biolib/lfs/utils.py +0 -153
- /biolib/{lfs → _internal/lfs}/cache.py +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0
@@ -1,44 +1,41 @@
|
|
1
|
+
import io
|
1
2
|
import json
|
3
|
+
import os
|
4
|
+
import re
|
5
|
+
import shlex
|
2
6
|
import subprocess
|
7
|
+
import tarfile
|
3
8
|
import tempfile
|
4
9
|
import time
|
5
|
-
import tarfile
|
6
10
|
import zipfile
|
7
|
-
import os
|
8
|
-
import io
|
9
|
-
import re
|
10
|
-
import shlex
|
11
11
|
from copy import copy
|
12
12
|
from datetime import datetime
|
13
13
|
|
14
14
|
import docker # type: ignore
|
15
15
|
import docker.types # type: ignore
|
16
|
-
|
17
|
-
from docker.errors import ImageNotFound, APIError # type: ignore
|
16
|
+
from docker.errors import APIError, ImageNotFound # type: ignore
|
18
17
|
from docker.models.containers import Container # type: ignore
|
19
18
|
|
20
19
|
from biolib import utils
|
21
|
-
|
22
20
|
from biolib._internal.runtime import RuntimeJobDataDict
|
23
21
|
from biolib.biolib_binary_format import ModuleInput, ModuleOutputV2
|
22
|
+
from biolib.biolib_binary_format.file_in_container import FileInContainer
|
24
23
|
from biolib.biolib_docker_client import BiolibDockerClient
|
25
|
-
from biolib.biolib_errors import
|
24
|
+
from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException
|
26
25
|
from biolib.biolib_logging import logger, logger_no_user_data
|
27
26
|
from biolib.compute_node import utils as compute_node_utils
|
28
27
|
from biolib.compute_node.cloud_utils import CloudUtils
|
29
28
|
from biolib.compute_node.job_worker.docker_image_cache import DockerImageCache
|
30
|
-
from biolib.biolib_binary_format.file_in_container import FileInContainer
|
31
29
|
from biolib.compute_node.job_worker.executors.docker_types import DockerDiffKind
|
32
|
-
from biolib.compute_node.job_worker.executors.types import
|
30
|
+
from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
|
33
31
|
from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
|
34
|
-
from biolib.compute_node.job_worker.utils import ComputeProcessException
|
35
32
|
from biolib.compute_node.job_worker.utilization_reporter_thread import UtilizationReporterThread
|
33
|
+
from biolib.compute_node.job_worker.utils import ComputeProcessException
|
36
34
|
from biolib.compute_node.utils import SystemExceptionCodes
|
37
|
-
from biolib.typing_utils import
|
35
|
+
from biolib.typing_utils import Dict, List, Optional
|
38
36
|
|
39
37
|
|
40
38
|
class DockerExecutor:
|
41
|
-
|
42
39
|
def __init__(self, options: LocalExecutorOptions) -> None:
|
43
40
|
self._options: LocalExecutorOptions = options
|
44
41
|
self._is_cleaning_up = False
|
@@ -83,11 +80,12 @@ class DockerExecutor:
|
|
83
80
|
raise Exception('Docker container was None')
|
84
81
|
return self._docker_container
|
85
82
|
|
86
|
-
def execute_module(self,
|
83
|
+
def execute_module(self, module_input_path: str, module_output_path: str) -> None:
|
87
84
|
try:
|
88
85
|
job_uuid = self._options['job']['public_id']
|
89
86
|
send_status_update = self._options['send_status_update']
|
90
|
-
|
87
|
+
with open(module_input_path, 'rb') as fp:
|
88
|
+
module_input = ModuleInput(fp.read()).deserialize()
|
91
89
|
|
92
90
|
send_status_update(StatusUpdate(progress=55, log_message='Pulling images...'))
|
93
91
|
|
@@ -115,35 +113,49 @@ class DockerExecutor:
|
|
115
113
|
except Exception: # pylint: disable=broad-except
|
116
114
|
logger_no_user_data.error('DockerExecutor failed to clean up container')
|
117
115
|
|
118
|
-
def _pull(self):
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
image_uri=self._absolute_image_uri,
|
124
|
-
estimated_image_size_bytes=self._options['module']['estimated_image_size_bytes'],
|
125
|
-
job_id=self._options['job']['public_id'],
|
126
|
-
)
|
127
|
-
else:
|
128
|
-
docker_client = BiolibDockerClient.get_docker_client()
|
129
|
-
try:
|
130
|
-
docker_client.images.get(self._absolute_image_uri)
|
131
|
-
except ImageNotFound:
|
132
|
-
job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
|
133
|
-
docker_client.images.pull(
|
134
|
-
self._absolute_image_uri,
|
135
|
-
auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
|
136
|
-
)
|
116
|
+
def _pull(self) -> None:
|
117
|
+
retries = 3
|
118
|
+
last_error: Optional[Exception] = None
|
119
|
+
estimated_image_size_bytes = self._options['module']['estimated_image_size_bytes']
|
120
|
+
assert estimated_image_size_bytes is not None, 'No estimated image size'
|
137
121
|
|
138
|
-
|
122
|
+
for retry_count in range(retries + 1):
|
123
|
+
if retry_count > 0:
|
124
|
+
logger_no_user_data.debug(f'Retrying Docker image pull of "{self._absolute_image_uri}"')
|
125
|
+
time.sleep(5 * retry_count)
|
126
|
+
try:
|
127
|
+
start_time = time.time()
|
128
|
+
if utils.IS_RUNNING_IN_CLOUD and not self._options['job'].get('federated_job_uuid'):
|
129
|
+
DockerImageCache().get(
|
130
|
+
image_uri=self._absolute_image_uri,
|
131
|
+
estimated_image_size_bytes=estimated_image_size_bytes,
|
132
|
+
job_id=self._options['job']['public_id'],
|
133
|
+
)
|
134
|
+
else:
|
135
|
+
docker_client = BiolibDockerClient.get_docker_client()
|
136
|
+
try:
|
137
|
+
docker_client.images.get(self._absolute_image_uri)
|
138
|
+
except ImageNotFound:
|
139
|
+
job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
|
140
|
+
docker_client.images.pull(
|
141
|
+
self._absolute_image_uri,
|
142
|
+
auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
|
143
|
+
)
|
144
|
+
|
145
|
+
logger_no_user_data.debug(f'Pulled image in: {time.time() - start_time}')
|
146
|
+
return
|
147
|
+
except Exception as error:
|
148
|
+
logger_no_user_data.warning(
|
149
|
+
f'Pull of Docker image "{self._absolute_image_uri}" returned error: {error}'
|
150
|
+
)
|
151
|
+
last_error = error
|
139
152
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
) from exception
|
153
|
+
raise ComputeProcessException(
|
154
|
+
last_error or Exception('Retries exceeded: failed to pull Docker image'),
|
155
|
+
SystemExceptionCodes.FAILED_TO_PULL_DOCKER_IMAGE.value,
|
156
|
+
self._send_system_exception,
|
157
|
+
may_contain_user_data=False,
|
158
|
+
)
|
147
159
|
|
148
160
|
def _execute_helper(self, module_input, module_output_path: str) -> None:
|
149
161
|
job_uuid = self._options['job']['public_id']
|
@@ -154,10 +166,10 @@ class DockerExecutor:
|
|
154
166
|
logger_no_user_data.debug(f'Job "{job_uuid}" starting utilization metrics reporter thread...')
|
155
167
|
config = CloudUtils.get_webserver_config()
|
156
168
|
node_auth_token = config['compute_node_info']['auth_token'] # pylint: disable=unsubscriptable-object
|
157
|
-
cloud_job = self._options[
|
169
|
+
cloud_job = self._options['cloud_job']
|
158
170
|
include_gpu_stats = False
|
159
171
|
if cloud_job:
|
160
|
-
include_gpu_stats = cloud_job.get(
|
172
|
+
include_gpu_stats = cloud_job.get('reserved_gpu_count', 0) > 0
|
161
173
|
UtilizationReporterThread(
|
162
174
|
container=self._container,
|
163
175
|
job_uuid=job_uuid,
|
@@ -212,7 +224,7 @@ class DockerExecutor:
|
|
212
224
|
raise ComputeProcessException(
|
213
225
|
MemoryError(),
|
214
226
|
SystemExceptionCodes.OUT_OF_MEMORY.value,
|
215
|
-
self._send_system_exception
|
227
|
+
self._send_system_exception,
|
216
228
|
)
|
217
229
|
|
218
230
|
logger_no_user_data.debug(f'Docker container exited with code {exit_code} for {job_uuid}')
|
@@ -243,24 +255,23 @@ class DockerExecutor:
|
|
243
255
|
for path_to_delete in [self._input_tar_path, self._runtime_tar_path]:
|
244
256
|
if os.path.exists(path_to_delete):
|
245
257
|
os.remove(path_to_delete)
|
246
|
-
logger_no_user_data.debug(f
|
258
|
+
logger_no_user_data.debug(f'Deleted tars in: {time.time() - tar_time}')
|
247
259
|
|
248
260
|
container_time = time.time()
|
249
261
|
if self._docker_container:
|
250
262
|
self._docker_container.remove(force=True)
|
251
263
|
|
252
264
|
if utils.IS_RUNNING_IN_CLOUD:
|
253
|
-
DockerImageCache().detach_job(
|
254
|
-
image_uri=self._absolute_image_uri,
|
255
|
-
job_id=self._options['job']['public_id']
|
256
|
-
)
|
265
|
+
DockerImageCache().detach_job(image_uri=self._absolute_image_uri, job_id=self._options['job']['public_id'])
|
257
266
|
|
258
|
-
logger_no_user_data.debug(f
|
267
|
+
logger_no_user_data.debug(f'Deleted compute container in: {time.time() - container_time}')
|
259
268
|
self._tmp_secrets_dir.cleanup()
|
260
269
|
|
261
270
|
# TODO: type this method
|
262
271
|
def _initialize_docker_container(self, module_input):
|
263
272
|
try:
|
273
|
+
job_uuid = self._options['job']['public_id']
|
274
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container...')
|
264
275
|
module = self._options['module']
|
265
276
|
logger.debug(f"Initializing docker container with command: {module['command']}")
|
266
277
|
|
@@ -274,6 +285,7 @@ class DockerExecutor:
|
|
274
285
|
job_requested_machine=self._options['job']['requested_machine'],
|
275
286
|
job_uuid=self._options['job']['public_id'],
|
276
287
|
job_auth_token=self._options['job']['auth_token'],
|
288
|
+
app_uri=self._options['job']['app_uri'],
|
277
289
|
)
|
278
290
|
secrets: Dict[str, str] = dict(
|
279
291
|
**module.get('secrets', {}),
|
@@ -290,32 +302,40 @@ class DockerExecutor:
|
|
290
302
|
)
|
291
303
|
if app_version_created_at < datetime(2022, 11, 30, 0, 0):
|
292
304
|
environment_vars = module.get('secrets', {})
|
293
|
-
environment_vars.update(
|
294
|
-
|
295
|
-
|
296
|
-
|
305
|
+
environment_vars.update(
|
306
|
+
{
|
307
|
+
'BIOLIB_JOB_UUID': self._options['job']['public_id'],
|
308
|
+
'BIOLIB_JOB_AUTH_TOKEN': self._options['job']['auth_token'],
|
309
|
+
}
|
310
|
+
)
|
297
311
|
|
298
312
|
if utils.IS_RUNNING_IN_CLOUD and self._options['cloud_job']:
|
299
|
-
environment_vars.update(
|
300
|
-
|
301
|
-
|
313
|
+
environment_vars.update(
|
314
|
+
{
|
315
|
+
'BIOLIB_JOB_MAX_RUNTIME_IN_SECONDS': self._options['cloud_job']['max_runtime_in_seconds'],
|
316
|
+
}
|
317
|
+
)
|
302
318
|
|
319
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Getting IPs for proxies...')
|
303
320
|
for proxy in self._options['remote_host_proxies']:
|
304
321
|
proxy_ip = proxy.get_ip_address_on_network(internal_network)
|
305
322
|
if proxy.is_app_caller_proxy:
|
306
323
|
logger_no_user_data.debug('Found app caller proxy, setting both base URLs in compute container')
|
307
|
-
environment_vars.update(
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
324
|
+
environment_vars.update(
|
325
|
+
{
|
326
|
+
'BIOLIB_BASE_URL': f'http://{proxy_ip}',
|
327
|
+
'BIOLIB_CLOUD_BASE_URL': f'http://{proxy_ip}',
|
328
|
+
# This should be removed eventually, but will break apps calling apps on older versions
|
329
|
+
'BIOLIB_CLOUD_RESULTS_BASE_URL': f'http://{proxy_ip}',
|
330
|
+
'BIOLIB_CLOUD_JOB_STORAGE_BASE_URL': f'http://{proxy_ip}',
|
331
|
+
# Inform container if we are targeting public biolib as we change the BIOLIB_BASE_URL
|
332
|
+
'BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB': bool(utils.BASE_URL_IS_PUBLIC_BIOLIB),
|
333
|
+
}
|
334
|
+
)
|
316
335
|
else:
|
317
336
|
extra_hosts[proxy.hostname] = proxy_ip
|
318
337
|
|
338
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Constructing container args...')
|
319
339
|
create_container_args = {
|
320
340
|
'environment': environment_vars,
|
321
341
|
'extra_hosts': extra_hosts,
|
@@ -327,45 +347,38 @@ class DockerExecutor:
|
|
327
347
|
|
328
348
|
if self._options['job'].get('arguments_override_command'):
|
329
349
|
# In this case, arguments contains a user specified command to run in the app
|
330
|
-
create_container_args.update({
|
331
|
-
'command': module_input['arguments'],
|
332
|
-
'entrypoint': ''
|
333
|
-
})
|
350
|
+
create_container_args.update({'command': module_input['arguments'], 'entrypoint': ''})
|
334
351
|
|
335
352
|
else:
|
336
|
-
create_container_args.update({
|
337
|
-
'command': shlex.split(module['command']) + module_input['arguments']
|
338
|
-
})
|
353
|
+
create_container_args.update({'command': shlex.split(module['command']) + module_input['arguments']})
|
339
354
|
|
340
355
|
app_version = self._options['job']['app_version']
|
341
356
|
if app_version.get('main_output_file') or app_version.get('stdout_render_type') == 'text':
|
342
357
|
create_container_args['tty'] = True
|
343
358
|
|
344
359
|
if utils.IS_RUNNING_IN_CLOUD:
|
345
|
-
cloud_job = self._options[
|
360
|
+
cloud_job = self._options['cloud_job']
|
346
361
|
create_container_args['mem_limit'] = f'{cloud_job["reserved_memory_in_bytes"]}b'
|
347
|
-
create_container_args['nano_cpus'] = cloud_job[
|
362
|
+
create_container_args['nano_cpus'] = cloud_job['reserved_cpu_in_nano_shares']
|
363
|
+
create_container_args['pids_limit'] = 10_000
|
348
364
|
|
349
365
|
biolib_identity_user_email: Optional[str] = cloud_job.get('biolib_identity_user_email')
|
350
366
|
if biolib_identity_user_email:
|
351
|
-
create_container_args['environment'].update(
|
352
|
-
'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email
|
353
|
-
|
367
|
+
create_container_args['environment'].update(
|
368
|
+
{'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email}
|
369
|
+
)
|
354
370
|
|
355
371
|
docker_runtime = os.getenv('BIOLIB_DOCKER_RUNTIME')
|
356
372
|
if docker_runtime is not None:
|
357
373
|
create_container_args['runtime'] = docker_runtime
|
358
374
|
|
359
|
-
|
360
|
-
|
361
|
-
)
|
362
|
-
|
363
|
-
logger_no_user_data.debug('Finished initializing docker container')
|
375
|
+
docker_client = BiolibDockerClient.get_docker_client()
|
376
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Creating container...')
|
377
|
+
self._docker_container = docker_client.containers.create(**create_container_args)
|
378
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" finished initializing Docker container.')
|
364
379
|
except Exception as exception:
|
365
380
|
raise ComputeProcessException(
|
366
|
-
exception,
|
367
|
-
SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value,
|
368
|
-
self._send_system_exception
|
381
|
+
exception, SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value, self._send_system_exception
|
369
382
|
) from exception
|
370
383
|
|
371
384
|
def _add_file_to_tar(self, tar, current_path, mapped_path, data):
|
@@ -432,7 +445,7 @@ class DockerExecutor:
|
|
432
445
|
raise ComputeProcessException(
|
433
446
|
exception,
|
434
447
|
SystemExceptionCodes.FAILED_TO_COPY_INPUT_FILES_TO_COMPUTE_CONTAINER.value,
|
435
|
-
self._send_system_exception
|
448
|
+
self._send_system_exception,
|
436
449
|
) from exception
|
437
450
|
|
438
451
|
def _map_and_copy_runtime_files_to_container(self, runtime_zip_data, arguments: List[str], remove_root_folder=True):
|
@@ -447,17 +460,17 @@ class DockerExecutor:
|
|
447
460
|
raise ComputeProcessException(
|
448
461
|
exception,
|
449
462
|
SystemExceptionCodes.FAILED_TO_COPY_RUNTIME_FILES_TO_COMPUTE_CONTAINER.value,
|
450
|
-
self._send_system_exception
|
463
|
+
self._send_system_exception,
|
451
464
|
) from exception
|
452
465
|
|
453
466
|
def _write_module_output_to_file(
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
467
|
+
self,
|
468
|
+
arguments: List[str],
|
469
|
+
exit_code: int,
|
470
|
+
module_output_path: str,
|
471
|
+
stderr: bytes,
|
472
|
+
stdout: bytes,
|
473
|
+
pre_start_diff: List[Dict],
|
461
474
|
) -> None:
|
462
475
|
mapped_files: List[FileInContainer] = []
|
463
476
|
try:
|
@@ -502,9 +515,11 @@ class DockerExecutor:
|
|
502
515
|
result = subprocess.run(
|
503
516
|
args=[
|
504
517
|
'ctr',
|
505
|
-
'--namespace',
|
518
|
+
'--namespace',
|
519
|
+
'moby',
|
506
520
|
'snapshots',
|
507
|
-
'--snapshotter',
|
521
|
+
'--snapshotter',
|
522
|
+
'nydus',
|
508
523
|
'mounts',
|
509
524
|
'/some_arbitrary_path',
|
510
525
|
str(self._container.id),
|
@@ -526,9 +541,10 @@ class DockerExecutor:
|
|
526
541
|
pre_start_diff_paths = [obj['Path'] for obj in pre_start_diff]
|
527
542
|
post_run_diff = self._container.diff()
|
528
543
|
run_diff_paths: List[str] = [
|
529
|
-
obj['Path']
|
530
|
-
obj
|
531
|
-
obj['
|
544
|
+
obj['Path']
|
545
|
+
for obj in post_run_diff
|
546
|
+
if obj['Kind'] in (DockerDiffKind.CHANGED.value, DockerDiffKind.ADDED.value)
|
547
|
+
and obj['Path'] not in pre_start_diff_paths
|
532
548
|
]
|
533
549
|
|
534
550
|
known_directories = set()
|
@@ -538,7 +554,7 @@ class DockerExecutor:
|
|
538
554
|
if idx == 0:
|
539
555
|
continue # always skip root
|
540
556
|
|
541
|
-
folder = '/' + '/'.join(parent_folders[1:idx + 1])
|
557
|
+
folder = '/' + '/'.join(parent_folders[1 : idx + 1])
|
542
558
|
known_directories.add(folder)
|
543
559
|
|
544
560
|
def path_is_included_in_from_mappings(path: str) -> bool:
|
@@ -558,11 +574,13 @@ class DockerExecutor:
|
|
558
574
|
files_and_empty_dirs: List[FileInContainer] = []
|
559
575
|
for path in run_diff_paths:
|
560
576
|
if path not in known_directories and path_is_included_in_from_mappings(path):
|
561
|
-
files_and_empty_dirs.append(
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
577
|
+
files_and_empty_dirs.append(
|
578
|
+
FileInContainer(
|
579
|
+
container=self._container,
|
580
|
+
overlay_upper_dir_path=overlay_upper_dir_path,
|
581
|
+
path_in_container=path,
|
582
|
+
)
|
583
|
+
)
|
566
584
|
|
567
585
|
return files_and_empty_dirs
|
568
586
|
|
@@ -10,6 +10,7 @@ from biolib.utils.multipart_uploader import get_chunk_iterator_from_file_object
|
|
10
10
|
|
11
11
|
|
12
12
|
class JobStorage:
|
13
|
+
module_input_file_name = 'input-output.bbf'
|
13
14
|
module_output_file_name = 'module-output.bbf'
|
14
15
|
|
15
16
|
@staticmethod
|
@@ -81,7 +82,7 @@ class JobStorage:
|
|
81
82
|
)
|
82
83
|
|
83
84
|
@staticmethod
|
84
|
-
def
|
85
|
+
def download_module_input(job: CreatedJobDict, path: str):
|
85
86
|
job_uuid = job['public_id']
|
86
87
|
logger_no_user_data.debug(f'Job "{job_uuid}" downloading module input...')
|
87
88
|
presigned_download_url = BiolibJobApi.get_job_storage_download_url(
|
@@ -89,7 +90,5 @@ class JobStorage:
|
|
89
90
|
job_auth_token=job['auth_token'],
|
90
91
|
storage_type='input',
|
91
92
|
)
|
92
|
-
|
93
|
-
data: bytes = response.content
|
93
|
+
HttpClient.request(url=presigned_download_url, response_path=path)
|
94
94
|
logger_no_user_data.debug(f'Job "{job_uuid}" module input downloaded')
|
95
|
-
return data
|
@@ -133,7 +133,8 @@ class JobWorker:
|
|
133
133
|
).start()
|
134
134
|
|
135
135
|
try:
|
136
|
-
|
136
|
+
module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
|
137
|
+
JobStorage.download_module_input(job=job, path=module_input_path)
|
137
138
|
except StorageDownloadFailed:
|
138
139
|
# Expect module input to be handled in a separate ModuleInput package
|
139
140
|
self._legacy_input_wait_timeout_thread = JobLegacyInputWaitTimeout(
|
@@ -147,7 +148,7 @@ class JobWorker:
|
|
147
148
|
raise error
|
148
149
|
|
149
150
|
try:
|
150
|
-
self._run_root_job(
|
151
|
+
self._run_root_job(module_input_path)
|
151
152
|
|
152
153
|
# This error occurs when trying to access the container after the job worker has cleaned it up.
|
153
154
|
# In that case stop the computation.
|
@@ -165,7 +166,9 @@ class JobWorker:
|
|
165
166
|
self._legacy_input_wait_timeout_thread.stop()
|
166
167
|
|
167
168
|
try:
|
168
|
-
self.
|
169
|
+
module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
|
170
|
+
open(module_input_path, 'wb').write(package)
|
171
|
+
self._run_root_job(module_input_path)
|
169
172
|
|
170
173
|
# This error occurs when trying to access the container after the job worker has cleaned it up.
|
171
174
|
# In that case stop the computation.
|
@@ -250,7 +253,6 @@ class JobWorker:
|
|
250
253
|
|
251
254
|
def _start_network_and_remote_host_proxies(self, job: CreatedJobDict) -> None:
|
252
255
|
app_version = job['app_version']
|
253
|
-
app = app_version.get('app', {})
|
254
256
|
job_id = job['public_id']
|
255
257
|
remote_hosts = app_version['remote_hosts']
|
256
258
|
if utils.IS_RUNNING_IN_CLOUD:
|
@@ -313,7 +315,6 @@ class JobWorker:
|
|
313
315
|
self._internal_network,
|
314
316
|
job_id,
|
315
317
|
ports,
|
316
|
-
can_push_data_record_for_user=app.get('can_push_data_record_for_user', False),
|
317
318
|
)
|
318
319
|
remote_host_proxy.start()
|
319
320
|
self._remote_host_proxies.append(remote_host_proxy)
|
@@ -331,15 +332,15 @@ class JobWorker:
|
|
331
332
|
def _run_app_version(
|
332
333
|
self,
|
333
334
|
app_version_id: str,
|
334
|
-
|
335
|
+
module_input_path: str,
|
335
336
|
caller_job: CreatedJobDict,
|
336
337
|
main_module_output_path: str,
|
337
338
|
) -> None:
|
338
339
|
job: CreatedJobDict = BiolibJobApi.create(app_version_id, caller_job=caller_job['public_id'])
|
339
340
|
self._jobs[job['public_id']] = job
|
340
|
-
self._run_job(job,
|
341
|
+
self._run_job(job, module_input_path, main_module_output_path)
|
341
342
|
|
342
|
-
def _run_job(self, job: CreatedJobDict,
|
343
|
+
def _run_job(self, job: CreatedJobDict, module_input_path: str, main_module_output_path: str) -> None:
|
343
344
|
job_uuid = job['public_id']
|
344
345
|
logger_no_user_data.info(f'Job "{job_uuid}" running...')
|
345
346
|
if self._root_job_wrapper is None:
|
@@ -406,7 +407,7 @@ class JobWorker:
|
|
406
407
|
send_system_exception=self.send_system_exception,
|
407
408
|
send_stdout_and_stderr=self.send_stdout_and_stderr,
|
408
409
|
),
|
409
|
-
|
410
|
+
module_input_path,
|
410
411
|
main_module_output_path,
|
411
412
|
)
|
412
413
|
|
@@ -417,15 +418,20 @@ class JobWorker:
|
|
417
418
|
def _run_module(
|
418
419
|
self,
|
419
420
|
options: LocalExecutorOptions,
|
420
|
-
|
421
|
+
module_input_path: str,
|
421
422
|
module_output_path: str,
|
422
423
|
) -> None:
|
423
424
|
module = options['module']
|
424
425
|
job_id = options['job']['public_id']
|
425
426
|
logger_no_user_data.debug(f'Job "{job_id}" running module "{module["name"]}"...')
|
427
|
+
|
426
428
|
executor_instance: DockerExecutor
|
427
429
|
if module['environment'] == ModuleEnvironment.BIOLIB_APP.value:
|
430
|
+
if not self.job_temporary_dir:
|
431
|
+
raise BioLibError('Undefined job_temporary_dir')
|
428
432
|
logger_no_user_data.debug(f'Job "{job_id}" starting child job...')
|
433
|
+
with open(module_input_path,'rb') as fp:
|
434
|
+
module_input_serialized = fp.read()
|
429
435
|
module_input = ModuleInput(module_input_serialized).deserialize()
|
430
436
|
module_input_with_runtime_zip = self._add_runtime_zip_and_command_to_module_input(options, module_input)
|
431
437
|
module_input_with_runtime_zip_serialized = ModuleInput().serialize(
|
@@ -433,9 +439,11 @@ class JobWorker:
|
|
433
439
|
arguments=module_input_with_runtime_zip['arguments'],
|
434
440
|
files=module_input_with_runtime_zip['files'],
|
435
441
|
)
|
442
|
+
module_input_path_new = os.path.join(self.job_temporary_dir, "runtime." + JobStorage.module_input_file_name)
|
443
|
+
open(module_input_path_new, 'wb').write(module_input_with_runtime_zip_serialized)
|
436
444
|
return self._run_app_version(
|
437
445
|
module['image_uri'],
|
438
|
-
|
446
|
+
module_input_path_new,
|
439
447
|
options['job'],
|
440
448
|
module_output_path,
|
441
449
|
)
|
@@ -461,7 +469,7 @@ class JobWorker:
|
|
461
469
|
# Log memory and disk before pulling and executing module
|
462
470
|
log_disk_and_memory_usage_info()
|
463
471
|
|
464
|
-
executor_instance.execute_module(
|
472
|
+
executor_instance.execute_module(module_input_path, module_output_path)
|
465
473
|
|
466
474
|
def _connect_to_parent(self):
|
467
475
|
try:
|
@@ -587,7 +595,7 @@ class JobWorker:
|
|
587
595
|
may_contain_user_data=False
|
588
596
|
) from exception
|
589
597
|
|
590
|
-
def _run_root_job(self,
|
598
|
+
def _run_root_job(self, module_input_path: str) -> str:
|
591
599
|
# Make typechecker happy
|
592
600
|
if not self._root_job_wrapper or not self.job_temporary_dir:
|
593
601
|
raise BioLibError('Undefined job_wrapper or job_temporary_dir')
|
@@ -595,7 +603,7 @@ class JobWorker:
|
|
595
603
|
main_module_output_path = os.path.join(self.job_temporary_dir, JobStorage.module_output_file_name)
|
596
604
|
self._run_job(
|
597
605
|
job=self._root_job_wrapper['job'],
|
598
|
-
|
606
|
+
module_input_path=module_input_path,
|
599
607
|
main_module_output_path=main_module_output_path,
|
600
608
|
)
|
601
609
|
self._send_status_update(StatusUpdate(progress=94, log_message='Computation finished'))
|
@@ -614,7 +622,9 @@ class JobWorker:
|
|
614
622
|
job_temporary_dir=job_temporary_dir,
|
615
623
|
)
|
616
624
|
self._start_network_and_remote_host_proxies(job_dict)
|
617
|
-
|
625
|
+
module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
|
626
|
+
open(module_input_path, 'wb').write(module_input_serialized)
|
627
|
+
module_output_path = self._run_root_job(module_input_path)
|
618
628
|
with open(module_output_path, mode='rb') as module_output_file:
|
619
629
|
module_output_serialized = module_output_file.read()
|
620
630
|
return ModuleOutputV2(InMemoryIndexableBuffer(module_output_serialized))
|