pybiolib 1.1.1942__py3-none-any.whl → 1.1.1957__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/compute_node/job_worker/executors/docker_executor.py +114 -104
- {pybiolib-1.1.1942.dist-info → pybiolib-1.1.1957.dist-info}/METADATA +1 -1
- {pybiolib-1.1.1942.dist-info → pybiolib-1.1.1957.dist-info}/RECORD +6 -6
- {pybiolib-1.1.1942.dist-info → pybiolib-1.1.1957.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1942.dist-info → pybiolib-1.1.1957.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.1942.dist-info → pybiolib-1.1.1957.dist-info}/entry_points.txt +0 -0
@@ -1,44 +1,41 @@
|
|
1
|
+
import io
|
1
2
|
import json
|
3
|
+
import os
|
4
|
+
import re
|
5
|
+
import shlex
|
2
6
|
import subprocess
|
7
|
+
import tarfile
|
3
8
|
import tempfile
|
4
9
|
import time
|
5
|
-
import tarfile
|
6
10
|
import zipfile
|
7
|
-
import os
|
8
|
-
import io
|
9
|
-
import re
|
10
|
-
import shlex
|
11
11
|
from copy import copy
|
12
12
|
from datetime import datetime
|
13
13
|
|
14
14
|
import docker # type: ignore
|
15
15
|
import docker.types # type: ignore
|
16
|
-
|
17
|
-
from docker.errors import ImageNotFound, APIError # type: ignore
|
16
|
+
from docker.errors import APIError, ImageNotFound # type: ignore
|
18
17
|
from docker.models.containers import Container # type: ignore
|
19
18
|
|
20
19
|
from biolib import utils
|
21
|
-
|
22
20
|
from biolib._internal.runtime import RuntimeJobDataDict
|
23
21
|
from biolib.biolib_binary_format import ModuleInput, ModuleOutputV2
|
22
|
+
from biolib.biolib_binary_format.file_in_container import FileInContainer
|
24
23
|
from biolib.biolib_docker_client import BiolibDockerClient
|
25
|
-
from biolib.biolib_errors import
|
24
|
+
from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException
|
26
25
|
from biolib.biolib_logging import logger, logger_no_user_data
|
27
26
|
from biolib.compute_node import utils as compute_node_utils
|
28
27
|
from biolib.compute_node.cloud_utils import CloudUtils
|
29
28
|
from biolib.compute_node.job_worker.docker_image_cache import DockerImageCache
|
30
|
-
from biolib.biolib_binary_format.file_in_container import FileInContainer
|
31
29
|
from biolib.compute_node.job_worker.executors.docker_types import DockerDiffKind
|
32
|
-
from biolib.compute_node.job_worker.executors.types import
|
30
|
+
from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
|
33
31
|
from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
|
34
|
-
from biolib.compute_node.job_worker.utils import ComputeProcessException
|
35
32
|
from biolib.compute_node.job_worker.utilization_reporter_thread import UtilizationReporterThread
|
33
|
+
from biolib.compute_node.job_worker.utils import ComputeProcessException
|
36
34
|
from biolib.compute_node.utils import SystemExceptionCodes
|
37
|
-
from biolib.typing_utils import
|
35
|
+
from biolib.typing_utils import Dict, List, Optional
|
38
36
|
|
39
37
|
|
40
38
|
class DockerExecutor:
|
41
|
-
|
42
39
|
def __init__(self, options: LocalExecutorOptions) -> None:
|
43
40
|
self._options: LocalExecutorOptions = options
|
44
41
|
self._is_cleaning_up = False
|
@@ -116,35 +113,49 @@ class DockerExecutor:
|
|
116
113
|
except Exception: # pylint: disable=broad-except
|
117
114
|
logger_no_user_data.error('DockerExecutor failed to clean up container')
|
118
115
|
|
119
|
-
def _pull(self):
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
image_uri=self._absolute_image_uri,
|
125
|
-
estimated_image_size_bytes=self._options['module']['estimated_image_size_bytes'],
|
126
|
-
job_id=self._options['job']['public_id'],
|
127
|
-
)
|
128
|
-
else:
|
129
|
-
docker_client = BiolibDockerClient.get_docker_client()
|
130
|
-
try:
|
131
|
-
docker_client.images.get(self._absolute_image_uri)
|
132
|
-
except ImageNotFound:
|
133
|
-
job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
|
134
|
-
docker_client.images.pull(
|
135
|
-
self._absolute_image_uri,
|
136
|
-
auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
|
137
|
-
)
|
116
|
+
def _pull(self) -> None:
|
117
|
+
retries = 3
|
118
|
+
last_error: Optional[Exception] = None
|
119
|
+
estimated_image_size_bytes = self._options['module']['estimated_image_size_bytes']
|
120
|
+
assert estimated_image_size_bytes is not None, 'No estimated image size'
|
138
121
|
|
139
|
-
|
122
|
+
for retry_count in range(retries + 1):
|
123
|
+
if retry_count > 0:
|
124
|
+
logger_no_user_data.debug(f'Retrying Docker image pull of "{self._absolute_image_uri}"')
|
125
|
+
time.sleep(5 * retry_count)
|
126
|
+
try:
|
127
|
+
start_time = time.time()
|
128
|
+
if utils.IS_RUNNING_IN_CLOUD and not self._options['job'].get('federated_job_uuid'):
|
129
|
+
DockerImageCache().get(
|
130
|
+
image_uri=self._absolute_image_uri,
|
131
|
+
estimated_image_size_bytes=estimated_image_size_bytes,
|
132
|
+
job_id=self._options['job']['public_id'],
|
133
|
+
)
|
134
|
+
else:
|
135
|
+
docker_client = BiolibDockerClient.get_docker_client()
|
136
|
+
try:
|
137
|
+
docker_client.images.get(self._absolute_image_uri)
|
138
|
+
except ImageNotFound:
|
139
|
+
job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
|
140
|
+
docker_client.images.pull(
|
141
|
+
self._absolute_image_uri,
|
142
|
+
auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
|
143
|
+
)
|
144
|
+
|
145
|
+
logger_no_user_data.debug(f'Pulled image in: {time.time() - start_time}')
|
146
|
+
return
|
147
|
+
except Exception as error:
|
148
|
+
logger_no_user_data.warning(
|
149
|
+
f'Pull of Docker image "{self._absolute_image_uri}" returned error: {error}'
|
150
|
+
)
|
151
|
+
last_error = error
|
140
152
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
) from exception
|
153
|
+
raise ComputeProcessException(
|
154
|
+
last_error or Exception('Retries exceeded: failed to pull Docker image'),
|
155
|
+
SystemExceptionCodes.FAILED_TO_PULL_DOCKER_IMAGE.value,
|
156
|
+
self._send_system_exception,
|
157
|
+
may_contain_user_data=False,
|
158
|
+
)
|
148
159
|
|
149
160
|
def _execute_helper(self, module_input, module_output_path: str) -> None:
|
150
161
|
job_uuid = self._options['job']['public_id']
|
@@ -155,10 +166,10 @@ class DockerExecutor:
|
|
155
166
|
logger_no_user_data.debug(f'Job "{job_uuid}" starting utilization metrics reporter thread...')
|
156
167
|
config = CloudUtils.get_webserver_config()
|
157
168
|
node_auth_token = config['compute_node_info']['auth_token'] # pylint: disable=unsubscriptable-object
|
158
|
-
cloud_job = self._options[
|
169
|
+
cloud_job = self._options['cloud_job']
|
159
170
|
include_gpu_stats = False
|
160
171
|
if cloud_job:
|
161
|
-
include_gpu_stats = cloud_job.get(
|
172
|
+
include_gpu_stats = cloud_job.get('reserved_gpu_count', 0) > 0
|
162
173
|
UtilizationReporterThread(
|
163
174
|
container=self._container,
|
164
175
|
job_uuid=job_uuid,
|
@@ -213,7 +224,7 @@ class DockerExecutor:
|
|
213
224
|
raise ComputeProcessException(
|
214
225
|
MemoryError(),
|
215
226
|
SystemExceptionCodes.OUT_OF_MEMORY.value,
|
216
|
-
self._send_system_exception
|
227
|
+
self._send_system_exception,
|
217
228
|
)
|
218
229
|
|
219
230
|
logger_no_user_data.debug(f'Docker container exited with code {exit_code} for {job_uuid}')
|
@@ -244,19 +255,16 @@ class DockerExecutor:
|
|
244
255
|
for path_to_delete in [self._input_tar_path, self._runtime_tar_path]:
|
245
256
|
if os.path.exists(path_to_delete):
|
246
257
|
os.remove(path_to_delete)
|
247
|
-
logger_no_user_data.debug(f
|
258
|
+
logger_no_user_data.debug(f'Deleted tars in: {time.time() - tar_time}')
|
248
259
|
|
249
260
|
container_time = time.time()
|
250
261
|
if self._docker_container:
|
251
262
|
self._docker_container.remove(force=True)
|
252
263
|
|
253
264
|
if utils.IS_RUNNING_IN_CLOUD:
|
254
|
-
DockerImageCache().detach_job(
|
255
|
-
image_uri=self._absolute_image_uri,
|
256
|
-
job_id=self._options['job']['public_id']
|
257
|
-
)
|
265
|
+
DockerImageCache().detach_job(image_uri=self._absolute_image_uri, job_id=self._options['job']['public_id'])
|
258
266
|
|
259
|
-
logger_no_user_data.debug(f
|
267
|
+
logger_no_user_data.debug(f'Deleted compute container in: {time.time() - container_time}')
|
260
268
|
self._tmp_secrets_dir.cleanup()
|
261
269
|
|
262
270
|
# TODO: type this method
|
@@ -292,29 +300,35 @@ class DockerExecutor:
|
|
292
300
|
)
|
293
301
|
if app_version_created_at < datetime(2022, 11, 30, 0, 0):
|
294
302
|
environment_vars = module.get('secrets', {})
|
295
|
-
environment_vars.update(
|
296
|
-
|
297
|
-
|
298
|
-
|
303
|
+
environment_vars.update(
|
304
|
+
{
|
305
|
+
'BIOLIB_JOB_UUID': self._options['job']['public_id'],
|
306
|
+
'BIOLIB_JOB_AUTH_TOKEN': self._options['job']['auth_token'],
|
307
|
+
}
|
308
|
+
)
|
299
309
|
|
300
310
|
if utils.IS_RUNNING_IN_CLOUD and self._options['cloud_job']:
|
301
|
-
environment_vars.update(
|
302
|
-
|
303
|
-
|
311
|
+
environment_vars.update(
|
312
|
+
{
|
313
|
+
'BIOLIB_JOB_MAX_RUNTIME_IN_SECONDS': self._options['cloud_job']['max_runtime_in_seconds'],
|
314
|
+
}
|
315
|
+
)
|
304
316
|
|
305
317
|
for proxy in self._options['remote_host_proxies']:
|
306
318
|
proxy_ip = proxy.get_ip_address_on_network(internal_network)
|
307
319
|
if proxy.is_app_caller_proxy:
|
308
320
|
logger_no_user_data.debug('Found app caller proxy, setting both base URLs in compute container')
|
309
|
-
environment_vars.update(
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
321
|
+
environment_vars.update(
|
322
|
+
{
|
323
|
+
'BIOLIB_BASE_URL': f'http://{proxy_ip}',
|
324
|
+
'BIOLIB_CLOUD_BASE_URL': f'http://{proxy_ip}',
|
325
|
+
# This should be removed eventually, but will break apps calling apps on older versions
|
326
|
+
'BIOLIB_CLOUD_RESULTS_BASE_URL': f'http://{proxy_ip}',
|
327
|
+
'BIOLIB_CLOUD_JOB_STORAGE_BASE_URL': f'http://{proxy_ip}',
|
328
|
+
# Inform container if we are targeting public biolib as we change the BIOLIB_BASE_URL
|
329
|
+
'BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB': bool(utils.BASE_URL_IS_PUBLIC_BIOLIB),
|
330
|
+
}
|
331
|
+
)
|
318
332
|
else:
|
319
333
|
extra_hosts[proxy.hostname] = proxy_ip
|
320
334
|
|
@@ -329,45 +343,36 @@ class DockerExecutor:
|
|
329
343
|
|
330
344
|
if self._options['job'].get('arguments_override_command'):
|
331
345
|
# In this case, arguments contains a user specified command to run in the app
|
332
|
-
create_container_args.update({
|
333
|
-
'command': module_input['arguments'],
|
334
|
-
'entrypoint': ''
|
335
|
-
})
|
346
|
+
create_container_args.update({'command': module_input['arguments'], 'entrypoint': ''})
|
336
347
|
|
337
348
|
else:
|
338
|
-
create_container_args.update({
|
339
|
-
'command': shlex.split(module['command']) + module_input['arguments']
|
340
|
-
})
|
349
|
+
create_container_args.update({'command': shlex.split(module['command']) + module_input['arguments']})
|
341
350
|
|
342
351
|
app_version = self._options['job']['app_version']
|
343
352
|
if app_version.get('main_output_file') or app_version.get('stdout_render_type') == 'text':
|
344
353
|
create_container_args['tty'] = True
|
345
354
|
|
346
355
|
if utils.IS_RUNNING_IN_CLOUD:
|
347
|
-
cloud_job = self._options[
|
356
|
+
cloud_job = self._options['cloud_job']
|
348
357
|
create_container_args['mem_limit'] = f'{cloud_job["reserved_memory_in_bytes"]}b'
|
349
|
-
create_container_args['nano_cpus'] = cloud_job[
|
358
|
+
create_container_args['nano_cpus'] = cloud_job['reserved_cpu_in_nano_shares']
|
350
359
|
|
351
360
|
biolib_identity_user_email: Optional[str] = cloud_job.get('biolib_identity_user_email')
|
352
361
|
if biolib_identity_user_email:
|
353
|
-
create_container_args['environment'].update(
|
354
|
-
'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email
|
355
|
-
|
362
|
+
create_container_args['environment'].update(
|
363
|
+
{'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email}
|
364
|
+
)
|
356
365
|
|
357
366
|
docker_runtime = os.getenv('BIOLIB_DOCKER_RUNTIME')
|
358
367
|
if docker_runtime is not None:
|
359
368
|
create_container_args['runtime'] = docker_runtime
|
360
369
|
|
361
|
-
self._docker_container = BiolibDockerClient.get_docker_client().containers.create(
|
362
|
-
**create_container_args
|
363
|
-
)
|
370
|
+
self._docker_container = BiolibDockerClient.get_docker_client().containers.create(**create_container_args)
|
364
371
|
|
365
372
|
logger_no_user_data.debug('Finished initializing docker container')
|
366
373
|
except Exception as exception:
|
367
374
|
raise ComputeProcessException(
|
368
|
-
exception,
|
369
|
-
SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value,
|
370
|
-
self._send_system_exception
|
375
|
+
exception, SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value, self._send_system_exception
|
371
376
|
) from exception
|
372
377
|
|
373
378
|
def _add_file_to_tar(self, tar, current_path, mapped_path, data):
|
@@ -434,7 +439,7 @@ class DockerExecutor:
|
|
434
439
|
raise ComputeProcessException(
|
435
440
|
exception,
|
436
441
|
SystemExceptionCodes.FAILED_TO_COPY_INPUT_FILES_TO_COMPUTE_CONTAINER.value,
|
437
|
-
self._send_system_exception
|
442
|
+
self._send_system_exception,
|
438
443
|
) from exception
|
439
444
|
|
440
445
|
def _map_and_copy_runtime_files_to_container(self, runtime_zip_data, arguments: List[str], remove_root_folder=True):
|
@@ -449,17 +454,17 @@ class DockerExecutor:
|
|
449
454
|
raise ComputeProcessException(
|
450
455
|
exception,
|
451
456
|
SystemExceptionCodes.FAILED_TO_COPY_RUNTIME_FILES_TO_COMPUTE_CONTAINER.value,
|
452
|
-
self._send_system_exception
|
457
|
+
self._send_system_exception,
|
453
458
|
) from exception
|
454
459
|
|
455
460
|
def _write_module_output_to_file(
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
461
|
+
self,
|
462
|
+
arguments: List[str],
|
463
|
+
exit_code: int,
|
464
|
+
module_output_path: str,
|
465
|
+
stderr: bytes,
|
466
|
+
stdout: bytes,
|
467
|
+
pre_start_diff: List[Dict],
|
463
468
|
) -> None:
|
464
469
|
mapped_files: List[FileInContainer] = []
|
465
470
|
try:
|
@@ -504,9 +509,11 @@ class DockerExecutor:
|
|
504
509
|
result = subprocess.run(
|
505
510
|
args=[
|
506
511
|
'ctr',
|
507
|
-
'--namespace',
|
512
|
+
'--namespace',
|
513
|
+
'moby',
|
508
514
|
'snapshots',
|
509
|
-
'--snapshotter',
|
515
|
+
'--snapshotter',
|
516
|
+
'nydus',
|
510
517
|
'mounts',
|
511
518
|
'/some_arbitrary_path',
|
512
519
|
str(self._container.id),
|
@@ -528,9 +535,10 @@ class DockerExecutor:
|
|
528
535
|
pre_start_diff_paths = [obj['Path'] for obj in pre_start_diff]
|
529
536
|
post_run_diff = self._container.diff()
|
530
537
|
run_diff_paths: List[str] = [
|
531
|
-
obj['Path']
|
532
|
-
obj
|
533
|
-
obj['
|
538
|
+
obj['Path']
|
539
|
+
for obj in post_run_diff
|
540
|
+
if obj['Kind'] in (DockerDiffKind.CHANGED.value, DockerDiffKind.ADDED.value)
|
541
|
+
and obj['Path'] not in pre_start_diff_paths
|
534
542
|
]
|
535
543
|
|
536
544
|
known_directories = set()
|
@@ -540,7 +548,7 @@ class DockerExecutor:
|
|
540
548
|
if idx == 0:
|
541
549
|
continue # always skip root
|
542
550
|
|
543
|
-
folder = '/' + '/'.join(parent_folders[1:idx + 1])
|
551
|
+
folder = '/' + '/'.join(parent_folders[1 : idx + 1])
|
544
552
|
known_directories.add(folder)
|
545
553
|
|
546
554
|
def path_is_included_in_from_mappings(path: str) -> bool:
|
@@ -560,11 +568,13 @@ class DockerExecutor:
|
|
560
568
|
files_and_empty_dirs: List[FileInContainer] = []
|
561
569
|
for path in run_diff_paths:
|
562
570
|
if path not in known_directories and path_is_included_in_from_mappings(path):
|
563
|
-
files_and_empty_dirs.append(
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
571
|
+
files_and_empty_dirs.append(
|
572
|
+
FileInContainer(
|
573
|
+
container=self._container,
|
574
|
+
overlay_upper_dir_path=overlay_upper_dir_path,
|
575
|
+
path_in_container=path,
|
576
|
+
)
|
577
|
+
)
|
568
578
|
|
569
579
|
return files_and_empty_dirs
|
570
580
|
|
@@ -59,7 +59,7 @@ biolib/compute_node/job_worker/cache_state.py,sha256=MwjSRzcJJ_4jybqvBL4xdgnDYSI
|
|
59
59
|
biolib/compute_node/job_worker/cache_types.py,sha256=ajpLy8i09QeQS9dEqTn3T6NVNMY_YsHQkSD5nvIHccQ,818
|
60
60
|
biolib/compute_node/job_worker/docker_image_cache.py,sha256=ansHIkJIq_EMW1nZNlW-RRLVVeKWTbzNICYaOHpKiRE,7460
|
61
61
|
biolib/compute_node/job_worker/executors/__init__.py,sha256=bW6t1qi3PZTlHM4quaTLa8EI4ALTCk83cqcVJfJfJfE,145
|
62
|
-
biolib/compute_node/job_worker/executors/docker_executor.py,sha256=
|
62
|
+
biolib/compute_node/job_worker/executors/docker_executor.py,sha256=Lo7qgOm6uolNyH7QIuLqTY_6RLyPYeAlOk9HzC82U-s,27332
|
63
63
|
biolib/compute_node/job_worker/executors/docker_types.py,sha256=VhsU1DKtJjx_BbCkVmiPZPH4ROiL1ygW1Y_s1Kbpa2o,216
|
64
64
|
biolib/compute_node/job_worker/executors/tars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
65
65
|
biolib/compute_node/job_worker/executors/types.py,sha256=yP5gG39hr-DLnw9bOE--VHi-1arDbIYiGuV1rlTbbHI,1466
|
@@ -105,8 +105,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
|
|
105
105
|
biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
|
106
106
|
biolib/utils/seq_util.py,sha256=jC5WhH63FTD7SLFJbxQGA2hOt9NTwq9zHl_BEec1Z0c,4907
|
107
107
|
biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
|
108
|
-
pybiolib-1.1.
|
109
|
-
pybiolib-1.1.
|
110
|
-
pybiolib-1.1.
|
111
|
-
pybiolib-1.1.
|
112
|
-
pybiolib-1.1.
|
108
|
+
pybiolib-1.1.1957.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
109
|
+
pybiolib-1.1.1957.dist-info/METADATA,sha256=dzeKnljFiNgqOp-j8BxQxOYZT_VHpj7yUabW8-HwR_k,1508
|
110
|
+
pybiolib-1.1.1957.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
111
|
+
pybiolib-1.1.1957.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
|
112
|
+
pybiolib-1.1.1957.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|