pybiolib 1.1.1747__py3-none-any.whl → 1.1.2193__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. biolib/__init__.py +18 -5
  2. biolib/_data_record/data_record.py +278 -0
  3. biolib/_internal/data_record/__init__.py +1 -0
  4. biolib/_internal/data_record/data_record.py +97 -0
  5. biolib/_internal/data_record/remote_storage_endpoint.py +38 -0
  6. biolib/_internal/file_utils.py +77 -0
  7. biolib/_internal/fuse_mount/__init__.py +1 -0
  8. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  9. biolib/_internal/http_client.py +42 -23
  10. biolib/_internal/lfs/__init__.py +1 -0
  11. biolib/_internal/libs/__init__.py +1 -0
  12. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  13. biolib/_internal/push_application.py +22 -37
  14. biolib/_internal/runtime.py +19 -0
  15. biolib/_internal/types/__init__.py +4 -0
  16. biolib/_internal/types/app.py +9 -0
  17. biolib/_internal/types/data_record.py +40 -0
  18. biolib/_internal/types/experiment.py +10 -0
  19. biolib/_internal/types/resource.py +14 -0
  20. biolib/_internal/types/typing.py +7 -0
  21. biolib/_internal/utils/__init__.py +18 -0
  22. biolib/_runtime/runtime.py +80 -0
  23. biolib/api/__init__.py +1 -0
  24. biolib/api/client.py +39 -17
  25. biolib/app/app.py +40 -72
  26. biolib/app/search_apps.py +8 -12
  27. biolib/biolib_api_client/api_client.py +22 -10
  28. biolib/biolib_api_client/app_types.py +2 -1
  29. biolib/biolib_api_client/biolib_app_api.py +1 -1
  30. biolib/biolib_api_client/biolib_job_api.py +6 -0
  31. biolib/biolib_api_client/job_types.py +4 -4
  32. biolib/biolib_api_client/lfs_types.py +8 -2
  33. biolib/biolib_binary_format/remote_endpoints.py +12 -10
  34. biolib/biolib_binary_format/utils.py +41 -4
  35. biolib/cli/__init__.py +6 -2
  36. biolib/cli/auth.py +58 -0
  37. biolib/cli/data_record.py +80 -0
  38. biolib/cli/download_container.py +3 -1
  39. biolib/cli/init.py +1 -0
  40. biolib/cli/lfs.py +45 -11
  41. biolib/cli/push.py +1 -1
  42. biolib/cli/run.py +3 -2
  43. biolib/cli/start.py +1 -0
  44. biolib/compute_node/cloud_utils/cloud_utils.py +15 -18
  45. biolib/compute_node/job_worker/cache_state.py +1 -1
  46. biolib/compute_node/job_worker/executors/docker_executor.py +134 -114
  47. biolib/compute_node/job_worker/job_storage.py +3 -4
  48. biolib/compute_node/job_worker/job_worker.py +31 -15
  49. biolib/compute_node/remote_host_proxy.py +75 -70
  50. biolib/compute_node/webserver/webserver_types.py +0 -1
  51. biolib/experiments/experiment.py +75 -44
  52. biolib/jobs/job.py +125 -47
  53. biolib/jobs/job_result.py +46 -21
  54. biolib/jobs/types.py +1 -1
  55. biolib/runtime/__init__.py +14 -1
  56. biolib/sdk/__init__.py +29 -5
  57. biolib/typing_utils.py +2 -7
  58. biolib/user/sign_in.py +10 -14
  59. biolib/utils/__init__.py +1 -1
  60. biolib/utils/app_uri.py +11 -4
  61. biolib/utils/cache_state.py +2 -2
  62. biolib/utils/seq_util.py +38 -30
  63. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
  64. pybiolib-1.1.2193.dist-info/RECORD +123 -0
  65. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +1 -1
  66. biolib/biolib_api_client/biolib_account_api.py +0 -8
  67. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
  68. biolib/experiments/types.py +0 -9
  69. biolib/lfs/__init__.py +0 -6
  70. biolib/lfs/utils.py +0 -237
  71. biolib/runtime/results.py +0 -20
  72. pybiolib-1.1.1747.dist-info/RECORD +0 -108
  73. /biolib/{lfs → _internal/lfs}/cache.py +0 -0
  74. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
  75. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0
biolib/cli/run.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import sys
2
+
2
3
  import click
3
4
 
4
5
  from biolib import biolib_errors, utils
@@ -27,13 +28,13 @@ def run(local: bool, non_blocking: bool, uri: str, args: Tuple[str]) -> None:
27
28
  stdin = sys.stdin.read()
28
29
  return stdin
29
30
 
30
- blocking = False if non_blocking else True
31
+ blocking = not non_blocking
31
32
  job = app.cli(
32
33
  args=list(args),
33
34
  stdin=_get_stdin(),
34
35
  files=None,
35
36
  machine=('local' if local else ''),
36
- blocking=blocking
37
+ blocking=blocking,
37
38
  )
38
39
 
39
40
  if blocking:
biolib/cli/start.py CHANGED
@@ -24,6 +24,7 @@ def start(host: str, port: int, tls_certificate: Optional[str], tls_key: Optiona
24
24
 
25
25
  try:
26
26
  from biolib.compute_node.webserver import webserver # pylint: disable=import-outside-toplevel
27
+
27
28
  webserver.start_webserver(
28
29
  host=host,
29
30
  port=port,
@@ -7,11 +7,11 @@ import time
7
7
  from datetime import datetime
8
8
  from socket import gethostbyname, gethostname
9
9
 
10
- from biolib import utils, api
11
- from biolib.biolib_logging import logger_no_user_data
12
- from biolib.typing_utils import Optional, List, Dict, cast
10
+ from biolib import api, utils
13
11
  from biolib.biolib_api_client import BiolibApiClient
14
- from biolib.compute_node.webserver.webserver_types import WebserverConfig, ComputeNodeInfo, ShutdownTimes
12
+ from biolib.biolib_logging import logger_no_user_data
13
+ from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo, ShutdownTimes, WebserverConfig
14
+ from biolib.typing_utils import Dict, List, Optional, cast
15
15
 
16
16
 
17
17
  def trust_ceritificates(certs_data: List[str]) -> None:
@@ -54,15 +54,12 @@ class CloudUtils:
54
54
  pybiolib_version=utils.BIOLIB_PACKAGE_VERSION,
55
55
  ),
56
56
  base_url=CloudUtils._get_environment_variable_or_fail('BIOLIB_BASE_URL'),
57
- s3_general_storage_bucket_name=CloudUtils._get_environment_variable_or_fail(
58
- 'BIOLIB_S3_GENERAL_STORAGE_BUCKET_NAME',
59
- ),
60
57
  is_dev=os.environ.get('BIOLIB_DEV') == 'TRUE',
61
58
  shutdown_times=ShutdownTimes(
62
59
  auto_shutdown_time_in_seconds=CloudUtils._get_environment_variable_as_int(
63
60
  'BIOLIB_CLOUD_AUTO_SHUTDOWN_TIME_IN_SECONDS'
64
61
  ),
65
- )
62
+ ),
66
63
  )
67
64
 
68
65
  return CloudUtils._webserver_config
@@ -81,10 +78,10 @@ class CloudUtils:
81
78
  'error': error,
82
79
  },
83
80
  )
84
- except BaseException as error:
85
- logger_no_user_data.error(f'Failed to deregister got error: {error}')
81
+ except BaseException as error_object:
82
+ logger_no_user_data.error(f'Failed to deregister got error: {error_object}')
86
83
  else:
87
- logger_no_user_data.error("Not deregistering as environment is not cloud")
84
+ logger_no_user_data.error('Not deregistering as environment is not cloud')
88
85
 
89
86
  @staticmethod
90
87
  def shutdown() -> None:
@@ -98,7 +95,7 @@ class CloudUtils:
98
95
  except Exception as error: # pylint: disable=broad-except
99
96
  logger_no_user_data.error(f'Failed to shutdown got error: {error}')
100
97
  else:
101
- logger_no_user_data.error("Not running shutdown as environment is not cloud")
98
+ logger_no_user_data.error('Not running shutdown as environment is not cloud')
102
99
 
103
100
  @staticmethod
104
101
  def deregister_and_shutdown() -> None:
@@ -131,7 +128,7 @@ class CloudUtils:
131
128
  'auth_token': config['compute_node_info']['auth_token'],
132
129
  'cloud_job_id': cloud_job_id,
133
130
  'system_exception_code': system_exception_code,
134
- 'exit_code': exit_code
131
+ 'exit_code': exit_code,
135
132
  },
136
133
  )
137
134
  except BaseException as error:
@@ -152,14 +149,14 @@ class CloudUtils:
152
149
  data=cast(Dict[str, str], compute_node_info),
153
150
  )
154
151
  if response.status_code != 201:
155
- raise Exception("Non 201 error code")
152
+ raise Exception('Non 201 error code')
156
153
  else:
157
- logger_no_user_data.info("Compute node registered!")
154
+ logger_no_user_data.info('Compute node registered!')
158
155
  response_data = response.json()
159
- logger_no_user_data.info(f"Got data on register: {json.dumps(response_data)}")
156
+ logger_no_user_data.info(f'Got data on register: {json.dumps(response_data)}')
160
157
  certs = []
161
- for federation in response_data["federation"]:
162
- for cert_b64 in federation["certs_b64"]:
158
+ for federation in response_data['federation']:
159
+ for cert_b64 in federation['certs_b64']:
163
160
  certs.append(base64.b64decode(cert_b64).decode())
164
161
  trust_ceritificates(certs)
165
162
 
@@ -16,7 +16,7 @@ class DockerCacheStateError(CacheStateError):
16
16
 
17
17
  class LfsCacheState(CacheState):
18
18
 
19
- def __init__(self):
19
+ def __init__(self) -> None:
20
20
  super().__init__()
21
21
 
22
22
  self._storage_path_for_write: str = self._get_storage_path_for_write()
@@ -1,43 +1,42 @@
1
+ import io
1
2
  import json
3
+ import os
4
+ import re
5
+ import shlex
2
6
  import subprocess
7
+ import tarfile
3
8
  import tempfile
4
9
  import time
5
- import tarfile
6
10
  import zipfile
7
- import os
8
- import io
9
- import re
10
- import shlex
11
11
  from copy import copy
12
12
  from datetime import datetime
13
13
 
14
14
  import docker # type: ignore
15
15
  import docker.types # type: ignore
16
-
17
- from docker.errors import ImageNotFound, APIError # type: ignore
16
+ from docker.errors import APIError, ImageNotFound # type: ignore
18
17
  from docker.models.containers import Container # type: ignore
19
18
 
20
19
  from biolib import utils
20
+ from biolib._internal.runtime import RuntimeJobDataDict
21
21
  from biolib.biolib_binary_format import ModuleInput, ModuleOutputV2
22
+ from biolib.biolib_binary_format.file_in_container import FileInContainer
22
23
  from biolib.biolib_docker_client import BiolibDockerClient
23
- from biolib.biolib_errors import DockerContainerNotFoundDuringExecutionException, BioLibError
24
+ from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException
24
25
  from biolib.biolib_logging import logger, logger_no_user_data
25
26
  from biolib.compute_node import utils as compute_node_utils
26
27
  from biolib.compute_node.cloud_utils import CloudUtils
27
28
  from biolib.compute_node.job_worker.docker_image_cache import DockerImageCache
28
- from biolib.biolib_binary_format.file_in_container import FileInContainer
29
29
  from biolib.compute_node.job_worker.executors.docker_types import DockerDiffKind
30
- from biolib.compute_node.job_worker.executors.types import StatusUpdate, LocalExecutorOptions
30
+ from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
31
31
  from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
32
- from biolib.compute_node.job_worker.utils import ComputeProcessException
33
32
  from biolib.compute_node.job_worker.utilization_reporter_thread import UtilizationReporterThread
33
+ from biolib.compute_node.job_worker.utils import ComputeProcessException
34
34
  from biolib.compute_node.utils import SystemExceptionCodes
35
- from biolib.typing_utils import List, Dict, Optional
35
+ from biolib.typing_utils import Dict, List, Optional
36
36
 
37
37
 
38
38
  class DockerExecutor:
39
-
40
- def __init__(self, options: LocalExecutorOptions):
39
+ def __init__(self, options: LocalExecutorOptions) -> None:
41
40
  self._options: LocalExecutorOptions = options
42
41
  self._is_cleaning_up = False
43
42
 
@@ -81,11 +80,12 @@ class DockerExecutor:
81
80
  raise Exception('Docker container was None')
82
81
  return self._docker_container
83
82
 
84
- def execute_module(self, module_input_serialized: bytes, module_output_path: str) -> None:
83
+ def execute_module(self, module_input_path: str, module_output_path: str) -> None:
85
84
  try:
86
85
  job_uuid = self._options['job']['public_id']
87
86
  send_status_update = self._options['send_status_update']
88
- module_input = ModuleInput(module_input_serialized).deserialize()
87
+ with open(module_input_path, 'rb') as fp:
88
+ module_input = ModuleInput(fp.read()).deserialize()
89
89
 
90
90
  send_status_update(StatusUpdate(progress=55, log_message='Pulling images...'))
91
91
 
@@ -113,35 +113,49 @@ class DockerExecutor:
113
113
  except Exception: # pylint: disable=broad-except
114
114
  logger_no_user_data.error('DockerExecutor failed to clean up container')
115
115
 
116
- def _pull(self):
117
- try:
118
- start_time = time.time()
119
- if utils.IS_RUNNING_IN_CLOUD and not self._options['job'].get('federated_job_uuid'):
120
- DockerImageCache().get(
121
- image_uri=self._absolute_image_uri,
122
- estimated_image_size_bytes=self._options['module']['estimated_image_size_bytes'],
123
- job_id=self._options['job']['public_id'],
124
- )
125
- else:
126
- docker_client = BiolibDockerClient.get_docker_client()
127
- try:
128
- docker_client.images.get(self._absolute_image_uri)
129
- except ImageNotFound:
130
- job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
131
- docker_client.images.pull(
132
- self._absolute_image_uri,
133
- auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
134
- )
116
+ def _pull(self) -> None:
117
+ retries = 3
118
+ last_error: Optional[Exception] = None
119
+ estimated_image_size_bytes = self._options['module']['estimated_image_size_bytes']
120
+ assert estimated_image_size_bytes is not None, 'No estimated image size'
135
121
 
136
- logger_no_user_data.debug(f'Pulled image in: {time.time() - start_time}')
122
+ for retry_count in range(retries + 1):
123
+ if retry_count > 0:
124
+ logger_no_user_data.debug(f'Retrying Docker image pull of "{self._absolute_image_uri}"')
125
+ time.sleep(5 * retry_count)
126
+ try:
127
+ start_time = time.time()
128
+ if utils.IS_RUNNING_IN_CLOUD and not self._options['job'].get('federated_job_uuid'):
129
+ DockerImageCache().get(
130
+ image_uri=self._absolute_image_uri,
131
+ estimated_image_size_bytes=estimated_image_size_bytes,
132
+ job_id=self._options['job']['public_id'],
133
+ )
134
+ else:
135
+ docker_client = BiolibDockerClient.get_docker_client()
136
+ try:
137
+ docker_client.images.get(self._absolute_image_uri)
138
+ except ImageNotFound:
139
+ job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
140
+ docker_client.images.pull(
141
+ self._absolute_image_uri,
142
+ auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
143
+ )
144
+
145
+ logger_no_user_data.debug(f'Pulled image in: {time.time() - start_time}')
146
+ return
147
+ except Exception as error:
148
+ logger_no_user_data.warning(
149
+ f'Pull of Docker image "{self._absolute_image_uri}" returned error: {error}'
150
+ )
151
+ last_error = error
137
152
 
138
- except Exception as exception:
139
- raise ComputeProcessException(
140
- exception,
141
- SystemExceptionCodes.FAILED_TO_PULL_DOCKER_IMAGE.value,
142
- self._send_system_exception,
143
- may_contain_user_data=False
144
- ) from exception
153
+ raise ComputeProcessException(
154
+ last_error or Exception('Retries exceeded: failed to pull Docker image'),
155
+ SystemExceptionCodes.FAILED_TO_PULL_DOCKER_IMAGE.value,
156
+ self._send_system_exception,
157
+ may_contain_user_data=False,
158
+ )
145
159
 
146
160
  def _execute_helper(self, module_input, module_output_path: str) -> None:
147
161
  job_uuid = self._options['job']['public_id']
@@ -152,10 +166,10 @@ class DockerExecutor:
152
166
  logger_no_user_data.debug(f'Job "{job_uuid}" starting utilization metrics reporter thread...')
153
167
  config = CloudUtils.get_webserver_config()
154
168
  node_auth_token = config['compute_node_info']['auth_token'] # pylint: disable=unsubscriptable-object
155
- cloud_job = self._options["cloud_job"]
169
+ cloud_job = self._options['cloud_job']
156
170
  include_gpu_stats = False
157
171
  if cloud_job:
158
- include_gpu_stats = cloud_job.get("reserved_gpu_count", 0) > 0
172
+ include_gpu_stats = cloud_job.get('reserved_gpu_count', 0) > 0
159
173
  UtilizationReporterThread(
160
174
  container=self._container,
161
175
  job_uuid=job_uuid,
@@ -210,7 +224,7 @@ class DockerExecutor:
210
224
  raise ComputeProcessException(
211
225
  MemoryError(),
212
226
  SystemExceptionCodes.OUT_OF_MEMORY.value,
213
- self._send_system_exception
227
+ self._send_system_exception,
214
228
  )
215
229
 
216
230
  logger_no_user_data.debug(f'Docker container exited with code {exit_code} for {job_uuid}')
@@ -241,24 +255,23 @@ class DockerExecutor:
241
255
  for path_to_delete in [self._input_tar_path, self._runtime_tar_path]:
242
256
  if os.path.exists(path_to_delete):
243
257
  os.remove(path_to_delete)
244
- logger_no_user_data.debug(f"Deleted tars in: {time.time() - tar_time}")
258
+ logger_no_user_data.debug(f'Deleted tars in: {time.time() - tar_time}')
245
259
 
246
260
  container_time = time.time()
247
261
  if self._docker_container:
248
262
  self._docker_container.remove(force=True)
249
263
 
250
264
  if utils.IS_RUNNING_IN_CLOUD:
251
- DockerImageCache().detach_job(
252
- image_uri=self._absolute_image_uri,
253
- job_id=self._options['job']['public_id']
254
- )
265
+ DockerImageCache().detach_job(image_uri=self._absolute_image_uri, job_id=self._options['job']['public_id'])
255
266
 
256
- logger_no_user_data.debug(f"Deleted compute container in: {time.time() - container_time}")
267
+ logger_no_user_data.debug(f'Deleted compute container in: {time.time() - container_time}')
257
268
  self._tmp_secrets_dir.cleanup()
258
269
 
259
270
  # TODO: type this method
260
271
  def _initialize_docker_container(self, module_input):
261
272
  try:
273
+ job_uuid = self._options['job']['public_id']
274
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container...')
262
275
  module = self._options['module']
263
276
  logger.debug(f"Initializing docker container with command: {module['command']}")
264
277
 
@@ -267,12 +280,13 @@ class DockerExecutor:
267
280
  internal_network = self._options['internal_network']
268
281
  extra_hosts: Dict[str, str] = {}
269
282
 
270
- biolib_system_secret = {
271
- 'version': '1.0.0',
272
- 'job_requested_machine': self._options['job']['requested_machine'],
273
- 'job_uuid': self._options['job']['public_id'],
274
- 'job_auth_token': self._options['job']['auth_token'],
275
- }
283
+ biolib_system_secret = RuntimeJobDataDict(
284
+ version='1.0.0',
285
+ job_requested_machine=self._options['job']['requested_machine'],
286
+ job_uuid=self._options['job']['public_id'],
287
+ job_auth_token=self._options['job']['auth_token'],
288
+ app_uri=self._options['job']['app_uri'],
289
+ )
276
290
  secrets: Dict[str, str] = dict(
277
291
  **module.get('secrets', {}),
278
292
  biolib_system_secret=json.dumps(biolib_system_secret, indent=4),
@@ -288,32 +302,40 @@ class DockerExecutor:
288
302
  )
289
303
  if app_version_created_at < datetime(2022, 11, 30, 0, 0):
290
304
  environment_vars = module.get('secrets', {})
291
- environment_vars.update({
292
- 'BIOLIB_JOB_UUID': self._options['job']['public_id'],
293
- 'BIOLIB_JOB_AUTH_TOKEN': self._options['job']['auth_token']
294
- })
305
+ environment_vars.update(
306
+ {
307
+ 'BIOLIB_JOB_UUID': self._options['job']['public_id'],
308
+ 'BIOLIB_JOB_AUTH_TOKEN': self._options['job']['auth_token'],
309
+ }
310
+ )
295
311
 
296
312
  if utils.IS_RUNNING_IN_CLOUD and self._options['cloud_job']:
297
- environment_vars.update({
298
- 'BIOLIB_JOB_MAX_RUNTIME_IN_SECONDS': self._options['cloud_job']['max_runtime_in_seconds'],
299
- })
313
+ environment_vars.update(
314
+ {
315
+ 'BIOLIB_JOB_MAX_RUNTIME_IN_SECONDS': self._options['cloud_job']['max_runtime_in_seconds'],
316
+ }
317
+ )
300
318
 
319
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Getting IPs for proxies...')
301
320
  for proxy in self._options['remote_host_proxies']:
302
321
  proxy_ip = proxy.get_ip_address_on_network(internal_network)
303
322
  if proxy.is_app_caller_proxy:
304
323
  logger_no_user_data.debug('Found app caller proxy, setting both base URLs in compute container')
305
- environment_vars.update({
306
- 'BIOLIB_BASE_URL': f'http://{proxy_ip}',
307
- 'BIOLIB_CLOUD_BASE_URL': f'http://{proxy_ip}',
308
- # This should be removed eventually, but will break apps calling apps on older versions
309
- 'BIOLIB_CLOUD_RESULTS_BASE_URL': f'http://{proxy_ip}',
310
- 'BIOLIB_CLOUD_JOB_STORAGE_BASE_URL': f'http://{proxy_ip}',
311
- # Inform container if we are targeting public biolib as we change the BIOLIB_BASE_URL
312
- 'BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB': bool(utils.BASE_URL_IS_PUBLIC_BIOLIB)
313
- })
324
+ environment_vars.update(
325
+ {
326
+ 'BIOLIB_BASE_URL': f'http://{proxy_ip}',
327
+ 'BIOLIB_CLOUD_BASE_URL': f'http://{proxy_ip}',
328
+ # This should be removed eventually, but will break apps calling apps on older versions
329
+ 'BIOLIB_CLOUD_RESULTS_BASE_URL': f'http://{proxy_ip}',
330
+ 'BIOLIB_CLOUD_JOB_STORAGE_BASE_URL': f'http://{proxy_ip}',
331
+ # Inform container if we are targeting public biolib as we change the BIOLIB_BASE_URL
332
+ 'BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB': bool(utils.BASE_URL_IS_PUBLIC_BIOLIB),
333
+ }
334
+ )
314
335
  else:
315
336
  extra_hosts[proxy.hostname] = proxy_ip
316
337
 
338
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Constructing container args...')
317
339
  create_container_args = {
318
340
  'environment': environment_vars,
319
341
  'extra_hosts': extra_hosts,
@@ -325,45 +347,38 @@ class DockerExecutor:
325
347
 
326
348
  if self._options['job'].get('arguments_override_command'):
327
349
  # In this case, arguments contains a user specified command to run in the app
328
- create_container_args.update({
329
- 'command': module_input['arguments'],
330
- 'entrypoint': ''
331
- })
350
+ create_container_args.update({'command': module_input['arguments'], 'entrypoint': ''})
332
351
 
333
352
  else:
334
- create_container_args.update({
335
- 'command': shlex.split(module['command']) + module_input['arguments']
336
- })
353
+ create_container_args.update({'command': shlex.split(module['command']) + module_input['arguments']})
337
354
 
338
355
  app_version = self._options['job']['app_version']
339
356
  if app_version.get('main_output_file') or app_version.get('stdout_render_type') == 'text':
340
357
  create_container_args['tty'] = True
341
358
 
342
359
  if utils.IS_RUNNING_IN_CLOUD:
343
- cloud_job = self._options["cloud_job"]
360
+ cloud_job = self._options['cloud_job']
344
361
  create_container_args['mem_limit'] = f'{cloud_job["reserved_memory_in_bytes"]}b'
345
- create_container_args['nano_cpus'] = cloud_job["reserved_cpu_in_nano_shares"]
362
+ create_container_args['nano_cpus'] = cloud_job['reserved_cpu_in_nano_shares']
363
+ create_container_args['pids_limit'] = 10_000
346
364
 
347
365
  biolib_identity_user_email: Optional[str] = cloud_job.get('biolib_identity_user_email')
348
366
  if biolib_identity_user_email:
349
- create_container_args['environment'].update({
350
- 'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email
351
- })
367
+ create_container_args['environment'].update(
368
+ {'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email}
369
+ )
352
370
 
353
371
  docker_runtime = os.getenv('BIOLIB_DOCKER_RUNTIME')
354
372
  if docker_runtime is not None:
355
373
  create_container_args['runtime'] = docker_runtime
356
374
 
357
- self._docker_container = BiolibDockerClient.get_docker_client().containers.create(
358
- **create_container_args
359
- )
360
-
361
- logger_no_user_data.debug('Finished initializing docker container')
375
+ docker_client = BiolibDockerClient.get_docker_client()
376
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Creating container...')
377
+ self._docker_container = docker_client.containers.create(**create_container_args)
378
+ logger_no_user_data.debug(f'Job "{job_uuid}" finished initializing Docker container.')
362
379
  except Exception as exception:
363
380
  raise ComputeProcessException(
364
- exception,
365
- SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value,
366
- self._send_system_exception
381
+ exception, SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value, self._send_system_exception
367
382
  ) from exception
368
383
 
369
384
  def _add_file_to_tar(self, tar, current_path, mapped_path, data):
@@ -430,7 +445,7 @@ class DockerExecutor:
430
445
  raise ComputeProcessException(
431
446
  exception,
432
447
  SystemExceptionCodes.FAILED_TO_COPY_INPUT_FILES_TO_COMPUTE_CONTAINER.value,
433
- self._send_system_exception
448
+ self._send_system_exception,
434
449
  ) from exception
435
450
 
436
451
  def _map_and_copy_runtime_files_to_container(self, runtime_zip_data, arguments: List[str], remove_root_folder=True):
@@ -445,17 +460,17 @@ class DockerExecutor:
445
460
  raise ComputeProcessException(
446
461
  exception,
447
462
  SystemExceptionCodes.FAILED_TO_COPY_RUNTIME_FILES_TO_COMPUTE_CONTAINER.value,
448
- self._send_system_exception
463
+ self._send_system_exception,
449
464
  ) from exception
450
465
 
451
466
  def _write_module_output_to_file(
452
- self,
453
- arguments: List[str],
454
- exit_code: int,
455
- module_output_path: str,
456
- stderr: bytes,
457
- stdout: bytes,
458
- pre_start_diff: List[Dict],
467
+ self,
468
+ arguments: List[str],
469
+ exit_code: int,
470
+ module_output_path: str,
471
+ stderr: bytes,
472
+ stdout: bytes,
473
+ pre_start_diff: List[Dict],
459
474
  ) -> None:
460
475
  mapped_files: List[FileInContainer] = []
461
476
  try:
@@ -500,9 +515,11 @@ class DockerExecutor:
500
515
  result = subprocess.run(
501
516
  args=[
502
517
  'ctr',
503
- '--namespace', 'moby',
518
+ '--namespace',
519
+ 'moby',
504
520
  'snapshots',
505
- '--snapshotter', 'nydus',
521
+ '--snapshotter',
522
+ 'nydus',
506
523
  'mounts',
507
524
  '/some_arbitrary_path',
508
525
  str(self._container.id),
@@ -524,9 +541,10 @@ class DockerExecutor:
524
541
  pre_start_diff_paths = [obj['Path'] for obj in pre_start_diff]
525
542
  post_run_diff = self._container.diff()
526
543
  run_diff_paths: List[str] = [
527
- obj['Path'] for obj in post_run_diff if
528
- obj['Kind'] in (DockerDiffKind.CHANGED.value, DockerDiffKind.ADDED.value) and
529
- obj['Path'] not in pre_start_diff_paths
544
+ obj['Path']
545
+ for obj in post_run_diff
546
+ if obj['Kind'] in (DockerDiffKind.CHANGED.value, DockerDiffKind.ADDED.value)
547
+ and obj['Path'] not in pre_start_diff_paths
530
548
  ]
531
549
 
532
550
  known_directories = set()
@@ -536,7 +554,7 @@ class DockerExecutor:
536
554
  if idx == 0:
537
555
  continue # always skip root
538
556
 
539
- folder = '/' + '/'.join(parent_folders[1:idx + 1])
557
+ folder = '/' + '/'.join(parent_folders[1 : idx + 1])
540
558
  known_directories.add(folder)
541
559
 
542
560
  def path_is_included_in_from_mappings(path: str) -> bool:
@@ -556,11 +574,13 @@ class DockerExecutor:
556
574
  files_and_empty_dirs: List[FileInContainer] = []
557
575
  for path in run_diff_paths:
558
576
  if path not in known_directories and path_is_included_in_from_mappings(path):
559
- files_and_empty_dirs.append(FileInContainer(
560
- container=self._container,
561
- overlay_upper_dir_path=overlay_upper_dir_path,
562
- path_in_container=path,
563
- ))
577
+ files_and_empty_dirs.append(
578
+ FileInContainer(
579
+ container=self._container,
580
+ overlay_upper_dir_path=overlay_upper_dir_path,
581
+ path_in_container=path,
582
+ )
583
+ )
564
584
 
565
585
  return files_and_empty_dirs
566
586
 
@@ -10,6 +10,7 @@ from biolib.utils.multipart_uploader import get_chunk_iterator_from_file_object
10
10
 
11
11
 
12
12
  class JobStorage:
13
+ module_input_file_name = 'input-output.bbf'
13
14
  module_output_file_name = 'module-output.bbf'
14
15
 
15
16
  @staticmethod
@@ -81,7 +82,7 @@ class JobStorage:
81
82
  )
82
83
 
83
84
  @staticmethod
84
- def get_module_input(job: CreatedJobDict) -> bytes:
85
+ def download_module_input(job: CreatedJobDict, path: str):
85
86
  job_uuid = job['public_id']
86
87
  logger_no_user_data.debug(f'Job "{job_uuid}" downloading module input...')
87
88
  presigned_download_url = BiolibJobApi.get_job_storage_download_url(
@@ -89,7 +90,5 @@ class JobStorage:
89
90
  job_auth_token=job['auth_token'],
90
91
  storage_type='input',
91
92
  )
92
- response = HttpClient.request(url=presigned_download_url)
93
- data: bytes = response.content
93
+ HttpClient.request(url=presigned_download_url, response_path=path)
94
94
  logger_no_user_data.debug(f'Job "{job_uuid}" module input downloaded')
95
- return data