pybiolib 1.1.1881__py3-none-any.whl → 1.2.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. biolib/__init__.py +11 -4
  2. biolib/_data_record/data_record.py +278 -0
  3. biolib/_internal/data_record/__init__.py +1 -1
  4. biolib/_internal/data_record/data_record.py +97 -151
  5. biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
  6. biolib/_internal/file_utils.py +77 -0
  7. biolib/_internal/fuse_mount/__init__.py +1 -0
  8. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  9. biolib/_internal/http_client.py +31 -9
  10. biolib/_internal/lfs/__init__.py +1 -0
  11. biolib/_internal/libs/__init__.py +1 -0
  12. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  13. biolib/_internal/push_application.py +6 -1
  14. biolib/_internal/runtime.py +3 -56
  15. biolib/_internal/types/__init__.py +4 -0
  16. biolib/_internal/types/app.py +9 -0
  17. biolib/_internal/types/data_record.py +40 -0
  18. biolib/_internal/types/experiment.py +10 -0
  19. biolib/_internal/types/resource.py +14 -0
  20. biolib/_internal/types/typing.py +7 -0
  21. biolib/_internal/utils/multinode.py +264 -0
  22. biolib/_runtime/runtime.py +84 -0
  23. biolib/api/__init__.py +1 -0
  24. biolib/api/client.py +39 -17
  25. biolib/app/app.py +34 -71
  26. biolib/biolib_api_client/api_client.py +9 -2
  27. biolib/biolib_api_client/app_types.py +3 -2
  28. biolib/biolib_api_client/biolib_job_api.py +6 -0
  29. biolib/biolib_api_client/job_types.py +4 -4
  30. biolib/biolib_api_client/lfs_types.py +8 -2
  31. biolib/biolib_binary_format/remote_endpoints.py +12 -10
  32. biolib/biolib_binary_format/utils.py +23 -3
  33. biolib/cli/auth.py +1 -1
  34. biolib/cli/data_record.py +45 -6
  35. biolib/cli/lfs.py +10 -6
  36. biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
  37. biolib/compute_node/job_worker/executors/docker_executor.py +127 -108
  38. biolib/compute_node/job_worker/job_storage.py +17 -5
  39. biolib/compute_node/job_worker/job_worker.py +25 -15
  40. biolib/compute_node/remote_host_proxy.py +72 -84
  41. biolib/compute_node/webserver/webserver_types.py +0 -1
  42. biolib/compute_node/webserver/worker_thread.py +42 -39
  43. biolib/experiments/experiment.py +75 -44
  44. biolib/jobs/job.py +98 -19
  45. biolib/jobs/job_result.py +46 -21
  46. biolib/jobs/types.py +1 -1
  47. biolib/runtime/__init__.py +2 -1
  48. biolib/sdk/__init__.py +18 -7
  49. biolib/typing_utils.py +2 -7
  50. biolib/user/sign_in.py +2 -2
  51. biolib/utils/seq_util.py +38 -35
  52. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/METADATA +1 -1
  53. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/RECORD +57 -45
  54. biolib/experiments/types.py +0 -9
  55. biolib/lfs/__init__.py +0 -4
  56. biolib/lfs/utils.py +0 -153
  57. /biolib/{lfs → _internal/lfs}/cache.py +0 -0
  58. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/LICENSE +0 -0
  59. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/WHEEL +0 -0
  60. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,44 +1,41 @@
1
+ import io
1
2
  import json
3
+ import os
4
+ import re
5
+ import shlex
2
6
  import subprocess
7
+ import tarfile
3
8
  import tempfile
4
9
  import time
5
- import tarfile
6
10
  import zipfile
7
- import os
8
- import io
9
- import re
10
- import shlex
11
11
  from copy import copy
12
12
  from datetime import datetime
13
13
 
14
14
  import docker # type: ignore
15
15
  import docker.types # type: ignore
16
-
17
- from docker.errors import ImageNotFound, APIError # type: ignore
16
+ from docker.errors import APIError, ImageNotFound # type: ignore
18
17
  from docker.models.containers import Container # type: ignore
19
18
 
20
19
  from biolib import utils
21
-
22
20
  from biolib._internal.runtime import RuntimeJobDataDict
23
21
  from biolib.biolib_binary_format import ModuleInput, ModuleOutputV2
22
+ from biolib.biolib_binary_format.file_in_container import FileInContainer
24
23
  from biolib.biolib_docker_client import BiolibDockerClient
25
- from biolib.biolib_errors import DockerContainerNotFoundDuringExecutionException, BioLibError
24
+ from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException
26
25
  from biolib.biolib_logging import logger, logger_no_user_data
27
26
  from biolib.compute_node import utils as compute_node_utils
28
27
  from biolib.compute_node.cloud_utils import CloudUtils
29
28
  from biolib.compute_node.job_worker.docker_image_cache import DockerImageCache
30
- from biolib.biolib_binary_format.file_in_container import FileInContainer
31
29
  from biolib.compute_node.job_worker.executors.docker_types import DockerDiffKind
32
- from biolib.compute_node.job_worker.executors.types import StatusUpdate, LocalExecutorOptions
30
+ from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
33
31
  from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
34
- from biolib.compute_node.job_worker.utils import ComputeProcessException
35
32
  from biolib.compute_node.job_worker.utilization_reporter_thread import UtilizationReporterThread
33
+ from biolib.compute_node.job_worker.utils import ComputeProcessException
36
34
  from biolib.compute_node.utils import SystemExceptionCodes
37
- from biolib.typing_utils import List, Dict, Optional
35
+ from biolib.typing_utils import Dict, List, Optional
38
36
 
39
37
 
40
38
  class DockerExecutor:
41
-
42
39
  def __init__(self, options: LocalExecutorOptions) -> None:
43
40
  self._options: LocalExecutorOptions = options
44
41
  self._is_cleaning_up = False
@@ -83,11 +80,12 @@ class DockerExecutor:
83
80
  raise Exception('Docker container was None')
84
81
  return self._docker_container
85
82
 
86
- def execute_module(self, module_input_serialized: bytes, module_output_path: str) -> None:
83
+ def execute_module(self, module_input_path: str, module_output_path: str) -> None:
87
84
  try:
88
85
  job_uuid = self._options['job']['public_id']
89
86
  send_status_update = self._options['send_status_update']
90
- module_input = ModuleInput(module_input_serialized).deserialize()
87
+ with open(module_input_path, 'rb') as fp:
88
+ module_input = ModuleInput(fp.read()).deserialize()
91
89
 
92
90
  send_status_update(StatusUpdate(progress=55, log_message='Pulling images...'))
93
91
 
@@ -115,35 +113,49 @@ class DockerExecutor:
115
113
  except Exception: # pylint: disable=broad-except
116
114
  logger_no_user_data.error('DockerExecutor failed to clean up container')
117
115
 
118
- def _pull(self):
119
- try:
120
- start_time = time.time()
121
- if utils.IS_RUNNING_IN_CLOUD and not self._options['job'].get('federated_job_uuid'):
122
- DockerImageCache().get(
123
- image_uri=self._absolute_image_uri,
124
- estimated_image_size_bytes=self._options['module']['estimated_image_size_bytes'],
125
- job_id=self._options['job']['public_id'],
126
- )
127
- else:
128
- docker_client = BiolibDockerClient.get_docker_client()
129
- try:
130
- docker_client.images.get(self._absolute_image_uri)
131
- except ImageNotFound:
132
- job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
133
- docker_client.images.pull(
134
- self._absolute_image_uri,
135
- auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
136
- )
116
+ def _pull(self) -> None:
117
+ retries = 3
118
+ last_error: Optional[Exception] = None
119
+ estimated_image_size_bytes = self._options['module']['estimated_image_size_bytes']
120
+ assert estimated_image_size_bytes is not None, 'No estimated image size'
137
121
 
138
- logger_no_user_data.debug(f'Pulled image in: {time.time() - start_time}')
122
+ for retry_count in range(retries + 1):
123
+ if retry_count > 0:
124
+ logger_no_user_data.debug(f'Retrying Docker image pull of "{self._absolute_image_uri}"')
125
+ time.sleep(5 * retry_count)
126
+ try:
127
+ start_time = time.time()
128
+ if utils.IS_RUNNING_IN_CLOUD and not self._options['job'].get('federated_job_uuid'):
129
+ DockerImageCache().get(
130
+ image_uri=self._absolute_image_uri,
131
+ estimated_image_size_bytes=estimated_image_size_bytes,
132
+ job_id=self._options['job']['public_id'],
133
+ )
134
+ else:
135
+ docker_client = BiolibDockerClient.get_docker_client()
136
+ try:
137
+ docker_client.images.get(self._absolute_image_uri)
138
+ except ImageNotFound:
139
+ job_uuid = self._options['job'].get('federated_job_uuid') or self._options['job']['public_id']
140
+ docker_client.images.pull(
141
+ self._absolute_image_uri,
142
+ auth_config={'username': 'biolib', 'password': f',{job_uuid}'},
143
+ )
144
+
145
+ logger_no_user_data.debug(f'Pulled image in: {time.time() - start_time}')
146
+ return
147
+ except Exception as error:
148
+ logger_no_user_data.warning(
149
+ f'Pull of Docker image "{self._absolute_image_uri}" returned error: {error}'
150
+ )
151
+ last_error = error
139
152
 
140
- except Exception as exception:
141
- raise ComputeProcessException(
142
- exception,
143
- SystemExceptionCodes.FAILED_TO_PULL_DOCKER_IMAGE.value,
144
- self._send_system_exception,
145
- may_contain_user_data=False
146
- ) from exception
153
+ raise ComputeProcessException(
154
+ last_error or Exception('Retries exceeded: failed to pull Docker image'),
155
+ SystemExceptionCodes.FAILED_TO_PULL_DOCKER_IMAGE.value,
156
+ self._send_system_exception,
157
+ may_contain_user_data=False,
158
+ )
147
159
 
148
160
  def _execute_helper(self, module_input, module_output_path: str) -> None:
149
161
  job_uuid = self._options['job']['public_id']
@@ -154,10 +166,10 @@ class DockerExecutor:
154
166
  logger_no_user_data.debug(f'Job "{job_uuid}" starting utilization metrics reporter thread...')
155
167
  config = CloudUtils.get_webserver_config()
156
168
  node_auth_token = config['compute_node_info']['auth_token'] # pylint: disable=unsubscriptable-object
157
- cloud_job = self._options["cloud_job"]
169
+ cloud_job = self._options['cloud_job']
158
170
  include_gpu_stats = False
159
171
  if cloud_job:
160
- include_gpu_stats = cloud_job.get("reserved_gpu_count", 0) > 0
172
+ include_gpu_stats = cloud_job.get('reserved_gpu_count', 0) > 0
161
173
  UtilizationReporterThread(
162
174
  container=self._container,
163
175
  job_uuid=job_uuid,
@@ -212,7 +224,7 @@ class DockerExecutor:
212
224
  raise ComputeProcessException(
213
225
  MemoryError(),
214
226
  SystemExceptionCodes.OUT_OF_MEMORY.value,
215
- self._send_system_exception
227
+ self._send_system_exception,
216
228
  )
217
229
 
218
230
  logger_no_user_data.debug(f'Docker container exited with code {exit_code} for {job_uuid}')
@@ -243,24 +255,23 @@ class DockerExecutor:
243
255
  for path_to_delete in [self._input_tar_path, self._runtime_tar_path]:
244
256
  if os.path.exists(path_to_delete):
245
257
  os.remove(path_to_delete)
246
- logger_no_user_data.debug(f"Deleted tars in: {time.time() - tar_time}")
258
+ logger_no_user_data.debug(f'Deleted tars in: {time.time() - tar_time}')
247
259
 
248
260
  container_time = time.time()
249
261
  if self._docker_container:
250
262
  self._docker_container.remove(force=True)
251
263
 
252
264
  if utils.IS_RUNNING_IN_CLOUD:
253
- DockerImageCache().detach_job(
254
- image_uri=self._absolute_image_uri,
255
- job_id=self._options['job']['public_id']
256
- )
265
+ DockerImageCache().detach_job(image_uri=self._absolute_image_uri, job_id=self._options['job']['public_id'])
257
266
 
258
- logger_no_user_data.debug(f"Deleted compute container in: {time.time() - container_time}")
267
+ logger_no_user_data.debug(f'Deleted compute container in: {time.time() - container_time}')
259
268
  self._tmp_secrets_dir.cleanup()
260
269
 
261
270
  # TODO: type this method
262
271
  def _initialize_docker_container(self, module_input):
263
272
  try:
273
+ job_uuid = self._options['job']['public_id']
274
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container...')
264
275
  module = self._options['module']
265
276
  logger.debug(f"Initializing docker container with command: {module['command']}")
266
277
 
@@ -274,6 +285,8 @@ class DockerExecutor:
274
285
  job_requested_machine=self._options['job']['requested_machine'],
275
286
  job_uuid=self._options['job']['public_id'],
276
287
  job_auth_token=self._options['job']['auth_token'],
288
+ app_uri=self._options['job']['app_uri'],
289
+ is_environment_biolib_cloud=bool(utils.IS_RUNNING_IN_CLOUD),
277
290
  )
278
291
  secrets: Dict[str, str] = dict(
279
292
  **module.get('secrets', {}),
@@ -290,32 +303,40 @@ class DockerExecutor:
290
303
  )
291
304
  if app_version_created_at < datetime(2022, 11, 30, 0, 0):
292
305
  environment_vars = module.get('secrets', {})
293
- environment_vars.update({
294
- 'BIOLIB_JOB_UUID': self._options['job']['public_id'],
295
- 'BIOLIB_JOB_AUTH_TOKEN': self._options['job']['auth_token']
296
- })
306
+ environment_vars.update(
307
+ {
308
+ 'BIOLIB_JOB_UUID': self._options['job']['public_id'],
309
+ 'BIOLIB_JOB_AUTH_TOKEN': self._options['job']['auth_token'],
310
+ }
311
+ )
297
312
 
298
313
  if utils.IS_RUNNING_IN_CLOUD and self._options['cloud_job']:
299
- environment_vars.update({
300
- 'BIOLIB_JOB_MAX_RUNTIME_IN_SECONDS': self._options['cloud_job']['max_runtime_in_seconds'],
301
- })
314
+ environment_vars.update(
315
+ {
316
+ 'BIOLIB_JOB_MAX_RUNTIME_IN_SECONDS': self._options['cloud_job']['max_runtime_in_seconds'],
317
+ }
318
+ )
302
319
 
320
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Getting IPs for proxies...')
303
321
  for proxy in self._options['remote_host_proxies']:
304
322
  proxy_ip = proxy.get_ip_address_on_network(internal_network)
305
323
  if proxy.is_app_caller_proxy:
306
324
  logger_no_user_data.debug('Found app caller proxy, setting both base URLs in compute container')
307
- environment_vars.update({
308
- 'BIOLIB_BASE_URL': f'http://{proxy_ip}',
309
- 'BIOLIB_CLOUD_BASE_URL': f'http://{proxy_ip}',
310
- # This should be removed eventually, but will break apps calling apps on older versions
311
- 'BIOLIB_CLOUD_RESULTS_BASE_URL': f'http://{proxy_ip}',
312
- 'BIOLIB_CLOUD_JOB_STORAGE_BASE_URL': f'http://{proxy_ip}',
313
- # Inform container if we are targeting public biolib as we change the BIOLIB_BASE_URL
314
- 'BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB': bool(utils.BASE_URL_IS_PUBLIC_BIOLIB)
315
- })
325
+ environment_vars.update(
326
+ {
327
+ 'BIOLIB_BASE_URL': f'http://{proxy_ip}',
328
+ 'BIOLIB_CLOUD_BASE_URL': f'http://{proxy_ip}',
329
+ # This should be removed eventually, but will break apps calling apps on older versions
330
+ 'BIOLIB_CLOUD_RESULTS_BASE_URL': f'http://{proxy_ip}',
331
+ 'BIOLIB_CLOUD_JOB_STORAGE_BASE_URL': f'http://{proxy_ip}',
332
+ # Inform container if we are targeting public biolib as we change the BIOLIB_BASE_URL
333
+ 'BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB': bool(utils.BASE_URL_IS_PUBLIC_BIOLIB),
334
+ }
335
+ )
316
336
  else:
317
337
  extra_hosts[proxy.hostname] = proxy_ip
318
338
 
339
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Constructing container args...')
319
340
  create_container_args = {
320
341
  'environment': environment_vars,
321
342
  'extra_hosts': extra_hosts,
@@ -327,45 +348,38 @@ class DockerExecutor:
327
348
 
328
349
  if self._options['job'].get('arguments_override_command'):
329
350
  # In this case, arguments contains a user specified command to run in the app
330
- create_container_args.update({
331
- 'command': module_input['arguments'],
332
- 'entrypoint': ''
333
- })
351
+ create_container_args.update({'command': module_input['arguments'], 'entrypoint': ''})
334
352
 
335
353
  else:
336
- create_container_args.update({
337
- 'command': shlex.split(module['command']) + module_input['arguments']
338
- })
354
+ create_container_args.update({'command': shlex.split(module['command']) + module_input['arguments']})
339
355
 
340
356
  app_version = self._options['job']['app_version']
341
357
  if app_version.get('main_output_file') or app_version.get('stdout_render_type') == 'text':
342
358
  create_container_args['tty'] = True
343
359
 
344
360
  if utils.IS_RUNNING_IN_CLOUD:
345
- cloud_job = self._options["cloud_job"]
361
+ cloud_job = self._options['cloud_job']
346
362
  create_container_args['mem_limit'] = f'{cloud_job["reserved_memory_in_bytes"]}b'
347
- create_container_args['nano_cpus'] = cloud_job["reserved_cpu_in_nano_shares"]
363
+ create_container_args['nano_cpus'] = cloud_job['reserved_cpu_in_nano_shares']
364
+ create_container_args['pids_limit'] = 10_000
348
365
 
349
366
  biolib_identity_user_email: Optional[str] = cloud_job.get('biolib_identity_user_email')
350
367
  if biolib_identity_user_email:
351
- create_container_args['environment'].update({
352
- 'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email
353
- })
368
+ create_container_args['environment'].update(
369
+ {'BIOLIB_IDENTITY_USER_EMAIL': biolib_identity_user_email}
370
+ )
354
371
 
355
372
  docker_runtime = os.getenv('BIOLIB_DOCKER_RUNTIME')
356
373
  if docker_runtime is not None:
357
374
  create_container_args['runtime'] = docker_runtime
358
375
 
359
- self._docker_container = BiolibDockerClient.get_docker_client().containers.create(
360
- **create_container_args
361
- )
362
-
363
- logger_no_user_data.debug('Finished initializing docker container')
376
+ docker_client = BiolibDockerClient.get_docker_client()
377
+ logger_no_user_data.debug(f'Job "{job_uuid}" initializing Docker container. Creating container...')
378
+ self._docker_container = docker_client.containers.create(**create_container_args)
379
+ logger_no_user_data.debug(f'Job "{job_uuid}" finished initializing Docker container.')
364
380
  except Exception as exception:
365
381
  raise ComputeProcessException(
366
- exception,
367
- SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value,
368
- self._send_system_exception
382
+ exception, SystemExceptionCodes.FAILED_TO_START_COMPUTE_CONTAINER.value, self._send_system_exception
369
383
  ) from exception
370
384
 
371
385
  def _add_file_to_tar(self, tar, current_path, mapped_path, data):
@@ -432,7 +446,7 @@ class DockerExecutor:
432
446
  raise ComputeProcessException(
433
447
  exception,
434
448
  SystemExceptionCodes.FAILED_TO_COPY_INPUT_FILES_TO_COMPUTE_CONTAINER.value,
435
- self._send_system_exception
449
+ self._send_system_exception,
436
450
  ) from exception
437
451
 
438
452
  def _map_and_copy_runtime_files_to_container(self, runtime_zip_data, arguments: List[str], remove_root_folder=True):
@@ -447,17 +461,17 @@ class DockerExecutor:
447
461
  raise ComputeProcessException(
448
462
  exception,
449
463
  SystemExceptionCodes.FAILED_TO_COPY_RUNTIME_FILES_TO_COMPUTE_CONTAINER.value,
450
- self._send_system_exception
464
+ self._send_system_exception,
451
465
  ) from exception
452
466
 
453
467
  def _write_module_output_to_file(
454
- self,
455
- arguments: List[str],
456
- exit_code: int,
457
- module_output_path: str,
458
- stderr: bytes,
459
- stdout: bytes,
460
- pre_start_diff: List[Dict],
468
+ self,
469
+ arguments: List[str],
470
+ exit_code: int,
471
+ module_output_path: str,
472
+ stderr: bytes,
473
+ stdout: bytes,
474
+ pre_start_diff: List[Dict],
461
475
  ) -> None:
462
476
  mapped_files: List[FileInContainer] = []
463
477
  try:
@@ -502,9 +516,11 @@ class DockerExecutor:
502
516
  result = subprocess.run(
503
517
  args=[
504
518
  'ctr',
505
- '--namespace', 'moby',
519
+ '--namespace',
520
+ 'moby',
506
521
  'snapshots',
507
- '--snapshotter', 'nydus',
522
+ '--snapshotter',
523
+ 'nydus',
508
524
  'mounts',
509
525
  '/some_arbitrary_path',
510
526
  str(self._container.id),
@@ -526,9 +542,10 @@ class DockerExecutor:
526
542
  pre_start_diff_paths = [obj['Path'] for obj in pre_start_diff]
527
543
  post_run_diff = self._container.diff()
528
544
  run_diff_paths: List[str] = [
529
- obj['Path'] for obj in post_run_diff if
530
- obj['Kind'] in (DockerDiffKind.CHANGED.value, DockerDiffKind.ADDED.value) and
531
- obj['Path'] not in pre_start_diff_paths
545
+ obj['Path']
546
+ for obj in post_run_diff
547
+ if obj['Kind'] in (DockerDiffKind.CHANGED.value, DockerDiffKind.ADDED.value)
548
+ and obj['Path'] not in pre_start_diff_paths
532
549
  ]
533
550
 
534
551
  known_directories = set()
@@ -538,7 +555,7 @@ class DockerExecutor:
538
555
  if idx == 0:
539
556
  continue # always skip root
540
557
 
541
- folder = '/' + '/'.join(parent_folders[1:idx + 1])
558
+ folder = '/' + '/'.join(parent_folders[1 : idx + 1])
542
559
  known_directories.add(folder)
543
560
 
544
561
  def path_is_included_in_from_mappings(path: str) -> bool:
@@ -558,11 +575,13 @@ class DockerExecutor:
558
575
  files_and_empty_dirs: List[FileInContainer] = []
559
576
  for path in run_diff_paths:
560
577
  if path not in known_directories and path_is_included_in_from_mappings(path):
561
- files_and_empty_dirs.append(FileInContainer(
562
- container=self._container,
563
- overlay_upper_dir_path=overlay_upper_dir_path,
564
- path_in_container=path,
565
- ))
578
+ files_and_empty_dirs.append(
579
+ FileInContainer(
580
+ container=self._container,
581
+ overlay_upper_dir_path=overlay_upper_dir_path,
582
+ path_in_container=path,
583
+ )
584
+ )
566
585
 
567
586
  return files_and_empty_dirs
568
587
 
@@ -10,6 +10,7 @@ from biolib.utils.multipart_uploader import get_chunk_iterator_from_file_object
10
10
 
11
11
 
12
12
  class JobStorage:
13
+ module_input_file_name = 'input-output.bbf'
13
14
  module_output_file_name = 'module-output.bbf'
14
15
 
15
16
  @staticmethod
@@ -46,8 +47,21 @@ class JobStorage:
46
47
  module_output_path = os.path.join(job_temporary_dir, JobStorage.module_output_file_name)
47
48
  module_output_size = os.path.getsize(module_output_path)
48
49
 
50
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
51
+ max_chunk_count = 9_000
52
+ min_chunk_size_bytes = 50_000_000
53
+ chunk_size_in_bytes = max(min_chunk_size_bytes, module_output_size // max_chunk_count)
54
+
55
+ logger_no_user_data.debug(
56
+ f'Job "{job_uuid}" uploading result of size {module_output_size} bytes '
57
+ f'with chunk size of {chunk_size_in_bytes} bytes...'
58
+ )
59
+
49
60
  with open(module_output_path, mode='rb') as module_output_file:
50
- module_output_iterator = get_chunk_iterator_from_file_object(module_output_file)
61
+ module_output_iterator = get_chunk_iterator_from_file_object(
62
+ file_object=module_output_file,
63
+ chunk_size_in_bytes=chunk_size_in_bytes,
64
+ )
51
65
  multipart_uploader = JobStorage._get_module_output_uploader(job_uuid)
52
66
  multipart_uploader.upload(
53
67
  payload_iterator=module_output_iterator,
@@ -81,7 +95,7 @@ class JobStorage:
81
95
  )
82
96
 
83
97
  @staticmethod
84
- def get_module_input(job: CreatedJobDict) -> bytes:
98
+ def download_module_input(job: CreatedJobDict, path: str):
85
99
  job_uuid = job['public_id']
86
100
  logger_no_user_data.debug(f'Job "{job_uuid}" downloading module input...')
87
101
  presigned_download_url = BiolibJobApi.get_job_storage_download_url(
@@ -89,7 +103,5 @@ class JobStorage:
89
103
  job_auth_token=job['auth_token'],
90
104
  storage_type='input',
91
105
  )
92
- response = HttpClient.request(url=presigned_download_url)
93
- data: bytes = response.content
106
+ HttpClient.request(url=presigned_download_url, response_path=path)
94
107
  logger_no_user_data.debug(f'Job "{job_uuid}" module input downloaded')
95
- return data
@@ -133,7 +133,8 @@ class JobWorker:
133
133
  ).start()
134
134
 
135
135
  try:
136
- module_input_serialized = JobStorage.get_module_input(job=job)
136
+ module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
137
+ JobStorage.download_module_input(job=job, path=module_input_path)
137
138
  except StorageDownloadFailed:
138
139
  # Expect module input to be handled in a separate ModuleInput package
139
140
  self._legacy_input_wait_timeout_thread = JobLegacyInputWaitTimeout(
@@ -147,7 +148,7 @@ class JobWorker:
147
148
  raise error
148
149
 
149
150
  try:
150
- self._run_root_job(module_input_serialized)
151
+ self._run_root_job(module_input_path)
151
152
 
152
153
  # This error occurs when trying to access the container after the job worker has cleaned it up.
153
154
  # In that case stop the computation.
@@ -165,7 +166,9 @@ class JobWorker:
165
166
  self._legacy_input_wait_timeout_thread.stop()
166
167
 
167
168
  try:
168
- self._run_root_job(package)
169
+ module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
170
+ open(module_input_path, 'wb').write(package)
171
+ self._run_root_job(module_input_path)
169
172
 
170
173
  # This error occurs when trying to access the container after the job worker has cleaned it up.
171
174
  # In that case stop the computation.
@@ -250,7 +253,6 @@ class JobWorker:
250
253
 
251
254
  def _start_network_and_remote_host_proxies(self, job: CreatedJobDict) -> None:
252
255
  app_version = job['app_version']
253
- app = app_version.get('app', {})
254
256
  job_id = job['public_id']
255
257
  remote_hosts = app_version['remote_hosts']
256
258
  if utils.IS_RUNNING_IN_CLOUD:
@@ -313,7 +315,6 @@ class JobWorker:
313
315
  self._internal_network,
314
316
  job_id,
315
317
  ports,
316
- can_push_data_record_for_user=app.get('can_push_data_record_for_user', False),
317
318
  )
318
319
  remote_host_proxy.start()
319
320
  self._remote_host_proxies.append(remote_host_proxy)
@@ -331,15 +332,15 @@ class JobWorker:
331
332
  def _run_app_version(
332
333
  self,
333
334
  app_version_id: str,
334
- module_input_serialized: bytes,
335
+ module_input_path: str,
335
336
  caller_job: CreatedJobDict,
336
337
  main_module_output_path: str,
337
338
  ) -> None:
338
339
  job: CreatedJobDict = BiolibJobApi.create(app_version_id, caller_job=caller_job['public_id'])
339
340
  self._jobs[job['public_id']] = job
340
- self._run_job(job, module_input_serialized, main_module_output_path)
341
+ self._run_job(job, module_input_path, main_module_output_path)
341
342
 
342
- def _run_job(self, job: CreatedJobDict, module_input_serialized: bytes, main_module_output_path: str) -> None:
343
+ def _run_job(self, job: CreatedJobDict, module_input_path: str, main_module_output_path: str) -> None:
343
344
  job_uuid = job['public_id']
344
345
  logger_no_user_data.info(f'Job "{job_uuid}" running...')
345
346
  if self._root_job_wrapper is None:
@@ -406,7 +407,7 @@ class JobWorker:
406
407
  send_system_exception=self.send_system_exception,
407
408
  send_stdout_and_stderr=self.send_stdout_and_stderr,
408
409
  ),
409
- module_input_serialized,
410
+ module_input_path,
410
411
  main_module_output_path,
411
412
  )
412
413
 
@@ -417,15 +418,20 @@ class JobWorker:
417
418
  def _run_module(
418
419
  self,
419
420
  options: LocalExecutorOptions,
420
- module_input_serialized: bytes,
421
+ module_input_path: str,
421
422
  module_output_path: str,
422
423
  ) -> None:
423
424
  module = options['module']
424
425
  job_id = options['job']['public_id']
425
426
  logger_no_user_data.debug(f'Job "{job_id}" running module "{module["name"]}"...')
427
+
426
428
  executor_instance: DockerExecutor
427
429
  if module['environment'] == ModuleEnvironment.BIOLIB_APP.value:
430
+ if not self.job_temporary_dir:
431
+ raise BioLibError('Undefined job_temporary_dir')
428
432
  logger_no_user_data.debug(f'Job "{job_id}" starting child job...')
433
+ with open(module_input_path,'rb') as fp:
434
+ module_input_serialized = fp.read()
429
435
  module_input = ModuleInput(module_input_serialized).deserialize()
430
436
  module_input_with_runtime_zip = self._add_runtime_zip_and_command_to_module_input(options, module_input)
431
437
  module_input_with_runtime_zip_serialized = ModuleInput().serialize(
@@ -433,9 +439,11 @@ class JobWorker:
433
439
  arguments=module_input_with_runtime_zip['arguments'],
434
440
  files=module_input_with_runtime_zip['files'],
435
441
  )
442
+ module_input_path_new = os.path.join(self.job_temporary_dir, "runtime." + JobStorage.module_input_file_name)
443
+ open(module_input_path_new, 'wb').write(module_input_with_runtime_zip_serialized)
436
444
  return self._run_app_version(
437
445
  module['image_uri'],
438
- module_input_with_runtime_zip_serialized,
446
+ module_input_path_new,
439
447
  options['job'],
440
448
  module_output_path,
441
449
  )
@@ -461,7 +469,7 @@ class JobWorker:
461
469
  # Log memory and disk before pulling and executing module
462
470
  log_disk_and_memory_usage_info()
463
471
 
464
- executor_instance.execute_module(module_input_serialized, module_output_path)
472
+ executor_instance.execute_module(module_input_path, module_output_path)
465
473
 
466
474
  def _connect_to_parent(self):
467
475
  try:
@@ -587,7 +595,7 @@ class JobWorker:
587
595
  may_contain_user_data=False
588
596
  ) from exception
589
597
 
590
- def _run_root_job(self, module_input_serialized: bytes) -> str:
598
+ def _run_root_job(self, module_input_path: str) -> str:
591
599
  # Make typechecker happy
592
600
  if not self._root_job_wrapper or not self.job_temporary_dir:
593
601
  raise BioLibError('Undefined job_wrapper or job_temporary_dir')
@@ -595,7 +603,7 @@ class JobWorker:
595
603
  main_module_output_path = os.path.join(self.job_temporary_dir, JobStorage.module_output_file_name)
596
604
  self._run_job(
597
605
  job=self._root_job_wrapper['job'],
598
- module_input_serialized=module_input_serialized,
606
+ module_input_path=module_input_path,
599
607
  main_module_output_path=main_module_output_path,
600
608
  )
601
609
  self._send_status_update(StatusUpdate(progress=94, log_message='Computation finished'))
@@ -614,7 +622,9 @@ class JobWorker:
614
622
  job_temporary_dir=job_temporary_dir,
615
623
  )
616
624
  self._start_network_and_remote_host_proxies(job_dict)
617
- module_output_path = self._run_root_job(module_input_serialized)
625
+ module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
626
+ open(module_input_path, 'wb').write(module_input_serialized)
627
+ module_output_path = self._run_root_job(module_input_path)
618
628
  with open(module_output_path, mode='rb') as module_output_file:
619
629
  module_output_serialized = module_output_file.read()
620
630
  return ModuleOutputV2(InMemoryIndexableBuffer(module_output_serialized))