pybiolib 1.2.1056__py3-none-any.whl → 1.2.1727__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pybiolib might be problematic. Click here for more details.

Files changed (93) hide show
  1. biolib/__init__.py +33 -10
  2. biolib/_data_record/data_record.py +103 -26
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +51 -0
  5. biolib/_index/types.py +7 -0
  6. biolib/_internal/data_record/data_record.py +1 -1
  7. biolib/_internal/data_record/push_data.py +65 -16
  8. biolib/_internal/data_record/remote_storage_endpoint.py +3 -3
  9. biolib/_internal/file_utils.py +7 -4
  10. biolib/_internal/index/__init__.py +1 -0
  11. biolib/_internal/index/index.py +18 -0
  12. biolib/_internal/lfs/cache.py +4 -2
  13. biolib/_internal/push_application.py +89 -23
  14. biolib/_internal/runtime.py +2 -0
  15. biolib/_internal/templates/gui_template/App.tsx +38 -2
  16. biolib/_internal/templates/gui_template/Dockerfile +2 -0
  17. biolib/_internal/templates/gui_template/biolib-sdk.ts +37 -0
  18. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  19. biolib/_internal/templates/gui_template/package.json +1 -0
  20. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +49 -0
  21. biolib/_internal/templates/gui_template/vite.config.mts +2 -1
  22. biolib/_internal/templates/init_template/.github/workflows/biolib.yml +6 -1
  23. biolib/_internal/templates/init_template/Dockerfile +2 -0
  24. biolib/_internal/utils/__init__.py +40 -0
  25. biolib/_internal/utils/auth.py +46 -0
  26. biolib/_internal/utils/job_url.py +33 -0
  27. biolib/_runtime/runtime.py +9 -0
  28. biolib/_session/session.py +7 -5
  29. biolib/_shared/__init__.py +0 -0
  30. biolib/_shared/types/__init__.py +74 -0
  31. biolib/_shared/types/resource.py +37 -0
  32. biolib/_shared/types/resource_deploy_key.py +11 -0
  33. biolib/{_internal → _shared}/types/resource_version.py +8 -2
  34. biolib/_shared/utils/__init__.py +7 -0
  35. biolib/_shared/utils/resource_uri.py +75 -0
  36. biolib/api/client.py +3 -47
  37. biolib/app/app.py +57 -33
  38. biolib/biolib_api_client/api_client.py +3 -47
  39. biolib/biolib_api_client/app_types.py +1 -6
  40. biolib/biolib_api_client/biolib_app_api.py +17 -0
  41. biolib/biolib_binary_format/module_input.py +8 -0
  42. biolib/biolib_binary_format/remote_endpoints.py +3 -3
  43. biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
  44. biolib/cli/__init__.py +2 -1
  45. biolib/cli/data_record.py +82 -0
  46. biolib/cli/index.py +32 -0
  47. biolib/cli/init.py +39 -1
  48. biolib/cli/lfs.py +1 -1
  49. biolib/cli/run.py +8 -5
  50. biolib/cli/start.py +14 -1
  51. biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
  52. biolib/compute_node/job_worker/executors/docker_types.py +1 -1
  53. biolib/compute_node/job_worker/executors/types.py +6 -5
  54. biolib/compute_node/job_worker/job_worker.py +149 -93
  55. biolib/compute_node/job_worker/large_file_system.py +2 -6
  56. biolib/compute_node/job_worker/network_alloc.py +99 -0
  57. biolib/compute_node/job_worker/network_buffer.py +240 -0
  58. biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
  59. biolib/compute_node/remote_host_proxy.py +139 -79
  60. biolib/compute_node/utils.py +2 -0
  61. biolib/compute_node/webserver/compute_node_results_proxy.py +188 -0
  62. biolib/compute_node/webserver/proxy_utils.py +28 -0
  63. biolib/compute_node/webserver/webserver.py +64 -19
  64. biolib/experiments/experiment.py +111 -16
  65. biolib/jobs/job.py +119 -29
  66. biolib/jobs/job_result.py +70 -33
  67. biolib/jobs/types.py +1 -0
  68. biolib/sdk/__init__.py +17 -2
  69. biolib/typing_utils.py +1 -1
  70. biolib/utils/cache_state.py +2 -2
  71. biolib/utils/multipart_uploader.py +24 -18
  72. biolib/utils/seq_util.py +1 -1
  73. pybiolib-1.2.1727.dist-info/METADATA +41 -0
  74. {pybiolib-1.2.1056.dist-info → pybiolib-1.2.1727.dist-info}/RECORD +103 -85
  75. {pybiolib-1.2.1056.dist-info → pybiolib-1.2.1727.dist-info}/WHEEL +1 -1
  76. pybiolib-1.2.1727.dist-info/entry_points.txt +2 -0
  77. biolib/_internal/types/__init__.py +0 -6
  78. biolib/_internal/types/resource.py +0 -18
  79. biolib/utils/app_uri.py +0 -57
  80. pybiolib-1.2.1056.dist-info/METADATA +0 -50
  81. pybiolib-1.2.1056.dist-info/entry_points.txt +0 -3
  82. /biolib/{_internal → _shared}/types/account.py +0 -0
  83. /biolib/{_internal → _shared}/types/account_member.py +0 -0
  84. /biolib/{_internal → _shared}/types/app.py +0 -0
  85. /biolib/{_internal → _shared}/types/data_record.py +0 -0
  86. /biolib/{_internal → _shared}/types/experiment.py +0 -0
  87. /biolib/{_internal → _shared}/types/file_node.py +0 -0
  88. /biolib/{_internal → _shared}/types/push.py +0 -0
  89. /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
  90. /biolib/{_internal → _shared}/types/result.py +0 -0
  91. /biolib/{_internal → _shared}/types/typing.py +0 -0
  92. /biolib/{_internal → _shared}/types/user.py +0 -0
  93. {pybiolib-1.2.1056.dist-info → pybiolib-1.2.1727.dist-info/licenses}/LICENSE +0 -0
@@ -1,50 +1,63 @@
1
+ import hashlib
1
2
  import io
2
3
  import json
3
- import socket
4
+ import multiprocessing
5
+ import os
4
6
  import shlex
7
+ import signal
8
+ import socket
5
9
  import sys
6
10
  import tempfile
7
11
  import zipfile
8
- from time import time
9
12
  from queue import Queue
10
- import multiprocessing
11
- import os
12
- import signal
13
+ from time import time
13
14
  from types import FrameType
14
15
 
15
16
  from docker.models.networks import Network # type: ignore
17
+ from docker.types import IPAMConfig, IPAMPool # type: ignore
16
18
 
17
- from biolib._internal.http_client import HttpClient
18
- from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
19
- from biolib.compute_node.job_worker.job_legacy_input_wait_timeout_thread import JobLegacyInputWaitTimeout
20
- from biolib.compute_node.job_worker.job_storage import JobStorage
21
- from biolib.compute_node.job_worker.large_file_system import LargeFileSystem
22
- from biolib.biolib_errors import DockerContainerNotFoundDuringExecutionException, BioLibError, \
23
- StorageDownloadFailed
24
- from biolib.compute_node.job_worker.job_max_runtime_timer_thread import JobMaxRuntimeTimerThread
25
- from biolib.compute_node.remote_host_proxy import RemoteHostProxy
26
- from biolib.typing_utils import Optional, List, Dict
27
19
  from biolib import utils
28
- from biolib.biolib_api_client import ModuleEnvironment, CreatedJobDict, JobWrapper, Module, AppVersionOnJob, \
29
- BiolibApiClient, RemoteHost
20
+ from biolib._internal.http_client import HttpClient
21
+ from biolib.biolib_api_client import (
22
+ AppVersionOnJob,
23
+ BiolibApiClient,
24
+ CreatedJobDict,
25
+ JobWrapper,
26
+ Module,
27
+ ModuleEnvironment,
28
+ )
30
29
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
30
+ from biolib.biolib_binary_format import (
31
+ InMemoryIndexableBuffer,
32
+ ModuleInput,
33
+ ModuleOutputV2,
34
+ SavedJob,
35
+ SystemException,
36
+ SystemStatusUpdate,
37
+ )
38
+ from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
31
39
  from biolib.biolib_docker_client import BiolibDockerClient
40
+ from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException, StorageDownloadFailed
32
41
  from biolib.biolib_logging import logger, logger_no_user_data
33
42
  from biolib.compute_node.job_worker.executors import DockerExecutor
34
43
  from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
35
- from biolib.compute_node.socker_listener_thread import SocketListenerThread
36
- from biolib.compute_node.socket_sender_thread import SocketSenderThread
44
+ from biolib.compute_node.job_worker.job_legacy_input_wait_timeout_thread import JobLegacyInputWaitTimeout
45
+ from biolib.compute_node.job_worker.job_max_runtime_timer_thread import JobMaxRuntimeTimerThread
46
+ from biolib.compute_node.job_worker.job_storage import JobStorage
47
+ from biolib.compute_node.job_worker.large_file_system import LargeFileSystem
37
48
  from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
49
+ from biolib.compute_node.job_worker.network_buffer import NetworkBuffer
38
50
  from biolib.compute_node.job_worker.utils import ComputeProcessException, log_disk_and_memory_usage_info
39
- from biolib.compute_node.utils import get_package_type, SystemExceptionCodes, SystemExceptionCodeMap
40
- from biolib.biolib_binary_format import SavedJob, SystemStatusUpdate, ModuleInput, SystemException, \
41
- ModuleOutputV2, InMemoryIndexableBuffer
51
+ from biolib.compute_node.remote_host_proxy import RemoteHostMapping, RemoteHostProxy, get_static_ip_from_network
52
+ from biolib.compute_node.socker_listener_thread import SocketListenerThread
53
+ from biolib.compute_node.socket_sender_thread import SocketSenderThread
54
+ from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes, get_package_type
55
+ from biolib.typing_utils import Dict, List, Optional
42
56
 
43
57
  SOCKET_HOST = '127.0.0.1'
44
58
 
45
59
 
46
60
  class JobWorkerProcess(multiprocessing.Process):
47
-
48
61
  # note: this method is run in the parent process
49
62
  def __init__(self, socket_port: int, log_level: int):
50
63
  super().__init__()
@@ -69,6 +82,13 @@ class JobWorker:
69
82
  # handle termination signal from parent
70
83
  signal.signal(signal.SIGTERM, self._handle_exit_gracefully)
71
84
 
85
+ try:
86
+ docker_client = BiolibDockerClient.get_docker_client()
87
+ networks = docker_client.networks.list()
88
+ logger_no_user_data.debug(f'Docker networks at JobWorker init: {[net.name for net in networks]}')
89
+ except Exception as error:
90
+ logger_no_user_data.debug(f'Failed to list docker networks at init: {error}')
91
+
72
92
  self._socket_port = socket_port
73
93
  self._received_messages_queue: Queue = Queue()
74
94
  self._messages_to_send_queue: Queue = Queue()
@@ -80,9 +100,9 @@ class JobWorker:
80
100
 
81
101
  self._remote_host_proxies: List[RemoteHostProxy] = []
82
102
  self._internal_network: Optional[Network] = None
83
- self._public_network: Optional[Network] = None
84
103
  self._executors: List[DockerExecutor] = []
85
104
  self.is_cleaning_up: bool = False
105
+ self._network_buffer = NetworkBuffer.get_instance()
86
106
 
87
107
  self.job_temporary_dir: Optional[str] = None
88
108
 
@@ -91,18 +111,18 @@ class JobWorker:
91
111
  exception,
92
112
  SystemExceptionCodes.FAILED_TO_INIT_COMPUTE_PROCESS_VARIABLES.value,
93
113
  self.send_system_exception,
94
- may_contain_user_data=False
114
+ may_contain_user_data=False,
95
115
  ) from exception
96
116
 
97
117
  if socket_port:
98
118
  self._connect_to_parent()
99
119
 
100
120
  def _handle_exit_gracefully(
101
- self,
102
- signum: int,
103
- frame: Optional[FrameType], # pylint: disable=unused-argument
121
+ self,
122
+ signum: int,
123
+ frame: Optional[FrameType], # pylint: disable=unused-argument
104
124
  ) -> None:
105
- job_id = self._root_job_wrapper["job"]["public_id"] if self._root_job_wrapper else None
125
+ job_id = self._root_job_wrapper['job']['public_id'] if self._root_job_wrapper else None
106
126
  logger_no_user_data.debug(
107
127
  f'_JobWorker ({job_id}) got exit signal {signal.Signals(signum).name}' # pylint: disable=no-member
108
128
  )
@@ -187,9 +207,7 @@ class JobWorker:
187
207
 
188
208
  except Exception as exception:
189
209
  raise ComputeProcessException(
190
- exception,
191
- SystemExceptionCodes.UNKNOWN_COMPUTE_PROCESS_ERROR.value,
192
- self.send_system_exception
210
+ exception, SystemExceptionCodes.UNKNOWN_COMPUTE_PROCESS_ERROR.value, self.send_system_exception
193
211
  ) from exception
194
212
 
195
213
  def _cleanup(self) -> None:
@@ -200,6 +218,8 @@ class JobWorker:
200
218
  executor.cleanup()
201
219
 
202
220
  proxy_count = len(self._remote_host_proxies)
221
+ cleaned_networks = set()
222
+
203
223
  if proxy_count > 0:
204
224
  logger_no_user_data.debug('Cleaning up proxies...')
205
225
  proxy_cleanup_start_time = time()
@@ -211,21 +231,37 @@ class JobWorker:
211
231
  logger_no_user_data.error('Failed to clean up remote host proxy')
212
232
  logger.error(exception)
213
233
 
234
+ for network in proxy.get_remote_host_networks():
235
+ try:
236
+ self._cleanup_network(network)
237
+ cleaned_networks.add(network.id)
238
+ except Exception as exception: # pylint: disable=broad-except
239
+ logger_no_user_data.error(f'Failed to clean up network {network.name}')
240
+ logger.error(exception)
241
+
214
242
  self._remote_host_proxies = []
215
243
  logger_no_user_data.debug(f'Cleaned up {proxy_count} proxies in {time() - proxy_cleanup_start_time}')
216
244
 
217
245
  logger_no_user_data.debug('Cleaning up networks...')
218
- self._cleanup_network(self._internal_network)
246
+ if self._internal_network and self._internal_network.id not in cleaned_networks:
247
+ self._cleanup_network(self._internal_network)
219
248
  self._internal_network = None
220
- self._cleanup_network(self._public_network)
221
- self._public_network = None
249
+
250
+ try:
251
+ logger_no_user_data.debug('Refilling network buffer...')
252
+ created = self._network_buffer.fill_buffer()
253
+ logger_no_user_data.debug(f'Refilled buffer with {created} new networks')
254
+ except Exception as exception: # pylint: disable=broad-except
255
+ logger_no_user_data.error('Failed to refill network buffer')
256
+ logger.error(exception)
257
+
222
258
  logger_no_user_data.debug('Cleaned up networks...')
223
259
 
224
260
  @staticmethod
225
261
  def _cleanup_network(network: Optional[Network]) -> None:
226
262
  if network:
227
263
  network_cleanup_start_time = time()
228
- network_name = network
264
+ network_name = network.name
229
265
  try:
230
266
  network.remove()
231
267
  except Exception as exception: # pylint: disable=broad-except
@@ -237,10 +273,7 @@ class JobWorker:
237
273
  def _handle_save_job_wrapper(self, package: bytes):
238
274
  job_wrapper_json_string = SavedJob(package).deserialize()
239
275
  job_wrapper: JobWrapper = json.loads(job_wrapper_json_string)
240
- BiolibApiClient.initialize(
241
- base_url=job_wrapper['BASE_URL'],
242
- access_token=job_wrapper['access_token']
243
- )
276
+ BiolibApiClient.initialize(base_url=job_wrapper['BASE_URL'], access_token=job_wrapper['access_token'])
244
277
  self._root_job_wrapper = job_wrapper
245
278
  if not utils.IS_RUNNING_IN_CLOUD:
246
279
  job_wrapper['cloud_job'] = None
@@ -253,10 +286,10 @@ class JobWorker:
253
286
  app_version = job['app_version']
254
287
  modules = app_version.get('modules', [])
255
288
  for module in modules:
256
- module_port_mappings = module.get('port_mappings', [])
257
- if module_port_mappings:
289
+ module_ports = module.get('ports', [])
290
+ if module_ports:
258
291
  logger_no_user_data.debug(
259
- f"Job '{job['public_id']}' module '{module['name']}' has port_mappings: {module_port_mappings}"
292
+ f"Job '{job['public_id']}' module '{module['name']}' has ports: {module_ports}"
260
293
  )
261
294
 
262
295
  if job['app_version'].get('modules') is not None and BiolibDockerClient.is_docker_running():
@@ -268,44 +301,33 @@ class JobWorker:
268
301
  app_version = job['app_version']
269
302
  job_id = job['public_id']
270
303
  remote_hosts = app_version['remote_hosts']
271
- if utils.IS_RUNNING_IN_CLOUD:
272
- remote_hosts.append(
273
- {
274
- 'hostname': 'AppCallerProxy',
275
- },
276
- )
277
-
278
304
  docker_client = BiolibDockerClient.get_docker_client()
279
305
  try:
306
+ name_hash = int(hashlib.sha256(job_id.encode()).hexdigest(), 16)
307
+ third_octet = name_hash % 256
308
+ internal_subnet = f'172.29.{third_octet}.0/24'
309
+
310
+ ipam_pool = IPAMPool(subnet=internal_subnet)
311
+ ipam_config = IPAMConfig(pool_configs=[ipam_pool])
312
+
280
313
  self._internal_network = docker_client.networks.create(
281
314
  name=f'biolib-sandboxed-network-{job_id}',
282
315
  internal=True,
283
316
  driver='bridge',
317
+ ipam=ipam_config,
284
318
  )
319
+ logger_no_user_data.debug(f'Created internal network for job {job_id} with subnet {internal_subnet}')
285
320
  except Exception as exception:
286
321
  raise ComputeProcessException(
287
322
  exception,
288
323
  SystemExceptionCodes.FAILED_TO_CREATE_DOCKER_NETWORKS.value,
289
324
  self.send_system_exception,
290
- may_contain_user_data=False
325
+ may_contain_user_data=False,
291
326
  ) from exception
292
327
 
293
328
  if len(remote_hosts) > 0:
294
- logger_no_user_data.debug(f'Job "{job_id}" creating networks for remote host proxies...')
295
- try:
296
- self._public_network = docker_client.networks.create(
297
- name=f'biolib-proxy-network-{job_id}',
298
- internal=False,
299
- driver='bridge',
300
- )
301
- except Exception as exception:
302
- raise ComputeProcessException(
303
- exception,
304
- SystemExceptionCodes.FAILED_TO_CREATE_DOCKER_NETWORKS.value,
305
- self.send_system_exception,
306
- may_contain_user_data=False
307
- ) from exception
308
- logger_no_user_data.debug(f'Job "{job_id}" starting proxies for remote hosts: {remote_hosts}')
329
+ logger_no_user_data.debug(f'Job "{job_id}" starting proxy for remote hosts: {remote_hosts}')
330
+ created_networks: List[Network] = []
309
331
  try:
310
332
  hostname_to_ports: Dict[str, List[int]] = {}
311
333
  for remote_host in remote_hosts:
@@ -321,33 +343,67 @@ class JobWorker:
321
343
  else:
322
344
  hostname_to_ports[hostname] = [port]
323
345
 
324
- for hostname, ports in hostname_to_ports.items():
346
+ remote_host_mappings: List[RemoteHostMapping] = []
347
+ networks = self._network_buffer.allocate_networks(job_id, len(hostname_to_ports))
348
+ created_networks.extend(networks)
349
+
350
+ for (hostname, ports), network in zip(hostname_to_ports.items(), networks):
351
+ static_ip = get_static_ip_from_network(network, offset=2)
352
+
353
+ mapping = RemoteHostMapping(
354
+ hostname=hostname,
355
+ ports=ports,
356
+ network=network,
357
+ static_ip=static_ip,
358
+ )
359
+ remote_host_mappings.append(mapping)
360
+
361
+ if remote_host_mappings:
325
362
  remote_host_proxy = RemoteHostProxy(
326
- RemoteHost(hostname=hostname),
327
- self._public_network,
328
- self._internal_network,
329
- job_id,
330
- ports,
363
+ remote_host_mappings=remote_host_mappings,
364
+ job=job,
365
+ app_caller_network=None,
331
366
  )
332
367
  remote_host_proxy.start()
333
368
  self._remote_host_proxies.append(remote_host_proxy)
369
+ num_hosts = len(remote_host_mappings)
370
+ logger_no_user_data.debug(f'Started single proxy container for {num_hosts} remote hosts')
334
371
 
335
372
  except Exception as exception:
373
+ for network in created_networks:
374
+ self._cleanup_network(network)
375
+
336
376
  raise ComputeProcessException(
337
377
  exception,
338
378
  SystemExceptionCodes.FAILED_TO_START_REMOTE_HOST_PROXIES.value,
339
379
  self.send_system_exception,
340
- may_contain_user_data=False
380
+ may_contain_user_data=False,
341
381
  ) from exception
342
382
 
343
- logger_no_user_data.debug(f'Job "{job_id}" startup of remote host proxies completed')
383
+ if utils.IS_RUNNING_IN_CLOUD:
384
+ try:
385
+ app_caller_proxy = RemoteHostProxy(
386
+ remote_host_mappings=[],
387
+ job=job,
388
+ app_caller_network=self._internal_network,
389
+ )
390
+ app_caller_proxy.start()
391
+ self._remote_host_proxies.append(app_caller_proxy)
392
+ logger_no_user_data.debug('Started app caller proxy')
393
+ except Exception as exception:
394
+ raise ComputeProcessException(
395
+ exception,
396
+ SystemExceptionCodes.FAILED_TO_START_REMOTE_HOST_PROXIES.value,
397
+ self.send_system_exception,
398
+ may_contain_user_data=False,
399
+ ) from exception
344
400
 
345
401
  def _run_app_version(
346
- self,
347
- app_version_id: str,
348
- module_input_path: str,
349
- caller_job: CreatedJobDict,
350
- main_module_output_path: str,
402
+ self,
403
+ app_version_id: str,
404
+ module_input_path: str,
405
+ caller_job: CreatedJobDict,
406
+ main_module_output_path: str,
351
407
  ) -> None:
352
408
  job: CreatedJobDict = BiolibJobApi.create(app_version_id, caller_job=caller_job['public_id'])
353
409
  self._jobs[job['public_id']] = job
@@ -366,17 +422,17 @@ class JobWorker:
366
422
  root_job_id = root_job['public_id']
367
423
  if job.get('arguments_override_command') and not job['app_version']['app']['allow_client_side_execution']:
368
424
  raise ComputeProcessException(
369
- Exception("Command override not allowed"),
425
+ Exception('Command override not allowed'),
370
426
  SystemExceptionCodes.COMMAND_OVERRIDE_NOT_ALLOWED.value,
371
- self.send_system_exception
427
+ self.send_system_exception,
372
428
  )
373
429
 
374
430
  modules = job['app_version'].get('modules')
375
431
  if not modules:
376
432
  raise ComputeProcessException(
377
- Exception("No modules found on job"),
433
+ Exception('No modules found on job'),
378
434
  SystemExceptionCodes.NO_MODULES_FOUND_ON_JOB.value,
379
- self.send_system_exception
435
+ self.send_system_exception,
380
436
  )
381
437
 
382
438
  main_module = self._get_module_from_name(modules, module_name='main')
@@ -429,8 +485,8 @@ class JobWorker:
429
485
  log_disk_and_memory_usage_info()
430
486
 
431
487
  def _run_module(
432
- self,
433
- options: LocalExecutorOptions,
488
+ self,
489
+ options: LocalExecutorOptions,
434
490
  ) -> None:
435
491
  module = options['module']
436
492
  job_id = options['job']['public_id']
@@ -443,7 +499,7 @@ class JobWorker:
443
499
  if not self.job_temporary_dir:
444
500
  raise BioLibError('Undefined job_temporary_dir')
445
501
  logger_no_user_data.debug(f'Job "{job_id}" starting child job...')
446
- with open(module_input_path,'rb') as fp:
502
+ with open(module_input_path, 'rb') as fp:
447
503
  module_input_serialized = fp.read()
448
504
  module_input = ModuleInput(module_input_serialized).deserialize()
449
505
  module_input_with_runtime_zip = self._add_runtime_zip_and_command_to_module_input(options, module_input)
@@ -452,7 +508,7 @@ class JobWorker:
452
508
  arguments=module_input_with_runtime_zip['arguments'],
453
509
  files=module_input_with_runtime_zip['files'],
454
510
  )
455
- module_input_path_new = os.path.join(self.job_temporary_dir, "runtime." + JobStorage.module_input_file_name)
511
+ module_input_path_new = os.path.join(self.job_temporary_dir, 'runtime.' + JobStorage.module_input_file_name)
456
512
  open(module_input_path_new, 'wb').write(module_input_with_runtime_zip_serialized)
457
513
  return self._run_app_version(
458
514
  module['image_uri'],
@@ -469,7 +525,7 @@ class JobWorker:
469
525
  exception,
470
526
  SystemExceptionCodes.FAILED_TO_INITIALIZE_DOCKER_EXECUTOR.value,
471
527
  self.send_system_exception,
472
- may_contain_user_data=False
528
+ may_contain_user_data=False,
473
529
  ) from exception
474
530
  else:
475
531
  err_string = f'Job "{job_id}" hit unsupported module environment "{module["environment"]}"'
@@ -494,7 +550,7 @@ class JobWorker:
494
550
  exception,
495
551
  SystemExceptionCodes.FAILED_TO_CONNECT_TO_WORKER_THREAD_SOCKET.value,
496
552
  self.send_system_exception,
497
- may_contain_user_data=False
553
+ may_contain_user_data=False,
498
554
  ) from exception
499
555
 
500
556
  try:
@@ -505,7 +561,7 @@ class JobWorker:
505
561
  exception,
506
562
  SystemExceptionCodes.FAILED_TO_START_SENDER_THREAD_OR_RECEIVER_THREAD.value,
507
563
  self.send_system_exception,
508
- may_contain_user_data=False
564
+ may_contain_user_data=False,
509
565
  ) from exception
510
566
 
511
567
  # TODO: move this mapping logic to the ModuleInput class
@@ -533,7 +589,7 @@ class JobWorker:
533
589
  exception,
534
590
  SystemExceptionCodes.FAILED_TO_CREATE_NEW_JOB.value,
535
591
  self.send_system_exception,
536
- may_contain_user_data=False
592
+ may_contain_user_data=False,
537
593
  ) from exception
538
594
 
539
595
  return module_input
@@ -559,7 +615,7 @@ class JobWorker:
559
615
  exception,
560
616
  SystemExceptionCodes.FAILED_TO_DOWNLOAD_RUNTIME_ZIP.value,
561
617
  self.send_system_exception,
562
- may_contain_user_data=False
618
+ may_contain_user_data=False,
563
619
  ) from exception
564
620
  finally:
565
621
  download_time = time() - start_time
@@ -605,7 +661,7 @@ class JobWorker:
605
661
  exception,
606
662
  SystemExceptionCodes.FAILED_TO_SEND_STATUS_UPDATE.value,
607
663
  self.send_system_exception,
608
- may_contain_user_data=False
664
+ may_contain_user_data=False,
609
665
  ) from exception
610
666
 
611
667
  def _run_root_job(self, module_input_path: str) -> str:
@@ -56,17 +56,13 @@ class LargeFileSystem:
56
56
  self._path_on_disk_for_write: Optional[str] = None
57
57
  self._send_status_update: Callable[[StatusUpdate], None] = send_status_update
58
58
 
59
- @property
60
- def _is_initialized(self) -> bool:
61
- return self._path_on_disk is not None
62
-
63
59
  @property
64
60
  def uuid(self) -> str:
65
61
  return self._lfs_mapping['uuid']
66
62
 
67
63
  @property
68
64
  def docker_mount(self) -> docker.types.Mount:
69
- if not self._is_initialized:
65
+ if not self._path_on_disk:
70
66
  raise LargeFileSystemError('LargeFileSystem not initialized')
71
67
 
72
68
  return docker.types.Mount(
@@ -77,7 +73,7 @@ class LargeFileSystem:
77
73
  )
78
74
 
79
75
  def initialize(self) -> None:
80
- if self._is_initialized:
76
+ if self._path_on_disk:
81
77
  logger_no_user_data.debug(f'LFS {self.uuid} is already initialized')
82
78
  return
83
79
 
@@ -0,0 +1,99 @@
1
+ import hashlib
2
+ import ipaddress
3
+ import uuid
4
+ from typing import Dict, Optional, cast
5
+
6
+ from docker.errors import APIError
7
+ from docker.models.networks import Network
8
+ from docker.types import IPAMConfig, IPAMPool
9
+
10
+ from biolib.biolib_errors import BioLibError
11
+ from biolib.biolib_logging import logger_no_user_data
12
+ from biolib.compute_node.remote_host_proxy import get_static_ip_from_network
13
+
14
+
15
+ def _iter_network_subnets(existing_network):
16
+ ipam_config = existing_network.attrs.get('IPAM', {}).get('Config', [])
17
+ for cfg in ipam_config:
18
+ subnet_str = cfg.get('Subnet')
19
+ if not subnet_str:
20
+ continue
21
+ try:
22
+ yield ipaddress.ip_network(subnet_str, strict=False)
23
+ except ValueError:
24
+ continue
25
+
26
+
27
+ def _find_overlap(candidate_network, existing_networks):
28
+ for existing in existing_networks:
29
+ for subnet in _iter_network_subnets(existing):
30
+ if candidate_network.overlaps(subnet):
31
+ return existing, str(subnet)
32
+ return None
33
+
34
+
35
+ def _allocate_network_with_retries(
36
+ name_prefix: str,
37
+ docker_client,
38
+ internal: bool = True,
39
+ driver: str = 'bridge',
40
+ max_attempts: int = 10,
41
+ labels: Optional[Dict[str, str]] = None,
42
+ ) -> Network:
43
+ base_network = ipaddress.ip_network('172.28.0.0/16', strict=False)
44
+
45
+ suffix = uuid.uuid4().hex
46
+ full_name = f'{name_prefix}{suffix}'
47
+ name_hash = int(hashlib.sha256(full_name.encode()).hexdigest(), 16)
48
+ starting_offset = name_hash % 256
49
+
50
+ for attempt in range(max_attempts):
51
+ offset = (starting_offset + attempt) % 256
52
+
53
+ if base_network.prefixlen == 16:
54
+ third_octet = offset
55
+ candidate_subnet = f'{base_network.network_address.exploded.rsplit(".", 2)[0]}.{third_octet}.0/24'
56
+ else:
57
+ candidate_subnet = f'{base_network.network_address.exploded.rsplit(".", 1)[0]}.{offset}.0/24'
58
+
59
+ candidate_network = ipaddress.ip_network(candidate_subnet, strict=False)
60
+
61
+ existing_networks = docker_client.networks.list()
62
+ overlap = _find_overlap(candidate_network, existing_networks)
63
+ if overlap:
64
+ existing_network, existing_subnet = overlap
65
+ logger_no_user_data.debug(
66
+ f'Subnet {candidate_subnet} conflicts with existing network '
67
+ f'{existing_network.name} ({existing_subnet}), trying next candidate'
68
+ )
69
+ continue
70
+
71
+ ipam_pool = IPAMPool(subnet=candidate_subnet)
72
+ computed_ipam_config = IPAMConfig(pool_configs=[ipam_pool])
73
+
74
+ try:
75
+ network = cast(
76
+ Network,
77
+ docker_client.networks.create(
78
+ name=full_name,
79
+ internal=internal,
80
+ driver=driver,
81
+ ipam=computed_ipam_config,
82
+ labels=labels or {},
83
+ ),
84
+ )
85
+ static_ip = get_static_ip_from_network(network, offset=2)
86
+ logger_no_user_data.debug(
87
+ f'Created network {full_name} with subnet {candidate_subnet} and static IP {static_ip}'
88
+ )
89
+ return network
90
+ except APIError as api_error:
91
+ logger_no_user_data.debug(
92
+ f'Network creation failed with Docker API error for subnet {candidate_subnet}: {api_error}, '
93
+ f'trying next candidate (attempt {attempt + 1}/{max_attempts})'
94
+ )
95
+ continue
96
+
97
+ raise BioLibError(
98
+ f'Failed to allocate and create network {full_name} after {max_attempts} attempts. ' f'Base CIDR: 172.28.0.0/16'
99
+ )