pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. biolib/__init__.py +33 -10
  2. biolib/_data_record/data_record.py +220 -126
  3. biolib/_index/index.py +55 -0
  4. biolib/_index/query_result.py +103 -0
  5. biolib/_internal/add_copilot_prompts.py +24 -11
  6. biolib/_internal/add_gui_files.py +81 -0
  7. biolib/_internal/data_record/__init__.py +1 -1
  8. biolib/_internal/data_record/data_record.py +1 -18
  9. biolib/_internal/data_record/push_data.py +65 -16
  10. biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
  11. biolib/_internal/file_utils.py +48 -0
  12. biolib/_internal/lfs/cache.py +4 -2
  13. biolib/_internal/push_application.py +95 -24
  14. biolib/_internal/runtime.py +2 -0
  15. biolib/_internal/string_utils.py +13 -0
  16. biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
  17. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  18. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  19. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  20. biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
  21. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  22. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  23. biolib/_internal/templates/gui_template/App.tsx +53 -0
  24. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  25. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  26. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  27. biolib/_internal/templates/gui_template/index.css +5 -0
  28. biolib/_internal/templates/gui_template/index.html +13 -0
  29. biolib/_internal/templates/gui_template/index.tsx +10 -0
  30. biolib/_internal/templates/gui_template/package.json +27 -0
  31. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  32. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  33. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  34. biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
  35. biolib/_internal/templates/init_template/Dockerfile +5 -1
  36. biolib/_internal/templates/init_template/run.py +6 -15
  37. biolib/_internal/templates/init_template/run.sh +1 -0
  38. biolib/_internal/templates/templates.py +21 -1
  39. biolib/_internal/utils/__init__.py +47 -0
  40. biolib/_internal/utils/auth.py +46 -0
  41. biolib/_internal/utils/job_url.py +33 -0
  42. biolib/_internal/utils/multinode.py +12 -14
  43. biolib/_runtime/runtime.py +15 -2
  44. biolib/_session/session.py +7 -5
  45. biolib/_shared/__init__.py +0 -0
  46. biolib/_shared/types/__init__.py +74 -0
  47. biolib/_shared/types/account.py +12 -0
  48. biolib/_shared/types/account_member.py +8 -0
  49. biolib/{_internal → _shared}/types/experiment.py +1 -0
  50. biolib/_shared/types/resource.py +37 -0
  51. biolib/_shared/types/resource_deploy_key.py +11 -0
  52. biolib/{_internal → _shared}/types/resource_version.py +8 -2
  53. biolib/_shared/types/user.py +19 -0
  54. biolib/_shared/utils/__init__.py +7 -0
  55. biolib/_shared/utils/resource_uri.py +75 -0
  56. biolib/api/client.py +5 -48
  57. biolib/app/app.py +97 -55
  58. biolib/biolib_api_client/api_client.py +3 -47
  59. biolib/biolib_api_client/app_types.py +1 -1
  60. biolib/biolib_api_client/biolib_app_api.py +31 -6
  61. biolib/biolib_api_client/biolib_job_api.py +1 -1
  62. biolib/biolib_api_client/user_state.py +34 -2
  63. biolib/biolib_binary_format/module_input.py +8 -0
  64. biolib/biolib_binary_format/remote_endpoints.py +3 -3
  65. biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
  66. biolib/biolib_logging.py +1 -1
  67. biolib/cli/__init__.py +2 -2
  68. biolib/cli/auth.py +4 -16
  69. biolib/cli/data_record.py +82 -0
  70. biolib/cli/index.py +32 -0
  71. biolib/cli/init.py +393 -71
  72. biolib/cli/lfs.py +1 -1
  73. biolib/cli/run.py +9 -6
  74. biolib/cli/start.py +14 -1
  75. biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
  76. biolib/compute_node/job_worker/executors/docker_types.py +1 -1
  77. biolib/compute_node/job_worker/executors/types.py +6 -5
  78. biolib/compute_node/job_worker/job_storage.py +2 -1
  79. biolib/compute_node/job_worker/job_worker.py +155 -90
  80. biolib/compute_node/job_worker/large_file_system.py +2 -6
  81. biolib/compute_node/job_worker/network_alloc.py +99 -0
  82. biolib/compute_node/job_worker/network_buffer.py +240 -0
  83. biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
  84. biolib/compute_node/remote_host_proxy.py +163 -79
  85. biolib/compute_node/utils.py +2 -0
  86. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  87. biolib/compute_node/webserver/proxy_utils.py +28 -0
  88. biolib/compute_node/webserver/webserver.py +64 -19
  89. biolib/experiments/experiment.py +111 -16
  90. biolib/jobs/job.py +128 -31
  91. biolib/jobs/job_result.py +74 -34
  92. biolib/jobs/types.py +1 -0
  93. biolib/sdk/__init__.py +28 -3
  94. biolib/typing_utils.py +1 -1
  95. biolib/utils/cache_state.py +8 -5
  96. biolib/utils/multipart_uploader.py +24 -18
  97. biolib/utils/seq_util.py +1 -1
  98. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  99. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  100. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  101. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  102. biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
  103. biolib/_internal/templates/init_template/.gitignore +0 -2
  104. biolib/_internal/types/__init__.py +0 -6
  105. biolib/_internal/types/resource.py +0 -18
  106. biolib/biolib_download_container.py +0 -38
  107. biolib/cli/download_container.py +0 -14
  108. biolib/utils/app_uri.py +0 -57
  109. pybiolib-1.2.883.dist-info/METADATA +0 -50
  110. pybiolib-1.2.883.dist-info/RECORD +0 -148
  111. pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
  112. /biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
  113. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
  114. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
  115. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
  116. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
  117. /biolib/{_internal → _shared}/types/app.py +0 -0
  118. /biolib/{_internal → _shared}/types/data_record.py +0 -0
  119. /biolib/{_internal → _shared}/types/file_node.py +0 -0
  120. /biolib/{_internal → _shared}/types/push.py +0 -0
  121. /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
  122. /biolib/{_internal → _shared}/types/result.py +0 -0
  123. /biolib/{_internal → _shared}/types/typing.py +0 -0
  124. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0
@@ -1,50 +1,63 @@
1
+ import hashlib
1
2
  import io
2
3
  import json
3
- import socket
4
+ import multiprocessing
5
+ import os
4
6
  import shlex
7
+ import signal
8
+ import socket
5
9
  import sys
6
10
  import tempfile
7
11
  import zipfile
8
- from time import time
9
12
  from queue import Queue
10
- import multiprocessing
11
- import os
12
- import signal
13
+ from time import time
13
14
  from types import FrameType
14
15
 
15
16
  from docker.models.networks import Network # type: ignore
17
+ from docker.types import IPAMConfig, IPAMPool # type: ignore
16
18
 
17
- from biolib._internal.http_client import HttpClient
18
- from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
19
- from biolib.compute_node.job_worker.job_legacy_input_wait_timeout_thread import JobLegacyInputWaitTimeout
20
- from biolib.compute_node.job_worker.job_storage import JobStorage
21
- from biolib.compute_node.job_worker.large_file_system import LargeFileSystem
22
- from biolib.biolib_errors import DockerContainerNotFoundDuringExecutionException, BioLibError, \
23
- StorageDownloadFailed
24
- from biolib.compute_node.job_worker.job_max_runtime_timer_thread import JobMaxRuntimeTimerThread
25
- from biolib.compute_node.remote_host_proxy import RemoteHostProxy
26
- from biolib.typing_utils import Optional, List, Dict
27
19
  from biolib import utils
28
- from biolib.biolib_api_client import ModuleEnvironment, CreatedJobDict, JobWrapper, Module, AppVersionOnJob, \
29
- BiolibApiClient, RemoteHost
20
+ from biolib._internal.http_client import HttpClient
21
+ from biolib.biolib_api_client import (
22
+ AppVersionOnJob,
23
+ BiolibApiClient,
24
+ CreatedJobDict,
25
+ JobWrapper,
26
+ Module,
27
+ ModuleEnvironment,
28
+ )
30
29
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
30
+ from biolib.biolib_binary_format import (
31
+ InMemoryIndexableBuffer,
32
+ ModuleInput,
33
+ ModuleOutputV2,
34
+ SavedJob,
35
+ SystemException,
36
+ SystemStatusUpdate,
37
+ )
38
+ from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
31
39
  from biolib.biolib_docker_client import BiolibDockerClient
40
+ from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException, StorageDownloadFailed
32
41
  from biolib.biolib_logging import logger, logger_no_user_data
33
42
  from biolib.compute_node.job_worker.executors import DockerExecutor
34
43
  from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
35
- from biolib.compute_node.socker_listener_thread import SocketListenerThread
36
- from biolib.compute_node.socket_sender_thread import SocketSenderThread
44
+ from biolib.compute_node.job_worker.job_legacy_input_wait_timeout_thread import JobLegacyInputWaitTimeout
45
+ from biolib.compute_node.job_worker.job_max_runtime_timer_thread import JobMaxRuntimeTimerThread
46
+ from biolib.compute_node.job_worker.job_storage import JobStorage
47
+ from biolib.compute_node.job_worker.large_file_system import LargeFileSystem
37
48
  from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
49
+ from biolib.compute_node.job_worker.network_buffer import NetworkBuffer
38
50
  from biolib.compute_node.job_worker.utils import ComputeProcessException, log_disk_and_memory_usage_info
39
- from biolib.compute_node.utils import get_package_type, SystemExceptionCodes, SystemExceptionCodeMap
40
- from biolib.biolib_binary_format import SavedJob, SystemStatusUpdate, ModuleInput, SystemException, \
41
- ModuleOutputV2, InMemoryIndexableBuffer
51
+ from biolib.compute_node.remote_host_proxy import RemoteHostMapping, RemoteHostProxy, get_static_ip_from_network
52
+ from biolib.compute_node.socker_listener_thread import SocketListenerThread
53
+ from biolib.compute_node.socket_sender_thread import SocketSenderThread
54
+ from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes, get_package_type
55
+ from biolib.typing_utils import Dict, List, Optional
42
56
 
43
57
  SOCKET_HOST = '127.0.0.1'
44
58
 
45
59
 
46
60
  class JobWorkerProcess(multiprocessing.Process):
47
-
48
61
  # note: this method is run in the parent process
49
62
  def __init__(self, socket_port: int, log_level: int):
50
63
  super().__init__()
@@ -69,6 +82,13 @@ class JobWorker:
69
82
  # handle termination signal from parent
70
83
  signal.signal(signal.SIGTERM, self._handle_exit_gracefully)
71
84
 
85
+ try:
86
+ docker_client = BiolibDockerClient.get_docker_client()
87
+ networks = docker_client.networks.list()
88
+ logger_no_user_data.debug(f'Docker networks at JobWorker init: {[net.name for net in networks]}')
89
+ except Exception as error:
90
+ logger_no_user_data.debug(f'Failed to list docker networks at init: {error}')
91
+
72
92
  self._socket_port = socket_port
73
93
  self._received_messages_queue: Queue = Queue()
74
94
  self._messages_to_send_queue: Queue = Queue()
@@ -80,9 +100,9 @@ class JobWorker:
80
100
 
81
101
  self._remote_host_proxies: List[RemoteHostProxy] = []
82
102
  self._internal_network: Optional[Network] = None
83
- self._public_network: Optional[Network] = None
84
103
  self._executors: List[DockerExecutor] = []
85
104
  self.is_cleaning_up: bool = False
105
+ self._network_buffer = NetworkBuffer.get_instance()
86
106
 
87
107
  self.job_temporary_dir: Optional[str] = None
88
108
 
@@ -91,18 +111,18 @@ class JobWorker:
91
111
  exception,
92
112
  SystemExceptionCodes.FAILED_TO_INIT_COMPUTE_PROCESS_VARIABLES.value,
93
113
  self.send_system_exception,
94
- may_contain_user_data=False
114
+ may_contain_user_data=False,
95
115
  ) from exception
96
116
 
97
117
  if socket_port:
98
118
  self._connect_to_parent()
99
119
 
100
120
  def _handle_exit_gracefully(
101
- self,
102
- signum: int,
103
- frame: Optional[FrameType], # pylint: disable=unused-argument
121
+ self,
122
+ signum: int,
123
+ frame: Optional[FrameType], # pylint: disable=unused-argument
104
124
  ) -> None:
105
- job_id = self._root_job_wrapper["job"]["public_id"] if self._root_job_wrapper else None
125
+ job_id = self._root_job_wrapper['job']['public_id'] if self._root_job_wrapper else None
106
126
  logger_no_user_data.debug(
107
127
  f'_JobWorker ({job_id}) got exit signal {signal.Signals(signum).name}' # pylint: disable=no-member
108
128
  )
@@ -187,9 +207,7 @@ class JobWorker:
187
207
 
188
208
  except Exception as exception:
189
209
  raise ComputeProcessException(
190
- exception,
191
- SystemExceptionCodes.UNKNOWN_COMPUTE_PROCESS_ERROR.value,
192
- self.send_system_exception
210
+ exception, SystemExceptionCodes.UNKNOWN_COMPUTE_PROCESS_ERROR.value, self.send_system_exception
193
211
  ) from exception
194
212
 
195
213
  def _cleanup(self) -> None:
@@ -200,6 +218,8 @@ class JobWorker:
200
218
  executor.cleanup()
201
219
 
202
220
  proxy_count = len(self._remote_host_proxies)
221
+ cleaned_networks = set()
222
+
203
223
  if proxy_count > 0:
204
224
  logger_no_user_data.debug('Cleaning up proxies...')
205
225
  proxy_cleanup_start_time = time()
@@ -211,21 +231,37 @@ class JobWorker:
211
231
  logger_no_user_data.error('Failed to clean up remote host proxy')
212
232
  logger.error(exception)
213
233
 
234
+ for network in proxy.get_remote_host_networks():
235
+ try:
236
+ self._cleanup_network(network)
237
+ cleaned_networks.add(network.id)
238
+ except Exception as exception: # pylint: disable=broad-except
239
+ logger_no_user_data.error(f'Failed to clean up network {network.name}')
240
+ logger.error(exception)
241
+
214
242
  self._remote_host_proxies = []
215
243
  logger_no_user_data.debug(f'Cleaned up {proxy_count} proxies in {time() - proxy_cleanup_start_time}')
216
244
 
217
245
  logger_no_user_data.debug('Cleaning up networks...')
218
- self._cleanup_network(self._internal_network)
246
+ if self._internal_network and self._internal_network.id not in cleaned_networks:
247
+ self._cleanup_network(self._internal_network)
219
248
  self._internal_network = None
220
- self._cleanup_network(self._public_network)
221
- self._public_network = None
249
+
250
+ try:
251
+ logger_no_user_data.debug('Refilling network buffer...')
252
+ created = self._network_buffer.fill_buffer()
253
+ logger_no_user_data.debug(f'Refilled buffer with {created} new networks')
254
+ except Exception as exception: # pylint: disable=broad-except
255
+ logger_no_user_data.error('Failed to refill network buffer')
256
+ logger.error(exception)
257
+
222
258
  logger_no_user_data.debug('Cleaned up networks...')
223
259
 
224
260
  @staticmethod
225
261
  def _cleanup_network(network: Optional[Network]) -> None:
226
262
  if network:
227
263
  network_cleanup_start_time = time()
228
- network_name = network
264
+ network_name = network.name
229
265
  try:
230
266
  network.remove()
231
267
  except Exception as exception: # pylint: disable=broad-except
@@ -237,10 +273,7 @@ class JobWorker:
237
273
  def _handle_save_job_wrapper(self, package: bytes):
238
274
  job_wrapper_json_string = SavedJob(package).deserialize()
239
275
  job_wrapper: JobWrapper = json.loads(job_wrapper_json_string)
240
- BiolibApiClient.initialize(
241
- base_url=job_wrapper['BASE_URL'],
242
- access_token=job_wrapper['access_token']
243
- )
276
+ BiolibApiClient.initialize(base_url=job_wrapper['BASE_URL'], access_token=job_wrapper['access_token'])
244
277
  self._root_job_wrapper = job_wrapper
245
278
  if not utils.IS_RUNNING_IN_CLOUD:
246
279
  job_wrapper['cloud_job'] = None
@@ -250,6 +283,15 @@ class JobWorker:
250
283
  job = job_wrapper['job']
251
284
  self._jobs[job['public_id']] = job
252
285
 
286
+ app_version = job['app_version']
287
+ modules = app_version.get('modules', [])
288
+ for module in modules:
289
+ module_ports = module.get('ports', [])
290
+ if module_ports:
291
+ logger_no_user_data.debug(
292
+ f"Job '{job['public_id']}' module '{module['name']}' has ports: {module_ports}"
293
+ )
294
+
253
295
  if job['app_version'].get('modules') is not None and BiolibDockerClient.is_docker_running():
254
296
  self._start_network_and_remote_host_proxies(job)
255
297
 
@@ -259,44 +301,33 @@ class JobWorker:
259
301
  app_version = job['app_version']
260
302
  job_id = job['public_id']
261
303
  remote_hosts = app_version['remote_hosts']
262
- if utils.IS_RUNNING_IN_CLOUD:
263
- remote_hosts.append(
264
- {
265
- 'hostname': 'AppCallerProxy',
266
- },
267
- )
268
-
269
304
  docker_client = BiolibDockerClient.get_docker_client()
270
305
  try:
306
+ name_hash = int(hashlib.sha256(job_id.encode()).hexdigest(), 16)
307
+ third_octet = name_hash % 256
308
+ internal_subnet = f'172.29.{third_octet}.0/24'
309
+
310
+ ipam_pool = IPAMPool(subnet=internal_subnet)
311
+ ipam_config = IPAMConfig(pool_configs=[ipam_pool])
312
+
271
313
  self._internal_network = docker_client.networks.create(
272
314
  name=f'biolib-sandboxed-network-{job_id}',
273
315
  internal=True,
274
316
  driver='bridge',
317
+ ipam=ipam_config,
275
318
  )
319
+ logger_no_user_data.debug(f'Created internal network for job {job_id} with subnet {internal_subnet}')
276
320
  except Exception as exception:
277
321
  raise ComputeProcessException(
278
322
  exception,
279
323
  SystemExceptionCodes.FAILED_TO_CREATE_DOCKER_NETWORKS.value,
280
324
  self.send_system_exception,
281
- may_contain_user_data=False
325
+ may_contain_user_data=False,
282
326
  ) from exception
283
327
 
284
328
  if len(remote_hosts) > 0:
285
- logger_no_user_data.debug(f'Job "{job_id}" creating networks for remote host proxies...')
286
- try:
287
- self._public_network = docker_client.networks.create(
288
- name=f'biolib-proxy-network-{job_id}',
289
- internal=False,
290
- driver='bridge',
291
- )
292
- except Exception as exception:
293
- raise ComputeProcessException(
294
- exception,
295
- SystemExceptionCodes.FAILED_TO_CREATE_DOCKER_NETWORKS.value,
296
- self.send_system_exception,
297
- may_contain_user_data=False
298
- ) from exception
299
- logger_no_user_data.debug(f'Job "{job_id}" starting proxies for remote hosts: {remote_hosts}')
329
+ logger_no_user_data.debug(f'Job "{job_id}" starting proxy for remote hosts: {remote_hosts}')
330
+ created_networks: List[Network] = []
300
331
  try:
301
332
  hostname_to_ports: Dict[str, List[int]] = {}
302
333
  for remote_host in remote_hosts:
@@ -312,33 +343,67 @@ class JobWorker:
312
343
  else:
313
344
  hostname_to_ports[hostname] = [port]
314
345
 
315
- for hostname, ports in hostname_to_ports.items():
346
+ remote_host_mappings: List[RemoteHostMapping] = []
347
+ networks = self._network_buffer.allocate_networks(job_id, len(hostname_to_ports))
348
+ created_networks.extend(networks)
349
+
350
+ for (hostname, ports), network in zip(hostname_to_ports.items(), networks):
351
+ static_ip = get_static_ip_from_network(network, offset=2)
352
+
353
+ mapping = RemoteHostMapping(
354
+ hostname=hostname,
355
+ ports=ports,
356
+ network=network,
357
+ static_ip=static_ip,
358
+ )
359
+ remote_host_mappings.append(mapping)
360
+
361
+ if remote_host_mappings:
316
362
  remote_host_proxy = RemoteHostProxy(
317
- RemoteHost(hostname=hostname),
318
- self._public_network,
319
- self._internal_network,
320
- job_id,
321
- ports,
363
+ remote_host_mappings=remote_host_mappings,
364
+ job=job,
365
+ app_caller_network=None,
322
366
  )
323
367
  remote_host_proxy.start()
324
368
  self._remote_host_proxies.append(remote_host_proxy)
369
+ num_hosts = len(remote_host_mappings)
370
+ logger_no_user_data.debug(f'Started single proxy container for {num_hosts} remote hosts')
325
371
 
326
372
  except Exception as exception:
373
+ for network in created_networks:
374
+ self._cleanup_network(network)
375
+
327
376
  raise ComputeProcessException(
328
377
  exception,
329
378
  SystemExceptionCodes.FAILED_TO_START_REMOTE_HOST_PROXIES.value,
330
379
  self.send_system_exception,
331
- may_contain_user_data=False
380
+ may_contain_user_data=False,
332
381
  ) from exception
333
382
 
334
- logger_no_user_data.debug(f'Job "{job_id}" startup of remote host proxies completed')
383
+ if utils.IS_RUNNING_IN_CLOUD:
384
+ try:
385
+ app_caller_proxy = RemoteHostProxy(
386
+ remote_host_mappings=[],
387
+ job=job,
388
+ app_caller_network=self._internal_network,
389
+ )
390
+ app_caller_proxy.start()
391
+ self._remote_host_proxies.append(app_caller_proxy)
392
+ logger_no_user_data.debug('Started app caller proxy')
393
+ except Exception as exception:
394
+ raise ComputeProcessException(
395
+ exception,
396
+ SystemExceptionCodes.FAILED_TO_START_REMOTE_HOST_PROXIES.value,
397
+ self.send_system_exception,
398
+ may_contain_user_data=False,
399
+ ) from exception
335
400
 
336
401
  def _run_app_version(
337
- self,
338
- app_version_id: str,
339
- module_input_path: str,
340
- caller_job: CreatedJobDict,
341
- main_module_output_path: str,
402
+ self,
403
+ app_version_id: str,
404
+ module_input_path: str,
405
+ caller_job: CreatedJobDict,
406
+ main_module_output_path: str,
342
407
  ) -> None:
343
408
  job: CreatedJobDict = BiolibJobApi.create(app_version_id, caller_job=caller_job['public_id'])
344
409
  self._jobs[job['public_id']] = job
@@ -357,17 +422,17 @@ class JobWorker:
357
422
  root_job_id = root_job['public_id']
358
423
  if job.get('arguments_override_command') and not job['app_version']['app']['allow_client_side_execution']:
359
424
  raise ComputeProcessException(
360
- Exception("Command override not allowed"),
425
+ Exception('Command override not allowed'),
361
426
  SystemExceptionCodes.COMMAND_OVERRIDE_NOT_ALLOWED.value,
362
- self.send_system_exception
427
+ self.send_system_exception,
363
428
  )
364
429
 
365
430
  modules = job['app_version'].get('modules')
366
431
  if not modules:
367
432
  raise ComputeProcessException(
368
- Exception("No modules found on job"),
433
+ Exception('No modules found on job'),
369
434
  SystemExceptionCodes.NO_MODULES_FOUND_ON_JOB.value,
370
- self.send_system_exception
435
+ self.send_system_exception,
371
436
  )
372
437
 
373
438
  main_module = self._get_module_from_name(modules, module_name='main')
@@ -420,8 +485,8 @@ class JobWorker:
420
485
  log_disk_and_memory_usage_info()
421
486
 
422
487
  def _run_module(
423
- self,
424
- options: LocalExecutorOptions,
488
+ self,
489
+ options: LocalExecutorOptions,
425
490
  ) -> None:
426
491
  module = options['module']
427
492
  job_id = options['job']['public_id']
@@ -434,7 +499,7 @@ class JobWorker:
434
499
  if not self.job_temporary_dir:
435
500
  raise BioLibError('Undefined job_temporary_dir')
436
501
  logger_no_user_data.debug(f'Job "{job_id}" starting child job...')
437
- with open(module_input_path,'rb') as fp:
502
+ with open(module_input_path, 'rb') as fp:
438
503
  module_input_serialized = fp.read()
439
504
  module_input = ModuleInput(module_input_serialized).deserialize()
440
505
  module_input_with_runtime_zip = self._add_runtime_zip_and_command_to_module_input(options, module_input)
@@ -443,7 +508,7 @@ class JobWorker:
443
508
  arguments=module_input_with_runtime_zip['arguments'],
444
509
  files=module_input_with_runtime_zip['files'],
445
510
  )
446
- module_input_path_new = os.path.join(self.job_temporary_dir, "runtime." + JobStorage.module_input_file_name)
511
+ module_input_path_new = os.path.join(self.job_temporary_dir, 'runtime.' + JobStorage.module_input_file_name)
447
512
  open(module_input_path_new, 'wb').write(module_input_with_runtime_zip_serialized)
448
513
  return self._run_app_version(
449
514
  module['image_uri'],
@@ -460,7 +525,7 @@ class JobWorker:
460
525
  exception,
461
526
  SystemExceptionCodes.FAILED_TO_INITIALIZE_DOCKER_EXECUTOR.value,
462
527
  self.send_system_exception,
463
- may_contain_user_data=False
528
+ may_contain_user_data=False,
464
529
  ) from exception
465
530
  else:
466
531
  err_string = f'Job "{job_id}" hit unsupported module environment "{module["environment"]}"'
@@ -485,7 +550,7 @@ class JobWorker:
485
550
  exception,
486
551
  SystemExceptionCodes.FAILED_TO_CONNECT_TO_WORKER_THREAD_SOCKET.value,
487
552
  self.send_system_exception,
488
- may_contain_user_data=False
553
+ may_contain_user_data=False,
489
554
  ) from exception
490
555
 
491
556
  try:
@@ -496,7 +561,7 @@ class JobWorker:
496
561
  exception,
497
562
  SystemExceptionCodes.FAILED_TO_START_SENDER_THREAD_OR_RECEIVER_THREAD.value,
498
563
  self.send_system_exception,
499
- may_contain_user_data=False
564
+ may_contain_user_data=False,
500
565
  ) from exception
501
566
 
502
567
  # TODO: move this mapping logic to the ModuleInput class
@@ -524,7 +589,7 @@ class JobWorker:
524
589
  exception,
525
590
  SystemExceptionCodes.FAILED_TO_CREATE_NEW_JOB.value,
526
591
  self.send_system_exception,
527
- may_contain_user_data=False
592
+ may_contain_user_data=False,
528
593
  ) from exception
529
594
 
530
595
  return module_input
@@ -550,7 +615,7 @@ class JobWorker:
550
615
  exception,
551
616
  SystemExceptionCodes.FAILED_TO_DOWNLOAD_RUNTIME_ZIP.value,
552
617
  self.send_system_exception,
553
- may_contain_user_data=False
618
+ may_contain_user_data=False,
554
619
  ) from exception
555
620
  finally:
556
621
  download_time = time() - start_time
@@ -596,7 +661,7 @@ class JobWorker:
596
661
  exception,
597
662
  SystemExceptionCodes.FAILED_TO_SEND_STATUS_UPDATE.value,
598
663
  self.send_system_exception,
599
- may_contain_user_data=False
664
+ may_contain_user_data=False,
600
665
  ) from exception
601
666
 
602
667
  def _run_root_job(self, module_input_path: str) -> str:
@@ -56,17 +56,13 @@ class LargeFileSystem:
56
56
  self._path_on_disk_for_write: Optional[str] = None
57
57
  self._send_status_update: Callable[[StatusUpdate], None] = send_status_update
58
58
 
59
- @property
60
- def _is_initialized(self) -> bool:
61
- return self._path_on_disk is not None
62
-
63
59
  @property
64
60
  def uuid(self) -> str:
65
61
  return self._lfs_mapping['uuid']
66
62
 
67
63
  @property
68
64
  def docker_mount(self) -> docker.types.Mount:
69
- if not self._is_initialized:
65
+ if not self._path_on_disk:
70
66
  raise LargeFileSystemError('LargeFileSystem not initialized')
71
67
 
72
68
  return docker.types.Mount(
@@ -77,7 +73,7 @@ class LargeFileSystem:
77
73
  )
78
74
 
79
75
  def initialize(self) -> None:
80
- if self._is_initialized:
76
+ if self._path_on_disk:
81
77
  logger_no_user_data.debug(f'LFS {self.uuid} is already initialized')
82
78
  return
83
79
 
@@ -0,0 +1,99 @@
1
+ import hashlib
2
+ import ipaddress
3
+ import uuid
4
+ from typing import Dict, Optional, cast
5
+
6
+ from docker.errors import APIError
7
+ from docker.models.networks import Network
8
+ from docker.types import IPAMConfig, IPAMPool
9
+
10
+ from biolib.biolib_errors import BioLibError
11
+ from biolib.biolib_logging import logger_no_user_data
12
+ from biolib.compute_node.remote_host_proxy import get_static_ip_from_network
13
+
14
+
15
+ def _iter_network_subnets(existing_network):
16
+ ipam_config = existing_network.attrs.get('IPAM', {}).get('Config', [])
17
+ for cfg in ipam_config:
18
+ subnet_str = cfg.get('Subnet')
19
+ if not subnet_str:
20
+ continue
21
+ try:
22
+ yield ipaddress.ip_network(subnet_str, strict=False)
23
+ except ValueError:
24
+ continue
25
+
26
+
27
+ def _find_overlap(candidate_network, existing_networks):
28
+ for existing in existing_networks:
29
+ for subnet in _iter_network_subnets(existing):
30
+ if candidate_network.overlaps(subnet):
31
+ return existing, str(subnet)
32
+ return None
33
+
34
+
35
+ def _allocate_network_with_retries(
36
+ name_prefix: str,
37
+ docker_client,
38
+ internal: bool = True,
39
+ driver: str = 'bridge',
40
+ max_attempts: int = 10,
41
+ labels: Optional[Dict[str, str]] = None,
42
+ ) -> Network:
43
+ base_network = ipaddress.ip_network('172.28.0.0/16', strict=False)
44
+
45
+ suffix = uuid.uuid4().hex
46
+ full_name = f'{name_prefix}{suffix}'
47
+ name_hash = int(hashlib.sha256(full_name.encode()).hexdigest(), 16)
48
+ starting_offset = name_hash % 256
49
+
50
+ for attempt in range(max_attempts):
51
+ offset = (starting_offset + attempt) % 256
52
+
53
+ if base_network.prefixlen == 16:
54
+ third_octet = offset
55
+ candidate_subnet = f'{base_network.network_address.exploded.rsplit(".", 2)[0]}.{third_octet}.0/24'
56
+ else:
57
+ candidate_subnet = f'{base_network.network_address.exploded.rsplit(".", 1)[0]}.{offset}.0/24'
58
+
59
+ candidate_network = ipaddress.ip_network(candidate_subnet, strict=False)
60
+
61
+ existing_networks = docker_client.networks.list()
62
+ overlap = _find_overlap(candidate_network, existing_networks)
63
+ if overlap:
64
+ existing_network, existing_subnet = overlap
65
+ logger_no_user_data.debug(
66
+ f'Subnet {candidate_subnet} conflicts with existing network '
67
+ f'{existing_network.name} ({existing_subnet}), trying next candidate'
68
+ )
69
+ continue
70
+
71
+ ipam_pool = IPAMPool(subnet=candidate_subnet)
72
+ computed_ipam_config = IPAMConfig(pool_configs=[ipam_pool])
73
+
74
+ try:
75
+ network = cast(
76
+ Network,
77
+ docker_client.networks.create(
78
+ name=full_name,
79
+ internal=internal,
80
+ driver=driver,
81
+ ipam=computed_ipam_config,
82
+ labels=labels or {},
83
+ ),
84
+ )
85
+ static_ip = get_static_ip_from_network(network, offset=2)
86
+ logger_no_user_data.debug(
87
+ f'Created network {full_name} with subnet {candidate_subnet} and static IP {static_ip}'
88
+ )
89
+ return network
90
+ except APIError as api_error:
91
+ logger_no_user_data.debug(
92
+ f'Network creation failed with Docker API error for subnet {candidate_subnet}: {api_error}, '
93
+ f'trying next candidate (attempt {attempt + 1}/{max_attempts})'
94
+ )
95
+ continue
96
+
97
+ raise BioLibError(
98
+ f'Failed to allocate and create network {full_name} after {max_attempts} attempts. ' f'Base CIDR: 172.28.0.0/16'
99
+ )