pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. biolib/__init__.py +33 -10
  2. biolib/_data_record/data_record.py +220 -126
  3. biolib/_index/index.py +55 -0
  4. biolib/_index/query_result.py +103 -0
  5. biolib/_internal/add_copilot_prompts.py +24 -11
  6. biolib/_internal/add_gui_files.py +81 -0
  7. biolib/_internal/data_record/__init__.py +1 -1
  8. biolib/_internal/data_record/data_record.py +1 -18
  9. biolib/_internal/data_record/push_data.py +65 -16
  10. biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
  11. biolib/_internal/file_utils.py +48 -0
  12. biolib/_internal/lfs/cache.py +4 -2
  13. biolib/_internal/push_application.py +95 -24
  14. biolib/_internal/runtime.py +2 -0
  15. biolib/_internal/string_utils.py +13 -0
  16. biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
  17. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  18. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  19. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  20. biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
  21. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  22. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  23. biolib/_internal/templates/gui_template/App.tsx +53 -0
  24. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  25. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  26. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  27. biolib/_internal/templates/gui_template/index.css +5 -0
  28. biolib/_internal/templates/gui_template/index.html +13 -0
  29. biolib/_internal/templates/gui_template/index.tsx +10 -0
  30. biolib/_internal/templates/gui_template/package.json +27 -0
  31. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  32. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  33. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  34. biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
  35. biolib/_internal/templates/init_template/Dockerfile +5 -1
  36. biolib/_internal/templates/init_template/run.py +6 -15
  37. biolib/_internal/templates/init_template/run.sh +1 -0
  38. biolib/_internal/templates/templates.py +21 -1
  39. biolib/_internal/utils/__init__.py +47 -0
  40. biolib/_internal/utils/auth.py +46 -0
  41. biolib/_internal/utils/job_url.py +33 -0
  42. biolib/_internal/utils/multinode.py +12 -14
  43. biolib/_runtime/runtime.py +15 -2
  44. biolib/_session/session.py +7 -5
  45. biolib/_shared/__init__.py +0 -0
  46. biolib/_shared/types/__init__.py +74 -0
  47. biolib/_shared/types/account.py +12 -0
  48. biolib/_shared/types/account_member.py +8 -0
  49. biolib/{_internal → _shared}/types/experiment.py +1 -0
  50. biolib/_shared/types/resource.py +37 -0
  51. biolib/_shared/types/resource_deploy_key.py +11 -0
  52. biolib/{_internal → _shared}/types/resource_version.py +8 -2
  53. biolib/_shared/types/user.py +19 -0
  54. biolib/_shared/utils/__init__.py +7 -0
  55. biolib/_shared/utils/resource_uri.py +75 -0
  56. biolib/api/client.py +5 -48
  57. biolib/app/app.py +97 -55
  58. biolib/biolib_api_client/api_client.py +3 -47
  59. biolib/biolib_api_client/app_types.py +1 -1
  60. biolib/biolib_api_client/biolib_app_api.py +31 -6
  61. biolib/biolib_api_client/biolib_job_api.py +1 -1
  62. biolib/biolib_api_client/user_state.py +34 -2
  63. biolib/biolib_binary_format/module_input.py +8 -0
  64. biolib/biolib_binary_format/remote_endpoints.py +3 -3
  65. biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
  66. biolib/biolib_logging.py +1 -1
  67. biolib/cli/__init__.py +2 -2
  68. biolib/cli/auth.py +4 -16
  69. biolib/cli/data_record.py +82 -0
  70. biolib/cli/index.py +32 -0
  71. biolib/cli/init.py +393 -71
  72. biolib/cli/lfs.py +1 -1
  73. biolib/cli/run.py +9 -6
  74. biolib/cli/start.py +14 -1
  75. biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
  76. biolib/compute_node/job_worker/executors/docker_types.py +1 -1
  77. biolib/compute_node/job_worker/executors/types.py +6 -5
  78. biolib/compute_node/job_worker/job_storage.py +2 -1
  79. biolib/compute_node/job_worker/job_worker.py +155 -90
  80. biolib/compute_node/job_worker/large_file_system.py +2 -6
  81. biolib/compute_node/job_worker/network_alloc.py +99 -0
  82. biolib/compute_node/job_worker/network_buffer.py +240 -0
  83. biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
  84. biolib/compute_node/remote_host_proxy.py +163 -79
  85. biolib/compute_node/utils.py +2 -0
  86. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  87. biolib/compute_node/webserver/proxy_utils.py +28 -0
  88. biolib/compute_node/webserver/webserver.py +64 -19
  89. biolib/experiments/experiment.py +111 -16
  90. biolib/jobs/job.py +128 -31
  91. biolib/jobs/job_result.py +74 -34
  92. biolib/jobs/types.py +1 -0
  93. biolib/sdk/__init__.py +28 -3
  94. biolib/typing_utils.py +1 -1
  95. biolib/utils/cache_state.py +8 -5
  96. biolib/utils/multipart_uploader.py +24 -18
  97. biolib/utils/seq_util.py +1 -1
  98. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  99. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  100. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  101. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  102. biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
  103. biolib/_internal/templates/init_template/.gitignore +0 -2
  104. biolib/_internal/types/__init__.py +0 -6
  105. biolib/_internal/types/resource.py +0 -18
  106. biolib/biolib_download_container.py +0 -38
  107. biolib/cli/download_container.py +0 -14
  108. biolib/utils/app_uri.py +0 -57
  109. pybiolib-1.2.883.dist-info/METADATA +0 -50
  110. pybiolib-1.2.883.dist-info/RECORD +0 -148
  111. pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
  112. /biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
  113. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
  114. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
  115. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
  116. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
  117. /biolib/{_internal → _shared}/types/app.py +0 -0
  118. /biolib/{_internal → _shared}/types/data_record.py +0 -0
  119. /biolib/{_internal → _shared}/types/file_node.py +0 -0
  120. /biolib/{_internal → _shared}/types/push.py +0 -0
  121. /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
  122. /biolib/{_internal → _shared}/types/result.py +0 -0
  123. /biolib/{_internal → _shared}/types/typing.py +0 -0
  124. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,240 @@
1
+ import contextlib
2
+ import json
3
+ import os
4
+ import socket
5
+ import time
6
+ from typing import List, Optional
7
+
8
+ from docker.errors import NotFound
9
+ from docker.models.networks import Network
10
+
11
+ from biolib import utils
12
+ from biolib.biolib_docker_client import BiolibDockerClient
13
+ from biolib.biolib_logging import logger_no_user_data
14
+ from biolib.compute_node.job_worker.network_alloc import _allocate_network_with_retries
15
+
16
+
17
+ class NetworkBuffer:
18
+ BUFFER_SIZE = 25
19
+ NETWORK_NAME_PREFIX = 'biolib-remote-host-network-'
20
+ NETWORK_LABEL = 'biolib-role=remote-host-network'
21
+
22
+ _BIOLIB_DIR = '/biolib' if utils.IS_RUNNING_IN_CLOUD else '/tmp/biolib'
23
+ _NETWORKS_FILE = os.path.join(_BIOLIB_DIR, 'remote-host-networks.json')
24
+ _LOCK_FILE = os.path.join(_BIOLIB_DIR, 'remote-host-networks.lock')
25
+ _LOCK_TIMEOUT_SECONDS = 60
26
+ _STALE_LOCK_THRESHOLD_SECONDS = 600
27
+
28
+ _instance: Optional['NetworkBuffer'] = None
29
+
30
+ def __init__(self):
31
+ os.makedirs(self._BIOLIB_DIR, exist_ok=True)
32
+ self._docker = BiolibDockerClient.get_docker_client()
33
+
34
+ @classmethod
35
+ def get_instance(cls) -> 'NetworkBuffer':
36
+ if cls._instance is None:
37
+ cls._instance = cls()
38
+ return cls._instance
39
+
40
+ def _acquire_lock(self) -> None:
41
+ start_time = time.time()
42
+ retry_count = 0
43
+
44
+ while time.time() - start_time < self._LOCK_TIMEOUT_SECONDS:
45
+ try:
46
+ with open(self._LOCK_FILE, 'x') as lock_file:
47
+ lock_info = {
48
+ 'pid': os.getpid(),
49
+ 'hostname': socket.gethostname(),
50
+ 'started_at': time.time(),
51
+ }
52
+ json.dump(lock_info, lock_file)
53
+ return
54
+ except FileExistsError:
55
+ if retry_count == 0:
56
+ self._check_and_remove_stale_lock()
57
+
58
+ time.sleep(0.5)
59
+ retry_count += 1
60
+
61
+ raise RuntimeError(
62
+ f'Failed to acquire network buffer lock after {self._LOCK_TIMEOUT_SECONDS}s: {self._LOCK_FILE}'
63
+ )
64
+
65
+ def _check_and_remove_stale_lock(self) -> None:
66
+ try:
67
+ if not os.path.exists(self._LOCK_FILE):
68
+ return
69
+
70
+ lock_mtime = os.path.getmtime(self._LOCK_FILE)
71
+ lock_age = time.time() - lock_mtime
72
+
73
+ if lock_age > self._STALE_LOCK_THRESHOLD_SECONDS:
74
+ try:
75
+ with open(self._LOCK_FILE) as f:
76
+ lock_info = json.load(f)
77
+ lock_pid = lock_info.get('pid')
78
+
79
+ if lock_pid:
80
+ try:
81
+ os.kill(lock_pid, 0)
82
+ logger_no_user_data.warning(
83
+ f'Lock file is old ({lock_age:.0f}s) but process {lock_pid} is still alive'
84
+ )
85
+ return
86
+ except (OSError, ProcessLookupError):
87
+ pass
88
+
89
+ except (json.JSONDecodeError, KeyError, ValueError):
90
+ pass
91
+
92
+ logger_no_user_data.warning(
93
+ f'Removing stale lock file (age: {lock_age:.0f}s, threshold: {self._STALE_LOCK_THRESHOLD_SECONDS}s)'
94
+ )
95
+ os.remove(self._LOCK_FILE)
96
+
97
+ except Exception as error:
98
+ logger_no_user_data.debug(f'Error checking stale lock: {error}')
99
+
100
+ def _release_lock(self) -> None:
101
+ with contextlib.suppress(FileNotFoundError):
102
+ os.remove(self._LOCK_FILE)
103
+
104
+ def _read_available_networks(self) -> List[str]:
105
+ if not os.path.exists(self._NETWORKS_FILE):
106
+ return []
107
+
108
+ try:
109
+ with open(self._NETWORKS_FILE) as f:
110
+ network_ids = json.load(f)
111
+ if not isinstance(network_ids, list):
112
+ logger_no_user_data.error(
113
+ f'Invalid network buffer file format (expected list, got {type(network_ids).__name__})'
114
+ )
115
+ self._backup_corrupted_file()
116
+ return []
117
+ return network_ids
118
+ except json.JSONDecodeError as error:
119
+ logger_no_user_data.error(f'Corrupted network buffer file: {error}')
120
+ self._backup_corrupted_file()
121
+ return []
122
+ except Exception as error:
123
+ logger_no_user_data.error(f'Failed to read network buffer file: {error}')
124
+ return []
125
+
126
+ def _write_available_networks(self, network_ids: List[str]) -> None:
127
+ temp_file = f'{self._NETWORKS_FILE}.tmp'
128
+ try:
129
+ with open(temp_file, 'w') as f:
130
+ json.dump(network_ids, f, indent=2)
131
+ f.flush()
132
+ os.fsync(f.fileno())
133
+
134
+ os.replace(temp_file, self._NETWORKS_FILE)
135
+ except Exception as error:
136
+ logger_no_user_data.error(f'Failed to write network buffer file: {error}')
137
+ with contextlib.suppress(FileNotFoundError):
138
+ os.remove(temp_file)
139
+ raise
140
+
141
+ def _backup_corrupted_file(self) -> None:
142
+ try:
143
+ timestamp = int(time.time())
144
+ backup_path = f'{self._NETWORKS_FILE}.corrupt-{timestamp}'
145
+ os.rename(self._NETWORKS_FILE, backup_path)
146
+ logger_no_user_data.error(f'Backed up corrupted file to {backup_path}')
147
+ except Exception as error:
148
+ logger_no_user_data.error(f'Failed to backup corrupted file: {error}')
149
+
150
+ def allocate_networks(self, job_id: str, count: int) -> List[Network]:
151
+ try:
152
+ self._acquire_lock()
153
+
154
+ available_ids = self._read_available_networks()
155
+ allocated: List[Network] = []
156
+
157
+ for _ in range(count):
158
+ network = None
159
+
160
+ while available_ids and network is None:
161
+ net_id = available_ids.pop(0)
162
+ try:
163
+ network = self._docker.networks.get(net_id)
164
+ logger_no_user_data.debug(
165
+ f'Allocated network {network.id} ({network.name}) from buffer for job {job_id}'
166
+ )
167
+ except NotFound:
168
+ logger_no_user_data.warning(
169
+ f'Network {net_id} in buffer file no longer exists in Docker, skipping'
170
+ )
171
+ network = None
172
+
173
+ if network is None:
174
+ logger_no_user_data.debug(f'Buffer exhausted, creating network on-the-fly for job {job_id}')
175
+ network = self._create_network()
176
+
177
+ allocated.append(network)
178
+
179
+ self._write_available_networks(available_ids)
180
+ return allocated
181
+
182
+ except RuntimeError as error:
183
+ logger_no_user_data.warning(f'Lock acquisition failed: {error}. Creating networks on-the-fly.')
184
+ allocated = []
185
+ for _ in range(count):
186
+ network = self._create_network()
187
+ allocated.append(network)
188
+ return allocated
189
+
190
+ finally:
191
+ self._release_lock()
192
+
193
+ def fill_buffer(self) -> int:
194
+ try:
195
+ self._acquire_lock()
196
+
197
+ available_ids = self._read_available_networks()
198
+ current_count = len(available_ids)
199
+ needed = self.BUFFER_SIZE - current_count
200
+
201
+ if needed <= 0:
202
+ logger_no_user_data.debug(
203
+ f'Buffer already has {current_count} available networks (target: {self.BUFFER_SIZE})'
204
+ )
205
+ return 0
206
+
207
+ logger_no_user_data.debug(
208
+ f'Filling buffer: current={current_count}, target={self.BUFFER_SIZE}, creating={needed}'
209
+ )
210
+
211
+ created_count = 0
212
+ for _ in range(needed):
213
+ try:
214
+ network = self._create_network()
215
+ if network.id:
216
+ available_ids.append(network.id)
217
+ created_count += 1
218
+ logger_no_user_data.debug(f'Created buffer network {network.id} ({created_count}/{needed})')
219
+ else:
220
+ logger_no_user_data.error('Created network has no ID, skipping')
221
+ except Exception as error:
222
+ logger_no_user_data.error(f'Failed to create buffer network: {error}')
223
+ continue
224
+
225
+ self._write_available_networks(available_ids)
226
+ logger_no_user_data.debug(f'Buffer fill complete: created {created_count} networks')
227
+ return created_count
228
+
229
+ finally:
230
+ self._release_lock()
231
+
232
+ def _create_network(self) -> Network:
233
+ network = _allocate_network_with_retries(
234
+ name_prefix=self.NETWORK_NAME_PREFIX,
235
+ docker_client=self._docker,
236
+ internal=True,
237
+ driver='bridge',
238
+ labels={'biolib-role': 'remote-host-network'},
239
+ )
240
+ return network
@@ -1,7 +1,7 @@
1
1
  import threading
2
2
  import time
3
3
  import subprocess
4
- from datetime import datetime
4
+ from datetime import datetime, timezone
5
5
 
6
6
  from docker.models.containers import Container # type: ignore
7
7
 
@@ -173,7 +173,7 @@ class UtilizationReporterThread(threading.Thread):
173
173
  gpu_max_usage_in_percent=gpu_max_usage_in_percent,
174
174
  memory_average_usage_in_percent=memory_average_usage_in_percent,
175
175
  memory_max_usage_in_percent=memory_max_usage_in_percent,
176
- recorded_at=datetime.utcnow().isoformat(),
176
+ recorded_at=datetime.now(timezone.utc).isoformat(),
177
177
  sampling_period_in_milliseconds=self._sampling_period_in_milliseconds * self._samples_between_writes,
178
178
  )
179
179
 
@@ -1,22 +1,37 @@
1
- import base64
2
1
  import io
3
- import subprocess
2
+ import ipaddress
4
3
  import tarfile
5
4
  import time
6
5
  from urllib.parse import urlparse
7
6
 
8
- from docker.errors import ImageNotFound # type: ignore
9
- from docker.models.containers import Container # type: ignore
10
- from docker.models.images import Image # type: ignore
11
- from docker.models.networks import Network # type: ignore
7
+ from docker.models.containers import Container
8
+ from docker.models.networks import Network
9
+ from docker.types import EndpointConfig
12
10
 
13
11
  from biolib import utils
12
+ from biolib._internal.utils import base64_encode_string
14
13
  from biolib.biolib_api_client import BiolibApiClient, RemoteHost
14
+ from biolib.biolib_api_client.job_types import CreatedJobDict
15
15
  from biolib.biolib_docker_client import BiolibDockerClient
16
16
  from biolib.biolib_errors import BioLibError
17
17
  from biolib.biolib_logging import logger_no_user_data
18
18
  from biolib.compute_node.cloud_utils import CloudUtils
19
- from biolib.typing_utils import List, Optional
19
+ from biolib.compute_node.utils import BIOLIB_PROXY_NETWORK_NAME
20
+ from biolib.compute_node.webserver.proxy_utils import get_biolib_nginx_proxy_image
21
+ from biolib.typing_utils import Dict, List, Optional
22
+
23
+
24
+ def get_static_ip_from_network(network: Network, offset: int = 2) -> str:
25
+ ipam_config = network.attrs['IPAM']['Config']
26
+ if not ipam_config:
27
+ raise BioLibError(f'Network {network.name} has no IPAM configuration')
28
+
29
+ subnet_str = ipam_config[0]['Subnet']
30
+ subnet = ipaddress.ip_network(subnet_str, strict=False)
31
+
32
+ static_ip = str(subnet.network_address + offset)
33
+
34
+ return static_ip
20
35
 
21
36
 
22
37
  # Prepare for remote hosts with specified port
@@ -24,78 +39,102 @@ class RemoteHostExtended(RemoteHost):
24
39
  ports: List[int]
25
40
 
26
41
 
42
+ class RemoteHostMapping:
43
+ def __init__(self, hostname: str, ports: List[int], network: Network, static_ip: str):
44
+ self.hostname = hostname
45
+ self.ports = ports
46
+ self.network = network
47
+ self.static_ip = static_ip
48
+
49
+
27
50
  class RemoteHostProxy:
28
51
  def __init__(
29
52
  self,
30
- remote_host: RemoteHost,
31
- public_network: Network,
32
- internal_network: Optional[Network],
33
- job_id: str,
34
- ports: List[int],
53
+ remote_host_mappings: List[RemoteHostMapping],
54
+ job: CreatedJobDict,
55
+ app_caller_network: Optional[Network] = None,
35
56
  ):
36
- self.is_app_caller_proxy = remote_host['hostname'] == 'AppCallerProxy'
37
- self._remote_host: RemoteHostExtended = RemoteHostExtended(hostname=remote_host['hostname'], ports=ports)
38
- self._public_network: Network = public_network
39
- self._internal_network: Optional[Network] = internal_network
57
+ self._remote_host_mappings = remote_host_mappings
58
+ self._app_caller_network = app_caller_network
59
+ self.is_app_caller_proxy = app_caller_network is not None
40
60
 
41
- if not job_id:
42
- raise Exception('RemoteHostProxy missing argument "job_id"')
61
+ if not job:
62
+ raise Exception('RemoteHostProxy missing argument "job"')
43
63
 
44
- self._name = f'biolib-remote-host-proxy-{job_id}-{self.hostname}'
45
- self._job_uuid = job_id
64
+ self._job = job
65
+ suffix = '-AppCallerProxy' if app_caller_network else ''
66
+ self._name = f'biolib-remote-host-proxy-{self._job_uuid}{suffix}'
46
67
  self._container: Optional[Container] = None
47
- self._enclave_traffic_forwarder_processes: List[subprocess.Popen] = []
48
68
  self._docker = BiolibDockerClient().get_docker_client()
49
69
 
50
70
  @property
51
- def hostname(self) -> str:
52
- return self._remote_host['hostname']
71
+ def _job_uuid(self) -> str:
72
+ return self._job['uuid']
73
+
74
+ def get_hostname_to_ip_mapping(self) -> Dict[str, str]:
75
+ return {mapping.hostname: mapping.static_ip for mapping in self._remote_host_mappings}
76
+
77
+ def get_remote_host_networks(self) -> List[Network]:
78
+ networks = [mapping.network for mapping in self._remote_host_mappings]
79
+ return networks
53
80
 
54
81
  def get_ip_address_on_network(self, network: Network) -> str:
55
82
  if not self._container:
56
- raise Exception('RemoteHostProxy not yet started')
83
+ raise BioLibError('RemoteHostProxy not yet started')
57
84
 
58
85
  container_networks = self._container.attrs['NetworkSettings']['Networks']
59
86
  if network.name in container_networks:
60
87
  ip_address: str = container_networks[network.name]['IPAddress']
88
+ if not ip_address:
89
+ raise BioLibError(f'No IP address found for network {network.name}')
61
90
  return ip_address
62
91
 
63
- raise Exception(f'RemoteHostProxy not connected to network {network.name}')
92
+ raise BioLibError(f'RemoteHostProxy not connected to network {network.name}')
64
93
 
65
94
  def start(self) -> None:
66
- # TODO: Implement nice error handling in this method
67
-
68
- upstream_server_name = self._remote_host['hostname']
69
- upstream_server_ports = self._remote_host['ports']
70
-
71
95
  docker = BiolibDockerClient.get_docker_client()
72
96
 
97
+ networking_config: Optional[Dict[str, EndpointConfig]] = (
98
+ None
99
+ if not self.is_app_caller_proxy
100
+ else {
101
+ BIOLIB_PROXY_NETWORK_NAME: docker.api.create_endpoint_config(
102
+ aliases=[f'biolib-app-caller-proxy-{self._job_uuid}']
103
+ )
104
+ }
105
+ )
106
+
73
107
  for index in range(3):
74
108
  logger_no_user_data.debug(f'Attempt {index} at creating RemoteHostProxy container "{self._name}"...')
75
109
  try:
76
110
  self._container = docker.containers.create(
77
111
  detach=True,
78
- image=self._get_biolib_remote_host_proxy_image(),
112
+ image=get_biolib_nginx_proxy_image(),
79
113
  name=self._name,
80
- network=self._public_network.name,
114
+ network=BIOLIB_PROXY_NETWORK_NAME,
115
+ networking_config=networking_config,
81
116
  )
82
117
  break
83
- except Exception as error: # pylint: disable=broad-except
118
+ except Exception as error:
84
119
  logger_no_user_data.exception(f'Failed to create container "{self._name}" hit error: {error}')
85
120
 
86
121
  logger_no_user_data.debug('Sleeping before re-trying container creation...')
87
122
  time.sleep(3)
88
123
 
89
- if not self._container:
124
+ if not self._container or not self._container.id:
90
125
  raise BioLibError(f'Exceeded re-try limit for creating container {self._name}')
91
126
 
92
- self._write_nginx_config_to_container(
93
- upstream_server_name,
94
- upstream_server_ports,
95
- )
127
+ for mapping in self._remote_host_mappings:
128
+ mapping.network.connect(self._container.id, ipv4_address=mapping.static_ip)
129
+ logger_no_user_data.debug(
130
+ f'Connected proxy to network {mapping.network.name} with static IP {mapping.static_ip}'
131
+ )
132
+
133
+ if self._app_caller_network:
134
+ self._app_caller_network.connect(self._container.id)
135
+ logger_no_user_data.debug(f'Connected app caller proxy to network {self._app_caller_network.name}')
96
136
 
97
- if self._internal_network:
98
- self._internal_network.connect(self._container.id)
137
+ self._write_nginx_config_to_container()
99
138
 
100
139
  self._container.start()
101
140
 
@@ -121,28 +160,7 @@ class RemoteHostProxy:
121
160
  if self._container:
122
161
  self._container.remove(force=True)
123
162
 
124
- for process in self._enclave_traffic_forwarder_processes:
125
- process.terminate()
126
-
127
- def _get_biolib_remote_host_proxy_image(self) -> Image:
128
- if utils.IS_RUNNING_IN_CLOUD:
129
- try:
130
- logger_no_user_data.debug('Getting local Docker image for remote host proxy')
131
- return self._docker.images.get('biolib-remote-host-proxy:latest')
132
- except ImageNotFound:
133
- logger_no_user_data.debug(
134
- 'Local Docker image for remote host proxy not available. Falling back to public image...'
135
- )
136
-
137
- public_image_uri = 'public.ecr.aws/h5y4b3l1/biolib-remote-host-proxy:latest'
138
- try:
139
- logger_no_user_data.debug('Getting public Docker image for remote host proxy')
140
- return self._docker.images.get(public_image_uri)
141
- except ImageNotFound:
142
- logger_no_user_data.debug('Pulling public Docker image for remote host proxy')
143
- return self._docker.images.pull(public_image_uri)
144
-
145
- def _write_nginx_config_to_container(self, upstream_server_name: str, upstream_server_ports: List[int]) -> None:
163
+ def _write_nginx_config_to_container(self) -> None:
146
164
  if not self._container:
147
165
  raise Exception('RemoteHostProxy container not defined when attempting to write NGINX config')
148
166
 
@@ -161,8 +179,16 @@ class RemoteHostProxy:
161
179
  access_token = BiolibApiClient.get().access_token
162
180
  bearer_token = f'Bearer {access_token}' if access_token else ''
163
181
 
164
- biolib_index_basic_auth = f'compute_node|admin:{compute_node_auth_token},{self._job_uuid}'
165
- biolib_index_basic_auth_base64 = base64.b64encode(biolib_index_basic_auth.encode('utf-8')).decode('utf-8')
182
+ user_uuid = self._job.get('user_id')
183
+ if user_uuid:
184
+ biolib_index_basic_auth = (
185
+ f'biolib_user|{user_uuid.replace("-", "_")}:cloud-{compute_node_auth_token},{self._job_uuid}'
186
+ )
187
+ biolib_index_auth_header_value = f'Basic {base64_encode_string(biolib_index_basic_auth)}'
188
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" using biolib_user auth for biolib-index')
189
+ else:
190
+ biolib_index_auth_header_value = ''
191
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" has no biolib-index auth configured')
166
192
 
167
193
  nginx_config = f"""
168
194
  events {{
@@ -273,6 +299,16 @@ http {{
273
299
  proxy_ssl_server_name on;
274
300
  }}
275
301
 
302
+ location ~ "^/api/auth/oauth-token-exchange/$" {{
303
+ # Note: Using $1 here as URI part from regex must be used for proxy_pass
304
+ proxy_pass https://$upstream_hostname/api/auth/oauth-token-exchange/$1;
305
+ proxy_set_header authorization "";
306
+ proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
307
+ proxy_set_header job-uuid "{self._job_uuid}";
308
+ proxy_set_header cookie "";
309
+ proxy_ssl_server_name on;
310
+ }}
311
+
276
312
  location /api/lfs/ {{
277
313
  proxy_pass https://$upstream_hostname$request_uri;
278
314
  proxy_set_header authorization "";
@@ -291,6 +327,15 @@ http {{
291
327
  proxy_ssl_server_name on;
292
328
  }}
293
329
 
330
+ location /api/resource/ {{
331
+ proxy_pass https://$upstream_hostname$request_uri;
332
+ proxy_set_header authorization "";
333
+ proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
334
+ proxy_set_header job-uuid "{self._job_uuid}";
335
+ proxy_set_header cookie "";
336
+ proxy_ssl_server_name on;
337
+ }}
338
+
294
339
  location /api/resources/data-records/ {{
295
340
  proxy_pass https://$upstream_hostname$request_uri;
296
341
  proxy_set_header authorization "";
@@ -318,37 +363,46 @@ http {{
318
363
  proxy_ssl_server_name on;
319
364
  }}
320
365
 
321
- location /api/ {{
366
+ location /api/proxy/index/ {{
322
367
  proxy_pass https://$upstream_hostname$request_uri;
368
+ proxy_set_header authorization "{biolib_index_auth_header_value}";
369
+ proxy_set_header cookie "";
370
+ proxy_ssl_server_name on;
371
+ }}
372
+
373
+ location ~* "^/api/accounts/(?<account_id>[a-z0-9-]{{36}})/metrics/jobs/$" {{
374
+ proxy_pass https://$upstream_hostname/api/accounts/$account_id/metrics/jobs/$is_args$args;
323
375
  proxy_set_header authorization "";
376
+ proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
377
+ proxy_set_header job-uuid "{self._job_uuid}";
324
378
  proxy_set_header cookie "";
325
379
  proxy_ssl_server_name on;
326
380
  }}
327
381
 
328
- location /proxy/storage/job-storage/ {{
382
+ location /api/ {{
329
383
  proxy_pass https://$upstream_hostname$request_uri;
330
384
  proxy_set_header authorization "";
331
385
  proxy_set_header cookie "";
332
386
  proxy_ssl_server_name on;
333
387
  }}
334
388
 
335
- location /proxy/storage/lfs/versions/ {{
389
+ location /proxy/storage/job-storage/ {{
336
390
  proxy_pass https://$upstream_hostname$request_uri;
337
391
  proxy_set_header authorization "";
338
392
  proxy_set_header cookie "";
339
393
  proxy_ssl_server_name on;
340
394
  }}
341
395
 
342
- location /proxy/cloud/ {{
396
+ location /proxy/storage/lfs/versions/ {{
343
397
  proxy_pass https://$upstream_hostname$request_uri;
344
398
  proxy_set_header authorization "";
345
399
  proxy_set_header cookie "";
346
400
  proxy_ssl_server_name on;
347
401
  }}
348
402
 
349
- location /proxy/index/ {{
403
+ location /proxy/cloud/ {{
350
404
  proxy_pass https://$upstream_hostname$request_uri;
351
- proxy_set_header authorization "Basic {biolib_index_basic_auth_base64}";
405
+ proxy_set_header authorization "";
352
406
  proxy_set_header cookie "";
353
407
  proxy_ssl_server_name on;
354
408
  }}
@@ -357,28 +411,58 @@ http {{
357
411
  return 404 "Not found";
358
412
  }}
359
413
  }}
414
+
415
+ server {{
416
+ listen 1080;
417
+ resolver 127.0.0.11 ipv6=off valid=30s;
418
+
419
+ if ($http_biolib_result_uuid != "{self._job_uuid}") {{
420
+ return 403 "Invalid or missing biolib-result-uuid header";
421
+ }}
422
+
423
+ if ($http_biolib_result_port = "") {{
424
+ return 400 "Missing biolib-result-port header";
425
+ }}
426
+
427
+ location / {{
428
+ proxy_pass http://main:$http_biolib_result_port$request_uri;
429
+ proxy_set_header Host $http_host;
430
+ proxy_set_header biolib-result-uuid "";
431
+ proxy_set_header biolib-result-port "";
432
+ proxy_pass_request_headers on;
433
+ }}
434
+ }}
360
435
  }}
361
436
  """
362
437
  else:
438
+ port_to_mappings: Dict[int, List[RemoteHostMapping]] = {}
439
+ for mapping in self._remote_host_mappings:
440
+ for port in mapping.ports:
441
+ if port not in port_to_mappings:
442
+ port_to_mappings[port] = []
443
+ port_to_mappings[port].append(mapping)
444
+
363
445
  nginx_config = """
364
446
  events {}
365
447
  error_log /dev/stdout info;
366
448
  stream {
367
449
  resolver 127.0.0.11 valid=30s;"""
368
- for idx, upstream_server_port in enumerate(upstream_server_ports):
450
+
451
+ for port, mappings in port_to_mappings.items():
369
452
  nginx_config += f"""
370
- map "" $upstream_{idx} {{
371
- default {upstream_server_name}:{upstream_server_port};
372
- }}
453
+ map $server_addr $backend_{port} {{"""
454
+ for mapping in mappings:
455
+ nginx_config += f'\n {mapping.static_ip} {mapping.hostname}:{port};'
373
456
 
457
+ nginx_config += f"""
458
+ }}
374
459
  server {{
375
- listen {self._remote_host['ports'][idx]};
376
- proxy_pass $upstream_{idx};
460
+ listen 0.0.0.0:{port};
461
+ proxy_pass $backend_{port};
377
462
  }}
378
-
379
463
  server {{
380
- listen {self._remote_host['ports'][idx]} udp;
381
- proxy_pass $upstream_{idx};
464
+ listen 0.0.0.0:{port} udp;
465
+ proxy_pass $backend_{port};
382
466
  }}"""
383
467
 
384
468
  nginx_config += """
@@ -4,6 +4,8 @@ from enum import Enum
4
4
 
5
5
  from biolib.biolib_logging import logger
6
6
 
7
+ BIOLIB_PROXY_NETWORK_NAME = 'biolib-proxy-network'
8
+
7
9
 
8
10
  def get_package_type(package):
9
11
  package_type = int.from_bytes(package[1:2], 'big')