pybiolib 1.1.1629__py3-none-any.whl → 1.1.1881__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. biolib/__init__.py +11 -3
  2. biolib/_internal/data_record/__init__.py +1 -0
  3. biolib/_internal/data_record/data_record.py +153 -0
  4. biolib/_internal/data_record/remote_storage_endpoint.py +27 -0
  5. biolib/_internal/http_client.py +45 -15
  6. biolib/_internal/push_application.py +22 -37
  7. biolib/_internal/runtime.py +73 -0
  8. biolib/_internal/utils/__init__.py +18 -0
  9. biolib/api/client.py +12 -6
  10. biolib/app/app.py +6 -1
  11. biolib/app/search_apps.py +8 -12
  12. biolib/biolib_api_client/api_client.py +14 -9
  13. biolib/biolib_api_client/app_types.py +1 -0
  14. biolib/biolib_api_client/auth.py +0 -12
  15. biolib/biolib_api_client/biolib_app_api.py +53 -27
  16. biolib/biolib_api_client/biolib_job_api.py +11 -40
  17. biolib/biolib_binary_format/utils.py +19 -2
  18. biolib/cli/__init__.py +9 -3
  19. biolib/cli/auth.py +58 -0
  20. biolib/cli/data_record.py +43 -0
  21. biolib/cli/download_container.py +3 -1
  22. biolib/cli/init.py +1 -0
  23. biolib/cli/lfs.py +39 -9
  24. biolib/cli/push.py +1 -1
  25. biolib/cli/run.py +3 -2
  26. biolib/cli/start.py +1 -0
  27. biolib/compute_node/cloud_utils/cloud_utils.py +38 -65
  28. biolib/compute_node/job_worker/cache_state.py +1 -1
  29. biolib/compute_node/job_worker/executors/docker_executor.py +10 -8
  30. biolib/compute_node/job_worker/job_storage.py +9 -13
  31. biolib/compute_node/job_worker/job_worker.py +10 -4
  32. biolib/compute_node/remote_host_proxy.py +48 -11
  33. biolib/compute_node/webserver/worker_thread.py +2 -2
  34. biolib/jobs/job.py +33 -32
  35. biolib/lfs/__init__.py +0 -2
  36. biolib/lfs/utils.py +23 -115
  37. biolib/runtime/__init__.py +13 -1
  38. biolib/sdk/__init__.py +17 -4
  39. biolib/user/sign_in.py +8 -12
  40. biolib/utils/__init__.py +17 -45
  41. biolib/utils/app_uri.py +11 -4
  42. biolib/utils/cache_state.py +2 -2
  43. biolib/utils/multipart_uploader.py +42 -68
  44. biolib/utils/seq_util.py +47 -9
  45. biolib/utils/zip/remote_zip.py +9 -17
  46. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/METADATA +1 -2
  47. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/RECORD +50 -46
  48. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/WHEEL +1 -1
  49. biolib/biolib_api_client/biolib_account_api.py +0 -21
  50. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -53
  51. biolib/runtime/results.py +0 -20
  52. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/LICENSE +0 -0
  53. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/entry_points.txt +0 -0
biolib/cli/run.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import sys
2
+
2
3
  import click
3
4
 
4
5
  from biolib import biolib_errors, utils
@@ -27,13 +28,13 @@ def run(local: bool, non_blocking: bool, uri: str, args: Tuple[str]) -> None:
27
28
  stdin = sys.stdin.read()
28
29
  return stdin
29
30
 
30
- blocking = False if non_blocking else True
31
+ blocking = not non_blocking
31
32
  job = app.cli(
32
33
  args=list(args),
33
34
  stdin=_get_stdin(),
34
35
  files=None,
35
36
  machine=('local' if local else ''),
36
- blocking=blocking
37
+ blocking=blocking,
37
38
  )
38
39
 
39
40
  if blocking:
biolib/cli/start.py CHANGED
@@ -24,6 +24,7 @@ def start(host: str, port: int, tls_certificate: Optional[str], tls_key: Optiona
24
24
 
25
25
  try:
26
26
  from biolib.compute_node.webserver import webserver # pylint: disable=import-outside-toplevel
27
+
27
28
  webserver.start_webserver(
28
29
  host=host,
29
30
  port=port,
@@ -6,12 +6,10 @@ import subprocess
6
6
  import time
7
7
  from datetime import datetime
8
8
  from socket import gethostbyname, gethostname
9
- import requests
10
9
 
11
- from biolib import utils
12
- from biolib.biolib_errors import BioLibError
10
+ from biolib import utils, api
13
11
  from biolib.biolib_logging import logger_no_user_data
14
- from biolib.typing_utils import Optional, List
12
+ from biolib.typing_utils import Optional, List, Dict, cast
15
13
  from biolib.biolib_api_client import BiolibApiClient
16
14
  from biolib.compute_node.webserver.webserver_types import WebserverConfig, ComputeNodeInfo, ShutdownTimes
17
15
 
@@ -70,23 +68,21 @@ class CloudUtils:
70
68
  return CloudUtils._webserver_config
71
69
 
72
70
  @staticmethod
73
- def deregister() -> None:
71
+ def deregister(error: Optional[str] = None) -> None:
74
72
  if utils.IS_RUNNING_IN_CLOUD:
75
73
  config = CloudUtils.get_webserver_config()
76
74
  try:
77
- response = requests.post(url=f'{config["base_url"]}/api/jobs/deregister/', json={
78
- 'auth_token': config["compute_node_info"]["auth_token"],
79
- 'public_id': config["compute_node_info"]["public_id"],
80
- })
81
-
82
- if not response.ok:
83
- response_content = response.content.decode()
84
- logger_no_user_data.error(
85
- f'Failed to deregister got status {response.status_code} and response {response_content}'
86
- )
87
-
88
- except Exception as error: # pylint: disable=broad-except
89
- logger_no_user_data.error(f'Failed to deregister got error: {error}')
75
+ api.client.post(
76
+ authenticate=False,
77
+ path='/jobs/deregister/',
78
+ data={
79
+ 'auth_token': config['compute_node_info']['auth_token'],
80
+ 'public_id': config['compute_node_info']['public_id'],
81
+ 'error': error,
82
+ },
83
+ )
84
+ except BaseException as error_object:
85
+ logger_no_user_data.error(f'Failed to deregister got error: {error_object}')
90
86
  else:
91
87
  logger_no_user_data.error("Not deregistering as environment is not cloud")
92
88
 
@@ -120,34 +116,26 @@ class CloudUtils:
120
116
  )
121
117
  return
122
118
 
123
- config = CloudUtils.get_webserver_config()
124
-
125
- for _ in range(100):
126
- try:
127
- response = requests.post(
128
- url=f'{config["base_url"]}/api/jobs/cloud/finish/',
129
- json={
130
- 'auth_token': config["compute_node_info"]["auth_token"],
131
- 'cloud_job_id': cloud_job_id,
132
- 'system_exception_code': system_exception_code,
133
- 'exit_code': exit_code
134
- },
135
- timeout=10,
136
- )
137
-
138
- response.raise_for_status()
139
-
140
- opt_error_string = f' with error code {system_exception_code}' if system_exception_code else ''
141
- logger_no_user_data.debug(
142
- f'Cloud Job "{cloud_job_id}" was reported as finished' + opt_error_string
143
- )
144
- return
119
+ logger_no_user_data.debug(
120
+ f'Reporting CloudJob "{cloud_job_id}" as finished with exit code: {exit_code} '
121
+ f'and system exception code: {system_exception_code}'
122
+ )
145
123
 
146
- except Exception as error: # pylint: disable=broad-except
147
- logger_no_user_data.debug(
148
- f'Could not finish cloud job "{cloud_job_id}" due to {error}, retrying...'
149
- )
150
- time.sleep(10)
124
+ config = CloudUtils.get_webserver_config()
125
+ try:
126
+ api.client.post(
127
+ authenticate=False,
128
+ path='/jobs/cloud/finish/',
129
+ retries=100,
130
+ data={
131
+ 'auth_token': config['compute_node_info']['auth_token'],
132
+ 'cloud_job_id': cloud_job_id,
133
+ 'system_exception_code': system_exception_code,
134
+ 'exit_code': exit_code
135
+ },
136
+ )
137
+ except BaseException as error:
138
+ logger_no_user_data.debug(f'Failed to finish CloudJob "{cloud_job_id}" due to: {error}')
151
139
 
152
140
  @staticmethod
153
141
  def _report_availability() -> None:
@@ -158,26 +146,11 @@ class CloudUtils:
158
146
  logger_no_user_data.debug(
159
147
  f'Registering with {compute_node_info} to host {api_client.base_url} at {datetime.now()}'
160
148
  )
161
-
162
- response: Optional[requests.Response] = None
163
- max_retries = 5
164
- for retry_count in range(max_retries):
165
- try:
166
- response = requests.post(
167
- url=f'{api_client.base_url}/api/jobs/report_available/',
168
- json=compute_node_info,
169
- )
170
- break
171
- except Exception as error: # pylint: disable=broad-except
172
- logger_no_user_data.error(f'Self-registering failed with error: {error}')
173
- if retry_count < max_retries - 1:
174
- seconds_to_sleep = 1
175
- logger_no_user_data.info(f'Retrying self-registering in {seconds_to_sleep} seconds')
176
- time.sleep(seconds_to_sleep)
177
-
178
- if not response:
179
- raise BioLibError('Failed to register. Max retry limit reached')
180
-
149
+ response = api.client.post(
150
+ authenticate=False,
151
+ path='/jobs/report_available/',
152
+ data=cast(Dict[str, str], compute_node_info),
153
+ )
181
154
  if response.status_code != 201:
182
155
  raise Exception("Non 201 error code")
183
156
  else:
@@ -16,7 +16,7 @@ class DockerCacheStateError(CacheStateError):
16
16
 
17
17
  class LfsCacheState(CacheState):
18
18
 
19
- def __init__(self):
19
+ def __init__(self) -> None:
20
20
  super().__init__()
21
21
 
22
22
  self._storage_path_for_write: str = self._get_storage_path_for_write()
@@ -18,6 +18,8 @@ from docker.errors import ImageNotFound, APIError # type: ignore
18
18
  from docker.models.containers import Container # type: ignore
19
19
 
20
20
  from biolib import utils
21
+
22
+ from biolib._internal.runtime import RuntimeJobDataDict
21
23
  from biolib.biolib_binary_format import ModuleInput, ModuleOutputV2
22
24
  from biolib.biolib_docker_client import BiolibDockerClient
23
25
  from biolib.biolib_errors import DockerContainerNotFoundDuringExecutionException, BioLibError
@@ -37,7 +39,7 @@ from biolib.typing_utils import List, Dict, Optional
37
39
 
38
40
  class DockerExecutor:
39
41
 
40
- def __init__(self, options: LocalExecutorOptions):
42
+ def __init__(self, options: LocalExecutorOptions) -> None:
41
43
  self._options: LocalExecutorOptions = options
42
44
  self._is_cleaning_up = False
43
45
 
@@ -267,12 +269,12 @@ class DockerExecutor:
267
269
  internal_network = self._options['internal_network']
268
270
  extra_hosts: Dict[str, str] = {}
269
271
 
270
- biolib_system_secret = {
271
- 'version': '1.0.0',
272
- 'job_requested_machine': self._options['job']['requested_machine'],
273
- 'job_uuid': self._options['job']['public_id'],
274
- 'job_auth_token': self._options['job']['auth_token'],
275
- }
272
+ biolib_system_secret = RuntimeJobDataDict(
273
+ version='1.0.0',
274
+ job_requested_machine=self._options['job']['requested_machine'],
275
+ job_uuid=self._options['job']['public_id'],
276
+ job_auth_token=self._options['job']['auth_token'],
277
+ )
276
278
  secrets: Dict[str, str] = dict(
277
279
  **module.get('secrets', {}),
278
280
  biolib_system_secret=json.dumps(biolib_system_secret, indent=4),
@@ -551,7 +553,7 @@ class DockerExecutor:
551
553
  'Docker UpperDir not available. Falling back to container.get_archive() for file extraction'
552
554
  )
553
555
 
554
- logger.debug(f'overlay_upper_dir_path={overlay_upper_dir_path}')
556
+ logger_no_user_data.debug(f'overlay_upper_dir_path={overlay_upper_dir_path}')
555
557
 
556
558
  files_and_empty_dirs: List[FileInContainer] = []
557
559
  for path in run_diff_paths:
@@ -1,9 +1,8 @@
1
1
  import os
2
2
 
3
- import requests
4
-
5
3
  from biolib import utils
6
- from biolib.biolib_api_client import BiolibApiClient, CreatedJobDict
4
+ from biolib._internal.http_client import HttpClient
5
+ from biolib.biolib_api_client import CreatedJobDict
7
6
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
8
7
  from biolib.compute_node.cloud_utils import CloudUtils
9
8
  from biolib.biolib_logging import logger_no_user_data
@@ -15,24 +14,23 @@ class JobStorage:
15
14
 
16
15
  @staticmethod
17
16
  def upload_module_input(job: CreatedJobDict, module_input_serialized: bytes) -> None:
18
- base_url = BiolibApiClient.get().base_url
19
17
  job_uuid = job['public_id']
20
18
  headers = {'Job-Auth-Token': job['auth_token']}
21
19
 
22
20
  multipart_uploader = utils.MultiPartUploader(
23
21
  start_multipart_upload_request=dict(
24
22
  requires_biolib_auth=False,
25
- url=f'{base_url}/api/jobs/{job_uuid}/storage/input/start_upload/',
23
+ path=f'/jobs/{job_uuid}/storage/input/start_upload/',
26
24
  headers=headers
27
25
  ),
28
26
  get_presigned_upload_url_request=dict(
29
27
  requires_biolib_auth=False,
30
- url=f'{base_url}/api/jobs/{job_uuid}/storage/input/presigned_upload_url/',
28
+ path=f'/jobs/{job_uuid}/storage/input/presigned_upload_url/',
31
29
  headers=headers
32
30
  ),
33
31
  complete_upload_request=dict(
34
32
  requires_biolib_auth=False,
35
- url=f'{base_url}/api/jobs/{job_uuid}/storage/input/complete_upload/',
33
+ path=f'/jobs/{job_uuid}/storage/input/complete_upload/',
36
34
  headers=headers
37
35
  ),
38
36
  )
@@ -60,7 +58,6 @@ class JobStorage:
60
58
 
61
59
  @staticmethod
62
60
  def _get_module_output_uploader(job_uuid: str) -> utils.MultiPartUploader:
63
- base_url = BiolibApiClient.get().base_url
64
61
  config = CloudUtils.get_webserver_config()
65
62
  compute_node_auth_token = config['compute_node_info']['auth_token'] # pylint: disable=unsubscriptable-object
66
63
  headers = {'Compute-Node-Auth-Token': compute_node_auth_token}
@@ -68,17 +65,17 @@ class JobStorage:
68
65
  return utils.MultiPartUploader(
69
66
  start_multipart_upload_request=dict(
70
67
  requires_biolib_auth=False,
71
- url=f'{base_url}/api/jobs/{job_uuid}/storage/results/start_upload/',
68
+ path=f'/jobs/{job_uuid}/storage/results/start_upload/',
72
69
  headers=headers,
73
70
  ),
74
71
  get_presigned_upload_url_request=dict(
75
72
  requires_biolib_auth=False,
76
- url=f'{base_url}/api/jobs/{job_uuid}/storage/results/presigned_upload_url/',
73
+ path=f'/jobs/{job_uuid}/storage/results/presigned_upload_url/',
77
74
  headers=headers,
78
75
  ),
79
76
  complete_upload_request=dict(
80
77
  requires_biolib_auth=False,
81
- url=f'{base_url}/api/jobs/{job_uuid}/storage/results/complete_upload/',
78
+ path=f'/jobs/{job_uuid}/storage/results/complete_upload/',
82
79
  headers=headers,
83
80
  ),
84
81
  )
@@ -92,8 +89,7 @@ class JobStorage:
92
89
  job_auth_token=job['auth_token'],
93
90
  storage_type='input',
94
91
  )
95
- response = requests.get(url=presigned_download_url)
96
- response.raise_for_status()
92
+ response = HttpClient.request(url=presigned_download_url)
97
93
  data: bytes = response.content
98
94
  logger_no_user_data.debug(f'Job "{job_uuid}" module input downloaded')
99
95
  return data
@@ -12,9 +12,9 @@ import os
12
12
  import signal
13
13
  from types import FrameType
14
14
 
15
- import requests
16
15
  from docker.models.networks import Network # type: ignore
17
16
 
17
+ from biolib._internal.http_client import HttpClient
18
18
  from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
19
19
  from biolib.compute_node.job_worker.job_legacy_input_wait_timeout_thread import JobLegacyInputWaitTimeout
20
20
  from biolib.compute_node.job_worker.job_storage import JobStorage
@@ -97,7 +97,11 @@ class JobWorker:
97
97
  if socket_port:
98
98
  self._connect_to_parent()
99
99
 
100
- def _handle_exit_gracefully(self, signum: int, frame: FrameType) -> None: # pylint: disable=unused-argument
100
+ def _handle_exit_gracefully(
101
+ self,
102
+ signum: int,
103
+ frame: Optional[FrameType], # pylint: disable=unused-argument
104
+ ) -> None:
101
105
  job_id = self._root_job_wrapper["job"]["public_id"] if self._root_job_wrapper else None
102
106
  logger_no_user_data.debug(
103
107
  f'_JobWorker ({job_id}) got exit signal {signal.Signals(signum).name}' # pylint: disable=no-member
@@ -246,6 +250,7 @@ class JobWorker:
246
250
 
247
251
  def _start_network_and_remote_host_proxies(self, job: CreatedJobDict) -> None:
248
252
  app_version = job['app_version']
253
+ app = app_version.get('app', {})
249
254
  job_id = job['public_id']
250
255
  remote_hosts = app_version['remote_hosts']
251
256
  if utils.IS_RUNNING_IN_CLOUD:
@@ -307,7 +312,8 @@ class JobWorker:
307
312
  self._public_network,
308
313
  self._internal_network,
309
314
  job_id,
310
- ports
315
+ ports,
316
+ can_push_data_record_for_user=app.get('can_push_data_record_for_user', False),
311
317
  )
312
318
  remote_host_proxy.start()
313
319
  self._remote_host_proxies.append(remote_host_proxy)
@@ -526,7 +532,7 @@ class JobWorker:
526
532
  start_time = time()
527
533
  logger_no_user_data.debug(f'Job "{root_job_id}" downloading runtime zip...')
528
534
  try:
529
- runtime_zip_bytes = requests.get(runtime_zip_url).content
535
+ runtime_zip_bytes = HttpClient.request(url=runtime_zip_url).content
530
536
  except Exception as exception:
531
537
  raise ComputeProcessException(
532
538
  exception,
@@ -14,7 +14,7 @@ from biolib.compute_node.cloud_utils import CloudUtils
14
14
  from biolib.typing_utils import Optional, List
15
15
  from biolib.biolib_api_client import RemoteHost
16
16
  from biolib.biolib_docker_client import BiolibDockerClient
17
- from biolib.biolib_logging import logger, logger_no_user_data
17
+ from biolib.biolib_logging import logger_no_user_data
18
18
  from biolib.biolib_api_client import BiolibApiClient
19
19
 
20
20
 
@@ -24,8 +24,6 @@ class RemoteHostExtended(RemoteHost):
24
24
 
25
25
 
26
26
  class RemoteHostProxy:
27
- _DOCKER_IMAGE_URI = 'public.ecr.aws/h5y4b3l1/biolib-remote-host-proxy:latest'
28
- _TRAFFIC_FORWARDER_PORT_OFFSET = 10000 # Port offset relative to port of a VSOCK proxy
29
27
 
30
28
  def __init__(
31
29
  self,
@@ -33,8 +31,10 @@ class RemoteHostProxy:
33
31
  public_network: Network,
34
32
  internal_network: Optional[Network],
35
33
  job_id: str,
36
- ports: List[int]
34
+ ports: List[int],
35
+ can_push_data_record_for_user: bool,
37
36
  ):
37
+ self._can_push_data_record_for_user: bool = can_push_data_record_for_user
38
38
  self.is_app_caller_proxy = remote_host['hostname'] == 'AppCallerProxy'
39
39
 
40
40
  # Default to port 443 for now until backend serves remote_hosts with port specified
@@ -52,6 +52,7 @@ class RemoteHostProxy:
52
52
  self._job_uuid = job_id
53
53
  self._container: Optional[Container] = None
54
54
  self._enclave_traffic_forwarder_processes: List[subprocess.Popen] = []
55
+ self._docker = BiolibDockerClient().get_docker_client()
55
56
 
56
57
  @property
57
58
  def hostname(self) -> str:
@@ -81,7 +82,7 @@ class RemoteHostProxy:
81
82
  try:
82
83
  self._container = docker.containers.create(
83
84
  detach=True,
84
- image=self._get_nginx_docker_image(),
85
+ image=self._get_biolib_remote_host_proxy_image(),
85
86
  name=self._name,
86
87
  network=self._public_network.name,
87
88
  )
@@ -130,13 +131,23 @@ class RemoteHostProxy:
130
131
  for process in self._enclave_traffic_forwarder_processes:
131
132
  process.terminate()
132
133
 
133
- def _get_nginx_docker_image(self) -> Image:
134
- docker = BiolibDockerClient.get_docker_client()
134
+ def _get_biolib_remote_host_proxy_image(self) -> Image:
135
+ if utils.IS_RUNNING_IN_CLOUD:
136
+ try:
137
+ logger_no_user_data.debug('Getting local Docker image for remote host proxy')
138
+ return self._docker.images.get('biolib-remote-host-proxy:latest')
139
+ except ImageNotFound:
140
+ logger_no_user_data.debug(
141
+ 'Local Docker image for remote host proxy not available. Falling back to public image...'
142
+ )
143
+
144
+ public_image_uri = 'public.ecr.aws/h5y4b3l1/biolib-remote-host-proxy:latest'
135
145
  try:
136
- return docker.images.get(self._DOCKER_IMAGE_URI)
146
+ logger_no_user_data.debug('Getting public Docker image for remote host proxy')
147
+ return self._docker.images.get(public_image_uri)
137
148
  except ImageNotFound:
138
- logger.debug('Pulling remote host docker image...')
139
- return docker.images.pull(self._DOCKER_IMAGE_URI)
149
+ logger_no_user_data.debug('Pulling public Docker image for remote host proxy')
150
+ return self._docker.images.pull(public_image_uri)
140
151
 
141
152
  def _write_nginx_config_to_container(self, upstream_server_name: str, upstream_server_ports: List[int]) -> None:
142
153
  if not self._container:
@@ -156,7 +167,7 @@ class RemoteHostProxy:
156
167
 
157
168
  if utils.IS_RUNNING_IN_CLOUD:
158
169
  config = CloudUtils.get_webserver_config()
159
- s3_results_bucket_name = config['s3_general_storage_bucket_name'] # pylint: disable=unsubscriptable-object
170
+ s3_results_bucket_name = config['s3_general_storage_bucket_name']
160
171
  s3_results_base_url = f'https://{s3_results_bucket_name}.s3.amazonaws.com'
161
172
  else:
162
173
  if base_url in ('https://biolib.com', 'https://staging-elb.biolib.com'):
@@ -196,6 +207,12 @@ http {{
196
207
  default "";
197
208
  }}
198
209
 
210
+ map $request_method $bearer_token_on_post_and_get {{
211
+ POST "{bearer_token}";
212
+ GET "{bearer_token}";
213
+ default "";
214
+ }}
215
+
199
216
  server {{
200
217
  listen 80;
201
218
  resolver 127.0.0.11 valid=30s;
@@ -270,6 +287,13 @@ http {{
270
287
  proxy_ssl_server_name on;
271
288
  }}
272
289
 
290
+ location /api/lfs/ {{
291
+ proxy_pass {base_url}/api/lfs/;
292
+ proxy_set_header authorization {'$bearer_token_on_post_and_get' if self._can_push_data_record_for_user else '""'};
293
+ proxy_set_header cookie "";
294
+ proxy_ssl_server_name on;
295
+ }}
296
+
273
297
  location /api/ {{
274
298
  proxy_pass {base_url}/api/;
275
299
  proxy_set_header authorization "";
@@ -298,12 +322,25 @@ http {{
298
322
  proxy_ssl_server_name on;
299
323
  }}
300
324
 
325
+ {f"""
326
+ location /proxy/storage/lfs/versions/ {{
327
+ proxy_pass {cloud_base_url}/proxy/storage/lfs/versions/;
328
+ proxy_set_header authorization "";
329
+ proxy_set_header cookie "";
330
+ proxy_ssl_server_name on;
331
+ }}
332
+ """ if self._can_push_data_record_for_user else ''}
333
+
301
334
  location /proxy/cloud/ {{
302
335
  proxy_pass {cloud_base_url}/proxy/cloud/;
303
336
  proxy_set_header authorization "";
304
337
  proxy_set_header cookie "";
305
338
  proxy_ssl_server_name on;
306
339
  }}
340
+
341
+ location / {{
342
+ return 404 "Not found";
343
+ }}
307
344
  }}
308
345
  }}
309
346
  '''
@@ -199,7 +199,7 @@ class WorkerThread(threading.Thread):
199
199
  logger_no_user_data.error(f'Job {self._job_uuid} worker process did not exit within 10 seconds')
200
200
  if utils.IS_RUNNING_IN_CLOUD:
201
201
  logger_no_user_data.error('Deregistering compute node...')
202
- CloudUtils.deregister()
202
+ CloudUtils.deregister(error='job_cleanup_timed_out')
203
203
  deregistered_due_to_error = True
204
204
 
205
205
  # Delete result as error occurred
@@ -249,7 +249,7 @@ class WorkerThread(threading.Thread):
249
249
 
250
250
  if utils.IS_RUNNING_IN_CLOUD:
251
251
  if deregistered_due_to_error:
252
- CloudUtils.shutdown() # shutdown now
252
+ CloudUtils.shutdown() # shutdown now
253
253
  else:
254
254
  webserver_utils.update_auto_shutdown_time()
255
255
 
biolib/jobs/job.py CHANGED
@@ -6,9 +6,10 @@ from pathlib import Path
6
6
  from collections import OrderedDict
7
7
  from urllib.parse import urlparse
8
8
 
9
- import requests
10
9
  from biolib import api, utils
11
10
  from biolib._internal.http_client import HttpClient
11
+ from biolib._internal.utils import open_browser_window_from_notebook
12
+ from biolib.biolib_api_client import BiolibApiClient
12
13
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
13
14
  from biolib.biolib_binary_format import LazyLoadedFile, ModuleOutputV2, ModuleInput, ModuleInputDict
14
15
  from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
@@ -24,12 +25,14 @@ from biolib.utils import IS_RUNNING_IN_NOTEBOOK
24
25
 
25
26
  class Job:
26
27
  # Columns to print in table when showing Job
27
- table_columns_to_row_map = OrderedDict({
28
- 'ID': {'key': 'uuid', 'params': {'width': 36}},
29
- 'Application': {'key': 'app_uri', 'params': {}},
30
- 'Status': {'key': 'state', 'params': {}},
31
- 'Started At': {'key': 'started_at', 'params': {}},
32
- })
28
+ table_columns_to_row_map = OrderedDict(
29
+ {
30
+ 'ID': {'key': 'uuid', 'params': {'width': 36}},
31
+ 'Application': {'key': 'app_uri', 'params': {}},
32
+ 'Status': {'key': 'state', 'params': {}},
33
+ 'Started At': {'key': 'started_at', 'params': {}},
34
+ }
35
+ )
33
36
 
34
37
  def __init__(self, job_dict: JobDict):
35
38
  self._uuid: str = job_dict['uuid']
@@ -127,8 +130,7 @@ class Job:
127
130
  job_auth_token=self._job_dict['auth_token'],
128
131
  storage_type='input',
129
132
  )
130
- response = requests.get(url=presigned_download_url)
131
- response.raise_for_status()
133
+ response = HttpClient.request(url=presigned_download_url)
132
134
  module_input_serialized: bytes = response.content
133
135
  return ModuleInput(module_input_serialized).deserialize()
134
136
 
@@ -174,6 +176,17 @@ class Job:
174
176
  time.sleep(2)
175
177
  logger.info(f'Job {self.id} has finished.')
176
178
 
179
+ def open_browser(self) -> None:
180
+ api_client = BiolibApiClient.get()
181
+ results_url_to_open = f'{api_client.base_url}/results/{self.id}/?token={self._auth_token}'
182
+ if IS_RUNNING_IN_NOTEBOOK:
183
+ print(f'Opening results page at: {results_url_to_open}')
184
+ print('If your browser does not open automatically, click on the link above.')
185
+ open_browser_window_from_notebook(results_url_to_open)
186
+ else:
187
+ print('Please copy and paste the following link into your browser:')
188
+ print(results_url_to_open)
189
+
177
190
  def _get_cloud_job(self) -> CloudJobDict:
178
191
  self._refetch_job_dict(force_refetch=True)
179
192
  if self._job_dict['cloud_job'] is None:
@@ -192,20 +205,11 @@ class Job:
192
205
  @staticmethod
193
206
  def show_jobs(count: int = 25) -> None:
194
207
  job_dicts = Job._get_job_dicts(count)
195
- BioLibTable(
196
- columns_to_row_map=Job.table_columns_to_row_map,
197
- rows=job_dicts,
198
- title='Jobs'
199
- ).print_table()
208
+ BioLibTable(columns_to_row_map=Job.table_columns_to_row_map, rows=job_dicts, title='Jobs').print_table()
200
209
 
201
210
  @staticmethod
202
211
  def _get_job_dicts(count: int) -> List['JobDict']:
203
- job_dicts: List['JobDict'] = api.client.get(
204
- path='/jobs/',
205
- params={
206
- 'page_size': str(count)
207
- }
208
- ).json()['results']
212
+ job_dicts: List['JobDict'] = api.client.get(path='/jobs/', params={'page_size': str(count)}).json()['results']
209
213
  return job_dicts
210
214
 
211
215
  @staticmethod
@@ -237,9 +241,7 @@ class Job:
237
241
  def show(self) -> None:
238
242
  self._refetch_job_dict()
239
243
  BioLibTable(
240
- columns_to_row_map=Job.table_columns_to_row_map,
241
- rows=[self._job_dict],
242
- title=f'Job: {self._uuid}'
244
+ columns_to_row_map=Job.table_columns_to_row_map, rows=[self._job_dict], title=f'Job: {self._uuid}'
243
245
  ).print_table()
244
246
 
245
247
  def stream_logs(self) -> None:
@@ -274,6 +276,10 @@ class Job:
274
276
  while True:
275
277
  time.sleep(2)
276
278
  status_json = self._get_job_status_from_compute_node(compute_node_url)
279
+ if not status_json:
280
+ # this can happen if the job is finished but already removed from the compute node
281
+ logger.warning("WARN: We were unable to retrieve the full log of the job, please try again")
282
+ break
277
283
  job_is_completed = status_json['is_completed']
278
284
  for status_update in status_json['status_updates']:
279
285
  # If the job is completed, print the log messages after all stdout and stderr has been written
@@ -303,12 +309,10 @@ class Job:
303
309
 
304
310
  def _print_full_logs(self, node_url: str) -> None:
305
311
  try:
306
- response_json = HttpClient.request(
307
- url=f'{node_url}/v1/job/{self._uuid}/status/?logs=full'
308
- ).json()
312
+ response_json = HttpClient.request(url=f'{node_url}/v1/job/{self._uuid}/status/?logs=full').json()
309
313
  except Exception as error:
310
314
  logger.error(f'Could not get full streamed logs due to: {error}')
311
- raise BioLibError from error
315
+ raise BioLibError('Could not get full streamed logs') from error
312
316
 
313
317
  for status_update in response_json.get('previous_status_updates', []):
314
318
  logger.info(f'Cloud: {status_update["log_message"]}')
@@ -334,9 +338,7 @@ class Job:
334
338
  def _get_job_status_from_compute_node(self, compute_node_url):
335
339
  for _ in range(15):
336
340
  try:
337
- return HttpClient.request(
338
- url=f'{compute_node_url}/v1/job/{self._uuid}/status/'
339
- ).json()
341
+ return HttpClient.request(url=f'{compute_node_url}/v1/job/{self._uuid}/status/').json()
340
342
  except Exception: # pylint: disable=broad-except
341
343
  cloud_job = self._get_cloud_job()
342
344
  logger.debug("Failed to get status from compute node, retrying...")
@@ -344,8 +346,7 @@ class Job:
344
346
  logger.debug("Job no longer exists on compute node, checking for error...")
345
347
  if cloud_job['error_code'] != SystemExceptionCodes.COMPLETED_SUCCESSFULLY.value:
346
348
  error_message = SystemExceptionCodeMap.get(
347
- cloud_job['error_code'],
348
- f'Unknown error code {cloud_job["error_code"]}'
349
+ cloud_job['error_code'], f'Unknown error code {cloud_job["error_code"]}'
349
350
  )
350
351
  raise BioLibError(f'Cloud: {error_message}') from None
351
352
  else:
biolib/lfs/__init__.py CHANGED
@@ -1,6 +1,4 @@
1
1
  from .cache import prune_lfs_cache
2
2
  from .utils import \
3
- describe_large_file_system, \
4
- get_file_data_from_large_file_system, \
5
3
  push_large_file_system, \
6
4
  create_large_file_system