pybiolib 1.1.1881__py3-none-any.whl → 1.2.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. biolib/__init__.py +11 -4
  2. biolib/_data_record/data_record.py +278 -0
  3. biolib/_internal/data_record/__init__.py +1 -1
  4. biolib/_internal/data_record/data_record.py +97 -151
  5. biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
  6. biolib/_internal/file_utils.py +77 -0
  7. biolib/_internal/fuse_mount/__init__.py +1 -0
  8. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  9. biolib/_internal/http_client.py +31 -9
  10. biolib/_internal/lfs/__init__.py +1 -0
  11. biolib/_internal/libs/__init__.py +1 -0
  12. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  13. biolib/_internal/push_application.py +6 -1
  14. biolib/_internal/runtime.py +3 -56
  15. biolib/_internal/types/__init__.py +4 -0
  16. biolib/_internal/types/app.py +9 -0
  17. biolib/_internal/types/data_record.py +40 -0
  18. biolib/_internal/types/experiment.py +10 -0
  19. biolib/_internal/types/resource.py +14 -0
  20. biolib/_internal/types/typing.py +7 -0
  21. biolib/_internal/utils/multinode.py +264 -0
  22. biolib/_runtime/runtime.py +84 -0
  23. biolib/api/__init__.py +1 -0
  24. biolib/api/client.py +39 -17
  25. biolib/app/app.py +34 -71
  26. biolib/biolib_api_client/api_client.py +9 -2
  27. biolib/biolib_api_client/app_types.py +3 -2
  28. biolib/biolib_api_client/biolib_job_api.py +6 -0
  29. biolib/biolib_api_client/job_types.py +4 -4
  30. biolib/biolib_api_client/lfs_types.py +8 -2
  31. biolib/biolib_binary_format/remote_endpoints.py +12 -10
  32. biolib/biolib_binary_format/utils.py +23 -3
  33. biolib/cli/auth.py +1 -1
  34. biolib/cli/data_record.py +45 -6
  35. biolib/cli/lfs.py +10 -6
  36. biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
  37. biolib/compute_node/job_worker/executors/docker_executor.py +127 -108
  38. biolib/compute_node/job_worker/job_storage.py +17 -5
  39. biolib/compute_node/job_worker/job_worker.py +25 -15
  40. biolib/compute_node/remote_host_proxy.py +72 -84
  41. biolib/compute_node/webserver/webserver_types.py +0 -1
  42. biolib/compute_node/webserver/worker_thread.py +42 -39
  43. biolib/experiments/experiment.py +75 -44
  44. biolib/jobs/job.py +98 -19
  45. biolib/jobs/job_result.py +46 -21
  46. biolib/jobs/types.py +1 -1
  47. biolib/runtime/__init__.py +2 -1
  48. biolib/sdk/__init__.py +18 -7
  49. biolib/typing_utils.py +2 -7
  50. biolib/user/sign_in.py +2 -2
  51. biolib/utils/seq_util.py +38 -35
  52. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/METADATA +1 -1
  53. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/RECORD +57 -45
  54. biolib/experiments/types.py +0 -9
  55. biolib/lfs/__init__.py +0 -4
  56. biolib/lfs/utils.py +0 -153
  57. /biolib/{lfs → _internal/lfs}/cache.py +0 -0
  58. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/LICENSE +0 -0
  59. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/WHEEL +0 -0
  60. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,30 +1,29 @@
1
1
  import time
2
2
  from collections import OrderedDict
3
3
 
4
- from biolib.biolib_errors import BioLibError
5
- from biolib.jobs.types import JobsPaginatedResponse
6
- from biolib.typing_utils import List, Optional
7
-
4
+ import biolib._internal.types as _types
8
5
  from biolib import api
9
- from biolib.experiments.types import ExperimentDict
6
+ from biolib.biolib_errors import BioLibError
10
7
  from biolib.jobs.job import Job
11
- from biolib.typing_utils import Dict, Union
12
-
8
+ from biolib.jobs.types import JobsPaginatedResponse
13
9
  from biolib.tables import BioLibTable
10
+ from biolib.typing_utils import Dict, List, Optional, Union
14
11
 
15
12
 
16
13
  class Experiment:
17
14
  _BIOLIB_EXPERIMENTS: List['Experiment'] = []
18
15
 
19
16
  # Columns to print in table when showing Job
20
- _table_columns_to_row_map = OrderedDict({
21
- 'Name': {'key': 'name', 'params': {}},
22
- 'Job Count': {'key': 'job_count', 'params': {}},
23
- 'Created At': {'key': 'created_at', 'params': {}}
24
- })
17
+ _table_columns_to_row_map = OrderedDict(
18
+ {
19
+ 'Name': {'key': 'name', 'params': {}},
20
+ 'Job Count': {'key': 'job_count', 'params': {}},
21
+ 'Created At': {'key': 'created_at', 'params': {}},
22
+ }
23
+ )
25
24
 
26
- def __init__(self, name: str):
27
- self._experiment_dict: ExperimentDict = self._create_in_backend_or_get_experiment_dict(name)
25
+ def __init__(self, uri: str, _resource_dict: Optional[_types.ResourceDict] = None):
26
+ self._resource_dict: _types.ResourceDict = _resource_dict or self._get_or_create_resource_dict(uri)
28
27
 
29
28
  def __enter__(self):
30
29
  Experiment._BIOLIB_EXPERIMENTS.append(self)
@@ -33,18 +32,29 @@ class Experiment:
33
32
  Experiment._BIOLIB_EXPERIMENTS.pop()
34
33
 
35
34
  def __str__(self):
36
- return f'Experiment: {self.name}'
35
+ return f'Experiment: {self.uri}'
37
36
 
38
37
  def __repr__(self):
39
- return f'Experiment: {self.name}'
38
+ return f'Experiment: {self.uri}'
40
39
 
41
40
  @property
42
41
  def uuid(self) -> str:
43
- return self._experiment_dict['uuid']
42
+ return self._resource_dict['uuid']
44
43
 
45
44
  @property
46
45
  def name(self) -> str:
47
- return self._experiment_dict['name']
46
+ return self._resource_dict['name']
47
+
48
+ @property
49
+ def uri(self) -> str:
50
+ return self._resource_dict['uri']
51
+
52
+ @property
53
+ def _experiment_dict(self) -> _types.ExperimentSlimDict:
54
+ if not self._resource_dict['experiment']:
55
+ raise ValueError(f'Resource {self.uri} is not an Experiment')
56
+
57
+ return self._resource_dict['experiment']
48
58
 
49
59
  @staticmethod
50
60
  def get_experiment_in_context() -> Optional['Experiment']:
@@ -55,32 +65,46 @@ class Experiment:
55
65
  # Prints a table listing info about experiments accessible to the user
56
66
  @staticmethod
57
67
  def show_experiments(count: int = 25) -> None:
58
- experiment_dicts = api.client.get(
59
- path='/experiments/',
60
- params={
61
- 'page_size': str(count)
62
- }
63
- ).json()['results']
68
+ experiment_dicts = api.client.get(path='/experiments/', params={'page_size': str(count)}).json()['results']
64
69
  BioLibTable(
65
70
  columns_to_row_map=Experiment._table_columns_to_row_map,
66
71
  rows=experiment_dicts,
67
- title='Experiments'
72
+ title='Experiments',
68
73
  ).print_table()
69
74
 
75
+ @staticmethod
76
+ def get_by_uri(uri: str) -> 'Experiment':
77
+ query_param_key = 'uri' if '/' in uri else 'name'
78
+ resource_dict: _types.ResourceDict = api.client.get('/resource/', params={query_param_key: uri}).json()
79
+ if not resource_dict['experiment']:
80
+ raise ValueError(f'Resource {uri} is not an experiment')
81
+
82
+ return Experiment(uri=resource_dict['uri'], _resource_dict=resource_dict)
83
+
70
84
  def wait(self) -> None:
71
- self._refetch_experiment_dict()
85
+ self._refetch()
72
86
  while self._experiment_dict['job_running_count'] > 0:
73
87
  print(f"Waiting for {self._experiment_dict['job_running_count']} jobs to finish", end='\r')
74
88
  time.sleep(5)
75
- self._refetch_experiment_dict()
89
+ self._refetch()
76
90
 
77
91
  print(f'All jobs of experiment {self.name} have finished')
78
92
 
79
93
  def add_job(self, job_id: str) -> None:
80
- api.client.patch(
81
- path=f'/jobs/{job_id}/',
82
- data={'experiment_uuid': self.uuid}
83
- )
94
+ api.client.patch(path=f'/jobs/{job_id}/', data={'experiment_uuid': self.uuid})
95
+
96
+ def mount_files(self, mount_path: str) -> None:
97
+ try:
98
+ # Only attempt to import FUSE dependencies when strictly necessary
99
+ from biolib._internal.fuse_mount import ( # pylint: disable=import-outside-toplevel
100
+ ExperimentFuseMount as _ExperimentFuseMount,
101
+ )
102
+ except ImportError as error:
103
+ raise ImportError(
104
+ 'Failed to import FUSE mounting utils. Please ensure FUSE is installed on your system.'
105
+ ) from error
106
+
107
+ _ExperimentFuseMount.mount_experiment(experiment=self, mount_path=mount_path)
84
108
 
85
109
  def export_job_list(self, export_format='dicts'):
86
110
  valid_formats = ('dicts', 'dataframe')
@@ -98,7 +122,7 @@ class Experiment:
98
122
  raise ImportError(
99
123
  'Pandas must be installed to use this method. '
100
124
  'Alternatively, use .get_jobs() to get a list of job objects.'
101
- ) from error
125
+ ) from error
102
126
 
103
127
  jobs_df = pd.DataFrame.from_dict(job_dict_list)
104
128
  jobs_df.started_at = pd.to_datetime(jobs_df.started_at)
@@ -125,7 +149,7 @@ class Experiment:
125
149
  BioLibTable(
126
150
  columns_to_row_map=Job.table_columns_to_row_map,
127
151
  rows=[job._job_dict for job in jobs], # pylint: disable=protected-access
128
- title=f'Jobs in experiment: "{self.name}"'
152
+ title=f'Jobs in experiment: "{self.name}"',
129
153
  ).print_table()
130
154
 
131
155
  def get_jobs(self, status: Optional[str] = None) -> List[Job]:
@@ -147,15 +171,22 @@ class Experiment:
147
171
 
148
172
  return jobs
149
173
 
150
- def _create_in_backend_or_get_experiment_dict(self, name: str) -> ExperimentDict:
151
- # This endpoint returns experiment dict if already created
152
- experiment_dict: ExperimentDict = api.client.post(
153
- path='/experiments/',
154
- data={
155
- 'name': name
156
- }
157
- ).json()
158
- return experiment_dict
174
+ def rename(self, destination: str) -> None:
175
+ api.client.patch(f'/resources/{self.uuid}/', data={'uri': destination})
176
+ self._refetch()
177
+
178
+ @staticmethod
179
+ def _get_resource_dict_by_uuid(uuid: str) -> _types.ResourceDict:
180
+ resource_dict: _types.ResourceDict = api.client.get(f'/resources/{uuid}/').json()
181
+ if not resource_dict['experiment']:
182
+ raise ValueError('Resource from URI is not an experiment')
183
+
184
+ return resource_dict
185
+
186
+ @staticmethod
187
+ def _get_or_create_resource_dict(uri: str) -> _types.ResourceDict:
188
+ response_dict = api.client.post(path='/experiments/', data={'uri' if '/' in uri else 'name': uri}).json()
189
+ return Experiment._get_resource_dict_by_uuid(uuid=response_dict['uuid'])
159
190
 
160
- def _refetch_experiment_dict(self) -> None:
161
- self._experiment_dict = api.client.get(path=f'/experiments/{self.uuid}/').json()
191
+ def _refetch(self) -> None:
192
+ self._resource_dict = self._get_resource_dict_by_uuid(uuid=self._resource_dict['uuid'])
biolib/jobs/job.py CHANGED
@@ -1,26 +1,30 @@
1
1
  import base64
2
- from datetime import datetime, timedelta
3
2
  import sys
4
3
  import time
5
- from pathlib import Path
6
4
  from collections import OrderedDict
5
+ from datetime import datetime, timedelta
6
+ from pathlib import Path
7
7
  from urllib.parse import urlparse
8
8
 
9
9
  from biolib import api, utils
10
10
  from biolib._internal.http_client import HttpClient
11
11
  from biolib._internal.utils import open_browser_window_from_notebook
12
- from biolib.biolib_api_client import BiolibApiClient
12
+ from biolib.biolib_api_client import BiolibApiClient, CreatedJobDict
13
+ from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
13
14
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
14
- from biolib.biolib_binary_format import LazyLoadedFile, ModuleOutputV2, ModuleInput, ModuleInputDict
15
+ from biolib.biolib_binary_format import LazyLoadedFile, ModuleInput, ModuleInputDict, ModuleOutputV2
16
+ from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
15
17
  from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
16
18
  from biolib.biolib_errors import BioLibError, CloudJobFinishedError
17
19
  from biolib.biolib_logging import logger, logger_no_user_data
20
+ from biolib.compute_node.job_worker.job_storage import JobStorage
18
21
  from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes
19
22
  from biolib.jobs.job_result import JobResult
20
- from biolib.jobs.types import JobDict, CloudJobStartedDict, CloudJobDict
23
+ from biolib.jobs.types import CloudJobDict, CloudJobStartedDict, JobDict
21
24
  from biolib.tables import BioLibTable
22
- from biolib.typing_utils import Optional, List, cast, Dict
25
+ from biolib.typing_utils import Dict, List, Optional, cast
23
26
  from biolib.utils import IS_RUNNING_IN_NOTEBOOK
27
+ from biolib.utils.app_uri import parse_app_uri
24
28
 
25
29
 
26
30
  class Job:
@@ -56,26 +60,23 @@ class Job:
56
60
  @property
57
61
  def result(self) -> JobResult:
58
62
  if not self._result:
59
- if self.get_status() == "completed":
60
- self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
61
- else:
62
- raise BioLibError(f"Result is not available for {self._uuid}: status is {self._job_dict['state']}.")
63
+ self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
63
64
 
64
65
  return self._result
65
66
 
66
67
  @property
67
68
  def stdout(self) -> bytes:
68
- logger.warning("The property .stdout is deprecated, please use .get_stdout()")
69
+ logger.warning('The property .stdout is deprecated, please use .get_stdout()')
69
70
  return self.result.get_stdout()
70
71
 
71
72
  @property
72
73
  def stderr(self) -> bytes:
73
- logger.warning("The property .stderr is deprecated, please use .get_stderr()")
74
+ logger.warning('The property .stderr is deprecated, please use .get_stderr()')
74
75
  return self.result.get_stderr()
75
76
 
76
77
  @property
77
78
  def exitcode(self) -> int:
78
- logger.warning("The property .exitcode is deprecated, please use .get_exit_code()")
79
+ logger.warning('The property .exitcode is deprecated, please use .get_exit_code()')
79
80
  return self.result.get_exit_code()
80
81
 
81
82
  def is_finished(self) -> bool:
@@ -109,8 +110,8 @@ class Job:
109
110
  def load_file_as_numpy(self, *args, **kwargs):
110
111
  try:
111
112
  import numpy # type: ignore # pylint: disable=import-outside-toplevel,import-error
112
- except: # pylint: disable=raise-missing-from
113
- raise Exception("Failed to import numpy, please make sure it is installed.")
113
+ except ImportError: # pylint: disable=raise-missing-from
114
+ raise Exception('Failed to import numpy, please make sure it is installed.') from None
114
115
  file_handle = self.result.get_output_file(*args, **kwargs).get_file_handle()
115
116
  return numpy.load(file_handle, allow_pickle=False) # type: ignore
116
117
 
@@ -187,6 +188,39 @@ class Job:
187
188
  print('Please copy and paste the following link into your browser:')
188
189
  print(results_url_to_open)
189
190
 
191
+ def cancel(self) -> None:
192
+ try:
193
+ api.client.patch(
194
+ path=f'/jobs/{self._uuid}/',
195
+ headers={'Job-Auth-Token': self._auth_token} if self._auth_token else None,
196
+ data={'state': 'cancelled'},
197
+ )
198
+ logger.info(f'Job {self._uuid} canceled')
199
+ except Exception as error:
200
+ logger.error(f'Failed to cancel job {self._uuid} due to: {error}')
201
+
202
+ def recompute(self, app_uri: Optional[str] = None, machine: Optional[str] = None, blocking: bool = True) -> 'Job':
203
+ app_response = BiolibAppApi.get_by_uri(uri=app_uri or self._job_dict['app_uri'])
204
+
205
+ job_storage_input = RemoteJobStorageEndpoint(
206
+ job_auth_token=self._auth_token,
207
+ job_uuid=self._uuid,
208
+ storage_type='input',
209
+ )
210
+ http_response = HttpClient.request(url=job_storage_input.get_remote_url())
211
+ module_input_serialized = http_response.content
212
+
213
+ job = self._start_job_in_cloud(
214
+ app_uri=app_response['app_uri'],
215
+ app_version_uuid=app_response['app_version']['public_id'],
216
+ module_input_serialized=module_input_serialized,
217
+ machine=machine,
218
+ )
219
+ if blocking:
220
+ job.stream_logs()
221
+
222
+ return job
223
+
190
224
  def _get_cloud_job(self) -> CloudJobDict:
191
225
  self._refetch_job_dict(force_refetch=True)
192
226
  if self._job_dict['cloud_job'] is None:
@@ -278,7 +312,7 @@ class Job:
278
312
  status_json = self._get_job_status_from_compute_node(compute_node_url)
279
313
  if not status_json:
280
314
  # this can happen if the job is finished but already removed from the compute node
281
- logger.warning("WARN: We were unable to retrieve the full log of the job, please try again")
315
+ logger.warning('WARN: We were unable to retrieve the full log of the job, please try again')
282
316
  break
283
317
  job_is_completed = status_json['is_completed']
284
318
  for status_update in status_json['status_updates']:
@@ -320,7 +354,10 @@ class Job:
320
354
  self.print_logs_packages(response_json['streamed_logs_packages_b64'])
321
355
 
322
356
  def _get_cloud_job_awaiting_started(self) -> CloudJobStartedDict:
357
+ retry_count = 0
323
358
  while True:
359
+ retry_count += 1
360
+ time.sleep(min(10, retry_count))
324
361
  cloud_job = self._get_cloud_job()
325
362
 
326
363
  if cloud_job['finished_at']:
@@ -333,7 +370,6 @@ class Job:
333
370
  return cast(CloudJobStartedDict, cloud_job)
334
371
 
335
372
  logger.info('Cloud: The job has been queued. Please wait...')
336
- time.sleep(10)
337
373
 
338
374
  def _get_job_status_from_compute_node(self, compute_node_url):
339
375
  for _ in range(15):
@@ -341,9 +377,9 @@ class Job:
341
377
  return HttpClient.request(url=f'{compute_node_url}/v1/job/{self._uuid}/status/').json()
342
378
  except Exception: # pylint: disable=broad-except
343
379
  cloud_job = self._get_cloud_job()
344
- logger.debug("Failed to get status from compute node, retrying...")
380
+ logger.debug('Failed to get status from compute node, retrying...')
345
381
  if cloud_job['finished_at']:
346
- logger.debug("Job no longer exists on compute node, checking for error...")
382
+ logger.debug('Job no longer exists on compute node, checking for error...')
347
383
  if cloud_job['error_code'] != SystemExceptionCodes.COMPLETED_SUCCESSFULLY.value:
348
384
  error_message = SystemExceptionCodeMap.get(
349
385
  cloud_job['error_code'], f'Unknown error code {cloud_job["error_code"]}'
@@ -366,3 +402,46 @@ class Job:
366
402
 
367
403
  self._job_dict = self._get_job_dict(self._uuid, self._auth_token)
368
404
  self._job_dict_last_fetched_at = datetime.utcnow()
405
+
406
+ @staticmethod
407
+ def _start_job_in_cloud(
408
+ app_uri: str,
409
+ app_version_uuid: str,
410
+ module_input_serialized: bytes,
411
+ override_command: bool = False,
412
+ machine: Optional[str] = None,
413
+ experiment_id: Optional[str] = None,
414
+ result_prefix: Optional[str] = None,
415
+ timeout: Optional[int] = None,
416
+ notify: bool = False,
417
+ requested_machine_count: Optional[int] = None,
418
+ ) -> 'Job':
419
+ if len(module_input_serialized) < 500_000:
420
+ _job_dict = BiolibJobApi.create_job_with_data(
421
+ app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
422
+ app_version_uuid=app_version_uuid,
423
+ arguments_override_command=override_command,
424
+ experiment_uuid=experiment_id,
425
+ module_input_serialized=module_input_serialized,
426
+ notify=notify,
427
+ requested_machine=machine,
428
+ requested_timeout_seconds=timeout,
429
+ result_name_prefix=result_prefix,
430
+ requested_machine_count=requested_machine_count,
431
+ )
432
+ return Job(cast(JobDict, _job_dict))
433
+
434
+ job_dict: CreatedJobDict = BiolibJobApi.create(
435
+ app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
436
+ app_version_id=app_version_uuid,
437
+ experiment_uuid=experiment_id,
438
+ machine=machine,
439
+ notify=notify,
440
+ override_command=override_command,
441
+ timeout=timeout,
442
+ requested_machine_count=requested_machine_count,
443
+ )
444
+ JobStorage.upload_module_input(job=job_dict, module_input_serialized=module_input_serialized)
445
+ cloud_job = BiolibJobApi.create_cloud_job(job_id=job_dict['public_id'], result_name_prefix=result_prefix)
446
+ logger.debug(f"Cloud: Job created with id {cloud_job['public_id']}")
447
+ return Job(cast(JobDict, job_dict))
biolib/jobs/job_result.py CHANGED
@@ -1,25 +1,24 @@
1
- from pathlib import Path
2
- from fnmatch import fnmatch
3
1
  import time
2
+ from fnmatch import fnmatch
3
+ from pathlib import Path
4
4
 
5
5
  from biolib.biolib_binary_format import ModuleOutputV2
6
+ from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
6
7
  from biolib.biolib_binary_format.remote_stream_seeker import StreamSeeker
7
- from biolib.biolib_binary_format.utils import RemoteIndexableBuffer, LazyLoadedFile
8
- from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageResultEndpoint
8
+ from biolib.biolib_binary_format.utils import LazyLoadedFile, RemoteIndexableBuffer
9
9
  from biolib.biolib_errors import BioLibError
10
10
  from biolib.biolib_logging import logger
11
- from biolib.typing_utils import Optional, List, cast, Union, Callable
11
+ from biolib.typing_utils import Callable, List, Optional, Union, cast
12
12
 
13
13
  PathFilter = Union[str, Callable[[str], bool]]
14
14
 
15
15
 
16
16
  class JobResult:
17
-
18
17
  def __init__(
19
- self,
20
- job_uuid: str,
21
- job_auth_token: str,
22
- module_output: Optional[ModuleOutputV2] = None,
18
+ self,
19
+ job_uuid: str,
20
+ job_auth_token: str,
21
+ module_output: Optional[ModuleOutputV2] = None,
23
22
  ):
24
23
  self._job_uuid: str = job_uuid
25
24
  self._job_auth_token: str = job_auth_token
@@ -35,7 +34,12 @@ class JobResult:
35
34
  def get_exit_code(self) -> int:
36
35
  return self._get_module_output().get_exit_code()
37
36
 
38
- def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
37
+ def save_files(
38
+ self,
39
+ output_dir: str,
40
+ path_filter: Optional[PathFilter] = None,
41
+ skip_file_if_exists: Optional[bool] = None,
42
+ ) -> None:
39
43
  module_output = self._get_module_output()
40
44
  output_files = module_output.get_files()
41
45
  filtered_output_files = self._get_filtered_files(output_files, path_filter) if path_filter else output_files
@@ -61,24 +65,44 @@ class JobResult:
61
65
  # Remove leading slash of file_path
62
66
  destination_file_path = Path(output_dir) / Path(file.path.lstrip('/'))
63
67
  if destination_file_path.exists():
64
- destination_file_path.rename(f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}')
68
+ if skip_file_if_exists:
69
+ print(f'Skipping {destination_file_path} as a file with that name already exists locally.')
70
+ continue
71
+ else:
72
+ destination_file_path.rename(
73
+ f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
74
+ )
65
75
 
66
76
  dir_path = destination_file_path.parent
67
77
  if dir_path:
68
78
  dir_path.mkdir(parents=True, exist_ok=True)
69
79
 
70
- with open(destination_file_path, mode='wb') as destination_file:
71
- for chunk in stream_seeker.seek_and_read(file_start=file.start, file_length=file.length):
72
- destination_file.write(chunk)
80
+ # write content to temporary (partial) file
81
+ partial_path = destination_file_path.with_suffix(
82
+ destination_file_path.suffix + f'.{self._job_uuid}.partial_biolib_download'
83
+ )
84
+ file_start = file.start
85
+ data_to_download = file.length
86
+ if partial_path.exists():
87
+ data_already_downloaded = partial_path.stat().st_size
88
+ file_start += data_already_downloaded
89
+ data_to_download -= data_already_downloaded
90
+
91
+ with open(partial_path, mode='ab') as partial_file:
92
+ for chunk in stream_seeker.seek_and_read(file_start=file_start, file_length=data_to_download):
93
+ partial_file.write(chunk)
94
+
95
+ # rename partial file to actual file name
96
+ partial_path.rename(destination_file_path)
73
97
 
74
98
  def get_output_file(self, filename) -> LazyLoadedFile:
75
99
  files = self._get_module_output().get_files()
76
100
  filtered_files = self._get_filtered_files(files, path_filter=filename)
77
101
  if not filtered_files:
78
- raise BioLibError(f"File {filename} not found in results.")
102
+ raise BioLibError(f'File {filename} not found in results.')
79
103
 
80
104
  if len(filtered_files) != 1:
81
- raise BioLibError(f"Found multiple results for filename {filename}.")
105
+ raise BioLibError(f'Found multiple results for filename {filename}.')
82
106
 
83
107
  return filtered_files[0]
84
108
 
@@ -100,8 +124,8 @@ class JobResult:
100
124
  glob_filter = cast(str, path_filter)
101
125
 
102
126
  # since all file paths start with /, make sure filter does too
103
- if not glob_filter.startswith("/"):
104
- glob_filter = "/" + glob_filter
127
+ if not glob_filter.startswith('/'):
128
+ glob_filter = '/' + glob_filter
105
129
 
106
130
  def _filter_function(file: LazyLoadedFile) -> bool:
107
131
  return fnmatch(file.path, glob_filter)
@@ -110,9 +134,10 @@ class JobResult:
110
134
 
111
135
  def _get_module_output(self) -> ModuleOutputV2:
112
136
  if self._module_output is None:
113
- remote_job_storage_endpoint = RemoteJobStorageResultEndpoint(
114
- job_id=self._job_uuid,
137
+ remote_job_storage_endpoint = RemoteJobStorageEndpoint(
115
138
  job_auth_token=self._job_auth_token,
139
+ job_uuid=self._job_uuid,
140
+ storage_type='output',
116
141
  )
117
142
  buffer = RemoteIndexableBuffer(endpoint=remote_job_storage_endpoint)
118
143
  self._module_output = ModuleOutputV2(buffer)
biolib/jobs/types.py CHANGED
@@ -1,4 +1,4 @@
1
- from biolib.typing_utils import TypedDict, Optional, Literal, List
1
+ from biolib.typing_utils import List, Literal, Optional, TypedDict
2
2
 
3
3
  JobState = Literal['in_progress', 'completed', 'failed', 'cancelled']
4
4
 
@@ -1,5 +1,6 @@
1
1
  import warnings
2
- from biolib.sdk import Runtime as _Runtime
2
+
3
+ from biolib._runtime.runtime import Runtime as _Runtime
3
4
 
4
5
 
5
6
  def set_main_result_prefix(result_prefix: str) -> None:
biolib/sdk/__init__.py CHANGED
@@ -1,12 +1,14 @@
1
+ from typing import Optional
2
+
1
3
  # Imports to hide and use as private internal utils
2
- from biolib._internal.data_record import DataRecord as _DataRecord
4
+ from biolib._data_record.data_record import DataRecord as _DataRecord
3
5
  from biolib._internal.push_application import push_application as _push_application
4
6
  from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
7
+ from biolib._runtime.runtime import Runtime as _Runtime
5
8
  from biolib.app import BioLibApp as _BioLibApp
6
- from biolib.typing_utils import Optional as _Optional
7
9
 
8
- # Imports to expose as public API
9
- from biolib._internal.runtime import Runtime
10
+ # Classes to expose as public API
11
+ Runtime = _Runtime
10
12
 
11
13
 
12
14
  def push_app_version(uri: str, path: str) -> _BioLibApp:
@@ -31,7 +33,7 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
31
33
  except BaseException:
32
34
  raise Exception('Failed to import pytest; please make sure it is installed') from None
33
35
 
34
- class AppVersionFixturePlugin(object):
36
+ class AppVersionFixturePlugin:
35
37
  def __init__(self, app_version_ref):
36
38
  self.app_version_ref = app_version_ref
37
39
 
@@ -42,5 +44,14 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
42
44
  return AppVersionFixturePlugin(app_version)
43
45
 
44
46
 
45
- def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> _DataRecord:
46
- return _DataRecord.create(destination, data_path, name)
47
+ def create_data_record(
48
+ destination: str,
49
+ data_path: str,
50
+ name: Optional[str] = None,
51
+ record_type: Optional[str] = None,
52
+ ) -> _DataRecord:
53
+ return _DataRecord.create(
54
+ destination=f'{destination}/{name}' if name else destination,
55
+ data_path=data_path,
56
+ record_type=record_type,
57
+ )
biolib/typing_utils.py CHANGED
@@ -1,7 +1,2 @@
1
- import sys
2
-
3
- # import and expose everything from the typing module
4
- from typing import * # pylint: disable=wildcard-import, unused-wildcard-import
5
-
6
- if sys.version_info < (3, 8):
7
- from typing_extensions import TypedDict, Literal # pylint: disable=unused-import
1
+ # TODO: Deprecate and later remove this file
2
+ from biolib._internal.types.typing import * # pylint: disable=wildcard-import, unused-wildcard-import
biolib/user/sign_in.py CHANGED
@@ -14,11 +14,11 @@ def sign_out() -> None:
14
14
 
15
15
 
16
16
  def sign_in(open_in_default_browser: bool = False) -> None:
17
- api_client = BiolibApiClient.get()
18
- if api_client.is_signed_in:
17
+ if not BiolibApiClient.is_reauthentication_needed():
19
18
  logger_no_user_data.info('Already signed in')
20
19
  return
21
20
 
21
+ api_client = BiolibApiClient.get()
22
22
  auth_challenge = BiolibAuthChallengeApi.create_auth_challenge()
23
23
  auth_challenge_token = auth_challenge['token']
24
24