pybiolib 1.1.1881__py3-none-any.whl → 1.2.7.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +11 -4
- biolib/_data_record/data_record.py +278 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +97 -151
- biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
- biolib/_internal/file_utils.py +77 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +31 -9
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +6 -1
- biolib/_internal/runtime.py +3 -56
- biolib/_internal/types/__init__.py +4 -0
- biolib/_internal/types/app.py +9 -0
- biolib/_internal/types/data_record.py +40 -0
- biolib/_internal/types/experiment.py +10 -0
- biolib/_internal/types/resource.py +14 -0
- biolib/_internal/types/typing.py +7 -0
- biolib/_internal/utils/multinode.py +264 -0
- biolib/_runtime/runtime.py +84 -0
- biolib/api/__init__.py +1 -0
- biolib/api/client.py +39 -17
- biolib/app/app.py +34 -71
- biolib/biolib_api_client/api_client.py +9 -2
- biolib/biolib_api_client/app_types.py +3 -2
- biolib/biolib_api_client/biolib_job_api.py +6 -0
- biolib/biolib_api_client/job_types.py +4 -4
- biolib/biolib_api_client/lfs_types.py +8 -2
- biolib/biolib_binary_format/remote_endpoints.py +12 -10
- biolib/biolib_binary_format/utils.py +23 -3
- biolib/cli/auth.py +1 -1
- biolib/cli/data_record.py +45 -6
- biolib/cli/lfs.py +10 -6
- biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
- biolib/compute_node/job_worker/executors/docker_executor.py +127 -108
- biolib/compute_node/job_worker/job_storage.py +17 -5
- biolib/compute_node/job_worker/job_worker.py +25 -15
- biolib/compute_node/remote_host_proxy.py +72 -84
- biolib/compute_node/webserver/webserver_types.py +0 -1
- biolib/compute_node/webserver/worker_thread.py +42 -39
- biolib/experiments/experiment.py +75 -44
- biolib/jobs/job.py +98 -19
- biolib/jobs/job_result.py +46 -21
- biolib/jobs/types.py +1 -1
- biolib/runtime/__init__.py +2 -1
- biolib/sdk/__init__.py +18 -7
- biolib/typing_utils.py +2 -7
- biolib/user/sign_in.py +2 -2
- biolib/utils/seq_util.py +38 -35
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/METADATA +1 -1
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/RECORD +57 -45
- biolib/experiments/types.py +0 -9
- biolib/lfs/__init__.py +0 -4
- biolib/lfs/utils.py +0 -153
- /biolib/{lfs → _internal/lfs}/cache.py +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/entry_points.txt +0 -0
biolib/experiments/experiment.py
CHANGED
@@ -1,30 +1,29 @@
|
|
1
1
|
import time
|
2
2
|
from collections import OrderedDict
|
3
3
|
|
4
|
-
|
5
|
-
from biolib.jobs.types import JobsPaginatedResponse
|
6
|
-
from biolib.typing_utils import List, Optional
|
7
|
-
|
4
|
+
import biolib._internal.types as _types
|
8
5
|
from biolib import api
|
9
|
-
from biolib.
|
6
|
+
from biolib.biolib_errors import BioLibError
|
10
7
|
from biolib.jobs.job import Job
|
11
|
-
from biolib.
|
12
|
-
|
8
|
+
from biolib.jobs.types import JobsPaginatedResponse
|
13
9
|
from biolib.tables import BioLibTable
|
10
|
+
from biolib.typing_utils import Dict, List, Optional, Union
|
14
11
|
|
15
12
|
|
16
13
|
class Experiment:
|
17
14
|
_BIOLIB_EXPERIMENTS: List['Experiment'] = []
|
18
15
|
|
19
16
|
# Columns to print in table when showing Job
|
20
|
-
_table_columns_to_row_map = OrderedDict(
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
17
|
+
_table_columns_to_row_map = OrderedDict(
|
18
|
+
{
|
19
|
+
'Name': {'key': 'name', 'params': {}},
|
20
|
+
'Job Count': {'key': 'job_count', 'params': {}},
|
21
|
+
'Created At': {'key': 'created_at', 'params': {}},
|
22
|
+
}
|
23
|
+
)
|
25
24
|
|
26
|
-
def __init__(self,
|
27
|
-
self.
|
25
|
+
def __init__(self, uri: str, _resource_dict: Optional[_types.ResourceDict] = None):
|
26
|
+
self._resource_dict: _types.ResourceDict = _resource_dict or self._get_or_create_resource_dict(uri)
|
28
27
|
|
29
28
|
def __enter__(self):
|
30
29
|
Experiment._BIOLIB_EXPERIMENTS.append(self)
|
@@ -33,18 +32,29 @@ class Experiment:
|
|
33
32
|
Experiment._BIOLIB_EXPERIMENTS.pop()
|
34
33
|
|
35
34
|
def __str__(self):
|
36
|
-
return f'Experiment: {self.
|
35
|
+
return f'Experiment: {self.uri}'
|
37
36
|
|
38
37
|
def __repr__(self):
|
39
|
-
return f'Experiment: {self.
|
38
|
+
return f'Experiment: {self.uri}'
|
40
39
|
|
41
40
|
@property
|
42
41
|
def uuid(self) -> str:
|
43
|
-
return self.
|
42
|
+
return self._resource_dict['uuid']
|
44
43
|
|
45
44
|
@property
|
46
45
|
def name(self) -> str:
|
47
|
-
return self.
|
46
|
+
return self._resource_dict['name']
|
47
|
+
|
48
|
+
@property
|
49
|
+
def uri(self) -> str:
|
50
|
+
return self._resource_dict['uri']
|
51
|
+
|
52
|
+
@property
|
53
|
+
def _experiment_dict(self) -> _types.ExperimentSlimDict:
|
54
|
+
if not self._resource_dict['experiment']:
|
55
|
+
raise ValueError(f'Resource {self.uri} is not an Experiment')
|
56
|
+
|
57
|
+
return self._resource_dict['experiment']
|
48
58
|
|
49
59
|
@staticmethod
|
50
60
|
def get_experiment_in_context() -> Optional['Experiment']:
|
@@ -55,32 +65,46 @@ class Experiment:
|
|
55
65
|
# Prints a table listing info about experiments accessible to the user
|
56
66
|
@staticmethod
|
57
67
|
def show_experiments(count: int = 25) -> None:
|
58
|
-
experiment_dicts = api.client.get(
|
59
|
-
path='/experiments/',
|
60
|
-
params={
|
61
|
-
'page_size': str(count)
|
62
|
-
}
|
63
|
-
).json()['results']
|
68
|
+
experiment_dicts = api.client.get(path='/experiments/', params={'page_size': str(count)}).json()['results']
|
64
69
|
BioLibTable(
|
65
70
|
columns_to_row_map=Experiment._table_columns_to_row_map,
|
66
71
|
rows=experiment_dicts,
|
67
|
-
title='Experiments'
|
72
|
+
title='Experiments',
|
68
73
|
).print_table()
|
69
74
|
|
75
|
+
@staticmethod
|
76
|
+
def get_by_uri(uri: str) -> 'Experiment':
|
77
|
+
query_param_key = 'uri' if '/' in uri else 'name'
|
78
|
+
resource_dict: _types.ResourceDict = api.client.get('/resource/', params={query_param_key: uri}).json()
|
79
|
+
if not resource_dict['experiment']:
|
80
|
+
raise ValueError(f'Resource {uri} is not an experiment')
|
81
|
+
|
82
|
+
return Experiment(uri=resource_dict['uri'], _resource_dict=resource_dict)
|
83
|
+
|
70
84
|
def wait(self) -> None:
|
71
|
-
self.
|
85
|
+
self._refetch()
|
72
86
|
while self._experiment_dict['job_running_count'] > 0:
|
73
87
|
print(f"Waiting for {self._experiment_dict['job_running_count']} jobs to finish", end='\r')
|
74
88
|
time.sleep(5)
|
75
|
-
self.
|
89
|
+
self._refetch()
|
76
90
|
|
77
91
|
print(f'All jobs of experiment {self.name} have finished')
|
78
92
|
|
79
93
|
def add_job(self, job_id: str) -> None:
|
80
|
-
api.client.patch(
|
81
|
-
|
82
|
-
|
83
|
-
|
94
|
+
api.client.patch(path=f'/jobs/{job_id}/', data={'experiment_uuid': self.uuid})
|
95
|
+
|
96
|
+
def mount_files(self, mount_path: str) -> None:
|
97
|
+
try:
|
98
|
+
# Only attempt to import FUSE dependencies when strictly necessary
|
99
|
+
from biolib._internal.fuse_mount import ( # pylint: disable=import-outside-toplevel
|
100
|
+
ExperimentFuseMount as _ExperimentFuseMount,
|
101
|
+
)
|
102
|
+
except ImportError as error:
|
103
|
+
raise ImportError(
|
104
|
+
'Failed to import FUSE mounting utils. Please ensure FUSE is installed on your system.'
|
105
|
+
) from error
|
106
|
+
|
107
|
+
_ExperimentFuseMount.mount_experiment(experiment=self, mount_path=mount_path)
|
84
108
|
|
85
109
|
def export_job_list(self, export_format='dicts'):
|
86
110
|
valid_formats = ('dicts', 'dataframe')
|
@@ -98,7 +122,7 @@ class Experiment:
|
|
98
122
|
raise ImportError(
|
99
123
|
'Pandas must be installed to use this method. '
|
100
124
|
'Alternatively, use .get_jobs() to get a list of job objects.'
|
101
|
-
|
125
|
+
) from error
|
102
126
|
|
103
127
|
jobs_df = pd.DataFrame.from_dict(job_dict_list)
|
104
128
|
jobs_df.started_at = pd.to_datetime(jobs_df.started_at)
|
@@ -125,7 +149,7 @@ class Experiment:
|
|
125
149
|
BioLibTable(
|
126
150
|
columns_to_row_map=Job.table_columns_to_row_map,
|
127
151
|
rows=[job._job_dict for job in jobs], # pylint: disable=protected-access
|
128
|
-
title=f'Jobs in experiment: "{self.name}"'
|
152
|
+
title=f'Jobs in experiment: "{self.name}"',
|
129
153
|
).print_table()
|
130
154
|
|
131
155
|
def get_jobs(self, status: Optional[str] = None) -> List[Job]:
|
@@ -147,15 +171,22 @@ class Experiment:
|
|
147
171
|
|
148
172
|
return jobs
|
149
173
|
|
150
|
-
def
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
174
|
+
def rename(self, destination: str) -> None:
|
175
|
+
api.client.patch(f'/resources/{self.uuid}/', data={'uri': destination})
|
176
|
+
self._refetch()
|
177
|
+
|
178
|
+
@staticmethod
|
179
|
+
def _get_resource_dict_by_uuid(uuid: str) -> _types.ResourceDict:
|
180
|
+
resource_dict: _types.ResourceDict = api.client.get(f'/resources/{uuid}/').json()
|
181
|
+
if not resource_dict['experiment']:
|
182
|
+
raise ValueError('Resource from URI is not an experiment')
|
183
|
+
|
184
|
+
return resource_dict
|
185
|
+
|
186
|
+
@staticmethod
|
187
|
+
def _get_or_create_resource_dict(uri: str) -> _types.ResourceDict:
|
188
|
+
response_dict = api.client.post(path='/experiments/', data={'uri' if '/' in uri else 'name': uri}).json()
|
189
|
+
return Experiment._get_resource_dict_by_uuid(uuid=response_dict['uuid'])
|
159
190
|
|
160
|
-
def
|
161
|
-
self.
|
191
|
+
def _refetch(self) -> None:
|
192
|
+
self._resource_dict = self._get_resource_dict_by_uuid(uuid=self._resource_dict['uuid'])
|
biolib/jobs/job.py
CHANGED
@@ -1,26 +1,30 @@
|
|
1
1
|
import base64
|
2
|
-
from datetime import datetime, timedelta
|
3
2
|
import sys
|
4
3
|
import time
|
5
|
-
from pathlib import Path
|
6
4
|
from collections import OrderedDict
|
5
|
+
from datetime import datetime, timedelta
|
6
|
+
from pathlib import Path
|
7
7
|
from urllib.parse import urlparse
|
8
8
|
|
9
9
|
from biolib import api, utils
|
10
10
|
from biolib._internal.http_client import HttpClient
|
11
11
|
from biolib._internal.utils import open_browser_window_from_notebook
|
12
|
-
from biolib.biolib_api_client import BiolibApiClient
|
12
|
+
from biolib.biolib_api_client import BiolibApiClient, CreatedJobDict
|
13
|
+
from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
|
13
14
|
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
14
|
-
from biolib.biolib_binary_format import LazyLoadedFile,
|
15
|
+
from biolib.biolib_binary_format import LazyLoadedFile, ModuleInput, ModuleInputDict, ModuleOutputV2
|
16
|
+
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
|
15
17
|
from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
|
16
18
|
from biolib.biolib_errors import BioLibError, CloudJobFinishedError
|
17
19
|
from biolib.biolib_logging import logger, logger_no_user_data
|
20
|
+
from biolib.compute_node.job_worker.job_storage import JobStorage
|
18
21
|
from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes
|
19
22
|
from biolib.jobs.job_result import JobResult
|
20
|
-
from biolib.jobs.types import
|
23
|
+
from biolib.jobs.types import CloudJobDict, CloudJobStartedDict, JobDict
|
21
24
|
from biolib.tables import BioLibTable
|
22
|
-
from biolib.typing_utils import
|
25
|
+
from biolib.typing_utils import Dict, List, Optional, cast
|
23
26
|
from biolib.utils import IS_RUNNING_IN_NOTEBOOK
|
27
|
+
from biolib.utils.app_uri import parse_app_uri
|
24
28
|
|
25
29
|
|
26
30
|
class Job:
|
@@ -56,26 +60,23 @@ class Job:
|
|
56
60
|
@property
|
57
61
|
def result(self) -> JobResult:
|
58
62
|
if not self._result:
|
59
|
-
|
60
|
-
self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
|
61
|
-
else:
|
62
|
-
raise BioLibError(f"Result is not available for {self._uuid}: status is {self._job_dict['state']}.")
|
63
|
+
self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
|
63
64
|
|
64
65
|
return self._result
|
65
66
|
|
66
67
|
@property
|
67
68
|
def stdout(self) -> bytes:
|
68
|
-
logger.warning(
|
69
|
+
logger.warning('The property .stdout is deprecated, please use .get_stdout()')
|
69
70
|
return self.result.get_stdout()
|
70
71
|
|
71
72
|
@property
|
72
73
|
def stderr(self) -> bytes:
|
73
|
-
logger.warning(
|
74
|
+
logger.warning('The property .stderr is deprecated, please use .get_stderr()')
|
74
75
|
return self.result.get_stderr()
|
75
76
|
|
76
77
|
@property
|
77
78
|
def exitcode(self) -> int:
|
78
|
-
logger.warning(
|
79
|
+
logger.warning('The property .exitcode is deprecated, please use .get_exit_code()')
|
79
80
|
return self.result.get_exit_code()
|
80
81
|
|
81
82
|
def is_finished(self) -> bool:
|
@@ -109,8 +110,8 @@ class Job:
|
|
109
110
|
def load_file_as_numpy(self, *args, **kwargs):
|
110
111
|
try:
|
111
112
|
import numpy # type: ignore # pylint: disable=import-outside-toplevel,import-error
|
112
|
-
except: # pylint: disable=raise-missing-from
|
113
|
-
raise Exception(
|
113
|
+
except ImportError: # pylint: disable=raise-missing-from
|
114
|
+
raise Exception('Failed to import numpy, please make sure it is installed.') from None
|
114
115
|
file_handle = self.result.get_output_file(*args, **kwargs).get_file_handle()
|
115
116
|
return numpy.load(file_handle, allow_pickle=False) # type: ignore
|
116
117
|
|
@@ -187,6 +188,39 @@ class Job:
|
|
187
188
|
print('Please copy and paste the following link into your browser:')
|
188
189
|
print(results_url_to_open)
|
189
190
|
|
191
|
+
def cancel(self) -> None:
|
192
|
+
try:
|
193
|
+
api.client.patch(
|
194
|
+
path=f'/jobs/{self._uuid}/',
|
195
|
+
headers={'Job-Auth-Token': self._auth_token} if self._auth_token else None,
|
196
|
+
data={'state': 'cancelled'},
|
197
|
+
)
|
198
|
+
logger.info(f'Job {self._uuid} canceled')
|
199
|
+
except Exception as error:
|
200
|
+
logger.error(f'Failed to cancel job {self._uuid} due to: {error}')
|
201
|
+
|
202
|
+
def recompute(self, app_uri: Optional[str] = None, machine: Optional[str] = None, blocking: bool = True) -> 'Job':
|
203
|
+
app_response = BiolibAppApi.get_by_uri(uri=app_uri or self._job_dict['app_uri'])
|
204
|
+
|
205
|
+
job_storage_input = RemoteJobStorageEndpoint(
|
206
|
+
job_auth_token=self._auth_token,
|
207
|
+
job_uuid=self._uuid,
|
208
|
+
storage_type='input',
|
209
|
+
)
|
210
|
+
http_response = HttpClient.request(url=job_storage_input.get_remote_url())
|
211
|
+
module_input_serialized = http_response.content
|
212
|
+
|
213
|
+
job = self._start_job_in_cloud(
|
214
|
+
app_uri=app_response['app_uri'],
|
215
|
+
app_version_uuid=app_response['app_version']['public_id'],
|
216
|
+
module_input_serialized=module_input_serialized,
|
217
|
+
machine=machine,
|
218
|
+
)
|
219
|
+
if blocking:
|
220
|
+
job.stream_logs()
|
221
|
+
|
222
|
+
return job
|
223
|
+
|
190
224
|
def _get_cloud_job(self) -> CloudJobDict:
|
191
225
|
self._refetch_job_dict(force_refetch=True)
|
192
226
|
if self._job_dict['cloud_job'] is None:
|
@@ -278,7 +312,7 @@ class Job:
|
|
278
312
|
status_json = self._get_job_status_from_compute_node(compute_node_url)
|
279
313
|
if not status_json:
|
280
314
|
# this can happen if the job is finished but already removed from the compute node
|
281
|
-
logger.warning(
|
315
|
+
logger.warning('WARN: We were unable to retrieve the full log of the job, please try again')
|
282
316
|
break
|
283
317
|
job_is_completed = status_json['is_completed']
|
284
318
|
for status_update in status_json['status_updates']:
|
@@ -320,7 +354,10 @@ class Job:
|
|
320
354
|
self.print_logs_packages(response_json['streamed_logs_packages_b64'])
|
321
355
|
|
322
356
|
def _get_cloud_job_awaiting_started(self) -> CloudJobStartedDict:
|
357
|
+
retry_count = 0
|
323
358
|
while True:
|
359
|
+
retry_count += 1
|
360
|
+
time.sleep(min(10, retry_count))
|
324
361
|
cloud_job = self._get_cloud_job()
|
325
362
|
|
326
363
|
if cloud_job['finished_at']:
|
@@ -333,7 +370,6 @@ class Job:
|
|
333
370
|
return cast(CloudJobStartedDict, cloud_job)
|
334
371
|
|
335
372
|
logger.info('Cloud: The job has been queued. Please wait...')
|
336
|
-
time.sleep(10)
|
337
373
|
|
338
374
|
def _get_job_status_from_compute_node(self, compute_node_url):
|
339
375
|
for _ in range(15):
|
@@ -341,9 +377,9 @@ class Job:
|
|
341
377
|
return HttpClient.request(url=f'{compute_node_url}/v1/job/{self._uuid}/status/').json()
|
342
378
|
except Exception: # pylint: disable=broad-except
|
343
379
|
cloud_job = self._get_cloud_job()
|
344
|
-
logger.debug(
|
380
|
+
logger.debug('Failed to get status from compute node, retrying...')
|
345
381
|
if cloud_job['finished_at']:
|
346
|
-
logger.debug(
|
382
|
+
logger.debug('Job no longer exists on compute node, checking for error...')
|
347
383
|
if cloud_job['error_code'] != SystemExceptionCodes.COMPLETED_SUCCESSFULLY.value:
|
348
384
|
error_message = SystemExceptionCodeMap.get(
|
349
385
|
cloud_job['error_code'], f'Unknown error code {cloud_job["error_code"]}'
|
@@ -366,3 +402,46 @@ class Job:
|
|
366
402
|
|
367
403
|
self._job_dict = self._get_job_dict(self._uuid, self._auth_token)
|
368
404
|
self._job_dict_last_fetched_at = datetime.utcnow()
|
405
|
+
|
406
|
+
@staticmethod
|
407
|
+
def _start_job_in_cloud(
|
408
|
+
app_uri: str,
|
409
|
+
app_version_uuid: str,
|
410
|
+
module_input_serialized: bytes,
|
411
|
+
override_command: bool = False,
|
412
|
+
machine: Optional[str] = None,
|
413
|
+
experiment_id: Optional[str] = None,
|
414
|
+
result_prefix: Optional[str] = None,
|
415
|
+
timeout: Optional[int] = None,
|
416
|
+
notify: bool = False,
|
417
|
+
requested_machine_count: Optional[int] = None,
|
418
|
+
) -> 'Job':
|
419
|
+
if len(module_input_serialized) < 500_000:
|
420
|
+
_job_dict = BiolibJobApi.create_job_with_data(
|
421
|
+
app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
|
422
|
+
app_version_uuid=app_version_uuid,
|
423
|
+
arguments_override_command=override_command,
|
424
|
+
experiment_uuid=experiment_id,
|
425
|
+
module_input_serialized=module_input_serialized,
|
426
|
+
notify=notify,
|
427
|
+
requested_machine=machine,
|
428
|
+
requested_timeout_seconds=timeout,
|
429
|
+
result_name_prefix=result_prefix,
|
430
|
+
requested_machine_count=requested_machine_count,
|
431
|
+
)
|
432
|
+
return Job(cast(JobDict, _job_dict))
|
433
|
+
|
434
|
+
job_dict: CreatedJobDict = BiolibJobApi.create(
|
435
|
+
app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
|
436
|
+
app_version_id=app_version_uuid,
|
437
|
+
experiment_uuid=experiment_id,
|
438
|
+
machine=machine,
|
439
|
+
notify=notify,
|
440
|
+
override_command=override_command,
|
441
|
+
timeout=timeout,
|
442
|
+
requested_machine_count=requested_machine_count,
|
443
|
+
)
|
444
|
+
JobStorage.upload_module_input(job=job_dict, module_input_serialized=module_input_serialized)
|
445
|
+
cloud_job = BiolibJobApi.create_cloud_job(job_id=job_dict['public_id'], result_name_prefix=result_prefix)
|
446
|
+
logger.debug(f"Cloud: Job created with id {cloud_job['public_id']}")
|
447
|
+
return Job(cast(JobDict, job_dict))
|
biolib/jobs/job_result.py
CHANGED
@@ -1,25 +1,24 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from fnmatch import fnmatch
|
3
1
|
import time
|
2
|
+
from fnmatch import fnmatch
|
3
|
+
from pathlib import Path
|
4
4
|
|
5
5
|
from biolib.biolib_binary_format import ModuleOutputV2
|
6
|
+
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
|
6
7
|
from biolib.biolib_binary_format.remote_stream_seeker import StreamSeeker
|
7
|
-
from biolib.biolib_binary_format.utils import
|
8
|
-
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageResultEndpoint
|
8
|
+
from biolib.biolib_binary_format.utils import LazyLoadedFile, RemoteIndexableBuffer
|
9
9
|
from biolib.biolib_errors import BioLibError
|
10
10
|
from biolib.biolib_logging import logger
|
11
|
-
from biolib.typing_utils import
|
11
|
+
from biolib.typing_utils import Callable, List, Optional, Union, cast
|
12
12
|
|
13
13
|
PathFilter = Union[str, Callable[[str], bool]]
|
14
14
|
|
15
15
|
|
16
16
|
class JobResult:
|
17
|
-
|
18
17
|
def __init__(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
self,
|
19
|
+
job_uuid: str,
|
20
|
+
job_auth_token: str,
|
21
|
+
module_output: Optional[ModuleOutputV2] = None,
|
23
22
|
):
|
24
23
|
self._job_uuid: str = job_uuid
|
25
24
|
self._job_auth_token: str = job_auth_token
|
@@ -35,7 +34,12 @@ class JobResult:
|
|
35
34
|
def get_exit_code(self) -> int:
|
36
35
|
return self._get_module_output().get_exit_code()
|
37
36
|
|
38
|
-
def save_files(
|
37
|
+
def save_files(
|
38
|
+
self,
|
39
|
+
output_dir: str,
|
40
|
+
path_filter: Optional[PathFilter] = None,
|
41
|
+
skip_file_if_exists: Optional[bool] = None,
|
42
|
+
) -> None:
|
39
43
|
module_output = self._get_module_output()
|
40
44
|
output_files = module_output.get_files()
|
41
45
|
filtered_output_files = self._get_filtered_files(output_files, path_filter) if path_filter else output_files
|
@@ -61,24 +65,44 @@ class JobResult:
|
|
61
65
|
# Remove leading slash of file_path
|
62
66
|
destination_file_path = Path(output_dir) / Path(file.path.lstrip('/'))
|
63
67
|
if destination_file_path.exists():
|
64
|
-
|
68
|
+
if skip_file_if_exists:
|
69
|
+
print(f'Skipping {destination_file_path} as a file with that name already exists locally.')
|
70
|
+
continue
|
71
|
+
else:
|
72
|
+
destination_file_path.rename(
|
73
|
+
f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
|
74
|
+
)
|
65
75
|
|
66
76
|
dir_path = destination_file_path.parent
|
67
77
|
if dir_path:
|
68
78
|
dir_path.mkdir(parents=True, exist_ok=True)
|
69
79
|
|
70
|
-
|
71
|
-
|
72
|
-
|
80
|
+
# write content to temporary (partial) file
|
81
|
+
partial_path = destination_file_path.with_suffix(
|
82
|
+
destination_file_path.suffix + f'.{self._job_uuid}.partial_biolib_download'
|
83
|
+
)
|
84
|
+
file_start = file.start
|
85
|
+
data_to_download = file.length
|
86
|
+
if partial_path.exists():
|
87
|
+
data_already_downloaded = partial_path.stat().st_size
|
88
|
+
file_start += data_already_downloaded
|
89
|
+
data_to_download -= data_already_downloaded
|
90
|
+
|
91
|
+
with open(partial_path, mode='ab') as partial_file:
|
92
|
+
for chunk in stream_seeker.seek_and_read(file_start=file_start, file_length=data_to_download):
|
93
|
+
partial_file.write(chunk)
|
94
|
+
|
95
|
+
# rename partial file to actual file name
|
96
|
+
partial_path.rename(destination_file_path)
|
73
97
|
|
74
98
|
def get_output_file(self, filename) -> LazyLoadedFile:
|
75
99
|
files = self._get_module_output().get_files()
|
76
100
|
filtered_files = self._get_filtered_files(files, path_filter=filename)
|
77
101
|
if not filtered_files:
|
78
|
-
raise BioLibError(f
|
102
|
+
raise BioLibError(f'File {filename} not found in results.')
|
79
103
|
|
80
104
|
if len(filtered_files) != 1:
|
81
|
-
raise BioLibError(f
|
105
|
+
raise BioLibError(f'Found multiple results for filename {filename}.')
|
82
106
|
|
83
107
|
return filtered_files[0]
|
84
108
|
|
@@ -100,8 +124,8 @@ class JobResult:
|
|
100
124
|
glob_filter = cast(str, path_filter)
|
101
125
|
|
102
126
|
# since all file paths start with /, make sure filter does too
|
103
|
-
if not glob_filter.startswith(
|
104
|
-
glob_filter =
|
127
|
+
if not glob_filter.startswith('/'):
|
128
|
+
glob_filter = '/' + glob_filter
|
105
129
|
|
106
130
|
def _filter_function(file: LazyLoadedFile) -> bool:
|
107
131
|
return fnmatch(file.path, glob_filter)
|
@@ -110,9 +134,10 @@ class JobResult:
|
|
110
134
|
|
111
135
|
def _get_module_output(self) -> ModuleOutputV2:
|
112
136
|
if self._module_output is None:
|
113
|
-
remote_job_storage_endpoint =
|
114
|
-
job_id=self._job_uuid,
|
137
|
+
remote_job_storage_endpoint = RemoteJobStorageEndpoint(
|
115
138
|
job_auth_token=self._job_auth_token,
|
139
|
+
job_uuid=self._job_uuid,
|
140
|
+
storage_type='output',
|
116
141
|
)
|
117
142
|
buffer = RemoteIndexableBuffer(endpoint=remote_job_storage_endpoint)
|
118
143
|
self._module_output = ModuleOutputV2(buffer)
|
biolib/jobs/types.py
CHANGED
biolib/runtime/__init__.py
CHANGED
biolib/sdk/__init__.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
# Imports to hide and use as private internal utils
|
2
|
-
from biolib.
|
4
|
+
from biolib._data_record.data_record import DataRecord as _DataRecord
|
3
5
|
from biolib._internal.push_application import push_application as _push_application
|
4
6
|
from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
|
7
|
+
from biolib._runtime.runtime import Runtime as _Runtime
|
5
8
|
from biolib.app import BioLibApp as _BioLibApp
|
6
|
-
from biolib.typing_utils import Optional as _Optional
|
7
9
|
|
8
|
-
#
|
9
|
-
|
10
|
+
# Classes to expose as public API
|
11
|
+
Runtime = _Runtime
|
10
12
|
|
11
13
|
|
12
14
|
def push_app_version(uri: str, path: str) -> _BioLibApp:
|
@@ -31,7 +33,7 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
|
|
31
33
|
except BaseException:
|
32
34
|
raise Exception('Failed to import pytest; please make sure it is installed') from None
|
33
35
|
|
34
|
-
class AppVersionFixturePlugin
|
36
|
+
class AppVersionFixturePlugin:
|
35
37
|
def __init__(self, app_version_ref):
|
36
38
|
self.app_version_ref = app_version_ref
|
37
39
|
|
@@ -42,5 +44,14 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
|
|
42
44
|
return AppVersionFixturePlugin(app_version)
|
43
45
|
|
44
46
|
|
45
|
-
def create_data_record(
|
46
|
-
|
47
|
+
def create_data_record(
|
48
|
+
destination: str,
|
49
|
+
data_path: str,
|
50
|
+
name: Optional[str] = None,
|
51
|
+
record_type: Optional[str] = None,
|
52
|
+
) -> _DataRecord:
|
53
|
+
return _DataRecord.create(
|
54
|
+
destination=f'{destination}/{name}' if name else destination,
|
55
|
+
data_path=data_path,
|
56
|
+
record_type=record_type,
|
57
|
+
)
|
biolib/typing_utils.py
CHANGED
@@ -1,7 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# import and expose everything from the typing module
|
4
|
-
from typing import * # pylint: disable=wildcard-import, unused-wildcard-import
|
5
|
-
|
6
|
-
if sys.version_info < (3, 8):
|
7
|
-
from typing_extensions import TypedDict, Literal # pylint: disable=unused-import
|
1
|
+
# TODO: Deprecate and later remove this file
|
2
|
+
from biolib._internal.types.typing import * # pylint: disable=wildcard-import, unused-wildcard-import
|
biolib/user/sign_in.py
CHANGED
@@ -14,11 +14,11 @@ def sign_out() -> None:
|
|
14
14
|
|
15
15
|
|
16
16
|
def sign_in(open_in_default_browser: bool = False) -> None:
|
17
|
-
|
18
|
-
if api_client.is_signed_in:
|
17
|
+
if not BiolibApiClient.is_reauthentication_needed():
|
19
18
|
logger_no_user_data.info('Already signed in')
|
20
19
|
return
|
21
20
|
|
21
|
+
api_client = BiolibApiClient.get()
|
22
22
|
auth_challenge = BiolibAuthChallengeApi.create_auth_challenge()
|
23
23
|
auth_challenge_token = auth_challenge['token']
|
24
24
|
|