pybiolib 1.1.1881__py3-none-any.whl → 1.1.2193__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +11 -4
- biolib/_data_record/data_record.py +278 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +95 -151
- biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
- biolib/_internal/file_utils.py +77 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +29 -9
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +1 -1
- biolib/_internal/runtime.py +2 -56
- biolib/_internal/types/__init__.py +4 -0
- biolib/_internal/types/app.py +9 -0
- biolib/_internal/types/data_record.py +40 -0
- biolib/_internal/types/experiment.py +10 -0
- biolib/_internal/types/resource.py +14 -0
- biolib/_internal/types/typing.py +7 -0
- biolib/_runtime/runtime.py +80 -0
- biolib/api/__init__.py +1 -0
- biolib/api/client.py +39 -17
- biolib/app/app.py +34 -71
- biolib/biolib_api_client/api_client.py +9 -2
- biolib/biolib_api_client/app_types.py +2 -2
- biolib/biolib_api_client/biolib_job_api.py +6 -0
- biolib/biolib_api_client/job_types.py +4 -4
- biolib/biolib_api_client/lfs_types.py +8 -2
- biolib/biolib_binary_format/remote_endpoints.py +12 -10
- biolib/biolib_binary_format/utils.py +23 -3
- biolib/cli/auth.py +1 -1
- biolib/cli/data_record.py +43 -6
- biolib/cli/lfs.py +10 -6
- biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
- biolib/compute_node/job_worker/executors/docker_executor.py +126 -108
- biolib/compute_node/job_worker/job_storage.py +3 -4
- biolib/compute_node/job_worker/job_worker.py +25 -15
- biolib/compute_node/remote_host_proxy.py +61 -84
- biolib/compute_node/webserver/webserver_types.py +0 -1
- biolib/experiments/experiment.py +75 -44
- biolib/jobs/job.py +98 -19
- biolib/jobs/job_result.py +46 -21
- biolib/jobs/types.py +1 -1
- biolib/runtime/__init__.py +2 -1
- biolib/sdk/__init__.py +18 -7
- biolib/typing_utils.py +2 -7
- biolib/user/sign_in.py +2 -2
- biolib/utils/seq_util.py +38 -35
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/RECORD +55 -44
- biolib/experiments/types.py +0 -9
- biolib/lfs/__init__.py +0 -4
- biolib/lfs/utils.py +0 -153
- /biolib/{lfs → _internal/lfs}/cache.py +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0
biolib/jobs/job.py
CHANGED
@@ -1,26 +1,30 @@
|
|
1
1
|
import base64
|
2
|
-
from datetime import datetime, timedelta
|
3
2
|
import sys
|
4
3
|
import time
|
5
|
-
from pathlib import Path
|
6
4
|
from collections import OrderedDict
|
5
|
+
from datetime import datetime, timedelta
|
6
|
+
from pathlib import Path
|
7
7
|
from urllib.parse import urlparse
|
8
8
|
|
9
9
|
from biolib import api, utils
|
10
10
|
from biolib._internal.http_client import HttpClient
|
11
11
|
from biolib._internal.utils import open_browser_window_from_notebook
|
12
|
-
from biolib.biolib_api_client import BiolibApiClient
|
12
|
+
from biolib.biolib_api_client import BiolibApiClient, CreatedJobDict
|
13
|
+
from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
|
13
14
|
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
14
|
-
from biolib.biolib_binary_format import LazyLoadedFile,
|
15
|
+
from biolib.biolib_binary_format import LazyLoadedFile, ModuleInput, ModuleInputDict, ModuleOutputV2
|
16
|
+
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
|
15
17
|
from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
|
16
18
|
from biolib.biolib_errors import BioLibError, CloudJobFinishedError
|
17
19
|
from biolib.biolib_logging import logger, logger_no_user_data
|
20
|
+
from biolib.compute_node.job_worker.job_storage import JobStorage
|
18
21
|
from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes
|
19
22
|
from biolib.jobs.job_result import JobResult
|
20
|
-
from biolib.jobs.types import
|
23
|
+
from biolib.jobs.types import CloudJobDict, CloudJobStartedDict, JobDict
|
21
24
|
from biolib.tables import BioLibTable
|
22
|
-
from biolib.typing_utils import
|
25
|
+
from biolib.typing_utils import Dict, List, Optional, cast
|
23
26
|
from biolib.utils import IS_RUNNING_IN_NOTEBOOK
|
27
|
+
from biolib.utils.app_uri import parse_app_uri
|
24
28
|
|
25
29
|
|
26
30
|
class Job:
|
@@ -56,26 +60,23 @@ class Job:
|
|
56
60
|
@property
|
57
61
|
def result(self) -> JobResult:
|
58
62
|
if not self._result:
|
59
|
-
|
60
|
-
self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
|
61
|
-
else:
|
62
|
-
raise BioLibError(f"Result is not available for {self._uuid}: status is {self._job_dict['state']}.")
|
63
|
+
self._result = JobResult(job_uuid=self._uuid, job_auth_token=self._auth_token)
|
63
64
|
|
64
65
|
return self._result
|
65
66
|
|
66
67
|
@property
|
67
68
|
def stdout(self) -> bytes:
|
68
|
-
logger.warning(
|
69
|
+
logger.warning('The property .stdout is deprecated, please use .get_stdout()')
|
69
70
|
return self.result.get_stdout()
|
70
71
|
|
71
72
|
@property
|
72
73
|
def stderr(self) -> bytes:
|
73
|
-
logger.warning(
|
74
|
+
logger.warning('The property .stderr is deprecated, please use .get_stderr()')
|
74
75
|
return self.result.get_stderr()
|
75
76
|
|
76
77
|
@property
|
77
78
|
def exitcode(self) -> int:
|
78
|
-
logger.warning(
|
79
|
+
logger.warning('The property .exitcode is deprecated, please use .get_exit_code()')
|
79
80
|
return self.result.get_exit_code()
|
80
81
|
|
81
82
|
def is_finished(self) -> bool:
|
@@ -109,8 +110,8 @@ class Job:
|
|
109
110
|
def load_file_as_numpy(self, *args, **kwargs):
|
110
111
|
try:
|
111
112
|
import numpy # type: ignore # pylint: disable=import-outside-toplevel,import-error
|
112
|
-
except: # pylint: disable=raise-missing-from
|
113
|
-
raise Exception(
|
113
|
+
except ImportError: # pylint: disable=raise-missing-from
|
114
|
+
raise Exception('Failed to import numpy, please make sure it is installed.') from None
|
114
115
|
file_handle = self.result.get_output_file(*args, **kwargs).get_file_handle()
|
115
116
|
return numpy.load(file_handle, allow_pickle=False) # type: ignore
|
116
117
|
|
@@ -187,6 +188,39 @@ class Job:
|
|
187
188
|
print('Please copy and paste the following link into your browser:')
|
188
189
|
print(results_url_to_open)
|
189
190
|
|
191
|
+
def cancel(self) -> None:
|
192
|
+
try:
|
193
|
+
api.client.patch(
|
194
|
+
path=f'/jobs/{self._uuid}/',
|
195
|
+
headers={'Job-Auth-Token': self._auth_token} if self._auth_token else None,
|
196
|
+
data={'state': 'cancelled'},
|
197
|
+
)
|
198
|
+
logger.info(f'Job {self._uuid} canceled')
|
199
|
+
except Exception as error:
|
200
|
+
logger.error(f'Failed to cancel job {self._uuid} due to: {error}')
|
201
|
+
|
202
|
+
def recompute(self, app_uri: Optional[str] = None, machine: Optional[str] = None, blocking: bool = True) -> 'Job':
|
203
|
+
app_response = BiolibAppApi.get_by_uri(uri=app_uri or self._job_dict['app_uri'])
|
204
|
+
|
205
|
+
job_storage_input = RemoteJobStorageEndpoint(
|
206
|
+
job_auth_token=self._auth_token,
|
207
|
+
job_uuid=self._uuid,
|
208
|
+
storage_type='input',
|
209
|
+
)
|
210
|
+
http_response = HttpClient.request(url=job_storage_input.get_remote_url())
|
211
|
+
module_input_serialized = http_response.content
|
212
|
+
|
213
|
+
job = self._start_job_in_cloud(
|
214
|
+
app_uri=app_response['app_uri'],
|
215
|
+
app_version_uuid=app_response['app_version']['public_id'],
|
216
|
+
module_input_serialized=module_input_serialized,
|
217
|
+
machine=machine,
|
218
|
+
)
|
219
|
+
if blocking:
|
220
|
+
job.stream_logs()
|
221
|
+
|
222
|
+
return job
|
223
|
+
|
190
224
|
def _get_cloud_job(self) -> CloudJobDict:
|
191
225
|
self._refetch_job_dict(force_refetch=True)
|
192
226
|
if self._job_dict['cloud_job'] is None:
|
@@ -278,7 +312,7 @@ class Job:
|
|
278
312
|
status_json = self._get_job_status_from_compute_node(compute_node_url)
|
279
313
|
if not status_json:
|
280
314
|
# this can happen if the job is finished but already removed from the compute node
|
281
|
-
logger.warning(
|
315
|
+
logger.warning('WARN: We were unable to retrieve the full log of the job, please try again')
|
282
316
|
break
|
283
317
|
job_is_completed = status_json['is_completed']
|
284
318
|
for status_update in status_json['status_updates']:
|
@@ -320,7 +354,10 @@ class Job:
|
|
320
354
|
self.print_logs_packages(response_json['streamed_logs_packages_b64'])
|
321
355
|
|
322
356
|
def _get_cloud_job_awaiting_started(self) -> CloudJobStartedDict:
|
357
|
+
retry_count = 0
|
323
358
|
while True:
|
359
|
+
retry_count += 1
|
360
|
+
time.sleep(min(10, retry_count))
|
324
361
|
cloud_job = self._get_cloud_job()
|
325
362
|
|
326
363
|
if cloud_job['finished_at']:
|
@@ -333,7 +370,6 @@ class Job:
|
|
333
370
|
return cast(CloudJobStartedDict, cloud_job)
|
334
371
|
|
335
372
|
logger.info('Cloud: The job has been queued. Please wait...')
|
336
|
-
time.sleep(10)
|
337
373
|
|
338
374
|
def _get_job_status_from_compute_node(self, compute_node_url):
|
339
375
|
for _ in range(15):
|
@@ -341,9 +377,9 @@ class Job:
|
|
341
377
|
return HttpClient.request(url=f'{compute_node_url}/v1/job/{self._uuid}/status/').json()
|
342
378
|
except Exception: # pylint: disable=broad-except
|
343
379
|
cloud_job = self._get_cloud_job()
|
344
|
-
logger.debug(
|
380
|
+
logger.debug('Failed to get status from compute node, retrying...')
|
345
381
|
if cloud_job['finished_at']:
|
346
|
-
logger.debug(
|
382
|
+
logger.debug('Job no longer exists on compute node, checking for error...')
|
347
383
|
if cloud_job['error_code'] != SystemExceptionCodes.COMPLETED_SUCCESSFULLY.value:
|
348
384
|
error_message = SystemExceptionCodeMap.get(
|
349
385
|
cloud_job['error_code'], f'Unknown error code {cloud_job["error_code"]}'
|
@@ -366,3 +402,46 @@ class Job:
|
|
366
402
|
|
367
403
|
self._job_dict = self._get_job_dict(self._uuid, self._auth_token)
|
368
404
|
self._job_dict_last_fetched_at = datetime.utcnow()
|
405
|
+
|
406
|
+
@staticmethod
|
407
|
+
def _start_job_in_cloud(
|
408
|
+
app_uri: str,
|
409
|
+
app_version_uuid: str,
|
410
|
+
module_input_serialized: bytes,
|
411
|
+
override_command: bool = False,
|
412
|
+
machine: Optional[str] = None,
|
413
|
+
experiment_id: Optional[str] = None,
|
414
|
+
result_prefix: Optional[str] = None,
|
415
|
+
timeout: Optional[int] = None,
|
416
|
+
notify: bool = False,
|
417
|
+
requested_machine_count: Optional[int] = None,
|
418
|
+
) -> 'Job':
|
419
|
+
if len(module_input_serialized) < 500_000:
|
420
|
+
_job_dict = BiolibJobApi.create_job_with_data(
|
421
|
+
app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
|
422
|
+
app_version_uuid=app_version_uuid,
|
423
|
+
arguments_override_command=override_command,
|
424
|
+
experiment_uuid=experiment_id,
|
425
|
+
module_input_serialized=module_input_serialized,
|
426
|
+
notify=notify,
|
427
|
+
requested_machine=machine,
|
428
|
+
requested_timeout_seconds=timeout,
|
429
|
+
result_name_prefix=result_prefix,
|
430
|
+
requested_machine_count=requested_machine_count,
|
431
|
+
)
|
432
|
+
return Job(cast(JobDict, _job_dict))
|
433
|
+
|
434
|
+
job_dict: CreatedJobDict = BiolibJobApi.create(
|
435
|
+
app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
|
436
|
+
app_version_id=app_version_uuid,
|
437
|
+
experiment_uuid=experiment_id,
|
438
|
+
machine=machine,
|
439
|
+
notify=notify,
|
440
|
+
override_command=override_command,
|
441
|
+
timeout=timeout,
|
442
|
+
requested_machine_count=requested_machine_count,
|
443
|
+
)
|
444
|
+
JobStorage.upload_module_input(job=job_dict, module_input_serialized=module_input_serialized)
|
445
|
+
cloud_job = BiolibJobApi.create_cloud_job(job_id=job_dict['public_id'], result_name_prefix=result_prefix)
|
446
|
+
logger.debug(f"Cloud: Job created with id {cloud_job['public_id']}")
|
447
|
+
return Job(cast(JobDict, job_dict))
|
biolib/jobs/job_result.py
CHANGED
@@ -1,25 +1,24 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from fnmatch import fnmatch
|
3
1
|
import time
|
2
|
+
from fnmatch import fnmatch
|
3
|
+
from pathlib import Path
|
4
4
|
|
5
5
|
from biolib.biolib_binary_format import ModuleOutputV2
|
6
|
+
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
|
6
7
|
from biolib.biolib_binary_format.remote_stream_seeker import StreamSeeker
|
7
|
-
from biolib.biolib_binary_format.utils import
|
8
|
-
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageResultEndpoint
|
8
|
+
from biolib.biolib_binary_format.utils import LazyLoadedFile, RemoteIndexableBuffer
|
9
9
|
from biolib.biolib_errors import BioLibError
|
10
10
|
from biolib.biolib_logging import logger
|
11
|
-
from biolib.typing_utils import
|
11
|
+
from biolib.typing_utils import Callable, List, Optional, Union, cast
|
12
12
|
|
13
13
|
PathFilter = Union[str, Callable[[str], bool]]
|
14
14
|
|
15
15
|
|
16
16
|
class JobResult:
|
17
|
-
|
18
17
|
def __init__(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
self,
|
19
|
+
job_uuid: str,
|
20
|
+
job_auth_token: str,
|
21
|
+
module_output: Optional[ModuleOutputV2] = None,
|
23
22
|
):
|
24
23
|
self._job_uuid: str = job_uuid
|
25
24
|
self._job_auth_token: str = job_auth_token
|
@@ -35,7 +34,12 @@ class JobResult:
|
|
35
34
|
def get_exit_code(self) -> int:
|
36
35
|
return self._get_module_output().get_exit_code()
|
37
36
|
|
38
|
-
def save_files(
|
37
|
+
def save_files(
|
38
|
+
self,
|
39
|
+
output_dir: str,
|
40
|
+
path_filter: Optional[PathFilter] = None,
|
41
|
+
skip_file_if_exists: Optional[bool] = None,
|
42
|
+
) -> None:
|
39
43
|
module_output = self._get_module_output()
|
40
44
|
output_files = module_output.get_files()
|
41
45
|
filtered_output_files = self._get_filtered_files(output_files, path_filter) if path_filter else output_files
|
@@ -61,24 +65,44 @@ class JobResult:
|
|
61
65
|
# Remove leading slash of file_path
|
62
66
|
destination_file_path = Path(output_dir) / Path(file.path.lstrip('/'))
|
63
67
|
if destination_file_path.exists():
|
64
|
-
|
68
|
+
if skip_file_if_exists:
|
69
|
+
print(f'Skipping {destination_file_path} as a file with that name already exists locally.')
|
70
|
+
continue
|
71
|
+
else:
|
72
|
+
destination_file_path.rename(
|
73
|
+
f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
|
74
|
+
)
|
65
75
|
|
66
76
|
dir_path = destination_file_path.parent
|
67
77
|
if dir_path:
|
68
78
|
dir_path.mkdir(parents=True, exist_ok=True)
|
69
79
|
|
70
|
-
|
71
|
-
|
72
|
-
|
80
|
+
# write content to temporary (partial) file
|
81
|
+
partial_path = destination_file_path.with_suffix(
|
82
|
+
destination_file_path.suffix + f'.{self._job_uuid}.partial_biolib_download'
|
83
|
+
)
|
84
|
+
file_start = file.start
|
85
|
+
data_to_download = file.length
|
86
|
+
if partial_path.exists():
|
87
|
+
data_already_downloaded = partial_path.stat().st_size
|
88
|
+
file_start += data_already_downloaded
|
89
|
+
data_to_download -= data_already_downloaded
|
90
|
+
|
91
|
+
with open(partial_path, mode='ab') as partial_file:
|
92
|
+
for chunk in stream_seeker.seek_and_read(file_start=file_start, file_length=data_to_download):
|
93
|
+
partial_file.write(chunk)
|
94
|
+
|
95
|
+
# rename partial file to actual file name
|
96
|
+
partial_path.rename(destination_file_path)
|
73
97
|
|
74
98
|
def get_output_file(self, filename) -> LazyLoadedFile:
|
75
99
|
files = self._get_module_output().get_files()
|
76
100
|
filtered_files = self._get_filtered_files(files, path_filter=filename)
|
77
101
|
if not filtered_files:
|
78
|
-
raise BioLibError(f
|
102
|
+
raise BioLibError(f'File {filename} not found in results.')
|
79
103
|
|
80
104
|
if len(filtered_files) != 1:
|
81
|
-
raise BioLibError(f
|
105
|
+
raise BioLibError(f'Found multiple results for filename {filename}.')
|
82
106
|
|
83
107
|
return filtered_files[0]
|
84
108
|
|
@@ -100,8 +124,8 @@ class JobResult:
|
|
100
124
|
glob_filter = cast(str, path_filter)
|
101
125
|
|
102
126
|
# since all file paths start with /, make sure filter does too
|
103
|
-
if not glob_filter.startswith(
|
104
|
-
glob_filter =
|
127
|
+
if not glob_filter.startswith('/'):
|
128
|
+
glob_filter = '/' + glob_filter
|
105
129
|
|
106
130
|
def _filter_function(file: LazyLoadedFile) -> bool:
|
107
131
|
return fnmatch(file.path, glob_filter)
|
@@ -110,9 +134,10 @@ class JobResult:
|
|
110
134
|
|
111
135
|
def _get_module_output(self) -> ModuleOutputV2:
|
112
136
|
if self._module_output is None:
|
113
|
-
remote_job_storage_endpoint =
|
114
|
-
job_id=self._job_uuid,
|
137
|
+
remote_job_storage_endpoint = RemoteJobStorageEndpoint(
|
115
138
|
job_auth_token=self._job_auth_token,
|
139
|
+
job_uuid=self._job_uuid,
|
140
|
+
storage_type='output',
|
116
141
|
)
|
117
142
|
buffer = RemoteIndexableBuffer(endpoint=remote_job_storage_endpoint)
|
118
143
|
self._module_output = ModuleOutputV2(buffer)
|
biolib/jobs/types.py
CHANGED
biolib/runtime/__init__.py
CHANGED
biolib/sdk/__init__.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
# Imports to hide and use as private internal utils
|
2
|
-
from biolib.
|
4
|
+
from biolib._data_record.data_record import DataRecord as _DataRecord
|
3
5
|
from biolib._internal.push_application import push_application as _push_application
|
4
6
|
from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
|
7
|
+
from biolib._runtime.runtime import Runtime as _Runtime
|
5
8
|
from biolib.app import BioLibApp as _BioLibApp
|
6
|
-
from biolib.typing_utils import Optional as _Optional
|
7
9
|
|
8
|
-
#
|
9
|
-
|
10
|
+
# Classes to expose as public API
|
11
|
+
Runtime = _Runtime
|
10
12
|
|
11
13
|
|
12
14
|
def push_app_version(uri: str, path: str) -> _BioLibApp:
|
@@ -31,7 +33,7 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
|
|
31
33
|
except BaseException:
|
32
34
|
raise Exception('Failed to import pytest; please make sure it is installed') from None
|
33
35
|
|
34
|
-
class AppVersionFixturePlugin
|
36
|
+
class AppVersionFixturePlugin:
|
35
37
|
def __init__(self, app_version_ref):
|
36
38
|
self.app_version_ref = app_version_ref
|
37
39
|
|
@@ -42,5 +44,14 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
|
|
42
44
|
return AppVersionFixturePlugin(app_version)
|
43
45
|
|
44
46
|
|
45
|
-
def create_data_record(
|
46
|
-
|
47
|
+
def create_data_record(
|
48
|
+
destination: str,
|
49
|
+
data_path: str,
|
50
|
+
name: Optional[str] = None,
|
51
|
+
record_type: Optional[str] = None,
|
52
|
+
) -> _DataRecord:
|
53
|
+
return _DataRecord.create(
|
54
|
+
destination=f'{destination}/{name}' if name else destination,
|
55
|
+
data_path=data_path,
|
56
|
+
record_type=record_type,
|
57
|
+
)
|
biolib/typing_utils.py
CHANGED
@@ -1,7 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# import and expose everything from the typing module
|
4
|
-
from typing import * # pylint: disable=wildcard-import, unused-wildcard-import
|
5
|
-
|
6
|
-
if sys.version_info < (3, 8):
|
7
|
-
from typing_extensions import TypedDict, Literal # pylint: disable=unused-import
|
1
|
+
# TODO: Deprecate and later remove this file
|
2
|
+
from biolib._internal.types.typing import * # pylint: disable=wildcard-import, unused-wildcard-import
|
biolib/user/sign_in.py
CHANGED
@@ -14,11 +14,11 @@ def sign_out() -> None:
|
|
14
14
|
|
15
15
|
|
16
16
|
def sign_in(open_in_default_browser: bool = False) -> None:
|
17
|
-
|
18
|
-
if api_client.is_signed_in:
|
17
|
+
if not BiolibApiClient.is_reauthentication_needed():
|
19
18
|
logger_no_user_data.info('Already signed in')
|
20
19
|
return
|
21
20
|
|
21
|
+
api_client = BiolibApiClient.get()
|
22
22
|
auth_challenge = BiolibAuthChallengeApi.create_auth_challenge()
|
23
23
|
auth_challenge_token = auth_challenge['token']
|
24
24
|
|
biolib/utils/seq_util.py
CHANGED
@@ -1,32 +1,26 @@
|
|
1
1
|
import re
|
2
2
|
from io import BufferedIOBase
|
3
|
-
from biolib.typing_utils import List, Optional, Dict, Union
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def find_invalid_sequence_characters(sequence):
|
9
|
-
invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
|
10
|
-
return invalid_chars
|
4
|
+
from biolib.typing_utils import Dict, List, Optional, Union
|
11
5
|
|
12
6
|
|
13
7
|
class SeqUtilRecord:
|
14
8
|
def __init__(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
9
|
+
self,
|
10
|
+
sequence: str,
|
11
|
+
sequence_id: str,
|
12
|
+
description: Optional['str'],
|
13
|
+
properties: Optional[Dict[str, str]] = None,
|
20
14
|
):
|
21
15
|
self.sequence = sequence
|
22
16
|
self.id = sequence_id # pylint: disable=invalid-name
|
23
17
|
self.description = description
|
24
18
|
|
25
19
|
if properties:
|
26
|
-
disallowed_pattern = re.compile(r
|
20
|
+
disallowed_pattern = re.compile(r'[=\[\]\n]')
|
27
21
|
for key, value in properties.items():
|
28
|
-
assert not bool(disallowed_pattern.search(key)),
|
29
|
-
assert not bool(disallowed_pattern.search(value)),
|
22
|
+
assert not bool(disallowed_pattern.search(key)), 'Key cannot contain characters =[] and newline'
|
23
|
+
assert not bool(disallowed_pattern.search(value)), 'Value cannot contain characters =[] and newline'
|
30
24
|
self.properties = properties
|
31
25
|
else:
|
32
26
|
self.properties = {}
|
@@ -38,24 +32,24 @@ class SeqUtilRecord:
|
|
38
32
|
class SeqUtil:
|
39
33
|
@staticmethod
|
40
34
|
def parse_fasta(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
35
|
+
input_file: Union[str, BufferedIOBase, None] = None,
|
36
|
+
default_header: Optional[str] = None,
|
37
|
+
allow_any_sequence_characters: bool = False,
|
38
|
+
allow_empty_sequence: bool = True,
|
39
|
+
file_name: Optional[str] = None,
|
46
40
|
) -> List[SeqUtilRecord]:
|
47
41
|
if input_file is None:
|
48
42
|
if file_name:
|
49
43
|
input_file = file_name
|
50
44
|
else:
|
51
|
-
raise ValueError(
|
45
|
+
raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
|
52
46
|
if isinstance(input_file, str):
|
53
|
-
with open(input_file
|
47
|
+
with open(input_file) as file_handle:
|
54
48
|
data = file_handle.read().strip()
|
55
49
|
elif isinstance(input_file, BufferedIOBase):
|
56
50
|
data = input_file.read().decode('utf-8')
|
57
51
|
else:
|
58
|
-
raise ValueError(
|
52
|
+
raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
|
59
53
|
if not data:
|
60
54
|
return []
|
61
55
|
|
@@ -71,9 +65,9 @@ class SeqUtil:
|
|
71
65
|
raise Exception(f'No header line found in FASTA file "{file_name}"')
|
72
66
|
|
73
67
|
splitted = []
|
74
|
-
tmp_data =
|
68
|
+
tmp_data = ''
|
75
69
|
for line in data.splitlines():
|
76
|
-
if line.startswith(
|
70
|
+
if line.startswith('>'):
|
77
71
|
if tmp_data:
|
78
72
|
splitted.append(tmp_data)
|
79
73
|
tmp_data = line[1:].strip() + '\n'
|
@@ -89,23 +83,20 @@ class SeqUtil:
|
|
89
83
|
sequence_data_splitted = sequence_data.strip().split('\n')
|
90
84
|
header_line = sequence_data_splitted[0].split()
|
91
85
|
sequence_id = header_line[0]
|
92
|
-
description = sequence_data_splitted[0][len(sequence_id):].strip()
|
93
|
-
sequence =
|
86
|
+
description = sequence_data_splitted[0][len(sequence_id) :].strip()
|
87
|
+
sequence = ''.join([seq.strip() for seq in sequence_data_splitted[1:]])
|
94
88
|
|
95
89
|
if not allow_any_sequence_characters:
|
96
|
-
invalid_sequence_characters =
|
90
|
+
invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
|
97
91
|
if len(invalid_sequence_characters) > 0:
|
98
92
|
raise Exception(
|
99
93
|
f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
|
100
94
|
)
|
101
95
|
if not allow_empty_sequence and len(sequence) == 0:
|
102
|
-
raise Exception(
|
103
|
-
|
104
|
-
|
96
|
+
raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
|
97
|
+
|
98
|
+
parsed_sequences.append(SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description))
|
105
99
|
|
106
|
-
parsed_sequences.append(
|
107
|
-
SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description)
|
108
|
-
)
|
109
100
|
return parsed_sequences
|
110
101
|
|
111
102
|
@staticmethod
|
@@ -116,5 +107,17 @@ class SeqUtil:
|
|
116
107
|
if record.properties:
|
117
108
|
for key, value in record.properties.items():
|
118
109
|
optional_description += f' [{key}={value}]'
|
119
|
-
sequence = '\n'.join(record.sequence[i:i + 80] for i in range(0, len(record.sequence), 80))
|
110
|
+
sequence = '\n'.join(record.sequence[i : i + 80] for i in range(0, len(record.sequence), 80))
|
120
111
|
file_handle.write(f'>{record.id}{optional_description}\n{sequence}\n')
|
112
|
+
|
113
|
+
@staticmethod
|
114
|
+
def _find_invalid_sequence_characters(sequence: str) -> List[str]:
|
115
|
+
allowed_sequence_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.')
|
116
|
+
invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
|
117
|
+
return invalid_chars
|
118
|
+
|
119
|
+
@staticmethod
|
120
|
+
def _find_invalid_sequence_id_characters(sequence: str) -> List[str]:
|
121
|
+
allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.:*#')
|
122
|
+
invalid_chars = [char for char in sequence if char not in allowed_chars]
|
123
|
+
return invalid_chars
|