pybiolib 1.1.2173__py3-none-any.whl → 1.1.2180__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/_data_record/data_record.py +9 -11
- biolib/_runtime/runtime.py +7 -6
- biolib/runtime/__init__.py +1 -0
- biolib/utils/seq_util.py +38 -35
- {pybiolib-1.1.2173.dist-info → pybiolib-1.1.2180.dist-info}/METADATA +1 -1
- {pybiolib-1.1.2173.dist-info → pybiolib-1.1.2180.dist-info}/RECORD +9 -9
- {pybiolib-1.1.2173.dist-info → pybiolib-1.1.2180.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.2173.dist-info → pybiolib-1.1.2180.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.2173.dist-info → pybiolib-1.1.2180.dist-info}/entry_points.txt +0 -0
@@ -4,7 +4,7 @@ from datetime import datetime
|
|
4
4
|
from fnmatch import fnmatch
|
5
5
|
from pathlib import Path
|
6
6
|
from struct import Struct
|
7
|
-
from typing import Callable, Dict, List, Union, cast
|
7
|
+
from typing import Callable, Dict, List, Optional, Union, cast
|
8
8
|
|
9
9
|
from biolib import api, utils
|
10
10
|
from biolib._internal import types
|
@@ -20,11 +20,9 @@ from biolib.biolib_binary_format import LazyLoadedFile
|
|
20
20
|
from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
|
21
21
|
from biolib.biolib_errors import BioLibError
|
22
22
|
from biolib.biolib_logging import logger
|
23
|
-
from biolib.typing_utils import Optional as _Optional
|
24
23
|
from biolib.utils.app_uri import parse_app_uri
|
25
24
|
from biolib.utils.zip.remote_zip import RemoteZip
|
26
25
|
|
27
|
-
|
28
26
|
PathFilter = Union[str, Callable[[str], bool]]
|
29
27
|
|
30
28
|
|
@@ -51,7 +49,7 @@ class DataRecord:
|
|
51
49
|
|
52
50
|
return uri_parsed['app_name']
|
53
51
|
|
54
|
-
def list_files(self, path_filter:
|
52
|
+
def list_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
|
55
53
|
remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
|
56
54
|
resource_version_uuid=self._state['resource_version_uuid'],
|
57
55
|
)
|
@@ -69,7 +67,7 @@ class DataRecord:
|
|
69
67
|
)
|
70
68
|
HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
|
71
69
|
|
72
|
-
def download_files(self, output_dir: str, path_filter:
|
70
|
+
def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
|
73
71
|
filtered_files = self.list_files(path_filter=path_filter)
|
74
72
|
|
75
73
|
if len(filtered_files) == 0:
|
@@ -83,10 +81,10 @@ class DataRecord:
|
|
83
81
|
for chunk in file.get_data_iterator():
|
84
82
|
file_handle.write(chunk)
|
85
83
|
|
86
|
-
def save_files(self, output_dir: str, path_filter:
|
84
|
+
def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
|
87
85
|
self.download_files(output_dir=output_dir, path_filter=path_filter)
|
88
86
|
|
89
|
-
def update(self, data_path: str, chunk_size_in_mb:
|
87
|
+
def update(self, data_path: str, chunk_size_in_mb: Optional[int] = None) -> None:
|
90
88
|
assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
|
91
89
|
BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
|
92
90
|
|
@@ -108,11 +106,11 @@ class DataRecord:
|
|
108
106
|
logger.info(f"Validating data record of type {data_record_type['name']}")
|
109
107
|
for rule in data_record_type['validation_rules']:
|
110
108
|
logger.info(f"Validating rule {rule['type']} for {rule['path']}...")
|
111
|
-
if rule['type'] ==
|
109
|
+
if rule['type'] == 'sqlite-v1':
|
112
110
|
try:
|
113
111
|
validate_sqlite_v1(schema=rule['rule'], sqlite_file=Path(rule['path']))
|
114
112
|
except Exception as error:
|
115
|
-
raise Exception(
|
113
|
+
raise Exception('Data Record Validation failed') from error
|
116
114
|
else:
|
117
115
|
raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
|
118
116
|
|
@@ -158,7 +156,7 @@ class DataRecord:
|
|
158
156
|
return DataRecord(_internal_state=get_data_record_state_from_uri(uri))
|
159
157
|
|
160
158
|
@staticmethod
|
161
|
-
def create(destination: str, data_path:
|
159
|
+
def create(destination: str, data_path: Optional[str] = None) -> 'DataRecord':
|
162
160
|
BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
|
163
161
|
if data_path is not None:
|
164
162
|
assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
|
@@ -188,7 +186,7 @@ class DataRecord:
|
|
188
186
|
return DataRecord.get_by_uri(uri=data_record_info['uri'])
|
189
187
|
|
190
188
|
@staticmethod
|
191
|
-
def fetch(uri:
|
189
|
+
def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
|
192
190
|
max_page_size = 1_000
|
193
191
|
params: Dict[str, Union[str, int]] = {
|
194
192
|
'page_size': str(count or max_page_size),
|
biolib/_runtime/runtime.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1
|
-
from biolib import api
|
2
|
-
from biolib._internal.runtime import BioLibRuntimeError, BioLibRuntimeNotRecognizedError, RuntimeJobDataDict
|
3
|
-
from biolib.typing_utils import cast, Optional as _Optional
|
4
|
-
|
5
1
|
import json
|
6
2
|
import re
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
from biolib import api
|
6
|
+
from biolib._internal.runtime import BioLibRuntimeError, BioLibRuntimeNotRecognizedError, RuntimeJobDataDict
|
7
|
+
from biolib.typing_utils import cast
|
7
8
|
|
8
9
|
|
9
10
|
class Runtime:
|
10
|
-
_job_data:
|
11
|
+
_job_data: Optional[RuntimeJobDataDict] = None
|
11
12
|
|
12
13
|
@staticmethod
|
13
14
|
def check_is_environment_biolib_app() -> bool:
|
@@ -56,7 +57,7 @@ class Runtime:
|
|
56
57
|
api.client.post(data={'note': note}, path=f'/jobs/{job_id}/notes/')
|
57
58
|
|
58
59
|
@staticmethod
|
59
|
-
def _try_to_get_job_data() ->
|
60
|
+
def _try_to_get_job_data() -> Optional[RuntimeJobDataDict]:
|
60
61
|
if not Runtime._job_data:
|
61
62
|
try:
|
62
63
|
with open('/biolib/secrets/biolib_system_secret') as file:
|
biolib/runtime/__init__.py
CHANGED
biolib/utils/seq_util.py
CHANGED
@@ -1,32 +1,26 @@
|
|
1
1
|
import re
|
2
2
|
from io import BufferedIOBase
|
3
|
-
from biolib.typing_utils import List, Optional, Dict, Union
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def find_invalid_sequence_characters(sequence):
|
9
|
-
invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
|
10
|
-
return invalid_chars
|
4
|
+
from biolib.typing_utils import Dict, List, Optional, Union
|
11
5
|
|
12
6
|
|
13
7
|
class SeqUtilRecord:
|
14
8
|
def __init__(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
9
|
+
self,
|
10
|
+
sequence: str,
|
11
|
+
sequence_id: str,
|
12
|
+
description: Optional['str'],
|
13
|
+
properties: Optional[Dict[str, str]] = None,
|
20
14
|
):
|
21
15
|
self.sequence = sequence
|
22
16
|
self.id = sequence_id # pylint: disable=invalid-name
|
23
17
|
self.description = description
|
24
18
|
|
25
19
|
if properties:
|
26
|
-
disallowed_pattern = re.compile(r
|
20
|
+
disallowed_pattern = re.compile(r'[=\[\]\n]')
|
27
21
|
for key, value in properties.items():
|
28
|
-
assert not bool(disallowed_pattern.search(key)),
|
29
|
-
assert not bool(disallowed_pattern.search(value)),
|
22
|
+
assert not bool(disallowed_pattern.search(key)), 'Key cannot contain characters =[] and newline'
|
23
|
+
assert not bool(disallowed_pattern.search(value)), 'Value cannot contain characters =[] and newline'
|
30
24
|
self.properties = properties
|
31
25
|
else:
|
32
26
|
self.properties = {}
|
@@ -38,24 +32,24 @@ class SeqUtilRecord:
|
|
38
32
|
class SeqUtil:
|
39
33
|
@staticmethod
|
40
34
|
def parse_fasta(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
35
|
+
input_file: Union[str, BufferedIOBase, None] = None,
|
36
|
+
default_header: Optional[str] = None,
|
37
|
+
allow_any_sequence_characters: bool = False,
|
38
|
+
allow_empty_sequence: bool = False,
|
39
|
+
file_name: Optional[str] = None,
|
46
40
|
) -> List[SeqUtilRecord]:
|
47
41
|
if input_file is None:
|
48
42
|
if file_name:
|
49
43
|
input_file = file_name
|
50
44
|
else:
|
51
|
-
raise ValueError(
|
45
|
+
raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
|
52
46
|
if isinstance(input_file, str):
|
53
|
-
with open(input_file
|
47
|
+
with open(input_file) as file_handle:
|
54
48
|
data = file_handle.read().strip()
|
55
49
|
elif isinstance(input_file, BufferedIOBase):
|
56
50
|
data = input_file.read().decode('utf-8')
|
57
51
|
else:
|
58
|
-
raise ValueError(
|
52
|
+
raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
|
59
53
|
if not data:
|
60
54
|
return []
|
61
55
|
|
@@ -71,9 +65,9 @@ class SeqUtil:
|
|
71
65
|
raise Exception(f'No header line found in FASTA file "{file_name}"')
|
72
66
|
|
73
67
|
splitted = []
|
74
|
-
tmp_data =
|
68
|
+
tmp_data = ''
|
75
69
|
for line in data.splitlines():
|
76
|
-
if line.startswith(
|
70
|
+
if line.startswith('>'):
|
77
71
|
if tmp_data:
|
78
72
|
splitted.append(tmp_data)
|
79
73
|
tmp_data = line[1:].strip() + '\n'
|
@@ -89,23 +83,20 @@ class SeqUtil:
|
|
89
83
|
sequence_data_splitted = sequence_data.strip().split('\n')
|
90
84
|
header_line = sequence_data_splitted[0].split()
|
91
85
|
sequence_id = header_line[0]
|
92
|
-
description = sequence_data_splitted[0][len(sequence_id):].strip()
|
93
|
-
sequence =
|
86
|
+
description = sequence_data_splitted[0][len(sequence_id) :].strip()
|
87
|
+
sequence = ''.join([seq.strip() for seq in sequence_data_splitted[1:]])
|
94
88
|
|
95
89
|
if not allow_any_sequence_characters:
|
96
|
-
invalid_sequence_characters =
|
90
|
+
invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
|
97
91
|
if len(invalid_sequence_characters) > 0:
|
98
92
|
raise Exception(
|
99
93
|
f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
|
100
94
|
)
|
101
95
|
if not allow_empty_sequence and len(sequence) == 0:
|
102
|
-
raise Exception(
|
103
|
-
|
104
|
-
|
96
|
+
raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
|
97
|
+
|
98
|
+
parsed_sequences.append(SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description))
|
105
99
|
|
106
|
-
parsed_sequences.append(
|
107
|
-
SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description)
|
108
|
-
)
|
109
100
|
return parsed_sequences
|
110
101
|
|
111
102
|
@staticmethod
|
@@ -116,5 +107,17 @@ class SeqUtil:
|
|
116
107
|
if record.properties:
|
117
108
|
for key, value in record.properties.items():
|
118
109
|
optional_description += f' [{key}={value}]'
|
119
|
-
sequence = '\n'.join(record.sequence[i:i + 80] for i in range(0, len(record.sequence), 80))
|
110
|
+
sequence = '\n'.join(record.sequence[i : i + 80] for i in range(0, len(record.sequence), 80))
|
120
111
|
file_handle.write(f'>{record.id}{optional_description}\n{sequence}\n')
|
112
|
+
|
113
|
+
@staticmethod
|
114
|
+
def _find_invalid_sequence_characters(sequence: str) -> List[str]:
|
115
|
+
allowed_sequence_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.')
|
116
|
+
invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
|
117
|
+
return invalid_chars
|
118
|
+
|
119
|
+
@staticmethod
|
120
|
+
def _find_invalid_sequence_id_characters(sequence: str) -> List[str]:
|
121
|
+
allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.:*#')
|
122
|
+
invalid_chars = [char for char in sequence if char not in allowed_chars]
|
123
|
+
return invalid_chars
|
@@ -1,7 +1,7 @@
|
|
1
1
|
LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
2
2
|
README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
|
3
3
|
biolib/__init__.py,sha256=_tThyzISH81yS9KXP_X3qEiKXmsIp5XOBcJIODfLVnc,4338
|
4
|
-
biolib/_data_record/data_record.py,sha256=
|
4
|
+
biolib/_data_record/data_record.py,sha256=6PD-jBgWU2Cc4PwT-hLNPTFZygb4r0LBL617U-1eijE,12633
|
5
5
|
biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
|
7
7
|
biolib/_internal/data_record/data_record.py,sha256=YmaAABR57goDCE8-rKb2j0FPMSbDtRPCm_HhT3mM074,4299
|
@@ -23,7 +23,7 @@ biolib/_internal/types/experiment.py,sha256=D94iBdn2nS92lRW-TOs1a2WKXJD5ZtmzL4yp
|
|
23
23
|
biolib/_internal/types/resource.py,sha256=G-vPkZoe4Um6FPxsQZtRzAlbSW5sDW4NFkbjn21I3V4,372
|
24
24
|
biolib/_internal/types/typing.py,sha256=D4EKKEe7kDx0K6lJi-H_XLtk-8w6nu2fdqn9bvzI-Xo,288
|
25
25
|
biolib/_internal/utils/__init__.py,sha256=p5vsIFyu-zYqBgdSMfwW9NC_jk7rXvvCbV4Bzd3As7c,630
|
26
|
-
biolib/_runtime/runtime.py,sha256=
|
26
|
+
biolib/_runtime/runtime.py,sha256=daYxzIpRoW4k-HJFu2BMXeylYSlCXn3-SqdSriCFnKw,2770
|
27
27
|
biolib/api/__init__.py,sha256=mQ4u8FijqyLzjYMezMUUbbBGNB3iFmkNdjXnWPZ7Jlw,138
|
28
28
|
biolib/api/client.py,sha256=9MD1qI52BnRC_QSydFGjyFquwFw0R9dkDfUrjUouuHQ,3490
|
29
29
|
biolib/app/__init__.py,sha256=cdPtcfb_U-bxb9iSL4fCEq2rpD9OjkyY4W-Zw60B0LI,37
|
@@ -102,7 +102,7 @@ biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
|
|
102
102
|
biolib/jobs/job.py,sha256=OfG8cLd3AjGjiMWRlJRZdVVbLsRWSX-OM5nxJhR6mPQ,19136
|
103
103
|
biolib/jobs/job_result.py,sha256=rALHiKYNaC9lHi_JJqBob1RubzNLwG9Z386kwRJjd2M,5885
|
104
104
|
biolib/jobs/types.py,sha256=qhadtH2KDC2WUOOqPiwke0YgtQY4FtuB71Stekq1k48,970
|
105
|
-
biolib/runtime/__init__.py,sha256=
|
105
|
+
biolib/runtime/__init__.py,sha256=MlRepA11n2H-3plB5rzWyyHK2JmP6PiaP3i6x3vt0mg,506
|
106
106
|
biolib/sdk/__init__.py,sha256=qJ_V_Edxolzi4VBQCrvem5lYIkJ0FVH3VZepSDuXjTc,1895
|
107
107
|
biolib/tables.py,sha256=acH7VjwAbadLo8P84FSnKEZxCTVsF5rEg9VPuxElNs8,872
|
108
108
|
biolib/templates/__init__.py,sha256=Yx62sSyDCDesRQDQgmbDsLpfgEh93fWE8r9u4g2azXk,36
|
@@ -114,10 +114,10 @@ biolib/utils/__init__.py,sha256=fwjciJyJicvYyZcVTzfDBgD0SKY13DeXqvTeG4qZIy8,5548
|
|
114
114
|
biolib/utils/app_uri.py,sha256=Yq_-_VGugQhMMo6mM5f0G9yNlLkr0WK4j0Nrf3FE4xQ,2171
|
115
115
|
biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3100
|
116
116
|
biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
|
117
|
-
biolib/utils/seq_util.py,sha256=
|
117
|
+
biolib/utils/seq_util.py,sha256=WieuQ2RvV4QSJFUAMRVyvKXFs3YanFAmjh-CFIaQmQk,5184
|
118
118
|
biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
|
119
|
-
pybiolib-1.1.
|
120
|
-
pybiolib-1.1.
|
121
|
-
pybiolib-1.1.
|
122
|
-
pybiolib-1.1.
|
123
|
-
pybiolib-1.1.
|
119
|
+
pybiolib-1.1.2180.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
120
|
+
pybiolib-1.1.2180.dist-info/METADATA,sha256=icX35ySYzpvqGrHM6Sy4NJ9tGCVWpP6U1qga8I3jgtc,1508
|
121
|
+
pybiolib-1.1.2180.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
122
|
+
pybiolib-1.1.2180.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
|
123
|
+
pybiolib-1.1.2180.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|