pybiolib 1.1.2173__py3-none-any.whl → 1.1.2175__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import warnings
2
+
2
3
  from biolib._runtime.runtime import Runtime as _Runtime
3
4
 
4
5
 
biolib/utils/seq_util.py CHANGED
@@ -1,32 +1,26 @@
1
1
  import re
2
2
  from io import BufferedIOBase
3
- from biolib.typing_utils import List, Optional, Dict, Union
4
3
 
5
- allowed_sequence_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.")
6
-
7
-
8
- def find_invalid_sequence_characters(sequence):
9
- invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
10
- return invalid_chars
4
+ from biolib.typing_utils import Dict, List, Optional, Union
11
5
 
12
6
 
13
7
  class SeqUtilRecord:
14
8
  def __init__(
15
- self,
16
- sequence: str,
17
- sequence_id: str,
18
- description: Optional['str'],
19
- properties: Optional[Dict[str, str]] = None,
9
+ self,
10
+ sequence: str,
11
+ sequence_id: str,
12
+ description: Optional['str'],
13
+ properties: Optional[Dict[str, str]] = None,
20
14
  ):
21
15
  self.sequence = sequence
22
16
  self.id = sequence_id # pylint: disable=invalid-name
23
17
  self.description = description
24
18
 
25
19
  if properties:
26
- disallowed_pattern = re.compile(r"[=\[\]\n]")
20
+ disallowed_pattern = re.compile(r'[=\[\]\n]')
27
21
  for key, value in properties.items():
28
- assert not bool(disallowed_pattern.search(key)), "Key cannot contain characters =[] and newline"
29
- assert not bool(disallowed_pattern.search(value)), "Value cannot contain characters =[] and newline"
22
+ assert not bool(disallowed_pattern.search(key)), 'Key cannot contain characters =[] and newline'
23
+ assert not bool(disallowed_pattern.search(value)), 'Value cannot contain characters =[] and newline'
30
24
  self.properties = properties
31
25
  else:
32
26
  self.properties = {}
@@ -38,24 +32,24 @@ class SeqUtilRecord:
38
32
  class SeqUtil:
39
33
  @staticmethod
40
34
  def parse_fasta(
41
- input_file: Union[str, BufferedIOBase, None] = None,
42
- default_header: Optional[str] = None,
43
- allow_any_sequence_characters: bool = False,
44
- allow_empty_sequence: bool = False,
45
- file_name: Optional[str] = None,
35
+ input_file: Union[str, BufferedIOBase, None] = None,
36
+ default_header: Optional[str] = None,
37
+ allow_any_sequence_characters: bool = False,
38
+ allow_empty_sequence: bool = False,
39
+ file_name: Optional[str] = None,
46
40
  ) -> List[SeqUtilRecord]:
47
41
  if input_file is None:
48
42
  if file_name:
49
43
  input_file = file_name
50
44
  else:
51
- raise ValueError("input_file must be a file name (str) or a BufferedIOBase object")
45
+ raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
52
46
  if isinstance(input_file, str):
53
- with open(input_file, 'r') as file_handle:
47
+ with open(input_file) as file_handle:
54
48
  data = file_handle.read().strip()
55
49
  elif isinstance(input_file, BufferedIOBase):
56
50
  data = input_file.read().decode('utf-8')
57
51
  else:
58
- raise ValueError("input_file must be a file name (str) or a BufferedIOBase object")
52
+ raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
59
53
  if not data:
60
54
  return []
61
55
 
@@ -71,9 +65,9 @@ class SeqUtil:
71
65
  raise Exception(f'No header line found in FASTA file "{file_name}"')
72
66
 
73
67
  splitted = []
74
- tmp_data = ""
68
+ tmp_data = ''
75
69
  for line in data.splitlines():
76
- if line.startswith(">"):
70
+ if line.startswith('>'):
77
71
  if tmp_data:
78
72
  splitted.append(tmp_data)
79
73
  tmp_data = line[1:].strip() + '\n'
@@ -89,23 +83,20 @@ class SeqUtil:
89
83
  sequence_data_splitted = sequence_data.strip().split('\n')
90
84
  header_line = sequence_data_splitted[0].split()
91
85
  sequence_id = header_line[0]
92
- description = sequence_data_splitted[0][len(sequence_id):].strip()
93
- sequence = "".join([seq.strip().upper() for seq in sequence_data_splitted[1:]])
86
+ description = sequence_data_splitted[0][len(sequence_id) :].strip()
87
+ sequence = ''.join([seq.strip() for seq in sequence_data_splitted[1:]])
94
88
 
95
89
  if not allow_any_sequence_characters:
96
- invalid_sequence_characters = find_invalid_sequence_characters(sequence)
90
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
97
91
  if len(invalid_sequence_characters) > 0:
98
92
  raise Exception(
99
93
  f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
100
94
  )
101
95
  if not allow_empty_sequence and len(sequence) == 0:
102
- raise Exception(
103
- f'Error: No sequence found for fasta entry {sequence_id}'
104
- )
96
+ raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
97
+
98
+ parsed_sequences.append(SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description))
105
99
 
106
- parsed_sequences.append(
107
- SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description)
108
- )
109
100
  return parsed_sequences
110
101
 
111
102
  @staticmethod
@@ -116,5 +107,17 @@ class SeqUtil:
116
107
  if record.properties:
117
108
  for key, value in record.properties.items():
118
109
  optional_description += f' [{key}={value}]'
119
- sequence = '\n'.join(record.sequence[i:i + 80] for i in range(0, len(record.sequence), 80))
110
+ sequence = '\n'.join(record.sequence[i : i + 80] for i in range(0, len(record.sequence), 80))
120
111
  file_handle.write(f'>{record.id}{optional_description}\n{sequence}\n')
112
+
113
+ @staticmethod
114
+ def _find_invalid_sequence_characters(sequence: str) -> List[str]:
115
+ allowed_sequence_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.')
116
+ invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
117
+ return invalid_chars
118
+
119
+ @staticmethod
120
+ def _find_invalid_sequence_id_characters(sequence: str) -> List[str]:
121
+ allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.:*#')
122
+ invalid_chars = [char for char in sequence if char not in allowed_chars]
123
+ return invalid_chars
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.2173
3
+ Version: 1.1.2175
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -102,7 +102,7 @@ biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
102
102
  biolib/jobs/job.py,sha256=OfG8cLd3AjGjiMWRlJRZdVVbLsRWSX-OM5nxJhR6mPQ,19136
103
103
  biolib/jobs/job_result.py,sha256=rALHiKYNaC9lHi_JJqBob1RubzNLwG9Z386kwRJjd2M,5885
104
104
  biolib/jobs/types.py,sha256=qhadtH2KDC2WUOOqPiwke0YgtQY4FtuB71Stekq1k48,970
105
- biolib/runtime/__init__.py,sha256=Fg2ZIAmUegurLKagpBNfRgLcOwR2VZSmXQpb-ryRwI0,505
105
+ biolib/runtime/__init__.py,sha256=MlRepA11n2H-3plB5rzWyyHK2JmP6PiaP3i6x3vt0mg,506
106
106
  biolib/sdk/__init__.py,sha256=qJ_V_Edxolzi4VBQCrvem5lYIkJ0FVH3VZepSDuXjTc,1895
107
107
  biolib/tables.py,sha256=acH7VjwAbadLo8P84FSnKEZxCTVsF5rEg9VPuxElNs8,872
108
108
  biolib/templates/__init__.py,sha256=Yx62sSyDCDesRQDQgmbDsLpfgEh93fWE8r9u4g2azXk,36
@@ -114,10 +114,10 @@ biolib/utils/__init__.py,sha256=fwjciJyJicvYyZcVTzfDBgD0SKY13DeXqvTeG4qZIy8,5548
114
114
  biolib/utils/app_uri.py,sha256=Yq_-_VGugQhMMo6mM5f0G9yNlLkr0WK4j0Nrf3FE4xQ,2171
115
115
  biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3100
116
116
  biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
117
- biolib/utils/seq_util.py,sha256=jC5WhH63FTD7SLFJbxQGA2hOt9NTwq9zHl_BEec1Z0c,4907
117
+ biolib/utils/seq_util.py,sha256=WieuQ2RvV4QSJFUAMRVyvKXFs3YanFAmjh-CFIaQmQk,5184
118
118
  biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
119
- pybiolib-1.1.2173.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
- pybiolib-1.1.2173.dist-info/METADATA,sha256=vA4sMu0c3kO4Z5PTqIziK4BRlsjEKNe-lVtJ26RzVag,1508
121
- pybiolib-1.1.2173.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
- pybiolib-1.1.2173.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
- pybiolib-1.1.2173.dist-info/RECORD,,
119
+ pybiolib-1.1.2175.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
+ pybiolib-1.1.2175.dist-info/METADATA,sha256=2N5GV9P58sKVu7I1HPx2eQuP_I7igY1RIqfd1OZEQKI,1508
121
+ pybiolib-1.1.2175.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
+ pybiolib-1.1.2175.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
+ pybiolib-1.1.2175.dist-info/RECORD,,