pybiolib 1.2.158.dev1__py3-none-any.whl → 1.2.164.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -300,6 +300,15 @@ http {{
300
300
  proxy_ssl_server_name on;
301
301
  }}
302
302
 
303
+ location /api/resources/versions/ {{
304
+ proxy_pass https://$upstream_hostname$request_uri;
305
+ proxy_set_header authorization "";
306
+ proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
307
+ proxy_set_header job-uuid "{self._job_uuid}";
308
+ proxy_set_header cookie "";
309
+ proxy_ssl_server_name on;
310
+ }}
311
+
303
312
  location /api/ {{
304
313
  proxy_pass https://$upstream_hostname$request_uri;
305
314
  proxy_set_header authorization "";
biolib/utils/seq_util.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import re
2
2
  from io import BufferedIOBase, TextIOBase
3
3
 
4
- from biolib.typing_utils import Dict, List, Optional, Union, Iterator
4
+ from biolib.typing_utils import Dict, Iterator, List, Optional, Union
5
5
 
6
6
 
7
7
  class SeqUtilRecord:
@@ -35,14 +35,22 @@ class SeqUtil:
35
35
  input_file: Union[str, BufferedIOBase, None] = None,
36
36
  default_header: Optional[str] = None,
37
37
  allow_any_sequence_characters: bool = False,
38
+ use_strict_alphabet: Optional[bool] = False,
38
39
  allow_empty_sequence: bool = True,
39
40
  file_name: Optional[str] = None,
40
41
  ) -> Iterator[SeqUtilRecord]:
41
42
  def process_and_yield_record(header: str, sequence_lines: List[str]):
42
43
  sequence = ''.join(sequence_lines)
43
44
  sequence_id = header.split()[0]
45
+ if allow_any_sequence_characters and use_strict_alphabet:
46
+ raise Exception(
47
+ 'Error: Please choose either allow_any_sequence_characters or use_strict_alphabet'
48
+ )
44
49
  if not allow_any_sequence_characters:
45
- invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
50
+ if use_strict_alphabet:
51
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters_strict(sequence)
52
+ else:
53
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
46
54
  if invalid_sequence_characters:
47
55
  raise Exception(
48
56
  f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
@@ -126,6 +134,15 @@ class SeqUtil:
126
134
  invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
127
135
  return invalid_chars
128
136
 
137
+ @staticmethod
138
+ def _find_invalid_sequence_characters_strict(sequence: str) -> List[str]:
139
+ # Equivalent to fair-esm alphabet, compatible with ESM-models
140
+ # Excludes digits, '_' and 'J' (ambiguous letter only used in mass-spec NMR)
141
+ # https://github.com/facebookresearch/esm/blob/2b369911bb5b4b0dda914521b9475cad1656b2ac/esm/constants.py#L8
142
+ allowed_sequence_chars = set('lagvsertidpkqnfymhwcxbuzoLAGVSERTIDPKQNFYMHWCXBUZO-.')
143
+ invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
144
+ return invalid_chars
145
+
129
146
  @staticmethod
130
147
  def _find_invalid_sequence_id_characters(sequence: str) -> List[str]:
131
148
  allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.:*#')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.2.158.dev1
3
+ Version: 1.2.164.dev1
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -89,7 +89,7 @@ biolib/compute_node/job_worker/large_file_system.py,sha256=XXqRlVtYhs-Ji9zQGIk5K
89
89
  biolib/compute_node/job_worker/mappings.py,sha256=Z48Kg4nbcOvsT2-9o3RRikBkqflgO4XeaWxTGz-CNvI,2499
90
90
  biolib/compute_node/job_worker/utilization_reporter_thread.py,sha256=7tm5Yk9coqJ9VbEdnO86tSXI0iM0omwIyKENxdxiVXk,8575
91
91
  biolib/compute_node/job_worker/utils.py,sha256=wgxcIA8yAhUPdCwyvuuJ0JmreyWmmUoBO33vWtG60xg,1282
92
- biolib/compute_node/remote_host_proxy.py,sha256=eTT7x7ht_cxXMQ-0yXCvhKZW6mKeYM4KrfBf75KTbc8,15651
92
+ biolib/compute_node/remote_host_proxy.py,sha256=aRo27m6KtNVLJm1nJSNv4ZKqdRj3E4hgaJ1m_912D28,16085
93
93
  biolib/compute_node/socker_listener_thread.py,sha256=T5_UikA3MB9bD5W_dckYLPTgixh72vKUlgbBvj9dbM0,1601
94
94
  biolib/compute_node/socket_sender_thread.py,sha256=YgamPHeUm2GjMFGx8qk-99WlZhEs-kAb3q_2O6qByig,971
95
95
  biolib/compute_node/utils.py,sha256=M7i_WTyxbFM3Lri9RWZ_8FeQNYrQIWpKGLfp2I55oeY,4677
@@ -117,10 +117,10 @@ biolib/utils/__init__.py,sha256=fwjciJyJicvYyZcVTzfDBgD0SKY13DeXqvTeG4qZIy8,5548
117
117
  biolib/utils/app_uri.py,sha256=Yq_-_VGugQhMMo6mM5f0G9yNlLkr0WK4j0Nrf3FE4xQ,2171
118
118
  biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3100
119
119
  biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
120
- biolib/utils/seq_util.py,sha256=WJnU9vZdwY8RHXvzATyV80OXzyJ7w9EkG33Tna9Nr6A,5698
120
+ biolib/utils/seq_util.py,sha256=Ozk0blGtPur_D9MwShD02r_mphyQmgZkx-lOHOwnlIM,6730
121
121
  biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
122
- pybiolib-1.2.158.dev1.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
123
- pybiolib-1.2.158.dev1.dist-info/METADATA,sha256=7gYFVnp6YM8fLfHzg0S_XDeC6DxdTzeDX6dfWesenm0,1512
124
- pybiolib-1.2.158.dev1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
125
- pybiolib-1.2.158.dev1.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
126
- pybiolib-1.2.158.dev1.dist-info/RECORD,,
122
+ pybiolib-1.2.164.dev1.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
123
+ pybiolib-1.2.164.dev1.dist-info/METADATA,sha256=KSBtOf3yfsApxBhfxIetw5GxfEhnJvq8b1Tc0NowaeU,1512
124
+ pybiolib-1.2.164.dev1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
125
+ pybiolib-1.2.164.dev1.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
126
+ pybiolib-1.2.164.dev1.dist-info/RECORD,,