RNApolis 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.11
3
+ Version: 0.4.13
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -2,16 +2,16 @@ rnapolis/annotator.py,sha256=_hsSX2VHFvIQ47l_EA7lwGFXLiVLbhFPEsOQzBKbjRk,22100
2
2
  rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
3
3
  rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
4
4
  rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
- rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
5
+ rnapolis/molecule_filter.py,sha256=1TDEIy2rgWXXqXxI21vjaElWExCFZlmCHWxwCfNehaM,9152
6
6
  rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
7
7
  rnapolis/parser.py,sha256=lHI6LyFbEEPdHOzbged1-Ov0tl6MpSungIPacip0Py0,15838
8
8
  rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
9
9
  rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
10
10
  rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
11
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.4.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.4.11.dist-info/METADATA,sha256=k9B0MCkh46cvBm3aoXEgmz_fwvNg4RRyAfAPJ08_X18,54323
14
- RNApolis-0.4.11.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
- RNApolis-0.4.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.4.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.4.11.dist-info/RECORD,,
12
+ RNApolis-0.4.13.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.4.13.dist-info/METADATA,sha256=wzV8Xvo09Qv02NfCKaLTID2Sm9k07aKsSZzqVWaNwf8,54323
14
+ RNApolis-0.4.13.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
+ RNApolis-0.4.13.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.4.13.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.4.13.dist-info/RECORD,,
@@ -1,10 +1,11 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
3
  import tempfile
4
- from typing import List, Set, Tuple
4
+ from typing import Iterable, List, Set, Tuple
5
5
 
6
6
  from mmcif.io.IoAdapterPy import IoAdapterPy
7
7
  from mmcif.io.PdbxReader import DataCategory, DataContainer
8
+
8
9
  from rnapolis.util import handle_input_file
9
10
 
10
11
  # Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
@@ -20,6 +21,7 @@ ENTITY_POLY_TYPES = [
20
21
  ]
21
22
 
22
23
  CATEGORIES_WITH_ENTITY_ID = [
24
+ ("entity", "id"),
23
25
  ("atom_site", "label_entity_id"),
24
26
  ("entity_keywords", "entity_id"),
25
27
  ("entity_name_com", "entity_id"),
@@ -153,38 +155,63 @@ def select_category_by_id(
153
155
  return attributes, rows
154
156
 
155
157
 
156
- def main():
157
- parser = argparse.ArgumentParser()
158
- parser.add_argument(
159
- "--type",
160
- help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
161
- action="append",
162
- default=["polyribonucleotide"],
163
- choices=ENTITY_POLY_TYPES,
158
+ def filter_by_poly_types(
159
+ file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
160
+ ) -> str:
161
+ adapter = IoAdapterPy()
162
+
163
+ with tempfile.NamedTemporaryFile("rt+") as f:
164
+ f.write(file_content)
165
+ f.seek(0)
166
+ data = adapter.readFile(f.name)
167
+
168
+ entity_ids = select_ids(
169
+ data, "entity_poly", "type", "entity_id", set(entity_poly_types)
164
170
  )
165
- parser.add_argument(
166
- "--chain",
167
- help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
168
- action="append",
169
- default=[],
171
+ asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
172
+ auth_asym_ids = select_ids(
173
+ data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
170
174
  )
171
- parser.add_argument("path", help="path to a PDBx/mmCIF file")
172
- args = parser.parse_args()
173
175
 
174
- file = handle_input_file(args.path)
175
- adapter = IoAdapterPy()
176
- data = adapter.readFile(file.name)
177
176
  output = DataContainer("rnapolis")
178
177
 
179
- if args.chain:
180
- entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(args.chain))
181
- asym_ids = set(args.chain)
182
- else:
183
- entity_ids = select_ids(
184
- data, "entity_poly", "type", "entity_id", set(args.type)
185
- )
186
- asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
178
+ for table, ids in (
179
+ (CATEGORIES_WITH_ENTITY_ID, entity_ids),
180
+ (CATEGORIES_WITH_ASYM_ID, asym_ids),
181
+ (CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
182
+ ):
183
+ for category, field_name in table:
184
+ attributes, rows = select_category_by_id(data, category, field_name, ids)
187
185
 
186
+ if attributes and rows:
187
+ obj = DataCategory(category, attributes, rows)
188
+ output.append(obj)
189
+
190
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
191
+ adapter.writeFile(tmp.name, [output])
192
+ tmp.seek(0)
193
+ return tmp.read()
194
+
195
+
196
+ def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
197
+ """
198
+ Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
199
+
200
+ Warning! The new file might contain more chains than provided in the `chains` argument.
201
+ This is because the function filters by entity, so if you ask for chain "A",
202
+ which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
203
+ """
204
+ adapter = IoAdapterPy()
205
+
206
+ with tempfile.NamedTemporaryFile("rt+") as f:
207
+ f.write(file_content)
208
+ f.seek(0)
209
+ data = adapter.readFile(f.name)
210
+
211
+ output = DataContainer("rnapolis")
212
+
213
+ entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
214
+ asym_ids = set(chains)
188
215
  auth_asym_ids = select_ids(
189
216
  data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
190
217
  )
@@ -201,9 +228,38 @@ def main():
201
228
  obj = DataCategory(category, attributes, rows)
202
229
  output.append(obj)
203
230
 
204
- with tempfile.NamedTemporaryFile() as tmp:
231
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
205
232
  adapter.writeFile(tmp.name, [output])
206
- print(tmp.read().decode())
233
+ tmp.seek(0)
234
+ return tmp.read()
235
+
236
+
237
+ def main():
238
+ parser = argparse.ArgumentParser()
239
+ parser.add_argument(
240
+ "--type",
241
+ help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
242
+ action="append",
243
+ default=["polyribonucleotide"],
244
+ choices=ENTITY_POLY_TYPES,
245
+ )
246
+ parser.add_argument(
247
+ "--chain",
248
+ help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
249
+ action="append",
250
+ default=[],
251
+ )
252
+ parser.add_argument("path", help="path to a PDBx/mmCIF file")
253
+ args = parser.parse_args()
254
+
255
+ file = handle_input_file(args.path)
256
+
257
+ if args.chain:
258
+ print(filter_by_chains(file.read(), args.chain))
259
+ elif args.type:
260
+ print(filter_by_poly_types(file.read(), args.type))
261
+ else:
262
+ parser.print_help()
207
263
 
208
264
 
209
265
  if __name__ == "__main__":