RNApolis 0.4.11__py3-none-any.whl → 0.4.12__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.11
3
+ Version: 0.4.12
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -2,16 +2,16 @@ rnapolis/annotator.py,sha256=_hsSX2VHFvIQ47l_EA7lwGFXLiVLbhFPEsOQzBKbjRk,22100
2
2
  rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
3
3
  rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
4
4
  rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
- rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
5
+ rnapolis/molecule_filter.py,sha256=F_xkAe7q2NZAaDpRaeikv-twUvbNflWdlLte7oFn2Ms,9130
6
6
  rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
7
7
  rnapolis/parser.py,sha256=lHI6LyFbEEPdHOzbged1-Ov0tl6MpSungIPacip0Py0,15838
8
8
  rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
9
9
  rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
10
10
  rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
11
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.4.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.4.11.dist-info/METADATA,sha256=k9B0MCkh46cvBm3aoXEgmz_fwvNg4RRyAfAPJ08_X18,54323
14
- RNApolis-0.4.11.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
- RNApolis-0.4.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.4.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.4.11.dist-info/RECORD,,
12
+ RNApolis-0.4.12.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.4.12.dist-info/METADATA,sha256=fPrlpVrM83UYwPYwaeymBP7SeVLMv7XftCZOl7hUnes,54323
14
+ RNApolis-0.4.12.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
+ RNApolis-0.4.12.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.4.12.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.4.12.dist-info/RECORD,,
@@ -1,10 +1,11 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
3
  import tempfile
4
- from typing import List, Set, Tuple
4
+ from typing import Iterable, List, Set, Tuple
5
5
 
6
6
  from mmcif.io.IoAdapterPy import IoAdapterPy
7
7
  from mmcif.io.PdbxReader import DataCategory, DataContainer
8
+
8
9
  from rnapolis.util import handle_input_file
9
10
 
10
11
  # Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
@@ -153,38 +154,63 @@ def select_category_by_id(
153
154
  return attributes, rows
154
155
 
155
156
 
156
- def main():
157
- parser = argparse.ArgumentParser()
158
- parser.add_argument(
159
- "--type",
160
- help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
161
- action="append",
162
- default=["polyribonucleotide"],
163
- choices=ENTITY_POLY_TYPES,
157
+ def filter_by_poly_types(
158
+ file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
159
+ ) -> str:
160
+ adapter = IoAdapterPy()
161
+
162
+ with tempfile.NamedTemporaryFile("rt+") as f:
163
+ f.write(file_content)
164
+ f.seek(0)
165
+ data = adapter.readFile(f.name)
166
+
167
+ entity_ids = select_ids(
168
+ data, "entity_poly", "type", "entity_id", set(entity_poly_types)
164
169
  )
165
- parser.add_argument(
166
- "--chain",
167
- help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
168
- action="append",
169
- default=[],
170
+ asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
171
+ auth_asym_ids = select_ids(
172
+ data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
170
173
  )
171
- parser.add_argument("path", help="path to a PDBx/mmCIF file")
172
- args = parser.parse_args()
173
174
 
174
- file = handle_input_file(args.path)
175
- adapter = IoAdapterPy()
176
- data = adapter.readFile(file.name)
177
175
  output = DataContainer("rnapolis")
178
176
 
179
- if args.chain:
180
- entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(args.chain))
181
- asym_ids = set(args.chain)
182
- else:
183
- entity_ids = select_ids(
184
- data, "entity_poly", "type", "entity_id", set(args.type)
185
- )
186
- asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
177
+ for table, ids in (
178
+ (CATEGORIES_WITH_ENTITY_ID, entity_ids),
179
+ (CATEGORIES_WITH_ASYM_ID, asym_ids),
180
+ (CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
181
+ ):
182
+ for category, field_name in table:
183
+ attributes, rows = select_category_by_id(data, category, field_name, ids)
187
184
 
185
+ if attributes and rows:
186
+ obj = DataCategory(category, attributes, rows)
187
+ output.append(obj)
188
+
189
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
190
+ adapter.writeFile(tmp.name, [output])
191
+ tmp.seek(0)
192
+ return tmp.read()
193
+
194
+
195
+ def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
196
+ """
197
+ Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
198
+
199
+ Warning! The new file might contain more chains than provided in the `chains` argument.
200
+ This is because the function filters by entity, so if you ask for chain "A",
201
+ which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
202
+ """
203
+ adapter = IoAdapterPy()
204
+
205
+ with tempfile.NamedTemporaryFile("rt+") as f:
206
+ f.write(file_content)
207
+ f.seek(0)
208
+ data = adapter.readFile(f.name)
209
+
210
+ output = DataContainer("rnapolis")
211
+
212
+ entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
213
+ asym_ids = set(chains)
188
214
  auth_asym_ids = select_ids(
189
215
  data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
190
216
  )
@@ -201,9 +227,38 @@ def main():
201
227
  obj = DataCategory(category, attributes, rows)
202
228
  output.append(obj)
203
229
 
204
- with tempfile.NamedTemporaryFile() as tmp:
230
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
205
231
  adapter.writeFile(tmp.name, [output])
206
- print(tmp.read().decode())
232
+ tmp.seek(0)
233
+ return tmp.read()
234
+
235
+
236
+ def main():
237
+ parser = argparse.ArgumentParser()
238
+ parser.add_argument(
239
+ "--type",
240
+ help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
241
+ action="append",
242
+ default=["polyribonucleotide"],
243
+ choices=ENTITY_POLY_TYPES,
244
+ )
245
+ parser.add_argument(
246
+ "--chain",
247
+ help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
248
+ action="append",
249
+ default=[],
250
+ )
251
+ parser.add_argument("path", help="path to a PDBx/mmCIF file")
252
+ args = parser.parse_args()
253
+
254
+ file = handle_input_file(args.path)
255
+
256
+ if args.chain:
257
+ print(filter_by_chains(file.read(), args.chain))
258
+ elif args.type:
259
+ print(filter_by_poly_types(file.read(), args.type))
260
+ else:
261
+ parser.print_help()
207
262
 
208
263
 
209
264
  if __name__ == "__main__":