RNApolis 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.10
3
+ Version: 0.4.12
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -2,16 +2,16 @@ rnapolis/annotator.py,sha256=_hsSX2VHFvIQ47l_EA7lwGFXLiVLbhFPEsOQzBKbjRk,22100
2
2
  rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
3
3
  rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
4
4
  rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
- rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
5
+ rnapolis/molecule_filter.py,sha256=F_xkAe7q2NZAaDpRaeikv-twUvbNflWdlLte7oFn2Ms,9130
6
6
  rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
7
- rnapolis/parser.py,sha256=2pQYy0sh8TCpeluMmmSJ7C5dudK_bsfstTWCdpwwpNU,15193
7
+ rnapolis/parser.py,sha256=lHI6LyFbEEPdHOzbged1-Ov0tl6MpSungIPacip0Py0,15838
8
8
  rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
9
9
  rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
10
- rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
10
+ rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
11
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.4.10.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.4.10.dist-info/METADATA,sha256=AiTwfWTRaaJ_Zd_E1UIYMu54Hi0vu9WN8dYA67x3SLk,54323
14
- RNApolis-0.4.10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
- RNApolis-0.4.10.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.4.10.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.4.10.dist-info/RECORD,,
12
+ RNApolis-0.4.12.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.4.12.dist-info/METADATA,sha256=fPrlpVrM83UYwPYwaeymBP7SeVLMv7XftCZOl7hUnes,54323
14
+ RNApolis-0.4.12.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
+ RNApolis-0.4.12.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.4.12.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.4.12.dist-info/RECORD,,
@@ -1,10 +1,11 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
3
  import tempfile
4
- from typing import List, Set, Tuple
4
+ from typing import Iterable, List, Set, Tuple
5
5
 
6
6
  from mmcif.io.IoAdapterPy import IoAdapterPy
7
7
  from mmcif.io.PdbxReader import DataCategory, DataContainer
8
+
8
9
  from rnapolis.util import handle_input_file
9
10
 
10
11
  # Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
@@ -153,38 +154,63 @@ def select_category_by_id(
153
154
  return attributes, rows
154
155
 
155
156
 
156
- def main():
157
- parser = argparse.ArgumentParser()
158
- parser.add_argument(
159
- "--type",
160
- help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
161
- action="append",
162
- default=["polyribonucleotide"],
163
- choices=ENTITY_POLY_TYPES,
157
+ def filter_by_poly_types(
158
+ file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
159
+ ) -> str:
160
+ adapter = IoAdapterPy()
161
+
162
+ with tempfile.NamedTemporaryFile("rt+") as f:
163
+ f.write(file_content)
164
+ f.seek(0)
165
+ data = adapter.readFile(f.name)
166
+
167
+ entity_ids = select_ids(
168
+ data, "entity_poly", "type", "entity_id", set(entity_poly_types)
164
169
  )
165
- parser.add_argument(
166
- "--chain",
167
- help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
168
- action="append",
169
- default=[],
170
+ asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
171
+ auth_asym_ids = select_ids(
172
+ data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
170
173
  )
171
- parser.add_argument("path", help="path to a PDBx/mmCIF file")
172
- args = parser.parse_args()
173
174
 
174
- file = handle_input_file(args.path)
175
- adapter = IoAdapterPy()
176
- data = adapter.readFile(file.name)
177
175
  output = DataContainer("rnapolis")
178
176
 
179
- if args.chain:
180
- entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(args.chain))
181
- asym_ids = set(args.chain)
182
- else:
183
- entity_ids = select_ids(
184
- data, "entity_poly", "type", "entity_id", set(args.type)
185
- )
186
- asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
177
+ for table, ids in (
178
+ (CATEGORIES_WITH_ENTITY_ID, entity_ids),
179
+ (CATEGORIES_WITH_ASYM_ID, asym_ids),
180
+ (CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
181
+ ):
182
+ for category, field_name in table:
183
+ attributes, rows = select_category_by_id(data, category, field_name, ids)
187
184
 
185
+ if attributes and rows:
186
+ obj = DataCategory(category, attributes, rows)
187
+ output.append(obj)
188
+
189
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
190
+ adapter.writeFile(tmp.name, [output])
191
+ tmp.seek(0)
192
+ return tmp.read()
193
+
194
+
195
+ def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
196
+ """
197
+ Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
198
+
199
+ Warning! The new file might contain more chains than provided in the `chains` argument.
200
+ This is because the function filters by entity, so if you ask for chain "A",
201
+ which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
202
+ """
203
+ adapter = IoAdapterPy()
204
+
205
+ with tempfile.NamedTemporaryFile("rt+") as f:
206
+ f.write(file_content)
207
+ f.seek(0)
208
+ data = adapter.readFile(f.name)
209
+
210
+ output = DataContainer("rnapolis")
211
+
212
+ entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
213
+ asym_ids = set(chains)
188
214
  auth_asym_ids = select_ids(
189
215
  data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
190
216
  )
@@ -201,9 +227,38 @@ def main():
201
227
  obj = DataCategory(category, attributes, rows)
202
228
  output.append(obj)
203
229
 
204
- with tempfile.NamedTemporaryFile() as tmp:
230
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
205
231
  adapter.writeFile(tmp.name, [output])
206
- print(tmp.read().decode())
232
+ tmp.seek(0)
233
+ return tmp.read()
234
+
235
+
236
+ def main():
237
+ parser = argparse.ArgumentParser()
238
+ parser.add_argument(
239
+ "--type",
240
+ help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
241
+ action="append",
242
+ default=["polyribonucleotide"],
243
+ choices=ENTITY_POLY_TYPES,
244
+ )
245
+ parser.add_argument(
246
+ "--chain",
247
+ help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
248
+ action="append",
249
+ default=[],
250
+ )
251
+ parser.add_argument("path", help="path to a PDBx/mmCIF file")
252
+ args = parser.parse_args()
253
+
254
+ file = handle_input_file(args.path)
255
+
256
+ if args.chain:
257
+ print(filter_by_chains(file.read(), args.chain))
258
+ elif args.type:
259
+ print(filter_by_poly_types(file.read(), args.type))
260
+ else:
261
+ parser.print_help()
207
262
 
208
263
 
209
264
  if __name__ == "__main__":
rnapolis/parser.py CHANGED
@@ -65,6 +65,7 @@ def parse_cif(
65
65
  atom_site = data[0].getObj("atom_site")
66
66
  mod_residue = data[0].getObj("pdbx_struct_mod_residue")
67
67
  entity_poly = data[0].getObj("entity_poly")
68
+ entity = data[0].getObj("entity")
68
69
 
69
70
  if atom_site:
70
71
  for row in atom_site.getRowList():
@@ -219,6 +220,23 @@ def parse_cif(
219
220
  if entity_id and pdbx_seq_one_letter_code_can:
220
221
  sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
221
222
 
223
+ if entity:
224
+ for row in entity.getRowList():
225
+ row_dict = dict(zip(entity.getAttributeList(), row))
226
+
227
+ entity_id = row_dict.get("id", None)
228
+ type_ = row_dict.get("type", None)
229
+
230
+ if entity_id:
231
+ sequence_by_entity[entity_id] = sequence_by_entity.get(
232
+ entity_id, ""
233
+ )
234
+
235
+ if type_:
236
+ is_nucleic_acid_by_entity[entity_id] = (
237
+ is_nucleic_acid_by_entity.get(entity_id, type_)
238
+ )
239
+
222
240
  atoms = filter_clashing_atoms(atoms_to_process)
223
241
  return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
224
242
 
rnapolis/transformer.py CHANGED
@@ -1,64 +1,133 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
- import sys
3
+ import string
4
+ import tempfile
5
+ from typing import Dict, Tuple
4
6
 
5
7
  from mmcif.io.IoAdapterPy import IoAdapterPy
6
8
  from mmcif.io.PdbxReader import DataCategory
7
9
 
8
10
 
9
- def main():
10
- parser = argparse.ArgumentParser()
11
- parser.add_argument("input", help="path to input mmCIF file")
12
- parser.add_argument("output", help="path to output mmCIF file")
13
- parser.add_argument(
14
- "--category", help="name of the category to work on, e.g., atom_site"
15
- )
16
- parser.add_argument(
17
- "--copy-from", help="name of a data item to copy from, e.g., label_asym_id"
18
- )
19
- parser.add_argument(
20
- "--copy-to", help="name of a data item to copy to, e.g., auth_asym_id"
21
- )
22
- args = parser.parse_args()
23
-
11
+ def copy_from_to(
12
+ file_content: str,
13
+ category: str = "atom_site",
14
+ copy_from: str = "label_asym_id",
15
+ copy_to: str = "auth_asym_id",
16
+ ) -> str:
24
17
  adapter = IoAdapterPy()
25
- data = adapter.readFile(args.input)
26
18
 
27
- if len(data) == 0:
28
- print("Empty mmCIF file", file=sys.stderr)
29
- sys.exit(1)
19
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
20
+ f.write(file_content)
21
+ f.seek(0)
22
+ data = adapter.readFile(f.name)
30
23
 
31
- if args.category not in data[0].getObjNameList():
32
- print(f"Failed to find {args.category} in the mmCIF file", file=sys.stderr)
33
- sys.exit(1)
24
+ if len(data) == 0 or category not in data[0].getObjNameList():
25
+ return file_content
34
26
 
35
- category = data[0].getObj(args.category)
36
- attributes = category.getAttributeList()
27
+ category_obj = data[0].getObj(category)
28
+ attributes = category_obj.getAttributeList()
37
29
 
38
- if args.copy_from not in attributes:
39
- print(
40
- f"Failed to find data item {args.copy_from} in {args.category}",
41
- file=sys.stderr,
42
- )
43
- sys.exit(1)
30
+ if copy_from not in attributes:
31
+ return file_content
44
32
 
45
33
  transformed = []
46
34
 
47
- if args.copy_to not in attributes:
48
- attributes.append(args.copy_to)
35
+ if copy_to not in attributes:
36
+ attributes.append(copy_to)
49
37
 
50
- for row in category.getRowList():
51
- i = attributes.index(args.copy_from)
52
- j = attributes.index(args.copy_to)
38
+ for row in category_obj.getRowList():
39
+ i = attributes.index(copy_from)
40
+ j = attributes.index(copy_to)
53
41
  if j >= len(row):
54
42
  row.append(row[i])
55
43
  else:
56
44
  row[j] = row[i]
57
45
  transformed.append(row)
58
46
 
59
- data[0].replace(DataCategory(args.category, attributes, transformed))
47
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
48
+
49
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
50
+ adapter.writeFile(f.name, data)
51
+ f.seek(0)
52
+ return f.read()
53
+
54
+
55
+ def replace_value(
56
+ file_content: str,
57
+ category: str = "atom_site",
58
+ column: str = "auth_asym_id",
59
+ values: str = "".join([c for c in string.printable if c not in string.whitespace]),
60
+ ) -> Tuple[str, Dict]:
61
+ adapter = IoAdapterPy()
62
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
63
+ f.write(file_content)
64
+ f.seek(0)
65
+ data = adapter.readFile(f.name)
66
+
67
+ if len(data) == 0 or category not in data[0].getObjNameList():
68
+ return file_content, {}
69
+
70
+ category_obj = data[0].getObj(category)
71
+ attributes = category_obj.getAttributeList()
72
+
73
+ if column not in attributes:
74
+ return file_content, {}
75
+
76
+ transformed = []
77
+ mapping = {}
78
+
79
+ for row in category_obj.getRowList():
80
+ i = attributes.index(column)
81
+
82
+ if row[i] not in mapping:
83
+ mapping[row[i]] = values[len(mapping)]
84
+
85
+ row[i] = mapping[row[i]]
86
+ transformed.append(row)
87
+
88
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
89
+
90
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
91
+ adapter.writeFile(f.name, data)
92
+ f.seek(0)
93
+ return f.read(), mapping
94
+
95
+
96
+ def main():
97
+ parser = argparse.ArgumentParser()
98
+ parser.add_argument("input", help="path to input mmCIF file")
99
+ parser.add_argument("output", help="path to output mmCIF file")
100
+ parser.add_argument(
101
+ "--category", help="name of the category to work on, e.g., atom_site"
102
+ )
103
+ parser.add_argument(
104
+ "--copy-from",
105
+ help="name of a data item to copy from, e.g., label_asym_id (exclusive with --replace)",
106
+ )
107
+ parser.add_argument(
108
+ "--copy-to",
109
+ help="name of a data item to copy to, e.g., auth_asym_id (exclusive with --replace)",
110
+ )
111
+ parser.add_argument(
112
+ "--replace",
113
+ help="name of a data item to replace values, e.g., auth_asym_id (exclusive with --copy-from and --copy-to)",
114
+ )
115
+ parser.add_argument(
116
+ "--values",
117
+ help="values to replace with, e.g., ABCDEFGHIJKLMNOPQRSTUVWXYZ (exclusive with --copy-from and --copy-to)",
118
+ )
119
+ args = parser.parse_args()
120
+
121
+ if args.copy_from and args.copy_to:
122
+ output = copy_from_to(args.input, args.category, args.copy_from, args.copy_to)
123
+ elif args.replace and args.values:
124
+ output = replace_value(args.input, args.category, args.replace, args.values)
125
+ else:
126
+ parser.print_help()
127
+ return
60
128
 
61
- adapter.writeFile(args.output, data)
129
+ with open(args.output, "w") as f:
130
+ f.write(output)
62
131
 
63
132
 
64
133
  if __name__ == "__main__":