RNApolis 0.4.10__tar.gz → 0.4.12__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. {rnapolis-0.4.10/src/RNApolis.egg-info → rnapolis-0.4.12}/PKG-INFO +1 -1
  2. {rnapolis-0.4.10 → rnapolis-0.4.12}/setup.py +1 -1
  3. {rnapolis-0.4.10 → rnapolis-0.4.12/src/RNApolis.egg-info}/PKG-INFO +1 -1
  4. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/RNApolis.egg-info/SOURCES.txt +3 -1
  5. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/molecule_filter.py +84 -29
  6. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/parser.py +18 -0
  7. rnapolis-0.4.12/src/rnapolis/transformer.py +134 -0
  8. rnapolis-0.4.12/tests/test_molecule_filter.py +34 -0
  9. rnapolis-0.4.12/tests/test_transformer.py +63 -0
  10. rnapolis-0.4.10/src/rnapolis/transformer.py +0 -65
  11. {rnapolis-0.4.10 → rnapolis-0.4.12}/LICENSE +0 -0
  12. {rnapolis-0.4.10 → rnapolis-0.4.12}/README.md +0 -0
  13. {rnapolis-0.4.10 → rnapolis-0.4.12}/pyproject.toml +0 -0
  14. {rnapolis-0.4.10 → rnapolis-0.4.12}/setup.cfg +0 -0
  15. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/RNApolis.egg-info/dependency_links.txt +0 -0
  16. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/RNApolis.egg-info/entry_points.txt +0 -0
  17. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/RNApolis.egg-info/requires.txt +0 -0
  18. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/RNApolis.egg-info/top_level.txt +0 -0
  19. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/annotator.py +0 -0
  20. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/clashfinder.py +0 -0
  21. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/common.py +0 -0
  22. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/metareader.py +0 -0
  23. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/motif_extractor.py +0 -0
  24. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/rfam_folder.py +0 -0
  25. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/tertiary.py +0 -0
  26. {rnapolis-0.4.10 → rnapolis-0.4.12}/src/rnapolis/util.py +0 -0
  27. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_annotator.py +0 -0
  28. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_bugfixes.py +0 -0
  29. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_common.py +0 -0
  30. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_metareader.py +0 -0
  31. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_parser.py +0 -0
  32. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_quadruplexes.py +0 -0
  33. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_rfam_folder.py +0 -0
  34. {rnapolis-0.4.10 → rnapolis-0.4.12}/tests/test_tertiary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.10
3
+ Version: 0.4.12
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -5,7 +5,7 @@ with open("README.md") as f:
5
5
 
6
6
  setup(
7
7
  name="RNApolis",
8
- version="0.4.10",
8
+ version="0.4.12",
9
9
  packages=["rnapolis"],
10
10
  package_dir={"": "src"},
11
11
  author="Tomasz Zok",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.10
3
+ Version: 0.4.12
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -23,7 +23,9 @@ tests/test_annotator.py
23
23
  tests/test_bugfixes.py
24
24
  tests/test_common.py
25
25
  tests/test_metareader.py
26
+ tests/test_molecule_filter.py
26
27
  tests/test_parser.py
27
28
  tests/test_quadruplexes.py
28
29
  tests/test_rfam_folder.py
29
- tests/test_tertiary.py
30
+ tests/test_tertiary.py
31
+ tests/test_transformer.py
@@ -1,10 +1,11 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
3
  import tempfile
4
- from typing import List, Set, Tuple
4
+ from typing import Iterable, List, Set, Tuple
5
5
 
6
6
  from mmcif.io.IoAdapterPy import IoAdapterPy
7
7
  from mmcif.io.PdbxReader import DataCategory, DataContainer
8
+
8
9
  from rnapolis.util import handle_input_file
9
10
 
10
11
  # Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
@@ -153,38 +154,63 @@ def select_category_by_id(
153
154
  return attributes, rows
154
155
 
155
156
 
156
- def main():
157
- parser = argparse.ArgumentParser()
158
- parser.add_argument(
159
- "--type",
160
- help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
161
- action="append",
162
- default=["polyribonucleotide"],
163
- choices=ENTITY_POLY_TYPES,
157
+ def filter_by_poly_types(
158
+ file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
159
+ ) -> str:
160
+ adapter = IoAdapterPy()
161
+
162
+ with tempfile.NamedTemporaryFile("rt+") as f:
163
+ f.write(file_content)
164
+ f.seek(0)
165
+ data = adapter.readFile(f.name)
166
+
167
+ entity_ids = select_ids(
168
+ data, "entity_poly", "type", "entity_id", set(entity_poly_types)
164
169
  )
165
- parser.add_argument(
166
- "--chain",
167
- help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
168
- action="append",
169
- default=[],
170
+ asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
171
+ auth_asym_ids = select_ids(
172
+ data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
170
173
  )
171
- parser.add_argument("path", help="path to a PDBx/mmCIF file")
172
- args = parser.parse_args()
173
174
 
174
- file = handle_input_file(args.path)
175
- adapter = IoAdapterPy()
176
- data = adapter.readFile(file.name)
177
175
  output = DataContainer("rnapolis")
178
176
 
179
- if args.chain:
180
- entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(args.chain))
181
- asym_ids = set(args.chain)
182
- else:
183
- entity_ids = select_ids(
184
- data, "entity_poly", "type", "entity_id", set(args.type)
185
- )
186
- asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
177
+ for table, ids in (
178
+ (CATEGORIES_WITH_ENTITY_ID, entity_ids),
179
+ (CATEGORIES_WITH_ASYM_ID, asym_ids),
180
+ (CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
181
+ ):
182
+ for category, field_name in table:
183
+ attributes, rows = select_category_by_id(data, category, field_name, ids)
187
184
 
185
+ if attributes and rows:
186
+ obj = DataCategory(category, attributes, rows)
187
+ output.append(obj)
188
+
189
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
190
+ adapter.writeFile(tmp.name, [output])
191
+ tmp.seek(0)
192
+ return tmp.read()
193
+
194
+
195
+ def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
196
+ """
197
+ Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
198
+
199
+ Warning! The new file might contain more chains than provided in the `chains` argument.
200
+ This is because the function filters by entity, so if you ask for chain "A",
201
+ which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
202
+ """
203
+ adapter = IoAdapterPy()
204
+
205
+ with tempfile.NamedTemporaryFile("rt+") as f:
206
+ f.write(file_content)
207
+ f.seek(0)
208
+ data = adapter.readFile(f.name)
209
+
210
+ output = DataContainer("rnapolis")
211
+
212
+ entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
213
+ asym_ids = set(chains)
188
214
  auth_asym_ids = select_ids(
189
215
  data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
190
216
  )
@@ -201,9 +227,38 @@ def main():
201
227
  obj = DataCategory(category, attributes, rows)
202
228
  output.append(obj)
203
229
 
204
- with tempfile.NamedTemporaryFile() as tmp:
230
+ with tempfile.NamedTemporaryFile("rt+") as tmp:
205
231
  adapter.writeFile(tmp.name, [output])
206
- print(tmp.read().decode())
232
+ tmp.seek(0)
233
+ return tmp.read()
234
+
235
+
236
+ def main():
237
+ parser = argparse.ArgumentParser()
238
+ parser.add_argument(
239
+ "--type",
240
+ help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
241
+ action="append",
242
+ default=["polyribonucleotide"],
243
+ choices=ENTITY_POLY_TYPES,
244
+ )
245
+ parser.add_argument(
246
+ "--chain",
247
+ help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
248
+ action="append",
249
+ default=[],
250
+ )
251
+ parser.add_argument("path", help="path to a PDBx/mmCIF file")
252
+ args = parser.parse_args()
253
+
254
+ file = handle_input_file(args.path)
255
+
256
+ if args.chain:
257
+ print(filter_by_chains(file.read(), args.chain))
258
+ elif args.type:
259
+ print(filter_by_poly_types(file.read(), args.type))
260
+ else:
261
+ parser.print_help()
207
262
 
208
263
 
209
264
  if __name__ == "__main__":
@@ -65,6 +65,7 @@ def parse_cif(
65
65
  atom_site = data[0].getObj("atom_site")
66
66
  mod_residue = data[0].getObj("pdbx_struct_mod_residue")
67
67
  entity_poly = data[0].getObj("entity_poly")
68
+ entity = data[0].getObj("entity")
68
69
 
69
70
  if atom_site:
70
71
  for row in atom_site.getRowList():
@@ -219,6 +220,23 @@ def parse_cif(
219
220
  if entity_id and pdbx_seq_one_letter_code_can:
220
221
  sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
221
222
 
223
+ if entity:
224
+ for row in entity.getRowList():
225
+ row_dict = dict(zip(entity.getAttributeList(), row))
226
+
227
+ entity_id = row_dict.get("id", None)
228
+ type_ = row_dict.get("type", None)
229
+
230
+ if entity_id:
231
+ sequence_by_entity[entity_id] = sequence_by_entity.get(
232
+ entity_id, ""
233
+ )
234
+
235
+ if type_:
236
+ is_nucleic_acid_by_entity[entity_id] = (
237
+ is_nucleic_acid_by_entity.get(entity_id, type_)
238
+ )
239
+
222
240
  atoms = filter_clashing_atoms(atoms_to_process)
223
241
  return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
224
242
 
@@ -0,0 +1,134 @@
1
+ #! /usr/bin/env python
2
+ import argparse
3
+ import string
4
+ import tempfile
5
+ from typing import Dict, Tuple
6
+
7
+ from mmcif.io.IoAdapterPy import IoAdapterPy
8
+ from mmcif.io.PdbxReader import DataCategory
9
+
10
+
11
+ def copy_from_to(
12
+ file_content: str,
13
+ category: str = "atom_site",
14
+ copy_from: str = "label_asym_id",
15
+ copy_to: str = "auth_asym_id",
16
+ ) -> str:
17
+ adapter = IoAdapterPy()
18
+
19
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
20
+ f.write(file_content)
21
+ f.seek(0)
22
+ data = adapter.readFile(f.name)
23
+
24
+ if len(data) == 0 or category not in data[0].getObjNameList():
25
+ return file_content
26
+
27
+ category_obj = data[0].getObj(category)
28
+ attributes = category_obj.getAttributeList()
29
+
30
+ if copy_from not in attributes:
31
+ return file_content
32
+
33
+ transformed = []
34
+
35
+ if copy_to not in attributes:
36
+ attributes.append(copy_to)
37
+
38
+ for row in category_obj.getRowList():
39
+ i = attributes.index(copy_from)
40
+ j = attributes.index(copy_to)
41
+ if j >= len(row):
42
+ row.append(row[i])
43
+ else:
44
+ row[j] = row[i]
45
+ transformed.append(row)
46
+
47
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
48
+
49
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
50
+ adapter.writeFile(f.name, data)
51
+ f.seek(0)
52
+ return f.read()
53
+
54
+
55
+ def replace_value(
56
+ file_content: str,
57
+ category: str = "atom_site",
58
+ column: str = "auth_asym_id",
59
+ values: str = "".join([c for c in string.printable if c not in string.whitespace]),
60
+ ) -> Tuple[str, Dict]:
61
+ adapter = IoAdapterPy()
62
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
63
+ f.write(file_content)
64
+ f.seek(0)
65
+ data = adapter.readFile(f.name)
66
+
67
+ if len(data) == 0 or category not in data[0].getObjNameList():
68
+ return file_content, {}
69
+
70
+ category_obj = data[0].getObj(category)
71
+ attributes = category_obj.getAttributeList()
72
+
73
+ if column not in attributes:
74
+ return file_content, {}
75
+
76
+ transformed = []
77
+ mapping = {}
78
+
79
+ for row in category_obj.getRowList():
80
+ i = attributes.index(column)
81
+
82
+ if row[i] not in mapping:
83
+ mapping[row[i]] = values[len(mapping)]
84
+
85
+ row[i] = mapping[row[i]]
86
+ transformed.append(row)
87
+
88
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
89
+
90
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
91
+ adapter.writeFile(f.name, data)
92
+ f.seek(0)
93
+ return f.read(), mapping
94
+
95
+
96
+ def main():
97
+ parser = argparse.ArgumentParser()
98
+ parser.add_argument("input", help="path to input mmCIF file")
99
+ parser.add_argument("output", help="path to output mmCIF file")
100
+ parser.add_argument(
101
+ "--category", help="name of the category to work on, e.g., atom_site"
102
+ )
103
+ parser.add_argument(
104
+ "--copy-from",
105
+ help="name of a data item to copy from, e.g., label_asym_id (exclusive with --replace)",
106
+ )
107
+ parser.add_argument(
108
+ "--copy-to",
109
+ help="name of a data item to copy to, e.g., auth_asym_id (exclusive with --replace)",
110
+ )
111
+ parser.add_argument(
112
+ "--replace",
113
+ help="name of a data item to replace values, e.g., auth_asym_id (exclusive with --copy-from and --copy-to)",
114
+ )
115
+ parser.add_argument(
116
+ "--values",
117
+ help="values to replace with, e.g., ABCDEFGHIJKLMNOPQRSTUVWXYZ (exclusive with --copy-from and --copy-to)",
118
+ )
119
+ args = parser.parse_args()
120
+
121
+ if args.copy_from and args.copy_to:
122
+ output = copy_from_to(args.input, args.category, args.copy_from, args.copy_to)
123
+ elif args.replace and args.values:
124
+ output = replace_value(args.input, args.category, args.replace, args.values)
125
+ else:
126
+ parser.print_help()
127
+ return
128
+
129
+ with open(args.output, "w") as f:
130
+ f.write(output)
131
+
132
+
133
+ if __name__ == "__main__":
134
+ main()
@@ -0,0 +1,34 @@
1
+ import tempfile
2
+
3
+ from rnapolis.molecule_filter import filter_by_chains, filter_by_poly_types
4
+ from rnapolis.parser import parse_cif
5
+
6
+
7
+ def test_filter_by_poly_types():
8
+ with open("tests/1a9n.cif") as f:
9
+ content = f.read()
10
+
11
+ filtered = filter_by_poly_types(content, ["polyribonucleotide"])
12
+
13
+ with tempfile.NamedTemporaryFile("rt+") as f:
14
+ f.write(filtered)
15
+ f.seek(0)
16
+ atoms, _, _, _ = parse_cif(f)
17
+
18
+ chains = set([atom.label.chain for atom in atoms if atom.label])
19
+ assert chains == {"A", "B"}
20
+
21
+
22
+ def test_filter_by_chains():
23
+ with open("tests/1a9n.cif") as f:
24
+ content = f.read()
25
+
26
+ filtered = filter_by_chains(content, ["A", "C"])
27
+
28
+ with tempfile.NamedTemporaryFile("rt+") as f:
29
+ f.write(filtered)
30
+ f.seek(0)
31
+ atoms, _, _, _ = parse_cif(f)
32
+
33
+ chains = set([atom.label.chain for atom in atoms if atom.label])
34
+ assert chains >= {"A", "C"}
@@ -0,0 +1,63 @@
1
+ import tempfile
2
+
3
+ from rnapolis.parser import parse_cif
4
+ from rnapolis.transformer import copy_from_to, replace_value
5
+
6
+
7
+ def test_replace_value():
8
+ with open("tests/4gqj-assembly1.cif") as f:
9
+ content = f.read()
10
+
11
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
12
+ f.write(content)
13
+ f.seek(0)
14
+ org_atoms, _, _, _ = parse_cif(f)
15
+
16
+ org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
17
+ org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
18
+ assert org_label_asym_id == set(["A", "B", "A-2", "B-2"])
19
+ assert org_auth_asym_id == set(["A", "B", "A-2", "B-2"])
20
+
21
+ replaced_content, mapping = replace_value(
22
+ content, "atom_site", "auth_asym_id", "ABCD"
23
+ )
24
+ assert mapping == {"A": "A", "B": "B", "A-2": "C", "B-2": "D"}
25
+
26
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
27
+ f.write(replaced_content)
28
+ f.seek(0)
29
+ rep_atoms, _, _, _ = parse_cif(f)
30
+
31
+ rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
32
+ rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
33
+ assert rep_label_asym_id == set(["A", "B", "A-2", "B-2"])
34
+ assert rep_auth_asym_id == set(["A", "B", "C", "D"])
35
+
36
+
37
+ def test_copy_from_to():
38
+ with open("tests/5it9.cif") as f:
39
+ content = f.read()
40
+
41
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
42
+ f.write(content)
43
+ f.seek(0)
44
+ org_atoms, _, _, _ = parse_cif(f)
45
+
46
+ org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
47
+ org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
48
+ assert org_label_asym_id == set(["HA", "IA"])
49
+ assert org_auth_asym_id == set(["2", "i"])
50
+
51
+ replaced_content = copy_from_to(
52
+ content, "atom_site", "label_asym_id", "auth_asym_id"
53
+ )
54
+
55
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
56
+ f.write(replaced_content)
57
+ f.seek(0)
58
+ rep_atoms, _, _, _ = parse_cif(f)
59
+
60
+ rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
61
+ rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
62
+ assert rep_label_asym_id == set(["HA", "IA"])
63
+ assert rep_auth_asym_id == set(["HA", "IA"])
@@ -1,65 +0,0 @@
1
- #! /usr/bin/env python
2
- import argparse
3
- import sys
4
-
5
- from mmcif.io.IoAdapterPy import IoAdapterPy
6
- from mmcif.io.PdbxReader import DataCategory
7
-
8
-
9
- def main():
10
- parser = argparse.ArgumentParser()
11
- parser.add_argument("input", help="path to input mmCIF file")
12
- parser.add_argument("output", help="path to output mmCIF file")
13
- parser.add_argument(
14
- "--category", help="name of the category to work on, e.g., atom_site"
15
- )
16
- parser.add_argument(
17
- "--copy-from", help="name of a data item to copy from, e.g., label_asym_id"
18
- )
19
- parser.add_argument(
20
- "--copy-to", help="name of a data item to copy to, e.g., auth_asym_id"
21
- )
22
- args = parser.parse_args()
23
-
24
- adapter = IoAdapterPy()
25
- data = adapter.readFile(args.input)
26
-
27
- if len(data) == 0:
28
- print("Empty mmCIF file", file=sys.stderr)
29
- sys.exit(1)
30
-
31
- if args.category not in data[0].getObjNameList():
32
- print(f"Failed to find {args.category} in the mmCIF file", file=sys.stderr)
33
- sys.exit(1)
34
-
35
- category = data[0].getObj(args.category)
36
- attributes = category.getAttributeList()
37
-
38
- if args.copy_from not in attributes:
39
- print(
40
- f"Failed to find data item {args.copy_from} in {args.category}",
41
- file=sys.stderr,
42
- )
43
- sys.exit(1)
44
-
45
- transformed = []
46
-
47
- if args.copy_to not in attributes:
48
- attributes.append(args.copy_to)
49
-
50
- for row in category.getRowList():
51
- i = attributes.index(args.copy_from)
52
- j = attributes.index(args.copy_to)
53
- if j >= len(row):
54
- row.append(row[i])
55
- else:
56
- row[j] = row[i]
57
- transformed.append(row)
58
-
59
- data[0].replace(DataCategory(args.category, attributes, transformed))
60
-
61
- adapter.writeFile(args.output, data)
62
-
63
-
64
- if __name__ == "__main__":
65
- main()
File without changes
File without changes
File without changes
File without changes