RNApolis 0.4.9__tar.gz → 0.4.11__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {rnapolis-0.4.9/src/RNApolis.egg-info → rnapolis-0.4.11}/PKG-INFO +1 -1
  2. {rnapolis-0.4.9 → rnapolis-0.4.11}/setup.py +1 -1
  3. {rnapolis-0.4.9 → rnapolis-0.4.11/src/RNApolis.egg-info}/PKG-INFO +1 -1
  4. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/RNApolis.egg-info/SOURCES.txt +2 -1
  5. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/common.py +25 -0
  6. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/motif_extractor.py +12 -0
  7. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/parser.py +18 -0
  8. rnapolis-0.4.11/src/rnapolis/transformer.py +134 -0
  9. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_common.py +34 -0
  10. rnapolis-0.4.11/tests/test_transformer.py +63 -0
  11. rnapolis-0.4.9/src/rnapolis/transformer.py +0 -65
  12. {rnapolis-0.4.9 → rnapolis-0.4.11}/LICENSE +0 -0
  13. {rnapolis-0.4.9 → rnapolis-0.4.11}/README.md +0 -0
  14. {rnapolis-0.4.9 → rnapolis-0.4.11}/pyproject.toml +0 -0
  15. {rnapolis-0.4.9 → rnapolis-0.4.11}/setup.cfg +0 -0
  16. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/RNApolis.egg-info/dependency_links.txt +0 -0
  17. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/RNApolis.egg-info/entry_points.txt +0 -0
  18. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/RNApolis.egg-info/requires.txt +0 -0
  19. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/RNApolis.egg-info/top_level.txt +0 -0
  20. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/annotator.py +0 -0
  21. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/clashfinder.py +0 -0
  22. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/metareader.py +0 -0
  23. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/molecule_filter.py +0 -0
  24. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/rfam_folder.py +0 -0
  25. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/tertiary.py +0 -0
  26. {rnapolis-0.4.9 → rnapolis-0.4.11}/src/rnapolis/util.py +0 -0
  27. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_annotator.py +0 -0
  28. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_bugfixes.py +0 -0
  29. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_metareader.py +0 -0
  30. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_parser.py +0 -0
  31. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_quadruplexes.py +0 -0
  32. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_rfam_folder.py +0 -0
  33. {rnapolis-0.4.9 → rnapolis-0.4.11}/tests/test_tertiary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.9
3
+ Version: 0.4.11
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -5,7 +5,7 @@ with open("README.md") as f:
5
5
 
6
6
  setup(
7
7
  name="RNApolis",
8
- version="0.4.9",
8
+ version="0.4.11",
9
9
  packages=["rnapolis"],
10
10
  package_dir={"": "src"},
11
11
  author="Tomasz Zok",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.9
3
+ Version: 0.4.11
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -26,4 +26,5 @@ tests/test_metareader.py
26
26
  tests/test_parser.py
27
27
  tests/test_quadruplexes.py
28
28
  tests/test_rfam_folder.py
29
- tests/test_tertiary.py
29
+ tests/test_tertiary.py
30
+ tests/test_transformer.py
@@ -940,6 +940,27 @@ class BpSeq:
940
940
  solutions.add(self.__make_dot_bracket(regions, orders))
941
941
  return list(solutions)
942
942
 
943
+ def without_pseudoknots(self):
944
+ return BpSeq.from_dotbracket(self.dot_bracket.without_pseudoknots())
945
+
946
+ def without_isolated(self):
947
+ stems, _, _, _ = self.elements
948
+ to_unpair = []
949
+
950
+ for stem in stems:
951
+ if stem.strand5p.first == stem.strand5p.last:
952
+ to_unpair.append(stem.strand5p.first - 1)
953
+ to_unpair.append(stem.strand3p.first - 1)
954
+
955
+ if not to_unpair:
956
+ return self
957
+
958
+ entries = self.entries.copy()
959
+ for i in to_unpair:
960
+ entries[i].pair = 0
961
+
962
+ return BpSeq(entries)
963
+
943
964
 
944
965
  @dataclass
945
966
  class DotBracket:
@@ -990,6 +1011,10 @@ class DotBracket:
990
1011
  def __hash__(self) -> int:
991
1012
  return hash((self.sequence, self.structure))
992
1013
 
1014
+ def without_pseudoknots(self):
1015
+ structure = re.sub(r"[\[\]\{\}\<\>A-Za-z]", ".", self.structure)
1016
+ return DotBracket(self.sequence, structure)
1017
+
993
1018
 
994
1019
  @dataclass
995
1020
  class MultiStrandDotBracket(DotBracket):
@@ -9,6 +9,12 @@ def main():
9
9
  parser = argparse.ArgumentParser()
10
10
  parser.add_argument("--dbn", help="path to DotBracket file")
11
11
  parser.add_argument("--bpseq", help="path to BpSeq file")
12
+ parser.add_argument(
13
+ "--remove-pseudoknots", action="store_true", help="remove pseudoknots"
14
+ )
15
+ parser.add_argument(
16
+ "--remove-isolated", action="store_true", help="remove isolated base pairs"
17
+ )
12
18
  args = parser.parse_args()
13
19
 
14
20
  if args.dbn:
@@ -19,6 +25,12 @@ def main():
19
25
  parser.print_help()
20
26
  return
21
27
 
28
+ if args.remove_isolated:
29
+ bpseq = bpseq.without_isolated()
30
+
31
+ if args.remove_pseudoknots:
32
+ bpseq = bpseq.without_pseudoknots()
33
+
22
34
  print(f"Full dot-bracket:\n{bpseq.dot_bracket}")
23
35
  stems, single_strands, hairpins, loops = bpseq.elements
24
36
 
@@ -65,6 +65,7 @@ def parse_cif(
65
65
  atom_site = data[0].getObj("atom_site")
66
66
  mod_residue = data[0].getObj("pdbx_struct_mod_residue")
67
67
  entity_poly = data[0].getObj("entity_poly")
68
+ entity = data[0].getObj("entity")
68
69
 
69
70
  if atom_site:
70
71
  for row in atom_site.getRowList():
@@ -219,6 +220,23 @@ def parse_cif(
219
220
  if entity_id and pdbx_seq_one_letter_code_can:
220
221
  sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
221
222
 
223
+ if entity:
224
+ for row in entity.getRowList():
225
+ row_dict = dict(zip(entity.getAttributeList(), row))
226
+
227
+ entity_id = row_dict.get("id", None)
228
+ type_ = row_dict.get("type", None)
229
+
230
+ if entity_id:
231
+ sequence_by_entity[entity_id] = sequence_by_entity.get(
232
+ entity_id, ""
233
+ )
234
+
235
+ if type_:
236
+ is_nucleic_acid_by_entity[entity_id] = (
237
+ is_nucleic_acid_by_entity.get(entity_id, type_)
238
+ )
239
+
222
240
  atoms = filter_clashing_atoms(atoms_to_process)
223
241
  return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
224
242
 
@@ -0,0 +1,134 @@
1
+ #! /usr/bin/env python
2
+ import argparse
3
+ import string
4
+ import tempfile
5
+ from typing import Dict, Tuple
6
+
7
+ from mmcif.io.IoAdapterPy import IoAdapterPy
8
+ from mmcif.io.PdbxReader import DataCategory
9
+
10
+
11
+ def copy_from_to(
12
+ file_content: str,
13
+ category: str = "atom_site",
14
+ copy_from: str = "label_asym_id",
15
+ copy_to: str = "auth_asym_id",
16
+ ) -> str:
17
+ adapter = IoAdapterPy()
18
+
19
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
20
+ f.write(file_content)
21
+ f.seek(0)
22
+ data = adapter.readFile(f.name)
23
+
24
+ if len(data) == 0 or category not in data[0].getObjNameList():
25
+ return file_content
26
+
27
+ category_obj = data[0].getObj(category)
28
+ attributes = category_obj.getAttributeList()
29
+
30
+ if copy_from not in attributes:
31
+ return file_content
32
+
33
+ transformed = []
34
+
35
+ if copy_to not in attributes:
36
+ attributes.append(copy_to)
37
+
38
+ for row in category_obj.getRowList():
39
+ i = attributes.index(copy_from)
40
+ j = attributes.index(copy_to)
41
+ if j >= len(row):
42
+ row.append(row[i])
43
+ else:
44
+ row[j] = row[i]
45
+ transformed.append(row)
46
+
47
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
48
+
49
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
50
+ adapter.writeFile(f.name, data)
51
+ f.seek(0)
52
+ return f.read()
53
+
54
+
55
+ def replace_value(
56
+ file_content: str,
57
+ category: str = "atom_site",
58
+ column: str = "auth_asym_id",
59
+ values: str = "".join([c for c in string.printable if c not in string.whitespace]),
60
+ ) -> Tuple[str, Dict]:
61
+ adapter = IoAdapterPy()
62
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
63
+ f.write(file_content)
64
+ f.seek(0)
65
+ data = adapter.readFile(f.name)
66
+
67
+ if len(data) == 0 or category not in data[0].getObjNameList():
68
+ return file_content, {}
69
+
70
+ category_obj = data[0].getObj(category)
71
+ attributes = category_obj.getAttributeList()
72
+
73
+ if column not in attributes:
74
+ return file_content, {}
75
+
76
+ transformed = []
77
+ mapping = {}
78
+
79
+ for row in category_obj.getRowList():
80
+ i = attributes.index(column)
81
+
82
+ if row[i] not in mapping:
83
+ mapping[row[i]] = values[len(mapping)]
84
+
85
+ row[i] = mapping[row[i]]
86
+ transformed.append(row)
87
+
88
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
89
+
90
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
91
+ adapter.writeFile(f.name, data)
92
+ f.seek(0)
93
+ return f.read(), mapping
94
+
95
+
96
+ def main():
97
+ parser = argparse.ArgumentParser()
98
+ parser.add_argument("input", help="path to input mmCIF file")
99
+ parser.add_argument("output", help="path to output mmCIF file")
100
+ parser.add_argument(
101
+ "--category", help="name of the category to work on, e.g., atom_site"
102
+ )
103
+ parser.add_argument(
104
+ "--copy-from",
105
+ help="name of a data item to copy from, e.g., label_asym_id (exclusive with --replace)",
106
+ )
107
+ parser.add_argument(
108
+ "--copy-to",
109
+ help="name of a data item to copy to, e.g., auth_asym_id (exclusive with --replace)",
110
+ )
111
+ parser.add_argument(
112
+ "--replace",
113
+ help="name of a data item to replace values, e.g., auth_asym_id (exclusive with --copy-from and --copy-to)",
114
+ )
115
+ parser.add_argument(
116
+ "--values",
117
+ help="values to replace with, e.g., ABCDEFGHIJKLMNOPQRSTUVWXYZ (exclusive with --copy-from and --copy-to)",
118
+ )
119
+ args = parser.parse_args()
120
+
121
+ if args.copy_from and args.copy_to:
122
+ output = copy_from_to(args.input, args.category, args.copy_from, args.copy_to)
123
+ elif args.replace and args.values:
124
+ output = replace_value(args.input, args.category, args.replace, args.values)
125
+ else:
126
+ parser.print_help()
127
+ return
128
+
129
+ with open(args.output, "w") as f:
130
+ f.write(output)
131
+
132
+
133
+ if __name__ == "__main__":
134
+ main()
@@ -199,3 +199,37 @@ def test_high_level_pseudoknot():
199
199
  dot_bracket.structure
200
200
  == "([{<" + string.ascii_uppercase + ")]}>" + string.ascii_lowercase
201
201
  )
202
+
203
+
204
+ def test_bpseq_removal_options():
205
+ sequence = (
206
+ "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCCAUCCACAGAAUUCGCACCA"
207
+ )
208
+ structure = (
209
+ "(((((((..((((....[[..)))).((((..(...)..)))).....(((((..]]...))))))))))))...."
210
+ )
211
+
212
+ bpseq = BpSeq.from_dotbracket(DotBracket(sequence, structure))
213
+ assert bpseq.dot_bracket.sequence == sequence
214
+ assert bpseq.dot_bracket.structure == structure
215
+
216
+ bpseq_without_isolated = bpseq.without_isolated()
217
+ assert bpseq_without_isolated.dot_bracket.sequence == sequence
218
+ assert (
219
+ bpseq_without_isolated.dot_bracket.structure
220
+ == "(((((((..((((....[[..)))).((((.........)))).....(((((..]]...))))))))))))...."
221
+ )
222
+
223
+ bpseq_without_pseudoknots = bpseq.without_pseudoknots()
224
+ assert bpseq_without_pseudoknots.dot_bracket.sequence == sequence
225
+ assert (
226
+ bpseq_without_pseudoknots.dot_bracket.structure
227
+ == "(((((((..((((........)))).((((..(...)..)))).....(((((.......))))))))))))...."
228
+ )
229
+
230
+ bpseq_without_both = bpseq.without_isolated().without_pseudoknots()
231
+ assert bpseq_without_both.dot_bracket.sequence == sequence
232
+ assert (
233
+ bpseq_without_both.dot_bracket.structure
234
+ == "(((((((..((((........)))).((((.........)))).....(((((.......))))))))))))...."
235
+ )
@@ -0,0 +1,63 @@
1
+ import tempfile
2
+
3
+ from rnapolis.parser import parse_cif
4
+ from rnapolis.transformer import copy_from_to, replace_value
5
+
6
+
7
+ def test_replace_value():
8
+ with open("tests/4gqj-assembly1.cif") as f:
9
+ content = f.read()
10
+
11
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
12
+ f.write(content)
13
+ f.seek(0)
14
+ org_atoms, _, _, _ = parse_cif(f)
15
+
16
+ org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
17
+ org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
18
+ assert org_label_asym_id == set(["A", "B", "A-2", "B-2"])
19
+ assert org_auth_asym_id == set(["A", "B", "A-2", "B-2"])
20
+
21
+ replaced_content, mapping = replace_value(
22
+ content, "atom_site", "auth_asym_id", "ABCD"
23
+ )
24
+ assert mapping == {"A": "A", "B": "B", "A-2": "C", "B-2": "D"}
25
+
26
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
27
+ f.write(replaced_content)
28
+ f.seek(0)
29
+ rep_atoms, _, _, _ = parse_cif(f)
30
+
31
+ rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
32
+ rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
33
+ assert rep_label_asym_id == set(["A", "B", "A-2", "B-2"])
34
+ assert rep_auth_asym_id == set(["A", "B", "C", "D"])
35
+
36
+
37
+ def test_copy_from_to():
38
+ with open("tests/5it9.cif") as f:
39
+ content = f.read()
40
+
41
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
42
+ f.write(content)
43
+ f.seek(0)
44
+ org_atoms, _, _, _ = parse_cif(f)
45
+
46
+ org_label_asym_id = set([atom.label.chain for atom in org_atoms if atom.label])
47
+ org_auth_asym_id = set([atom.auth.chain for atom in org_atoms if atom.auth])
48
+ assert org_label_asym_id == set(["HA", "IA"])
49
+ assert org_auth_asym_id == set(["2", "i"])
50
+
51
+ replaced_content = copy_from_to(
52
+ content, "atom_site", "label_asym_id", "auth_asym_id"
53
+ )
54
+
55
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
56
+ f.write(replaced_content)
57
+ f.seek(0)
58
+ rep_atoms, _, _, _ = parse_cif(f)
59
+
60
+ rep_label_asym_id = set([atom.label.chain for atom in rep_atoms if atom.label])
61
+ rep_auth_asym_id = set([atom.auth.chain for atom in rep_atoms if atom.auth])
62
+ assert rep_label_asym_id == set(["HA", "IA"])
63
+ assert rep_auth_asym_id == set(["HA", "IA"])
@@ -1,65 +0,0 @@
1
- #! /usr/bin/env python
2
- import argparse
3
- import sys
4
-
5
- from mmcif.io.IoAdapterPy import IoAdapterPy
6
- from mmcif.io.PdbxReader import DataCategory
7
-
8
-
9
- def main():
10
- parser = argparse.ArgumentParser()
11
- parser.add_argument("input", help="path to input mmCIF file")
12
- parser.add_argument("output", help="path to output mmCIF file")
13
- parser.add_argument(
14
- "--category", help="name of the category to work on, e.g., atom_site"
15
- )
16
- parser.add_argument(
17
- "--copy-from", help="name of a data item to copy from, e.g., label_asym_id"
18
- )
19
- parser.add_argument(
20
- "--copy-to", help="name of a data item to copy to, e.g., auth_asym_id"
21
- )
22
- args = parser.parse_args()
23
-
24
- adapter = IoAdapterPy()
25
- data = adapter.readFile(args.input)
26
-
27
- if len(data) == 0:
28
- print("Empty mmCIF file", file=sys.stderr)
29
- sys.exit(1)
30
-
31
- if args.category not in data[0].getObjNameList():
32
- print(f"Failed to find {args.category} in the mmCIF file", file=sys.stderr)
33
- sys.exit(1)
34
-
35
- category = data[0].getObj(args.category)
36
- attributes = category.getAttributeList()
37
-
38
- if args.copy_from not in attributes:
39
- print(
40
- f"Failed to find data item {args.copy_from} in {args.category}",
41
- file=sys.stderr,
42
- )
43
- sys.exit(1)
44
-
45
- transformed = []
46
-
47
- if args.copy_to not in attributes:
48
- attributes.append(args.copy_to)
49
-
50
- for row in category.getRowList():
51
- i = attributes.index(args.copy_from)
52
- j = attributes.index(args.copy_to)
53
- if j >= len(row):
54
- row.append(row[i])
55
- else:
56
- row[j] = row[i]
57
- transformed.append(row)
58
-
59
- data[0].replace(DataCategory(args.category, attributes, transformed))
60
-
61
- adapter.writeFile(args.output, data)
62
-
63
-
64
- if __name__ == "__main__":
65
- main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes