RNApolis 0.4.9__py3-none-any.whl → 0.4.11__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.9
3
+ Version: 0.4.11
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -0,0 +1,17 @@
1
+ rnapolis/annotator.py,sha256=_hsSX2VHFvIQ47l_EA7lwGFXLiVLbhFPEsOQzBKbjRk,22100
2
+ rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
3
+ rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
4
+ rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
+ rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
6
+ rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
7
+ rnapolis/parser.py,sha256=lHI6LyFbEEPdHOzbged1-Ov0tl6MpSungIPacip0Py0,15838
8
+ rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
9
+ rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
10
+ rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
11
+ rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
+ RNApolis-0.4.11.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.4.11.dist-info/METADATA,sha256=k9B0MCkh46cvBm3aoXEgmz_fwvNg4RRyAfAPJ08_X18,54323
14
+ RNApolis-0.4.11.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
+ RNApolis-0.4.11.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.4.11.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.4.11.dist-info/RECORD,,
rnapolis/common.py CHANGED
@@ -940,6 +940,27 @@ class BpSeq:
940
940
  solutions.add(self.__make_dot_bracket(regions, orders))
941
941
  return list(solutions)
942
942
 
943
+ def without_pseudoknots(self):
944
+ return BpSeq.from_dotbracket(self.dot_bracket.without_pseudoknots())
945
+
946
+ def without_isolated(self):
947
+ stems, _, _, _ = self.elements
948
+ to_unpair = []
949
+
950
+ for stem in stems:
951
+ if stem.strand5p.first == stem.strand5p.last:
952
+ to_unpair.append(stem.strand5p.first - 1)
953
+ to_unpair.append(stem.strand3p.first - 1)
954
+
955
+ if not to_unpair:
956
+ return self
957
+
958
+ entries = self.entries.copy()
959
+ for i in to_unpair:
960
+ entries[i].pair = 0
961
+
962
+ return BpSeq(entries)
963
+
943
964
 
944
965
  @dataclass
945
966
  class DotBracket:
@@ -990,6 +1011,10 @@ class DotBracket:
990
1011
  def __hash__(self) -> int:
991
1012
  return hash((self.sequence, self.structure))
992
1013
 
1014
+ def without_pseudoknots(self):
1015
+ structure = re.sub(r"[\[\]\{\}\<\>A-Za-z]", ".", self.structure)
1016
+ return DotBracket(self.sequence, structure)
1017
+
993
1018
 
994
1019
  @dataclass
995
1020
  class MultiStrandDotBracket(DotBracket):
@@ -9,6 +9,12 @@ def main():
9
9
  parser = argparse.ArgumentParser()
10
10
  parser.add_argument("--dbn", help="path to DotBracket file")
11
11
  parser.add_argument("--bpseq", help="path to BpSeq file")
12
+ parser.add_argument(
13
+ "--remove-pseudoknots", action="store_true", help="remove pseudoknots"
14
+ )
15
+ parser.add_argument(
16
+ "--remove-isolated", action="store_true", help="remove isolated base pairs"
17
+ )
12
18
  args = parser.parse_args()
13
19
 
14
20
  if args.dbn:
@@ -19,6 +25,12 @@ def main():
19
25
  parser.print_help()
20
26
  return
21
27
 
28
+ if args.remove_isolated:
29
+ bpseq = bpseq.without_isolated()
30
+
31
+ if args.remove_pseudoknots:
32
+ bpseq = bpseq.without_pseudoknots()
33
+
22
34
  print(f"Full dot-bracket:\n{bpseq.dot_bracket}")
23
35
  stems, single_strands, hairpins, loops = bpseq.elements
24
36
 
rnapolis/parser.py CHANGED
@@ -65,6 +65,7 @@ def parse_cif(
65
65
  atom_site = data[0].getObj("atom_site")
66
66
  mod_residue = data[0].getObj("pdbx_struct_mod_residue")
67
67
  entity_poly = data[0].getObj("entity_poly")
68
+ entity = data[0].getObj("entity")
68
69
 
69
70
  if atom_site:
70
71
  for row in atom_site.getRowList():
@@ -219,6 +220,23 @@ def parse_cif(
219
220
  if entity_id and pdbx_seq_one_letter_code_can:
220
221
  sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
221
222
 
223
+ if entity:
224
+ for row in entity.getRowList():
225
+ row_dict = dict(zip(entity.getAttributeList(), row))
226
+
227
+ entity_id = row_dict.get("id", None)
228
+ type_ = row_dict.get("type", None)
229
+
230
+ if entity_id:
231
+ sequence_by_entity[entity_id] = sequence_by_entity.get(
232
+ entity_id, ""
233
+ )
234
+
235
+ if type_:
236
+ is_nucleic_acid_by_entity[entity_id] = (
237
+ is_nucleic_acid_by_entity.get(entity_id, type_)
238
+ )
239
+
222
240
  atoms = filter_clashing_atoms(atoms_to_process)
223
241
  return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
224
242
 
rnapolis/transformer.py CHANGED
@@ -1,64 +1,133 @@
1
1
  #! /usr/bin/env python
2
2
  import argparse
3
- import sys
3
+ import string
4
+ import tempfile
5
+ from typing import Dict, Tuple
4
6
 
5
7
  from mmcif.io.IoAdapterPy import IoAdapterPy
6
8
  from mmcif.io.PdbxReader import DataCategory
7
9
 
8
10
 
9
- def main():
10
- parser = argparse.ArgumentParser()
11
- parser.add_argument("input", help="path to input mmCIF file")
12
- parser.add_argument("output", help="path to output mmCIF file")
13
- parser.add_argument(
14
- "--category", help="name of the category to work on, e.g., atom_site"
15
- )
16
- parser.add_argument(
17
- "--copy-from", help="name of a data item to copy from, e.g., label_asym_id"
18
- )
19
- parser.add_argument(
20
- "--copy-to", help="name of a data item to copy to, e.g., auth_asym_id"
21
- )
22
- args = parser.parse_args()
23
-
11
+ def copy_from_to(
12
+ file_content: str,
13
+ category: str = "atom_site",
14
+ copy_from: str = "label_asym_id",
15
+ copy_to: str = "auth_asym_id",
16
+ ) -> str:
24
17
  adapter = IoAdapterPy()
25
- data = adapter.readFile(args.input)
26
18
 
27
- if len(data) == 0:
28
- print("Empty mmCIF file", file=sys.stderr)
29
- sys.exit(1)
19
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
20
+ f.write(file_content)
21
+ f.seek(0)
22
+ data = adapter.readFile(f.name)
30
23
 
31
- if args.category not in data[0].getObjNameList():
32
- print(f"Failed to find {args.category} in the mmCIF file", file=sys.stderr)
33
- sys.exit(1)
24
+ if len(data) == 0 or category not in data[0].getObjNameList():
25
+ return file_content
34
26
 
35
- category = data[0].getObj(args.category)
36
- attributes = category.getAttributeList()
27
+ category_obj = data[0].getObj(category)
28
+ attributes = category_obj.getAttributeList()
37
29
 
38
- if args.copy_from not in attributes:
39
- print(
40
- f"Failed to find data item {args.copy_from} in {args.category}",
41
- file=sys.stderr,
42
- )
43
- sys.exit(1)
30
+ if copy_from not in attributes:
31
+ return file_content
44
32
 
45
33
  transformed = []
46
34
 
47
- if args.copy_to not in attributes:
48
- attributes.append(args.copy_to)
35
+ if copy_to not in attributes:
36
+ attributes.append(copy_to)
49
37
 
50
- for row in category.getRowList():
51
- i = attributes.index(args.copy_from)
52
- j = attributes.index(args.copy_to)
38
+ for row in category_obj.getRowList():
39
+ i = attributes.index(copy_from)
40
+ j = attributes.index(copy_to)
53
41
  if j >= len(row):
54
42
  row.append(row[i])
55
43
  else:
56
44
  row[j] = row[i]
57
45
  transformed.append(row)
58
46
 
59
- data[0].replace(DataCategory(args.category, attributes, transformed))
47
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
48
+
49
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
50
+ adapter.writeFile(f.name, data)
51
+ f.seek(0)
52
+ return f.read()
53
+
54
+
55
+ def replace_value(
56
+ file_content: str,
57
+ category: str = "atom_site",
58
+ column: str = "auth_asym_id",
59
+ values: str = "".join([c for c in string.printable if c not in string.whitespace]),
60
+ ) -> Tuple[str, Dict]:
61
+ adapter = IoAdapterPy()
62
+ with tempfile.NamedTemporaryFile(mode="wt") as f:
63
+ f.write(file_content)
64
+ f.seek(0)
65
+ data = adapter.readFile(f.name)
66
+
67
+ if len(data) == 0 or category not in data[0].getObjNameList():
68
+ return file_content, {}
69
+
70
+ category_obj = data[0].getObj(category)
71
+ attributes = category_obj.getAttributeList()
72
+
73
+ if column not in attributes:
74
+ return file_content, {}
75
+
76
+ transformed = []
77
+ mapping = {}
78
+
79
+ for row in category_obj.getRowList():
80
+ i = attributes.index(column)
81
+
82
+ if row[i] not in mapping:
83
+ mapping[row[i]] = values[len(mapping)]
84
+
85
+ row[i] = mapping[row[i]]
86
+ transformed.append(row)
87
+
88
+ data[0].replace(DataCategory(category_obj, attributes, transformed))
89
+
90
+ with tempfile.NamedTemporaryFile(mode="rt+") as f:
91
+ adapter.writeFile(f.name, data)
92
+ f.seek(0)
93
+ return f.read(), mapping
94
+
95
+
96
+ def main():
97
+ parser = argparse.ArgumentParser()
98
+ parser.add_argument("input", help="path to input mmCIF file")
99
+ parser.add_argument("output", help="path to output mmCIF file")
100
+ parser.add_argument(
101
+ "--category", help="name of the category to work on, e.g., atom_site"
102
+ )
103
+ parser.add_argument(
104
+ "--copy-from",
105
+ help="name of a data item to copy from, e.g., label_asym_id (exclusive with --replace)",
106
+ )
107
+ parser.add_argument(
108
+ "--copy-to",
109
+ help="name of a data item to copy to, e.g., auth_asym_id (exclusive with --replace)",
110
+ )
111
+ parser.add_argument(
112
+ "--replace",
113
+ help="name of a data item to replace values, e.g., auth_asym_id (exclusive with --copy-from and --copy-to)",
114
+ )
115
+ parser.add_argument(
116
+ "--values",
117
+ help="values to replace with, e.g., ABCDEFGHIJKLMNOPQRSTUVWXYZ (exclusive with --copy-from and --copy-to)",
118
+ )
119
+ args = parser.parse_args()
120
+
121
+ if args.copy_from and args.copy_to:
122
+ output = copy_from_to(args.input, args.category, args.copy_from, args.copy_to)
123
+ elif args.replace and args.values:
124
+ output = replace_value(args.input, args.category, args.replace, args.values)
125
+ else:
126
+ parser.print_help()
127
+ return
60
128
 
61
- adapter.writeFile(args.output, data)
129
+ with open(args.output, "w") as f:
130
+ f.write(output)
62
131
 
63
132
 
64
133
  if __name__ == "__main__":
@@ -1,17 +0,0 @@
1
- rnapolis/annotator.py,sha256=_hsSX2VHFvIQ47l_EA7lwGFXLiVLbhFPEsOQzBKbjRk,22100
2
- rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
3
- rnapolis/common.py,sha256=7KSWZzqR7ntpaS6VRDYgpP2pC1dnBttOzYB06hQzWEI,30499
4
- rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
- rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
6
- rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
- rnapolis/parser.py,sha256=2pQYy0sh8TCpeluMmmSJ7C5dudK_bsfstTWCdpwwpNU,15193
8
- rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
9
- rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
10
- rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
- rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.4.9.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.4.9.dist-info/METADATA,sha256=gOlfioYZ9tvECL9I5UWTNeWkXnJ1nv3se8kys2jAzjw,54322
14
- RNApolis-0.4.9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
- RNApolis-0.4.9.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.4.9.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.4.9.dist-info/RECORD,,