RNApolis 0.4.6__tar.gz → 0.4.7__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. {rnapolis-0.4.6/src/RNApolis.egg-info → rnapolis-0.4.7}/PKG-INFO +1 -1
  2. {rnapolis-0.4.6 → rnapolis-0.4.7}/setup.py +1 -1
  3. {rnapolis-0.4.6 → rnapolis-0.4.7/src/RNApolis.egg-info}/PKG-INFO +1 -1
  4. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/common.py +4 -1
  5. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/tertiary.py +59 -29
  6. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_bugfixes.py +1 -1
  7. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_common.py +19 -0
  8. {rnapolis-0.4.6 → rnapolis-0.4.7}/LICENSE +0 -0
  9. {rnapolis-0.4.6 → rnapolis-0.4.7}/README.md +0 -0
  10. {rnapolis-0.4.6 → rnapolis-0.4.7}/pyproject.toml +0 -0
  11. {rnapolis-0.4.6 → rnapolis-0.4.7}/setup.cfg +0 -0
  12. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/RNApolis.egg-info/SOURCES.txt +0 -0
  13. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/RNApolis.egg-info/dependency_links.txt +0 -0
  14. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/RNApolis.egg-info/entry_points.txt +0 -0
  15. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/RNApolis.egg-info/requires.txt +0 -0
  16. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/RNApolis.egg-info/top_level.txt +0 -0
  17. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/annotator.py +0 -0
  18. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/clashfinder.py +0 -0
  19. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/metareader.py +0 -0
  20. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/molecule_filter.py +0 -0
  21. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/motif_extractor.py +0 -0
  22. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/parser.py +0 -0
  23. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/rfam_folder.py +0 -0
  24. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/transformer.py +0 -0
  25. {rnapolis-0.4.6 → rnapolis-0.4.7}/src/rnapolis/util.py +0 -0
  26. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_annotator.py +0 -0
  27. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_metareader.py +0 -0
  28. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_parser.py +0 -0
  29. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_quadruplexes.py +0 -0
  30. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_rfam_folder.py +0 -0
  31. {rnapolis-0.4.6 → rnapolis-0.4.7}/tests/test_tertiary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.6
3
+ Version: 0.4.7
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -5,7 +5,7 @@ with open("README.md") as f:
5
5
 
6
6
  setup(
7
7
  name="RNApolis",
8
- version="0.4.6",
8
+ version="0.4.7",
9
9
  packages=["rnapolis"],
10
10
  package_dir={"": "src"},
11
11
  author="Tomasz Zok",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.6
3
+ Version: 0.4.7
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -338,6 +338,9 @@ class Entry(Sequence):
338
338
  return self.pair
339
339
  raise IndexError()
340
340
 
341
+ def __lt__(self, other):
342
+ return self.index_ < other.index_
343
+
341
344
  def __len__(self) -> int:
342
345
  return 3
343
346
 
@@ -838,7 +841,7 @@ class BpSeq:
838
841
 
839
842
  for i in range(1, len(regions)):
840
843
  k, l, _ = regions[i]
841
- available = [True for i in range(10)]
844
+ available = [True for _ in range(len("([{<" + string.ascii_uppercase))]
842
845
 
843
846
  for j in range(i):
844
847
  m, n, _ = regions[j]
@@ -124,36 +124,17 @@ class Residue3D(Residue):
124
124
  outermost_atoms = {"A": "N9", "G": "N9", "C": "N1", "U": "N1", "T": "N1"}
125
125
  # Dist representing expected name of atom closest to the tetrad center
126
126
  innermost_atoms = {"A": "N6", "G": "O6", "C": "N4", "U": "O4", "T": "O4"}
127
+ # Heavy atoms in phosphate and ribose
128
+ phosphate_atoms = {"P", "OP1", "OP2", "O3'", "O5'"}
129
+ sugar_atoms = {"C1'", "C2'", "C3'", "C4'", "C5'", "O4'"}
127
130
  # Heavy atoms for each main nucleobase
128
131
  nucleobase_heavy_atoms = {
129
132
  "A": set(["N1", "C2", "N3", "C4", "C5", "C6", "N6", "N7", "C8", "N9"]),
130
133
  "G": set(["N1", "C2", "N2", "N3", "C4", "C5", "C6", "O6", "N7", "C8", "N9"]),
131
134
  "C": set(["N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"]),
132
135
  "U": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C6"]),
136
+ "T": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C5M", "C6"]),
133
137
  }
134
- # Heavy atoms in nucleotide
135
- nucleotide_heavy_atoms = (
136
- set(
137
- [
138
- "P",
139
- "OP1",
140
- "OP2",
141
- "O5'",
142
- "C5'",
143
- "C4'",
144
- "O4'",
145
- "C3'",
146
- "O3'",
147
- "C2'",
148
- "O2'",
149
- "C1'",
150
- ]
151
- )
152
- .union(nucleobase_heavy_atoms["A"])
153
- .union(nucleobase_heavy_atoms["G"])
154
- .union(nucleobase_heavy_atoms["C"])
155
- .union(nucleobase_heavy_atoms["U"])
156
- )
157
138
 
158
139
  def __lt__(self, other):
159
140
  return (self.model, self.chain, self.number, self.icode or " ") < (
@@ -202,9 +183,59 @@ class Residue3D(Residue):
202
183
 
203
184
  @cached_property
204
185
  def is_nucleotide(self) -> bool:
205
- return self.nucleotide_heavy_atoms.intersection(
206
- set([atom.name for atom in self.atoms])
186
+ scores = {"phosphate": 0.0, "sugar": 0.0, "base": 0.0, "connections": 0.0}
187
+ weights = {"phosphate": 0.25, "sugar": 0.25, "base": 0.25, "connections": 0.25}
188
+
189
+ residue_atoms = {atom.name for atom in self.atoms}
190
+
191
+ phosphate_match = len(residue_atoms.intersection(self.phosphate_atoms))
192
+ scores["phosphate"] = phosphate_match / len(self.phosphate_atoms)
193
+
194
+ sugar_match = len(residue_atoms.intersection(self.sugar_atoms))
195
+ scores["sugar"] = sugar_match / len(self.sugar_atoms)
196
+
197
+ nucleobase_atoms = {
198
+ key: self.nucleobase_heavy_atoms[key] for key in self.nucleobase_heavy_atoms
199
+ }
200
+ matches = {
201
+ key: len(residue_atoms.intersection(nucleobase_atoms[key]))
202
+ / len(nucleobase_atoms[key])
203
+ for key in nucleobase_atoms
204
+ }
205
+ best_match = max(matches.items(), key=lambda x: x[1])
206
+ scores["base"] = best_match[1]
207
+
208
+ connection_score = 0.0
209
+ distance_threshold = 2.0
210
+
211
+ if "P" in residue_atoms and "O5'" in residue_atoms:
212
+ p_atom = next(atom for atom in self.atoms if atom.name == "P")
213
+ o5_atom = next(atom for atom in self.atoms if atom.name == "O5'")
214
+ if (
215
+ numpy.linalg.norm(p_atom.coordinates - o5_atom.coordinates)
216
+ <= distance_threshold
217
+ ):
218
+ connection_score += 0.5
219
+ if "C1'" in residue_atoms:
220
+ c1_atom = next(atom for atom in self.atoms if atom.name == "C1'")
221
+ for base_connection in ["N9", "N1"]:
222
+ if base_connection in residue_atoms:
223
+ base_atom = next(
224
+ atom for atom in self.atoms if atom.name == base_connection
225
+ )
226
+ if (
227
+ numpy.linalg.norm(c1_atom.coordinates - base_atom.coordinates)
228
+ <= distance_threshold
229
+ ):
230
+ connection_score += 0.5
231
+ break
232
+
233
+ scores["connections"] = connection_score
234
+
235
+ probability = sum(
236
+ scores[component] * weights[component] for component in scores.keys()
207
237
  )
238
+ return probability > 0.5
208
239
 
209
240
  @cached_property
210
241
  def base_normal_vector(self) -> Optional[numpy.typing.NDArray[numpy.floating]]:
@@ -566,15 +597,14 @@ class Mapping2D3D:
566
597
  return self.__generate_bpseq(canonical)
567
598
 
568
599
  def __generate_bpseq(self, base_pairs):
600
+ nucleotides = list(filter(lambda r: r.is_nucleotide, self.structure3d.residues))
569
601
  result: Dict[int, List] = {}
570
602
  residue_map: Dict[Residue3D, int] = {}
571
603
  i = 1
572
604
 
573
- for j, residue in enumerate(
574
- filter(lambda r: r.is_nucleotide, self.structure3d.residues)
575
- ):
605
+ for j, residue in enumerate(nucleotides):
576
606
  if self.find_gaps and j > 0:
577
- previous = self.structure3d.residues[j - 1]
607
+ previous = nucleotides[j - 1]
578
608
 
579
609
  if (
580
610
  not previous.is_connected(residue)
@@ -42,7 +42,7 @@ def test_4WTI():
42
42
  mapping = Mapping2D3D(
43
43
  structure3d, base_interactions.basePairs, base_interactions.stackings, True
44
44
  )
45
- assert mapping.dot_bracket == ">strand_T\nACGG\n..((\n>strand_P\nCC\n))"
45
+ assert mapping.dot_bracket == ">strand_T\nCGG\n.((\n>strand_P\nCC\n))"
46
46
 
47
47
 
48
48
  # in 1HMH the bases are oriented in 45 degrees and it caused the program to identify invalid base pair
@@ -1,3 +1,4 @@
1
+ import string
1
2
  from collections import Counter
2
3
 
3
4
  import orjson
@@ -11,6 +12,7 @@ from rnapolis.common import (
11
12
  BaseRibose,
12
13
  BpSeq,
13
14
  DotBracket,
15
+ Entry,
14
16
  Interaction,
15
17
  LeontisWesthof,
16
18
  MultiStrandDotBracket,
@@ -180,3 +182,20 @@ def test_conflicted_base_pairs():
180
182
  assert (
181
183
  mapping.dot_bracket == ">strand_B\nGGACUAGCGGAGGCUAGUCC\n((((((((....))))))))"
182
184
  )
185
+
186
+
187
+ def test_high_level_pseudoknot():
188
+ entries = []
189
+ brackets = "([{<" + string.ascii_uppercase
190
+
191
+ for i in range(len(brackets)):
192
+ entries.append(Entry(i + 1, "C", i + len(brackets) + 1))
193
+ entries.append(Entry(i + len(brackets) + 1, "G", i + 1))
194
+
195
+ bpseq = BpSeq(sorted(entries))
196
+ dot_bracket = bpseq.fcfs
197
+ assert dot_bracket.sequence == "C" * len(brackets) + "G" * len(brackets)
198
+ assert (
199
+ dot_bracket.structure
200
+ == "([{<" + string.ascii_uppercase + ")]}>" + string.ascii_lowercase
201
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes