RNApolis 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnapolis/adapter.py +27 -117
- rnapolis/annotator.py +256 -20
- rnapolis/common.py +13 -0
- rnapolis/parser_v2.py +774 -240
- rnapolis/splitter.py +18 -5
- rnapolis/tertiary.py +383 -10
- rnapolis/unifier.py +20 -5
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/METADATA +1 -1
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/RECORD +13 -13
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/WHEEL +0 -0
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/entry_points.txt +0 -0
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/licenses/LICENSE +0 -0
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/top_level.txt +0 -0
rnapolis/splitter.py
CHANGED
@@ -3,10 +3,14 @@ import argparse
|
|
3
3
|
import os
|
4
4
|
import sys
|
5
5
|
|
6
|
-
import pandas as pd
|
7
|
-
|
8
6
|
from rnapolis.parser import is_cif
|
9
|
-
from rnapolis.parser_v2 import
|
7
|
+
from rnapolis.parser_v2 import (
|
8
|
+
fit_to_pdb,
|
9
|
+
parse_cif_atoms,
|
10
|
+
parse_pdb_atoms,
|
11
|
+
write_cif,
|
12
|
+
write_pdb,
|
13
|
+
)
|
10
14
|
|
11
15
|
|
12
16
|
def main():
|
@@ -97,12 +101,21 @@ def main():
|
|
97
101
|
|
98
102
|
try:
|
99
103
|
if output_format == "PDB":
|
100
|
-
|
104
|
+
df_to_write = fit_to_pdb(model_df)
|
105
|
+
write_pdb(df_to_write, output_path)
|
101
106
|
else: # mmCIF
|
102
107
|
write_cif(model_df, output_path)
|
108
|
+
except ValueError as e:
|
109
|
+
# Handle errors specifically from fit_to_pdb
|
110
|
+
print(
|
111
|
+
f"Error fitting model {model_num} from {args.file} to PDB: {e}. Skipping model.",
|
112
|
+
file=sys.stderr,
|
113
|
+
)
|
114
|
+
continue
|
103
115
|
except Exception as e:
|
116
|
+
# Handle general writing errors
|
104
117
|
print(
|
105
|
-
f"Error writing file {output_path}: {e}",
|
118
|
+
f"Error writing file {output_path} for model {model_num}: {e}",
|
106
119
|
file=sys.stderr,
|
107
120
|
)
|
108
121
|
# Optionally continue to next model or exit
|
rnapolis/tertiary.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import itertools
|
1
2
|
import logging
|
2
3
|
import math
|
3
4
|
from collections import defaultdict
|
@@ -7,18 +8,22 @@ from typing import Dict, List, Optional, Set, Tuple, Union
|
|
7
8
|
|
8
9
|
import numpy
|
9
10
|
import numpy.typing
|
11
|
+
from scipy.stats import vonmises
|
10
12
|
|
11
13
|
from rnapolis.common import (
|
12
14
|
BasePair,
|
13
15
|
BpSeq,
|
14
16
|
Entry,
|
15
17
|
GlycosidicBond,
|
18
|
+
InterStemParameters,
|
16
19
|
LeontisWesthof,
|
17
20
|
Residue,
|
18
21
|
ResidueAuth,
|
19
22
|
ResidueLabel,
|
20
23
|
Saenger,
|
21
24
|
Stacking,
|
25
|
+
Stem,
|
26
|
+
Strand,
|
22
27
|
)
|
23
28
|
|
24
29
|
BASE_ATOMS = {
|
@@ -579,6 +584,57 @@ class Mapping2D3D:
|
|
579
584
|
if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
|
580
585
|
]
|
581
586
|
|
587
|
+
while True:
|
588
|
+
matches = defaultdict(set)
|
589
|
+
|
590
|
+
for base_pair in canonical:
|
591
|
+
matches[base_pair.nt1_3d].add(base_pair)
|
592
|
+
matches[base_pair.nt2_3d].add(base_pair)
|
593
|
+
|
594
|
+
for pairs in matches.values():
|
595
|
+
if len(pairs) > 1:
|
596
|
+
pairs = sorted(pairs, key=pair_scoring_function)
|
597
|
+
canonical.remove(pairs[-1])
|
598
|
+
break
|
599
|
+
else:
|
600
|
+
break
|
601
|
+
|
602
|
+
return self._generated_bpseq_data[0]
|
603
|
+
|
604
|
+
@cached_property
|
605
|
+
def bpseq_index_to_residue_map(self) -> Dict[int, Residue3D]:
|
606
|
+
"""Mapping from BpSeq entry index to the corresponding Residue3D object."""
|
607
|
+
return self._generated_bpseq_data[1]
|
608
|
+
|
609
|
+
@cached_property
|
610
|
+
def _generated_bpseq_data(self) -> Tuple[BpSeq, Dict[int, Residue3D]]:
|
611
|
+
"""Helper property to compute BpSeq and index map simultaneously."""
|
612
|
+
|
613
|
+
def pair_scoring_function(pair: BasePair3D) -> int:
|
614
|
+
if pair.saenger is not None:
|
615
|
+
if pair.saenger in (Saenger.XIX, Saenger.XX):
|
616
|
+
return 0, pair.nt1, pair.nt2
|
617
|
+
else:
|
618
|
+
return 1, pair.nt1, pair.nt2
|
619
|
+
|
620
|
+
sequence = "".join(
|
621
|
+
sorted(
|
622
|
+
[
|
623
|
+
pair.nt1_3d.one_letter_name.upper(),
|
624
|
+
pair.nt2_3d.one_letter_name.upper(),
|
625
|
+
]
|
626
|
+
)
|
627
|
+
)
|
628
|
+
if sequence in ("AU", "AT", "CG"):
|
629
|
+
return 0, pair.nt1, pair.nt2
|
630
|
+
return 1, pair.nt1, pair.nt2
|
631
|
+
|
632
|
+
canonical = [
|
633
|
+
base_pair
|
634
|
+
for base_pair in self.base_pairs
|
635
|
+
if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
|
636
|
+
]
|
637
|
+
|
582
638
|
while True:
|
583
639
|
matches = defaultdict(set)
|
584
640
|
|
@@ -596,10 +652,12 @@ class Mapping2D3D:
|
|
596
652
|
|
597
653
|
return self.__generate_bpseq(canonical)
|
598
654
|
|
599
|
-
def __generate_bpseq(self, base_pairs):
|
655
|
+
def __generate_bpseq(self, base_pairs) -> Tuple[BpSeq, Dict[int, Residue3D]]:
|
656
|
+
"""Generates BpSeq entries and a map from index to Residue3D."""
|
600
657
|
nucleotides = list(filter(lambda r: r.is_nucleotide, self.structure3d.residues))
|
601
658
|
result: Dict[int, List] = {}
|
602
659
|
residue_map: Dict[Residue3D, int] = {}
|
660
|
+
index_to_residue_map: Dict[int, Residue3D] = {}
|
603
661
|
i = 1
|
604
662
|
|
605
663
|
for j, residue in enumerate(nucleotides):
|
@@ -616,6 +674,7 @@ class Mapping2D3D:
|
|
616
674
|
|
617
675
|
result[i] = [i, residue.one_letter_name, 0]
|
618
676
|
residue_map[residue] = i
|
677
|
+
index_to_residue_map[i] = residue
|
619
678
|
i += 1
|
620
679
|
|
621
680
|
for base_pair in base_pairs:
|
@@ -631,7 +690,21 @@ class Mapping2D3D:
|
|
631
690
|
Entry(index_, sequence, pair)
|
632
691
|
for index_, sequence, pair in result.values()
|
633
692
|
]
|
634
|
-
)
|
693
|
+
), index_to_residue_map
|
694
|
+
|
695
|
+
def find_residue_for_entry(self, entry: Entry) -> Optional[Residue3D]:
|
696
|
+
"""Finds the Residue3D object corresponding to a BpSeq Entry."""
|
697
|
+
return self.bpseq_index_to_residue_map.get(entry.index_)
|
698
|
+
|
699
|
+
def get_residues_for_strand(self, strand: Strand) -> List[Residue3D]:
|
700
|
+
"""Retrieves the list of Residue3D objects corresponding to a Strand."""
|
701
|
+
residues = []
|
702
|
+
# Strand indices are 1-based and inclusive
|
703
|
+
for index_ in range(strand.first, strand.last + 1):
|
704
|
+
residue = self.bpseq_index_to_residue_map.get(index_)
|
705
|
+
if residue:
|
706
|
+
residues.append(residue)
|
707
|
+
return residues
|
635
708
|
|
636
709
|
@cached_property
|
637
710
|
def dot_bracket(self) -> str:
|
@@ -647,6 +720,196 @@ class Mapping2D3D:
|
|
647
720
|
i += len(sequence)
|
648
721
|
return "\n".join(result)
|
649
722
|
|
723
|
+
def _calculate_pair_centroid(
|
724
|
+
self, residue1: Residue3D, residue2: Residue3D
|
725
|
+
) -> Optional[numpy.typing.NDArray[numpy.floating]]:
|
726
|
+
"""Calculates the geometric mean of base atoms for a pair of residues."""
|
727
|
+
base_atoms = []
|
728
|
+
for residue in [residue1, residue2]:
|
729
|
+
base_atom_names = Residue3D.nucleobase_heavy_atoms.get(
|
730
|
+
residue.one_letter_name.upper(), set()
|
731
|
+
)
|
732
|
+
if not base_atom_names:
|
733
|
+
logging.warning(
|
734
|
+
f"Could not find base atom definition for residue {residue.full_name}"
|
735
|
+
)
|
736
|
+
continue
|
737
|
+
for atom in residue.atoms:
|
738
|
+
if atom.name in base_atom_names:
|
739
|
+
base_atoms.append(atom)
|
740
|
+
|
741
|
+
if not base_atoms:
|
742
|
+
logging.warning(
|
743
|
+
f"No base atoms found for pair {residue1.full_name} - {residue2.full_name}"
|
744
|
+
)
|
745
|
+
return None
|
746
|
+
|
747
|
+
coordinates = [atom.coordinates for atom in base_atoms]
|
748
|
+
return numpy.mean(coordinates, axis=0)
|
749
|
+
|
750
|
+
def get_stem_coordinates(
|
751
|
+
self, stem: Stem
|
752
|
+
) -> List[numpy.typing.NDArray[numpy.floating]]:
|
753
|
+
"""
|
754
|
+
Calculates the geometric centroid for each base pair in the stem.
|
755
|
+
|
756
|
+
Args:
|
757
|
+
stem: The Stem object.
|
758
|
+
|
759
|
+
Returns:
|
760
|
+
A list of numpy arrays, where each array is the centroid of a
|
761
|
+
base pair in the stem. Returns an empty list if no centroids
|
762
|
+
can be calculated.
|
763
|
+
"""
|
764
|
+
all_pair_centroids = []
|
765
|
+
stem_len = stem.strand5p.last - stem.strand5p.first + 1
|
766
|
+
|
767
|
+
for i in range(stem_len):
|
768
|
+
idx5p = stem.strand5p.first + i
|
769
|
+
idx3p = stem.strand3p.last - i
|
770
|
+
try:
|
771
|
+
res5p = self.bpseq_index_to_residue_map[idx5p]
|
772
|
+
res3p = self.bpseq_index_to_residue_map[idx3p]
|
773
|
+
centroid = self._calculate_pair_centroid(res5p, res3p)
|
774
|
+
if centroid is not None:
|
775
|
+
all_pair_centroids.append(centroid)
|
776
|
+
except KeyError:
|
777
|
+
logging.warning(
|
778
|
+
f"Could not find residues for pair {idx5p}-{idx3p} in stem {stem}"
|
779
|
+
)
|
780
|
+
continue # Continue calculating other centroids
|
781
|
+
|
782
|
+
return all_pair_centroids
|
783
|
+
|
784
|
+
def calculate_inter_stem_parameters(
|
785
|
+
self, stem1: Stem, stem2: Stem, kappa: float = 10.0
|
786
|
+
) -> Optional[Dict[str, Union[str, float]]]:
|
787
|
+
"""
|
788
|
+
Calculates geometric parameters between two stems based on closest endpoints
|
789
|
+
and the probability of the observed torsion angle based on an expected
|
790
|
+
A-RNA twist using a von Mises distribution.
|
791
|
+
|
792
|
+
Args:
|
793
|
+
stem1: The first Stem object.
|
794
|
+
stem2: The second Stem object.
|
795
|
+
kappa: Concentration parameter for the von Mises distribution (default: 10.0).
|
796
|
+
|
797
|
+
Returns:
|
798
|
+
A dictionary containing:
|
799
|
+
- 'type': The type of closest endpoint pair ('cs55', 'cs53', 'cs35', 'cs33').
|
800
|
+
- 'torsion_angle': The calculated torsion angle in degrees.
|
801
|
+
- 'min_endpoint_distance': The minimum distance between the endpoints.
|
802
|
+
- 'torsion_angle_pdf': The probability density function (PDF) value of the
|
803
|
+
torsion angle under the von Mises distribution.
|
804
|
+
- 'min_endpoint_distance_pdf': The probability density function (PDF) value
|
805
|
+
based on the minimum endpoint distance using a Lennard-Jones-like function.
|
806
|
+
- 'coaxial_probability': The normalized product of the torsion angle PDF and
|
807
|
+
distance PDF, indicating the likelihood of coaxial stacking (0-1).
|
808
|
+
Returns None if either stem has fewer than 2 base pairs or centroids
|
809
|
+
cannot be calculated.
|
810
|
+
"""
|
811
|
+
stem1_centroids = self.get_stem_coordinates(stem1)
|
812
|
+
stem2_centroids = self.get_stem_coordinates(stem2)
|
813
|
+
|
814
|
+
# Need at least 2 centroids (base pairs) per stem
|
815
|
+
if len(stem1_centroids) < 2 or len(stem2_centroids) < 2:
|
816
|
+
logging.warning(
|
817
|
+
f"Cannot calculate inter-stem parameters for stems {stem1} and {stem2}: "
|
818
|
+
f"Insufficient base pairs ({len(stem1_centroids)} and {len(stem2_centroids)} respectively)."
|
819
|
+
)
|
820
|
+
return None
|
821
|
+
|
822
|
+
# Define the endpoints for each stem
|
823
|
+
s1_first, s1_last = stem1_centroids[0], stem1_centroids[-1]
|
824
|
+
s2_first, s2_last = stem2_centroids[0], stem2_centroids[-1]
|
825
|
+
|
826
|
+
# Calculate distances between the four endpoint pairs
|
827
|
+
endpoint_distances = {
|
828
|
+
"cs55": numpy.linalg.norm(s1_first - s2_first),
|
829
|
+
"cs53": numpy.linalg.norm(s1_first - s2_last),
|
830
|
+
"cs35": numpy.linalg.norm(s1_last - s2_first),
|
831
|
+
"cs33": numpy.linalg.norm(s1_last - s2_last),
|
832
|
+
}
|
833
|
+
|
834
|
+
# Find the minimum endpoint distance and the corresponding pair
|
835
|
+
min_endpoint_distance = min(endpoint_distances.values())
|
836
|
+
closest_pair_key = min(endpoint_distances, key=endpoint_distances.get)
|
837
|
+
|
838
|
+
# Select the points for torsion and determine mu based on the closest pair.
|
839
|
+
# s1p2 and s2p1 must be the endpoints involved in the minimum distance.
|
840
|
+
a_rna_twist = 32.7
|
841
|
+
mu_degrees = 0.0
|
842
|
+
|
843
|
+
if closest_pair_key == "cs55":
|
844
|
+
# Closest: s1_first and s2_first
|
845
|
+
# Torsion points: s1_second, s1_first, s2_first, s2_second
|
846
|
+
s1p1, s1p2 = stem1_centroids[1], stem1_centroids[0]
|
847
|
+
s2p1, s2p2 = stem2_centroids[0], stem2_centroids[1]
|
848
|
+
mu_degrees = 180.0 - a_rna_twist
|
849
|
+
elif closest_pair_key == "cs53":
|
850
|
+
# Closest: s1_first and s2_last
|
851
|
+
# Torsion points: s1_second, s1_first, s2_last, s2_second_last
|
852
|
+
s1p1, s1p2 = stem1_centroids[1], stem1_centroids[0]
|
853
|
+
s2p1, s2p2 = stem2_centroids[-1], stem2_centroids[-2]
|
854
|
+
mu_degrees = 0.0 - a_rna_twist
|
855
|
+
elif closest_pair_key == "cs35":
|
856
|
+
# Closest: s1_last and s2_first
|
857
|
+
# Torsion points: s1_second_last, s1_last, s2_first, s2_second
|
858
|
+
s1p1, s1p2 = stem1_centroids[-2], stem1_centroids[-1]
|
859
|
+
s2p1, s2p2 = stem2_centroids[0], stem2_centroids[1]
|
860
|
+
mu_degrees = 0.0 + a_rna_twist
|
861
|
+
elif closest_pair_key == "cs33":
|
862
|
+
# Closest: s1_last and s2_last
|
863
|
+
# Torsion points: s1_second_last, s1_last, s2_last, s2_second_last
|
864
|
+
s1p1, s1p2 = stem1_centroids[-2], stem1_centroids[-1]
|
865
|
+
s2p1, s2p2 = stem2_centroids[-1], stem2_centroids[-2]
|
866
|
+
mu_degrees = 180.0 + a_rna_twist
|
867
|
+
else:
|
868
|
+
# This case should ideally not be reached if endpoint_distances is not empty
|
869
|
+
logging.error(
|
870
|
+
f"Unexpected closest pair key: {closest_pair_key}. Cannot calculate parameters."
|
871
|
+
)
|
872
|
+
return None
|
873
|
+
|
874
|
+
# Calculate torsion angle (in radians)
|
875
|
+
torsion_radians = calculate_torsion_angle_coords(s1p1, s1p2, s2p1, s2p2)
|
876
|
+
|
877
|
+
# Create von Mises distribution instance
|
878
|
+
mu_radians = math.radians(mu_degrees)
|
879
|
+
vm_dist = vonmises(kappa=kappa, loc=mu_radians)
|
880
|
+
|
881
|
+
# Calculate the probability density function (PDF) value for the torsion angle
|
882
|
+
torsion_probability = vm_dist.pdf(torsion_radians)
|
883
|
+
|
884
|
+
# Calculate the probability density for the minimum endpoint distance
|
885
|
+
distance_probability = distance_pdf(
|
886
|
+
min_endpoint_distance
|
887
|
+
) # Use the new function
|
888
|
+
|
889
|
+
# Calculate the coaxial probability
|
890
|
+
# Max torsion probability occurs at mu (location of the distribution)
|
891
|
+
max_torsion_probability = vm_dist.pdf(mu_radians)
|
892
|
+
# Max distance probability is 1.0 by design of lennard_jones_like_pdf
|
893
|
+
max_distance_probability = 1.0
|
894
|
+
# Normalization factor is the product of maximum possible probabilities
|
895
|
+
normalization_factor = max_torsion_probability * max_distance_probability
|
896
|
+
|
897
|
+
coaxial_probability = 0.0
|
898
|
+
if normalization_factor > 1e-9: # Avoid division by zero
|
899
|
+
probability_product = torsion_probability * distance_probability
|
900
|
+
coaxial_probability = probability_product / normalization_factor
|
901
|
+
# Clamp between 0 and 1
|
902
|
+
coaxial_probability = max(0.0, min(1.0, coaxial_probability))
|
903
|
+
|
904
|
+
return {
|
905
|
+
"type": closest_pair_key,
|
906
|
+
"torsion_angle": math.degrees(torsion_radians),
|
907
|
+
"min_endpoint_distance": min_endpoint_distance,
|
908
|
+
"torsion_angle_pdf": torsion_probability,
|
909
|
+
"min_endpoint_distance_pdf": distance_probability,
|
910
|
+
"coaxial_probability": coaxial_probability,
|
911
|
+
}
|
912
|
+
|
650
913
|
def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
|
651
914
|
dbn = dbn_structure
|
652
915
|
i = 0
|
@@ -698,7 +961,7 @@ class Mapping2D3D:
|
|
698
961
|
|
699
962
|
for row in [row1, row2]:
|
700
963
|
if row:
|
701
|
-
bpseq = self.__generate_bpseq(row)
|
964
|
+
bpseq, _ = self.__generate_bpseq(row) # Unpack the tuple
|
702
965
|
dbns = self.__generate_dot_bracket_per_strand(
|
703
966
|
bpseq.dot_bracket.structure
|
704
967
|
)
|
@@ -709,11 +972,121 @@ class Mapping2D3D:
|
|
709
972
|
return "\n".join(["\n".join(r) for r in result])
|
710
973
|
|
711
974
|
|
975
|
+
def distance_pdf(
|
976
|
+
x: float, lower_bound: float = 3.0, upper_bound: float = 7.0, steepness: float = 5.0
|
977
|
+
) -> float:
|
978
|
+
"""
|
979
|
+
Calculates a probability density based on distance using a plateau function.
|
980
|
+
|
981
|
+
The function uses the product of two sigmoid functions to create a distribution
|
982
|
+
that is close to 1.0 between lower_bound and upper_bound, and drops off
|
983
|
+
rapidly outside this range.
|
984
|
+
|
985
|
+
Args:
|
986
|
+
x: The distance value.
|
987
|
+
lower_bound: The start of the high-probability plateau (default: 3.0).
|
988
|
+
upper_bound: The end of the high-probability plateau (default: 7.0).
|
989
|
+
steepness: Controls how quickly the probability drops outside the plateau
|
990
|
+
(default: 5.0). Higher values mean steeper drops.
|
991
|
+
|
992
|
+
Returns:
|
993
|
+
The calculated probability density (between 0.0 and 1.0).
|
994
|
+
"""
|
995
|
+
# Define a maximum exponent value to prevent overflow
|
996
|
+
max_exponent = 700.0
|
997
|
+
|
998
|
+
# Calculate exponent for the first sigmoid (increasing)
|
999
|
+
exponent1 = -steepness * (x - lower_bound)
|
1000
|
+
# Clamp the exponent if it's excessively large (which happens when x << lower_bound)
|
1001
|
+
exponent1 = min(exponent1, max_exponent)
|
1002
|
+
sigmoid1 = 1.0 / (1.0 + math.exp(exponent1))
|
1003
|
+
|
1004
|
+
# Calculate exponent for the second sigmoid (decreasing)
|
1005
|
+
exponent2 = steepness * (x - upper_bound)
|
1006
|
+
# Clamp the exponent if it's excessively large (which happens when x >> upper_bound)
|
1007
|
+
exponent2 = min(exponent2, max_exponent)
|
1008
|
+
sigmoid2 = 1.0 / (1.0 + math.exp(exponent2))
|
1009
|
+
|
1010
|
+
# The product creates the plateau effect
|
1011
|
+
probability = sigmoid1 * sigmoid2
|
1012
|
+
# Clamp to handle potential floating point inaccuracies near 0 and 1
|
1013
|
+
return max(0.0, min(1.0, probability))
|
1014
|
+
|
1015
|
+
|
1016
|
+
def calculate_all_inter_stem_parameters(
|
1017
|
+
mapping: Mapping2D3D,
|
1018
|
+
) -> List[InterStemParameters]:
|
1019
|
+
"""
|
1020
|
+
Calculates InterStemParameters for all valid pairs of stems found in the mapping.
|
1021
|
+
|
1022
|
+
Args:
|
1023
|
+
mapping: The Mapping2D3D object containing structure, 2D info, and mapping.
|
1024
|
+
|
1025
|
+
"""
|
1026
|
+
stems = mapping.bpseq.elements[0] # Get stems from mapping
|
1027
|
+
inter_stem_params = []
|
1028
|
+
for i, j in itertools.combinations(range(len(stems)), 2):
|
1029
|
+
stem1 = stems[i]
|
1030
|
+
stem2 = stems[j]
|
1031
|
+
|
1032
|
+
# Ensure both stems have at least 2 base pairs for parameter calculation
|
1033
|
+
if (stem1.strand5p.last - stem1.strand5p.first + 1) > 1 and (
|
1034
|
+
stem2.strand5p.last - stem2.strand5p.first + 1
|
1035
|
+
) > 1:
|
1036
|
+
params = mapping.calculate_inter_stem_parameters(stem1, stem2)
|
1037
|
+
# Only add if calculation returned valid values
|
1038
|
+
if params is not None:
|
1039
|
+
inter_stem_params.append(
|
1040
|
+
InterStemParameters(
|
1041
|
+
stem1_idx=i,
|
1042
|
+
stem2_idx=j,
|
1043
|
+
type=params["type"],
|
1044
|
+
torsion=params["torsion_angle"],
|
1045
|
+
min_endpoint_distance=params["min_endpoint_distance"],
|
1046
|
+
torsion_angle_pdf=params["torsion_angle_pdf"],
|
1047
|
+
min_endpoint_distance_pdf=params["min_endpoint_distance_pdf"],
|
1048
|
+
coaxial_probability=params["coaxial_probability"],
|
1049
|
+
)
|
1050
|
+
)
|
1051
|
+
return inter_stem_params
|
1052
|
+
|
1053
|
+
|
712
1054
|
def torsion_angle(a1: Atom, a2: Atom, a3: Atom, a4: Atom) -> float:
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
1055
|
+
"""Calculates the torsion angle between four atoms."""
|
1056
|
+
return calculate_torsion_angle_coords(
|
1057
|
+
a1.coordinates, a2.coordinates, a3.coordinates, a4.coordinates
|
1058
|
+
)
|
1059
|
+
|
1060
|
+
|
1061
|
+
def calculate_torsion_angle_coords(
|
1062
|
+
p1: numpy.typing.NDArray[numpy.floating],
|
1063
|
+
p2: numpy.typing.NDArray[numpy.floating],
|
1064
|
+
p3: numpy.typing.NDArray[numpy.floating],
|
1065
|
+
p4: numpy.typing.NDArray[numpy.floating],
|
1066
|
+
) -> float:
|
1067
|
+
"""Calculates the torsion angle between four points defined by their coordinates."""
|
1068
|
+
v1 = p2 - p1
|
1069
|
+
v2 = p3 - p2
|
1070
|
+
v3 = p4 - p3
|
1071
|
+
|
1072
|
+
# Normalize vectors to avoid issues with very short vectors
|
1073
|
+
v1_norm = v1 / numpy.linalg.norm(v1) if numpy.linalg.norm(v1) > 1e-6 else v1
|
1074
|
+
v2_norm = v2 / numpy.linalg.norm(v2) if numpy.linalg.norm(v2) > 1e-6 else v2
|
1075
|
+
v3_norm = v3 / numpy.linalg.norm(v3) if numpy.linalg.norm(v3) > 1e-6 else v3
|
1076
|
+
|
1077
|
+
t1 = numpy.cross(v1_norm, v2_norm)
|
1078
|
+
t2 = numpy.cross(v2_norm, v3_norm)
|
1079
|
+
t3 = v1_norm * numpy.linalg.norm(v2_norm)
|
1080
|
+
|
1081
|
+
# Ensure t1 and t2 are not zero vectors before calculating dot products
|
1082
|
+
if numpy.linalg.norm(t1) < 1e-6 or numpy.linalg.norm(t2) < 1e-6:
|
1083
|
+
return 0.0 # Or handle as undefined/error
|
1084
|
+
|
1085
|
+
dot_t1_t2 = numpy.dot(t1, t2)
|
1086
|
+
dot_t2_t3 = numpy.dot(t2, t3)
|
1087
|
+
|
1088
|
+
# Clamp dot product arguments for acos/atan2 to avoid domain errors
|
1089
|
+
dot_t1_t2 = numpy.clip(dot_t1_t2, -1.0, 1.0)
|
1090
|
+
|
1091
|
+
angle = math.atan2(dot_t2_t3, dot_t1_t2)
|
1092
|
+
return angle if not math.isnan(angle) else 0.0
|
rnapolis/unifier.py
CHANGED
@@ -7,7 +7,13 @@ from collections import Counter
|
|
7
7
|
import pandas as pd
|
8
8
|
|
9
9
|
from rnapolis.parser import is_cif
|
10
|
-
from rnapolis.parser_v2 import
|
10
|
+
from rnapolis.parser_v2 import (
|
11
|
+
fit_to_pdb,
|
12
|
+
parse_cif_atoms,
|
13
|
+
parse_pdb_atoms,
|
14
|
+
write_cif,
|
15
|
+
write_pdb,
|
16
|
+
)
|
11
17
|
from rnapolis.tertiary_v2 import Structure
|
12
18
|
|
13
19
|
|
@@ -140,13 +146,22 @@ def main():
|
|
140
146
|
|
141
147
|
ext = ".pdb" if format == "PDB" else ".cif"
|
142
148
|
|
143
|
-
|
144
|
-
df = pd.concat([residue.atoms for residue in residues])
|
149
|
+
df = pd.concat([residue.atoms for residue in residues])
|
145
150
|
|
151
|
+
try:
|
146
152
|
if format == "PDB":
|
147
|
-
|
153
|
+
df_to_write = fit_to_pdb(df)
|
154
|
+
with open(f"{args.output}/{base}{ext}", "w") as f:
|
155
|
+
write_pdb(df_to_write, f)
|
148
156
|
else:
|
149
|
-
|
157
|
+
with open(f"{args.output}/{base}{ext}", "w") as f:
|
158
|
+
write_cif(df, f)
|
159
|
+
except ValueError as e:
|
160
|
+
print(
|
161
|
+
f"Error processing {path} for PDB output: {e}. Skipping file.",
|
162
|
+
file=sys.stderr,
|
163
|
+
)
|
164
|
+
continue
|
150
165
|
|
151
166
|
|
152
167
|
if __name__ == "__main__":
|
@@ -1,8 +1,8 @@
|
|
1
|
-
rnapolis/adapter.py,sha256=
|
1
|
+
rnapolis/adapter.py,sha256=hgOPzbvLhdPxuqpV2fLqizHQSpAtglIXrySf_SzsxCc,15379
|
2
2
|
rnapolis/aligner.py,sha256=o7rQyjAZ3n4VXcnSPY3HVB8nLNRkVbl552O3NVh0mfg,3429
|
3
|
-
rnapolis/annotator.py,sha256=
|
3
|
+
rnapolis/annotator.py,sha256=zzjyZ13JYd32E_SUcTCyfV4XYpWHtgzuBsacNDHCMI8,31835
|
4
4
|
rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
|
5
|
-
rnapolis/common.py,sha256=
|
5
|
+
rnapolis/common.py,sha256=p70gydcG8bcA8_NXPef40efvz9Jrt4TAKPfhNpg4iKg,31896
|
6
6
|
rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
|
7
7
|
rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
|
8
8
|
rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
|
@@ -12,17 +12,17 @@ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5
|
|
12
12
|
rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
|
13
13
|
rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
|
14
14
|
rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
|
15
|
-
rnapolis/parser_v2.py,sha256=
|
15
|
+
rnapolis/parser_v2.py,sha256=qG6CO3or7zmuJu368g9Nzokiqdeip4yjD14F163uH6w,40618
|
16
16
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
17
|
-
rnapolis/splitter.py,sha256=
|
18
|
-
rnapolis/tertiary.py,sha256=
|
17
|
+
rnapolis/splitter.py,sha256=x-Zn21mkiMgvYPptUFD9BbdNIvoaM6b8GzGf6uYXEwE,4052
|
18
|
+
rnapolis/tertiary.py,sha256=mTVpz8rz5Q9s5_QVSMdEMXSooCce0rAD5RQMh00bHm4,39200
|
19
19
|
rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
|
20
20
|
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
21
|
-
rnapolis/unifier.py,sha256=
|
21
|
+
rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
|
22
22
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
23
|
-
rnapolis-0.8.
|
24
|
-
rnapolis-0.8.
|
25
|
-
rnapolis-0.8.
|
26
|
-
rnapolis-0.8.
|
27
|
-
rnapolis-0.8.
|
28
|
-
rnapolis-0.8.
|
23
|
+
rnapolis-0.8.2.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
24
|
+
rnapolis-0.8.2.dist-info/METADATA,sha256=1_ITZKV6JF324k9MawJIQCYH1VI8fQffIp9IRquKwnE,54537
|
25
|
+
rnapolis-0.8.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
26
|
+
rnapolis-0.8.2.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
|
27
|
+
rnapolis-0.8.2.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
28
|
+
rnapolis-0.8.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|