RNApolis 0.9.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnapolis/adapter.py +791 -37
- rnapolis/annotator.py +101 -27
- rnapolis/distiller.py +1119 -0
- rnapolis/parser.py +4 -0
- rnapolis/tertiary_v2.py +482 -18
- {rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/METADATA +4 -1
- {rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/RECORD +11 -10
- {rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/entry_points.txt +1 -0
- {rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/WHEEL +0 -0
- {rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {rnapolis-0.9.1.dist-info → rnapolis-0.10.0.dist-info}/top_level.txt +0 -0
rnapolis/annotator.py
CHANGED
@@ -24,6 +24,7 @@ from rnapolis.common import (
|
|
24
24
|
BPh,
|
25
25
|
BpSeq,
|
26
26
|
LeontisWesthof,
|
27
|
+
OtherInteraction,
|
27
28
|
Residue,
|
28
29
|
Saenger,
|
29
30
|
Stacking,
|
@@ -660,12 +661,6 @@ def write_bpseq(path: str, bpseq: BpSeq):
|
|
660
661
|
|
661
662
|
def add_common_output_arguments(parser: argparse.ArgumentParser):
|
662
663
|
"""Adds common output and processing arguments to the parser."""
|
663
|
-
parser.add_argument(
|
664
|
-
"-a",
|
665
|
-
"--all-dot-brackets",
|
666
|
-
action="store_true",
|
667
|
-
help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
|
668
|
-
)
|
669
664
|
parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
|
670
665
|
parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
|
671
666
|
parser.add_argument(
|
@@ -693,42 +688,126 @@ def add_common_output_arguments(parser: argparse.ArgumentParser):
|
|
693
688
|
)
|
694
689
|
|
695
690
|
|
691
|
+
def unify_structure_data(structure2d: Structure2D, mapping: Mapping2D3D) -> Structure2D:
|
692
|
+
"""
|
693
|
+
Unify structure data by:
|
694
|
+
1. Adding missing Saenger classifications to base pairs
|
695
|
+
2. Filling in empty residue labels from Structure3D
|
696
|
+
"""
|
697
|
+
# Create a mapping from residue to residue3d for label filling
|
698
|
+
residue_to_residue3d = {}
|
699
|
+
for residue3d in mapping.structure3d.residues:
|
700
|
+
residue_key = Residue(residue3d.label, residue3d.auth)
|
701
|
+
residue_to_residue3d[residue_key] = residue3d
|
702
|
+
|
703
|
+
def fill_residue_label(residue: Residue) -> Residue:
|
704
|
+
"""Fill empty label from Structure3D if available."""
|
705
|
+
if residue.label is not None:
|
706
|
+
return residue
|
707
|
+
|
708
|
+
# Try to find matching residue3d by auth
|
709
|
+
for residue3d in mapping.structure3d.residues:
|
710
|
+
if residue.auth == residue3d.auth:
|
711
|
+
return Residue(residue3d.label, residue.auth)
|
712
|
+
|
713
|
+
return residue
|
714
|
+
|
715
|
+
# Process base pairs
|
716
|
+
unified_base_pairs = []
|
717
|
+
for base_pair in structure2d.base_pairs:
|
718
|
+
# Fill in missing labels
|
719
|
+
nt1 = fill_residue_label(base_pair.nt1)
|
720
|
+
nt2 = fill_residue_label(base_pair.nt2)
|
721
|
+
|
722
|
+
# Detect missing Saenger classification
|
723
|
+
saenger = base_pair.saenger
|
724
|
+
if saenger is None:
|
725
|
+
# Find corresponding 3D residues for Saenger detection
|
726
|
+
residue3d_1 = residue_to_residue3d.get(Residue(nt1.label, nt1.auth))
|
727
|
+
residue3d_2 = residue_to_residue3d.get(Residue(nt2.label, nt2.auth))
|
728
|
+
|
729
|
+
if residue3d_1 is not None and residue3d_2 is not None:
|
730
|
+
saenger = detect_saenger(residue3d_1, residue3d_2, base_pair.lw)
|
731
|
+
|
732
|
+
unified_base_pairs.append(BasePair(nt1, nt2, base_pair.lw, saenger))
|
733
|
+
|
734
|
+
# Process other interaction types (fill labels only)
|
735
|
+
unified_stackings = []
|
736
|
+
for stacking in structure2d.stackings:
|
737
|
+
nt1 = fill_residue_label(stacking.nt1)
|
738
|
+
nt2 = fill_residue_label(stacking.nt2)
|
739
|
+
unified_stackings.append(Stacking(nt1, nt2, stacking.topology))
|
740
|
+
|
741
|
+
unified_base_ribose = []
|
742
|
+
for base_ribose in structure2d.base_ribose_interactions:
|
743
|
+
nt1 = fill_residue_label(base_ribose.nt1)
|
744
|
+
nt2 = fill_residue_label(base_ribose.nt2)
|
745
|
+
unified_base_ribose.append(BaseRibose(nt1, nt2, base_ribose.br))
|
746
|
+
|
747
|
+
unified_base_phosphate = []
|
748
|
+
for base_phosphate in structure2d.base_phosphate_interactions:
|
749
|
+
nt1 = fill_residue_label(base_phosphate.nt1)
|
750
|
+
nt2 = fill_residue_label(base_phosphate.nt2)
|
751
|
+
unified_base_phosphate.append(BasePhosphate(nt1, nt2, base_phosphate.bph))
|
752
|
+
|
753
|
+
unified_other = []
|
754
|
+
for other in structure2d.other_interactions:
|
755
|
+
nt1 = fill_residue_label(other.nt1)
|
756
|
+
nt2 = fill_residue_label(other.nt2)
|
757
|
+
unified_other.append(OtherInteraction(nt1, nt2))
|
758
|
+
|
759
|
+
# Create new Structure2D with unified data
|
760
|
+
unified_base_interactions = BaseInteractions(
|
761
|
+
unified_base_pairs,
|
762
|
+
unified_stackings,
|
763
|
+
unified_base_ribose,
|
764
|
+
unified_base_phosphate,
|
765
|
+
unified_other,
|
766
|
+
)
|
767
|
+
|
768
|
+
# Recreate Structure2D with unified interactions
|
769
|
+
unified_structure2d, _ = mapping.structure3d.extract_secondary_structure(
|
770
|
+
unified_base_interactions, False
|
771
|
+
)
|
772
|
+
|
773
|
+
return unified_structure2d
|
774
|
+
|
775
|
+
|
696
776
|
def handle_output_arguments(
|
697
777
|
args: argparse.Namespace,
|
698
778
|
structure2d: Structure2D,
|
699
|
-
dot_brackets: List[str],
|
700
779
|
mapping: Mapping2D3D,
|
701
780
|
input_filename: str,
|
702
781
|
):
|
703
782
|
"""Handles writing output based on provided arguments."""
|
783
|
+
# Unify the structure data before processing outputs
|
784
|
+
unified_structure2d = unify_structure_data(structure2d, mapping)
|
785
|
+
|
704
786
|
input_basename = os.path.basename(input_filename)
|
705
787
|
if args.csv:
|
706
|
-
write_csv(args.csv,
|
788
|
+
write_csv(args.csv, unified_structure2d)
|
707
789
|
|
708
790
|
if args.json:
|
709
|
-
write_json(args.json,
|
791
|
+
write_json(args.json, unified_structure2d)
|
710
792
|
|
711
793
|
if args.bpseq:
|
712
|
-
write_bpseq(args.bpseq,
|
794
|
+
write_bpseq(args.bpseq, unified_structure2d.bpseq)
|
713
795
|
|
714
796
|
if args.extended:
|
715
|
-
print(
|
716
|
-
elif args.all_dot_brackets:
|
717
|
-
for dot_bracket in dot_brackets:
|
718
|
-
print(dot_bracket)
|
797
|
+
print(unified_structure2d.extended_dot_bracket)
|
719
798
|
else:
|
720
|
-
print(
|
799
|
+
print(unified_structure2d.dot_bracket)
|
721
800
|
|
722
801
|
if args.dot:
|
723
|
-
print(BpSeq.from_string(
|
802
|
+
print(BpSeq.from_string(unified_structure2d.bpseq).graphviz)
|
724
803
|
|
725
804
|
if args.pml:
|
726
|
-
pml_script = generate_pymol_script(mapping,
|
805
|
+
pml_script = generate_pymol_script(mapping, unified_structure2d.stems)
|
727
806
|
with open(args.pml, "w") as f:
|
728
807
|
f.write(pml_script)
|
729
808
|
|
730
809
|
if args.inter_stem_csv:
|
731
|
-
if
|
810
|
+
if unified_structure2d.inter_stem_parameters:
|
732
811
|
# Convert list of dataclasses to list of dicts
|
733
812
|
params_list = [
|
734
813
|
{
|
@@ -741,7 +820,7 @@ def handle_output_arguments(
|
|
741
820
|
"min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
|
742
821
|
"coaxial_probability": p.coaxial_probability,
|
743
822
|
}
|
744
|
-
for p in
|
823
|
+
for p in unified_structure2d.interStemParameters
|
745
824
|
]
|
746
825
|
df = pd.DataFrame(params_list)
|
747
826
|
df["input_basename"] = input_basename
|
@@ -759,9 +838,9 @@ def handle_output_arguments(
|
|
759
838
|
# pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
|
760
839
|
|
761
840
|
if args.stems_csv:
|
762
|
-
if
|
841
|
+
if unified_structure2d.stems:
|
763
842
|
stems_data = []
|
764
|
-
for i, stem in enumerate(
|
843
|
+
for i, stem in enumerate(unified_structure2d.stems):
|
765
844
|
try:
|
766
845
|
res5p_first = mapping.bpseq_index_to_residue_map.get(
|
767
846
|
stem.strand5p.first
|
@@ -842,12 +921,7 @@ def main():
|
|
842
921
|
base_interactions, args.find_gaps
|
843
922
|
)
|
844
923
|
|
845
|
-
|
846
|
-
dot_brackets = mapping.all_dot_brackets
|
847
|
-
else:
|
848
|
-
dot_brackets = [mapping.dot_bracket]
|
849
|
-
|
850
|
-
handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
|
924
|
+
handle_output_arguments(args, structure2d, mapping, args.input)
|
851
925
|
|
852
926
|
|
853
927
|
if __name__ == "__main__":
|