RNApolis 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnapolis/adapter.py +27 -117
- rnapolis/annotator.py +256 -20
- rnapolis/common.py +13 -0
- rnapolis/tertiary.py +383 -10
- {rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/METADATA +1 -1
- {rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/RECORD +10 -10
- {rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/WHEEL +0 -0
- {rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/entry_points.txt +0 -0
- {rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/licenses/LICENSE +0 -0
- {rnapolis-0.8.1.dist-info → rnapolis-0.8.2.dist-info}/top_level.txt +0 -0
rnapolis/adapter.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
#! /usr/bin/env python
|
2
2
|
import argparse
|
3
|
-
import csv
|
4
3
|
import logging
|
5
4
|
import os
|
6
5
|
from enum import Enum
|
@@ -8,6 +7,10 @@ from typing import Dict, List, Optional, Tuple
|
|
8
7
|
|
9
8
|
import orjson
|
10
9
|
|
10
|
+
from rnapolis.annotator import (
|
11
|
+
add_common_output_arguments,
|
12
|
+
handle_output_arguments,
|
13
|
+
)
|
11
14
|
from rnapolis.common import (
|
12
15
|
BR,
|
13
16
|
BaseInteractions,
|
@@ -15,7 +18,6 @@ from rnapolis.common import (
|
|
15
18
|
BasePhosphate,
|
16
19
|
BaseRibose,
|
17
20
|
BPh,
|
18
|
-
BpSeq,
|
19
21
|
LeontisWesthof,
|
20
22
|
OtherInteraction,
|
21
23
|
Residue,
|
@@ -25,7 +27,11 @@ from rnapolis.common import (
|
|
25
27
|
Structure2D,
|
26
28
|
)
|
27
29
|
from rnapolis.parser import read_3d_structure
|
28
|
-
from rnapolis.tertiary import
|
30
|
+
from rnapolis.tertiary import (
|
31
|
+
Mapping2D3D,
|
32
|
+
Structure3D,
|
33
|
+
calculate_all_inter_stem_parameters, # Import the new helper function
|
34
|
+
)
|
29
35
|
from rnapolis.util import handle_input_file
|
30
36
|
|
31
37
|
|
@@ -317,7 +323,7 @@ def process_external_tool_output(
|
|
317
323
|
model: Optional[int] = None,
|
318
324
|
find_gaps: bool = False,
|
319
325
|
all_dot_brackets: bool = False,
|
320
|
-
) -> Tuple[Structure2D, List[str]]:
|
326
|
+
) -> Tuple[Structure2D, List[str], Mapping2D3D]: # Added Mapping2D3D to return tuple
|
321
327
|
"""
|
322
328
|
Process external tool output and create a secondary structure representation.
|
323
329
|
|
@@ -333,7 +339,8 @@ def process_external_tool_output(
|
|
333
339
|
all_dot_brackets: Whether to return all possible dot-bracket notations
|
334
340
|
|
335
341
|
Returns:
|
336
|
-
A tuple containing the Structure2D object
|
342
|
+
A tuple containing the Structure2D object, a list of dot-bracket notations,
|
343
|
+
and the Mapping2D3D object.
|
337
344
|
"""
|
338
345
|
# Parse external tool output
|
339
346
|
base_interactions = parse_external_output(external_file_path, tool, structure3d)
|
@@ -350,7 +357,7 @@ def extract_secondary_structure_from_external(
|
|
350
357
|
model: Optional[int] = None,
|
351
358
|
find_gaps: bool = False,
|
352
359
|
all_dot_brackets: bool = False,
|
353
|
-
) -> Tuple[Structure2D, List[str]]:
|
360
|
+
) -> Tuple[Structure2D, List[str], Mapping2D3D]: # Added Mapping2D3D to return tuple
|
354
361
|
"""
|
355
362
|
Create a secondary structure representation using interactions from an external tool.
|
356
363
|
|
@@ -362,7 +369,8 @@ def extract_secondary_structure_from_external(
|
|
362
369
|
all_dot_brackets: Whether to return all possible dot-bracket notations
|
363
370
|
|
364
371
|
Returns:
|
365
|
-
A tuple containing the Structure2D object
|
372
|
+
A tuple containing the Structure2D object, a list of dot-bracket notations,
|
373
|
+
and the Mapping2D3D object.
|
366
374
|
"""
|
367
375
|
mapping = Mapping2D3D(
|
368
376
|
tertiary_structure,
|
@@ -371,6 +379,10 @@ def extract_secondary_structure_from_external(
|
|
371
379
|
find_gaps,
|
372
380
|
)
|
373
381
|
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
382
|
+
|
383
|
+
# Calculate inter-stem parameters using the helper function
|
384
|
+
inter_stem_params = calculate_all_inter_stem_parameters(mapping)
|
385
|
+
|
374
386
|
structure2d = Structure2D(
|
375
387
|
base_interactions,
|
376
388
|
str(mapping.bpseq),
|
@@ -380,81 +392,15 @@ def extract_secondary_structure_from_external(
|
|
380
392
|
single_strands,
|
381
393
|
hairpins,
|
382
394
|
loops,
|
395
|
+
inter_stem_params, # Added inter-stem parameters
|
383
396
|
)
|
384
397
|
if all_dot_brackets:
|
385
|
-
return structure2d, mapping.all_dot_brackets
|
398
|
+
return structure2d, mapping.all_dot_brackets, mapping # Return mapping
|
386
399
|
else:
|
387
|
-
return structure2d, [structure2d.dotBracket]
|
388
|
-
|
389
|
-
|
390
|
-
def write_json(path: str, structure2d: BaseInteractions):
|
391
|
-
with open(path, "wb") as f:
|
392
|
-
f.write(orjson.dumps(structure2d))
|
393
|
-
|
394
|
-
|
395
|
-
def write_csv(path: str, structure2d: Structure2D):
|
396
|
-
with open(path, "w") as f:
|
397
|
-
writer = csv.writer(f)
|
398
|
-
writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
|
399
|
-
for base_pair in structure2d.baseInteractions.basePairs:
|
400
|
-
writer.writerow(
|
401
|
-
[
|
402
|
-
base_pair.nt1.full_name,
|
403
|
-
base_pair.nt2.full_name,
|
404
|
-
"base pair",
|
405
|
-
base_pair.lw.value,
|
406
|
-
(
|
407
|
-
base_pair.saenger.value or ""
|
408
|
-
if base_pair.saenger is not None
|
409
|
-
else ""
|
410
|
-
),
|
411
|
-
]
|
412
|
-
)
|
413
|
-
for stacking in structure2d.baseInteractions.stackings:
|
414
|
-
writer.writerow(
|
415
|
-
[
|
416
|
-
stacking.nt1.full_name,
|
417
|
-
stacking.nt2.full_name,
|
418
|
-
"stacking",
|
419
|
-
stacking.topology.value if stacking.topology is not None else "",
|
420
|
-
"",
|
421
|
-
]
|
422
|
-
)
|
423
|
-
for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
|
424
|
-
writer.writerow(
|
425
|
-
[
|
426
|
-
base_phosphate.nt1.full_name,
|
427
|
-
base_phosphate.nt2.full_name,
|
428
|
-
"base-phosphate interaction",
|
429
|
-
base_phosphate.bph.value if base_phosphate.bph is not None else "",
|
430
|
-
"",
|
431
|
-
]
|
432
|
-
)
|
433
|
-
for base_ribose in structure2d.baseInteractions.baseRiboseInteractions:
|
434
|
-
writer.writerow(
|
435
|
-
[
|
436
|
-
base_ribose.nt1.full_name,
|
437
|
-
base_ribose.nt2.full_name,
|
438
|
-
"base-ribose interaction",
|
439
|
-
base_ribose.br.value if base_ribose.br is not None else "",
|
440
|
-
"",
|
441
|
-
]
|
442
|
-
)
|
443
|
-
for other in structure2d.baseInteractions.otherInteractions:
|
444
|
-
writer.writerow(
|
445
|
-
[
|
446
|
-
other.nt1.full_name,
|
447
|
-
other.nt2.full_name,
|
448
|
-
"other interaction",
|
449
|
-
"",
|
450
|
-
"",
|
451
|
-
]
|
452
|
-
)
|
400
|
+
return structure2d, [structure2d.dotBracket], mapping # Return mapping
|
453
401
|
|
454
402
|
|
455
|
-
|
456
|
-
with open(path, "w") as f:
|
457
|
-
f.write(str(bpseq))
|
403
|
+
# Removed duplicate functions - now imported from annotator
|
458
404
|
|
459
405
|
|
460
406
|
def main():
|
@@ -471,39 +417,21 @@ def main():
|
|
471
417
|
required=True,
|
472
418
|
help="External tool that generated the output file",
|
473
419
|
)
|
474
|
-
parser.add_argument(
|
475
|
-
"-a",
|
476
|
-
"--all-dot-brackets",
|
477
|
-
action="store_true",
|
478
|
-
help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
|
479
|
-
)
|
480
|
-
parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
|
481
|
-
parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
|
482
|
-
parser.add_argument(
|
483
|
-
"-j",
|
484
|
-
"--json",
|
485
|
-
help="(optional) path to output JSON file",
|
486
|
-
)
|
487
|
-
parser.add_argument(
|
488
|
-
"-e",
|
489
|
-
"--extended",
|
490
|
-
action="store_true",
|
491
|
-
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
492
|
-
)
|
493
420
|
parser.add_argument(
|
494
421
|
"-f",
|
495
422
|
"--find-gaps",
|
496
423
|
action="store_true",
|
497
424
|
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands",
|
498
425
|
)
|
499
|
-
parser
|
426
|
+
add_common_output_arguments(parser)
|
427
|
+
# The --inter-stem-csv and --stems-csv arguments are now added by add_common_output_arguments
|
500
428
|
args = parser.parse_args()
|
501
429
|
|
502
430
|
file = handle_input_file(args.input)
|
503
431
|
structure3d = read_3d_structure(file, None)
|
504
432
|
|
505
433
|
# Process external tool output and get secondary structure
|
506
|
-
structure2d, dot_brackets = process_external_tool_output(
|
434
|
+
structure2d, dot_brackets, mapping = process_external_tool_output(
|
507
435
|
structure3d,
|
508
436
|
args.external,
|
509
437
|
ExternalTool(args.tool),
|
@@ -512,25 +440,7 @@ def main():
|
|
512
440
|
args.all_dot_brackets,
|
513
441
|
)
|
514
442
|
|
515
|
-
|
516
|
-
write_csv(args.csv, structure2d)
|
517
|
-
|
518
|
-
if args.json:
|
519
|
-
write_json(args.json, structure2d)
|
520
|
-
|
521
|
-
if args.bpseq:
|
522
|
-
write_bpseq(args.bpseq, structure2d.bpseq)
|
523
|
-
|
524
|
-
if args.extended:
|
525
|
-
print(structure2d.extendedDotBracket)
|
526
|
-
elif args.all_dot_brackets:
|
527
|
-
for dot_bracket in dot_brackets:
|
528
|
-
print(dot_bracket)
|
529
|
-
else:
|
530
|
-
print(structure2d.dotBracket)
|
531
|
-
|
532
|
-
if args.dot:
|
533
|
-
print(BpSeq.from_string(structure2d.bpseq).graphviz)
|
443
|
+
handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
|
534
444
|
|
535
445
|
|
536
446
|
if __name__ == "__main__":
|
rnapolis/annotator.py
CHANGED
@@ -10,6 +10,7 @@ from typing import Dict, List, Optional, Set, Tuple
|
|
10
10
|
import numpy
|
11
11
|
import numpy.typing
|
12
12
|
import orjson
|
13
|
+
import pandas as pd
|
13
14
|
from ordered_set import OrderedSet
|
14
15
|
from scipy.spatial import KDTree
|
15
16
|
|
@@ -26,6 +27,7 @@ from rnapolis.common import (
|
|
26
27
|
Saenger,
|
27
28
|
Stacking,
|
28
29
|
StackingTopology,
|
30
|
+
Stem,
|
29
31
|
Structure2D,
|
30
32
|
)
|
31
33
|
from rnapolis.parser import read_3d_structure
|
@@ -38,9 +40,10 @@ from rnapolis.tertiary import (
|
|
38
40
|
PHOSPHATE_ACCEPTORS,
|
39
41
|
RIBOSE_ACCEPTORS,
|
40
42
|
Atom,
|
41
|
-
Mapping2D3D,
|
43
|
+
Mapping2D3D, # Added import
|
42
44
|
Residue3D,
|
43
45
|
Structure3D,
|
46
|
+
calculate_all_inter_stem_parameters, # Import the new helper function
|
44
47
|
torsion_angle,
|
45
48
|
)
|
46
49
|
from rnapolis.util import handle_input_file
|
@@ -496,6 +499,10 @@ def extract_secondary_structure(
|
|
496
499
|
find_gaps,
|
497
500
|
)
|
498
501
|
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
502
|
+
|
503
|
+
# Calculate inter-stem parameters using the helper function
|
504
|
+
inter_stem_params = calculate_all_inter_stem_parameters(mapping)
|
505
|
+
|
499
506
|
structure2d = Structure2D(
|
500
507
|
base_interactions,
|
501
508
|
str(mapping.bpseq),
|
@@ -505,6 +512,7 @@ def extract_secondary_structure(
|
|
505
512
|
single_strands,
|
506
513
|
hairpins,
|
507
514
|
loops,
|
515
|
+
inter_stem_params, # Added inter-stem parameters
|
508
516
|
)
|
509
517
|
if all_dot_brackets:
|
510
518
|
return structure2d, mapping.all_dot_brackets
|
@@ -512,9 +520,102 @@ def extract_secondary_structure(
|
|
512
520
|
return structure2d, [structure2d.dotBracket]
|
513
521
|
|
514
522
|
|
515
|
-
def
|
523
|
+
def generate_pymol_script(mapping: Mapping2D3D, stems: List[Stem]) -> str:
|
524
|
+
"""Generates a PyMOL script to draw stems as cylinders."""
|
525
|
+
pymol_commands = []
|
526
|
+
radius = 0.5
|
527
|
+
r, g, b = 1.0, 0.0, 0.0 # Red color
|
528
|
+
|
529
|
+
for stem_idx, stem in enumerate(stems):
|
530
|
+
# Get residues for selection string
|
531
|
+
try:
|
532
|
+
res5p_first = mapping.bpseq_index_to_residue_map[stem.strand5p.first]
|
533
|
+
res5p_last = mapping.bpseq_index_to_residue_map[stem.strand5p.last]
|
534
|
+
res3p_first = mapping.bpseq_index_to_residue_map[stem.strand3p.first]
|
535
|
+
res3p_last = mapping.bpseq_index_to_residue_map[stem.strand3p.last]
|
536
|
+
|
537
|
+
# Prefer auth chain/number if available
|
538
|
+
chain5p = (
|
539
|
+
res5p_first.auth.chain if res5p_first.auth else res5p_first.label.chain
|
540
|
+
)
|
541
|
+
num5p_first = (
|
542
|
+
res5p_first.auth.number
|
543
|
+
if res5p_first.auth
|
544
|
+
else res5p_first.label.number
|
545
|
+
)
|
546
|
+
num5p_last = (
|
547
|
+
res5p_last.auth.number if res5p_last.auth else res5p_last.label.number
|
548
|
+
)
|
549
|
+
|
550
|
+
chain3p = (
|
551
|
+
res3p_first.auth.chain if res3p_first.auth else res3p_first.label.chain
|
552
|
+
)
|
553
|
+
num3p_first = (
|
554
|
+
res3p_first.auth.number
|
555
|
+
if res3p_first.auth
|
556
|
+
else res3p_first.label.number
|
557
|
+
)
|
558
|
+
num3p_last = (
|
559
|
+
res3p_last.auth.number if res3p_last.auth else res3p_last.label.number
|
560
|
+
)
|
561
|
+
|
562
|
+
# Format selection string: select stem0, A/1-5/ or A/10-15/
|
563
|
+
selection_str = f"{chain5p}/{num5p_first}-{num5p_last}/ or {chain3p}/{num3p_first}-{num3p_last}/"
|
564
|
+
pymol_commands.append(f"select stem{stem_idx}, {selection_str}")
|
565
|
+
|
566
|
+
except (KeyError, AttributeError) as e:
|
567
|
+
logging.warning(
|
568
|
+
f"Could not generate selection string for stem {stem_idx}: Missing residue data ({e})"
|
569
|
+
)
|
570
|
+
|
571
|
+
centroids = mapping.get_stem_coordinates(stem)
|
572
|
+
|
573
|
+
# Need at least 2 centroids to draw a segment
|
574
|
+
if len(centroids) < 2:
|
575
|
+
# Removed warning log for stems with < 2 base pairs
|
576
|
+
continue
|
577
|
+
|
578
|
+
# Create pseudoatoms for each centroid
|
579
|
+
for centroid_idx, centroid in enumerate(centroids):
|
580
|
+
x, y, z = centroid
|
581
|
+
pseudoatom_name = f"stem{stem_idx}_centroid{centroid_idx}"
|
582
|
+
pymol_commands.append(
|
583
|
+
f"pseudoatom {pseudoatom_name}, pos=[{x:.3f}, {y:.3f}, {z:.3f}]"
|
584
|
+
)
|
585
|
+
|
586
|
+
# Draw cylinders between consecutive centroids
|
587
|
+
for seg_idx in range(len(centroids) - 1):
|
588
|
+
p1 = centroids[seg_idx]
|
589
|
+
p2 = centroids[seg_idx + 1]
|
590
|
+
x1, y1, z1 = p1
|
591
|
+
x2, y2, z2 = p2
|
592
|
+
# Format: [CYLINDER, x1, y1, z1, x2, y2, z2, radius, r1, g1, b1, r2, g2, b2]
|
593
|
+
# Use 9.0 for CYLINDER code
|
594
|
+
# Use same color for both ends
|
595
|
+
cgo_object = f"[ 9.0, {x1:.3f}, {y1:.3f}, {z1:.3f}, {x2:.3f}, {y2:.3f}, {z2:.3f}, {radius}, {r}, {g}, {b}, {r}, {g}, {b} ]"
|
596
|
+
pymol_commands.append(
|
597
|
+
f'cmd.load_cgo({cgo_object}, "stem_{stem_idx}_seg_{seg_idx}")'
|
598
|
+
)
|
599
|
+
|
600
|
+
# Calculate and display dihedral angles between consecutive centroids
|
601
|
+
if len(centroids) >= 4:
|
602
|
+
for i in range(len(centroids) - 3):
|
603
|
+
pa1 = f"stem{stem_idx}_centroid{i}"
|
604
|
+
pa2 = f"stem{stem_idx}_centroid{i + 1}"
|
605
|
+
pa3 = f"stem{stem_idx}_centroid{i + 2}"
|
606
|
+
pa4 = f"stem{stem_idx}_centroid{i + 3}"
|
607
|
+
dihedral_name = f"stem{stem_idx}_dihedral{i}"
|
608
|
+
pymol_commands.append(
|
609
|
+
f"dihedral {dihedral_name}, {pa1}, {pa2}, {pa3}, {pa4}"
|
610
|
+
)
|
611
|
+
|
612
|
+
return "\n".join(pymol_commands)
|
613
|
+
|
614
|
+
|
615
|
+
def write_json(path: str, structure2d: Structure2D):
|
516
616
|
with open(path, "wb") as f:
|
517
|
-
|
617
|
+
# Add OPT_SERIALIZE_NUMPY to handle numpy types like float64
|
618
|
+
f.write(orjson.dumps(structure2d, option=orjson.OPT_SERIALIZE_NUMPY))
|
518
619
|
|
519
620
|
|
520
621
|
def write_csv(path: str, structure2d: Structure2D):
|
@@ -555,13 +656,13 @@ def write_csv(path: str, structure2d: Structure2D):
|
|
555
656
|
"",
|
556
657
|
]
|
557
658
|
)
|
558
|
-
for base_ribose in structure2d.baseInteractions.
|
659
|
+
for base_ribose in structure2d.baseInteractions.baseRiboseInteractions:
|
559
660
|
writer.writerow(
|
560
661
|
[
|
561
662
|
base_ribose.nt1.full_name,
|
562
663
|
base_ribose.nt2.full_name,
|
563
664
|
"base-ribose interaction",
|
564
|
-
base_ribose.
|
665
|
+
base_ribose.br.value if base_ribose.br is not None else "",
|
565
666
|
"",
|
566
667
|
]
|
567
668
|
)
|
@@ -582,9 +683,8 @@ def write_bpseq(path: str, bpseq: BpSeq):
|
|
582
683
|
f.write(str(bpseq))
|
583
684
|
|
584
685
|
|
585
|
-
def
|
586
|
-
|
587
|
-
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
686
|
+
def add_common_output_arguments(parser: argparse.ArgumentParser):
|
687
|
+
"""Adds common output and processing arguments to the parser."""
|
588
688
|
parser.add_argument(
|
589
689
|
"-a",
|
590
690
|
"--all-dot-brackets",
|
@@ -604,22 +704,29 @@ def main():
|
|
604
704
|
action="store_true",
|
605
705
|
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
606
706
|
)
|
707
|
+
parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
|
607
708
|
parser.add_argument(
|
608
|
-
"-
|
609
|
-
"--find-gaps",
|
610
|
-
action="store_true",
|
611
|
-
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
612
|
-
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
709
|
+
"-p", "--pml", help="(optional) path to output PyMOL PML script for stems"
|
613
710
|
)
|
614
|
-
parser.add_argument(
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
711
|
+
parser.add_argument(
|
712
|
+
"--inter-stem-csv",
|
713
|
+
help="(optional) path to output CSV file for inter-stem parameters",
|
714
|
+
)
|
715
|
+
parser.add_argument(
|
716
|
+
"--stems-csv",
|
717
|
+
help="(optional) path to output CSV file for stem details",
|
621
718
|
)
|
622
719
|
|
720
|
+
|
721
|
+
def handle_output_arguments(
|
722
|
+
args: argparse.Namespace,
|
723
|
+
structure2d: Structure2D,
|
724
|
+
dot_brackets: List[str],
|
725
|
+
mapping: Mapping2D3D,
|
726
|
+
input_filename: str,
|
727
|
+
):
|
728
|
+
"""Handles writing output based on provided arguments."""
|
729
|
+
input_basename = os.path.basename(input_filename)
|
623
730
|
if args.csv:
|
624
731
|
write_csv(args.csv, structure2d)
|
625
732
|
|
@@ -640,6 +747,135 @@ def main():
|
|
640
747
|
if args.dot:
|
641
748
|
print(BpSeq.from_string(structure2d.bpseq).graphviz)
|
642
749
|
|
750
|
+
if args.pml:
|
751
|
+
pml_script = generate_pymol_script(mapping, structure2d.stems)
|
752
|
+
with open(args.pml, "w") as f:
|
753
|
+
f.write(pml_script)
|
754
|
+
|
755
|
+
if args.inter_stem_csv:
|
756
|
+
if structure2d.interStemParameters:
|
757
|
+
# Convert list of dataclasses to list of dicts
|
758
|
+
params_list = [
|
759
|
+
{
|
760
|
+
"stem1_idx": p.stem1_idx,
|
761
|
+
"stem2_idx": p.stem2_idx,
|
762
|
+
"type": p.type,
|
763
|
+
"torsion": p.torsion,
|
764
|
+
"min_endpoint_distance": p.min_endpoint_distance,
|
765
|
+
"torsion_angle_pdf": p.torsion_angle_pdf,
|
766
|
+
"min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
|
767
|
+
"coaxial_probability": p.coaxial_probability,
|
768
|
+
}
|
769
|
+
for p in structure2d.interStemParameters
|
770
|
+
]
|
771
|
+
df = pd.DataFrame(params_list)
|
772
|
+
df["input_basename"] = input_basename
|
773
|
+
# Reorder columns to put input_basename first
|
774
|
+
cols = ["input_basename"] + [
|
775
|
+
col for col in df.columns if col != "input_basename"
|
776
|
+
]
|
777
|
+
df = df[cols]
|
778
|
+
df.to_csv(args.inter_stem_csv, index=False)
|
779
|
+
else:
|
780
|
+
logging.warning(
|
781
|
+
f"No inter-stem parameters calculated for {input_basename}, CSV file '{args.inter_stem_csv}' will be empty or not created."
|
782
|
+
)
|
783
|
+
# Optionally create an empty file with headers
|
784
|
+
# pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
|
785
|
+
|
786
|
+
if args.stems_csv:
|
787
|
+
if structure2d.stems:
|
788
|
+
stems_data = []
|
789
|
+
for i, stem in enumerate(structure2d.stems):
|
790
|
+
try:
|
791
|
+
res5p_first = mapping.bpseq_index_to_residue_map.get(
|
792
|
+
stem.strand5p.first
|
793
|
+
)
|
794
|
+
res5p_last = mapping.bpseq_index_to_residue_map.get(
|
795
|
+
stem.strand5p.last
|
796
|
+
)
|
797
|
+
res3p_first = mapping.bpseq_index_to_residue_map.get(
|
798
|
+
stem.strand3p.first
|
799
|
+
)
|
800
|
+
res3p_last = mapping.bpseq_index_to_residue_map.get(
|
801
|
+
stem.strand3p.last
|
802
|
+
)
|
803
|
+
|
804
|
+
stems_data.append(
|
805
|
+
{
|
806
|
+
"stem_idx": i,
|
807
|
+
"strand5p_first_nt_id": res5p_first.full_name
|
808
|
+
if res5p_first
|
809
|
+
else None,
|
810
|
+
"strand5p_last_nt_id": res5p_last.full_name
|
811
|
+
if res5p_last
|
812
|
+
else None,
|
813
|
+
"strand3p_first_nt_id": res3p_first.full_name
|
814
|
+
if res3p_first
|
815
|
+
else None,
|
816
|
+
"strand3p_last_nt_id": res3p_last.full_name
|
817
|
+
if res3p_last
|
818
|
+
else None,
|
819
|
+
"strand5p_sequence": stem.strand5p.sequence,
|
820
|
+
"strand3p_sequence": stem.strand3p.sequence,
|
821
|
+
}
|
822
|
+
)
|
823
|
+
except KeyError as e:
|
824
|
+
logging.warning(
|
825
|
+
f"Could not find residue for stem {i} (index {e}), skipping stem details."
|
826
|
+
)
|
827
|
+
continue
|
828
|
+
|
829
|
+
if stems_data:
|
830
|
+
df_stems = pd.DataFrame(stems_data)
|
831
|
+
df_stems["input_basename"] = input_basename
|
832
|
+
# Reorder columns
|
833
|
+
stem_cols = ["input_basename", "stem_idx"] + [
|
834
|
+
col
|
835
|
+
for col in df_stems.columns
|
836
|
+
if col not in ["input_basename", "stem_idx"]
|
837
|
+
]
|
838
|
+
df_stems = df_stems[stem_cols]
|
839
|
+
df_stems.to_csv(args.stems_csv, index=False)
|
840
|
+
else:
|
841
|
+
logging.warning(
|
842
|
+
f"No valid stem data generated for {input_basename}, CSV file '{args.stems_csv}' will be empty or not created."
|
843
|
+
)
|
844
|
+
else:
|
845
|
+
logging.warning(
|
846
|
+
f"No stems found for {input_basename}, CSV file '{args.stems_csv}' will be empty or not created."
|
847
|
+
)
|
848
|
+
|
849
|
+
|
850
|
+
def main():
|
851
|
+
parser = argparse.ArgumentParser()
|
852
|
+
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
853
|
+
parser.add_argument(
|
854
|
+
"-f",
|
855
|
+
"--find-gaps",
|
856
|
+
action="store_true",
|
857
|
+
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
858
|
+
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
859
|
+
)
|
860
|
+
add_common_output_arguments(parser)
|
861
|
+
args = parser.parse_args()
|
862
|
+
|
863
|
+
file = handle_input_file(args.input)
|
864
|
+
structure3d = read_3d_structure(file, None)
|
865
|
+
structure2d, dot_brackets = extract_secondary_structure(
|
866
|
+
structure3d, None, args.find_gaps, args.all_dot_brackets
|
867
|
+
)
|
868
|
+
|
869
|
+
# Need the mapping object for PML generation
|
870
|
+
mapping = Mapping2D3D(
|
871
|
+
structure3d,
|
872
|
+
structure2d.baseInteractions.basePairs,
|
873
|
+
structure2d.baseInteractions.stackings,
|
874
|
+
args.find_gaps,
|
875
|
+
)
|
876
|
+
|
877
|
+
handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
|
878
|
+
|
643
879
|
|
644
880
|
if __name__ == "__main__":
|
645
881
|
main()
|
rnapolis/common.py
CHANGED
@@ -1057,6 +1057,18 @@ class BaseInteractions:
|
|
1057
1057
|
otherInteractions: List[OtherInteraction]
|
1058
1058
|
|
1059
1059
|
|
1060
|
+
@dataclass(frozen=True, order=True)
|
1061
|
+
class InterStemParameters:
|
1062
|
+
stem1_idx: int
|
1063
|
+
stem2_idx: int
|
1064
|
+
type: Optional[str] # Type of closest endpoint pair ('cs55', 'cs53', etc.)
|
1065
|
+
torsion: Optional[float] # Torsion angle between stem segments (degrees)
|
1066
|
+
min_endpoint_distance: Optional[float] # Minimum distance between stem endpoints
|
1067
|
+
torsion_angle_pdf: Optional[float] # PDF value of the torsion angle
|
1068
|
+
min_endpoint_distance_pdf: Optional[float] # PDF value of the min endpoint distance
|
1069
|
+
coaxial_probability: Optional[float] # Probability of stems being coaxial (0-1)
|
1070
|
+
|
1071
|
+
|
1060
1072
|
@dataclass(frozen=True, order=True)
|
1061
1073
|
class Structure2D:
|
1062
1074
|
baseInteractions: BaseInteractions
|
@@ -1067,3 +1079,4 @@ class Structure2D:
|
|
1067
1079
|
singleStrands: List[SingleStrand]
|
1068
1080
|
hairpins: List[Hairpin]
|
1069
1081
|
loops: List[Loop]
|
1082
|
+
interStemParameters: List[InterStemParameters]
|
rnapolis/tertiary.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import itertools
|
1
2
|
import logging
|
2
3
|
import math
|
3
4
|
from collections import defaultdict
|
@@ -7,18 +8,22 @@ from typing import Dict, List, Optional, Set, Tuple, Union
|
|
7
8
|
|
8
9
|
import numpy
|
9
10
|
import numpy.typing
|
11
|
+
from scipy.stats import vonmises
|
10
12
|
|
11
13
|
from rnapolis.common import (
|
12
14
|
BasePair,
|
13
15
|
BpSeq,
|
14
16
|
Entry,
|
15
17
|
GlycosidicBond,
|
18
|
+
InterStemParameters,
|
16
19
|
LeontisWesthof,
|
17
20
|
Residue,
|
18
21
|
ResidueAuth,
|
19
22
|
ResidueLabel,
|
20
23
|
Saenger,
|
21
24
|
Stacking,
|
25
|
+
Stem,
|
26
|
+
Strand,
|
22
27
|
)
|
23
28
|
|
24
29
|
BASE_ATOMS = {
|
@@ -579,6 +584,57 @@ class Mapping2D3D:
|
|
579
584
|
if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
|
580
585
|
]
|
581
586
|
|
587
|
+
while True:
|
588
|
+
matches = defaultdict(set)
|
589
|
+
|
590
|
+
for base_pair in canonical:
|
591
|
+
matches[base_pair.nt1_3d].add(base_pair)
|
592
|
+
matches[base_pair.nt2_3d].add(base_pair)
|
593
|
+
|
594
|
+
for pairs in matches.values():
|
595
|
+
if len(pairs) > 1:
|
596
|
+
pairs = sorted(pairs, key=pair_scoring_function)
|
597
|
+
canonical.remove(pairs[-1])
|
598
|
+
break
|
599
|
+
else:
|
600
|
+
break
|
601
|
+
|
602
|
+
return self._generated_bpseq_data[0]
|
603
|
+
|
604
|
+
@cached_property
|
605
|
+
def bpseq_index_to_residue_map(self) -> Dict[int, Residue3D]:
|
606
|
+
"""Mapping from BpSeq entry index to the corresponding Residue3D object."""
|
607
|
+
return self._generated_bpseq_data[1]
|
608
|
+
|
609
|
+
@cached_property
|
610
|
+
def _generated_bpseq_data(self) -> Tuple[BpSeq, Dict[int, Residue3D]]:
|
611
|
+
"""Helper property to compute BpSeq and index map simultaneously."""
|
612
|
+
|
613
|
+
def pair_scoring_function(pair: BasePair3D) -> int:
|
614
|
+
if pair.saenger is not None:
|
615
|
+
if pair.saenger in (Saenger.XIX, Saenger.XX):
|
616
|
+
return 0, pair.nt1, pair.nt2
|
617
|
+
else:
|
618
|
+
return 1, pair.nt1, pair.nt2
|
619
|
+
|
620
|
+
sequence = "".join(
|
621
|
+
sorted(
|
622
|
+
[
|
623
|
+
pair.nt1_3d.one_letter_name.upper(),
|
624
|
+
pair.nt2_3d.one_letter_name.upper(),
|
625
|
+
]
|
626
|
+
)
|
627
|
+
)
|
628
|
+
if sequence in ("AU", "AT", "CG"):
|
629
|
+
return 0, pair.nt1, pair.nt2
|
630
|
+
return 1, pair.nt1, pair.nt2
|
631
|
+
|
632
|
+
canonical = [
|
633
|
+
base_pair
|
634
|
+
for base_pair in self.base_pairs
|
635
|
+
if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
|
636
|
+
]
|
637
|
+
|
582
638
|
while True:
|
583
639
|
matches = defaultdict(set)
|
584
640
|
|
@@ -596,10 +652,12 @@ class Mapping2D3D:
|
|
596
652
|
|
597
653
|
return self.__generate_bpseq(canonical)
|
598
654
|
|
599
|
-
def __generate_bpseq(self, base_pairs):
|
655
|
+
def __generate_bpseq(self, base_pairs) -> Tuple[BpSeq, Dict[int, Residue3D]]:
|
656
|
+
"""Generates BpSeq entries and a map from index to Residue3D."""
|
600
657
|
nucleotides = list(filter(lambda r: r.is_nucleotide, self.structure3d.residues))
|
601
658
|
result: Dict[int, List] = {}
|
602
659
|
residue_map: Dict[Residue3D, int] = {}
|
660
|
+
index_to_residue_map: Dict[int, Residue3D] = {}
|
603
661
|
i = 1
|
604
662
|
|
605
663
|
for j, residue in enumerate(nucleotides):
|
@@ -616,6 +674,7 @@ class Mapping2D3D:
|
|
616
674
|
|
617
675
|
result[i] = [i, residue.one_letter_name, 0]
|
618
676
|
residue_map[residue] = i
|
677
|
+
index_to_residue_map[i] = residue
|
619
678
|
i += 1
|
620
679
|
|
621
680
|
for base_pair in base_pairs:
|
@@ -631,7 +690,21 @@ class Mapping2D3D:
|
|
631
690
|
Entry(index_, sequence, pair)
|
632
691
|
for index_, sequence, pair in result.values()
|
633
692
|
]
|
634
|
-
)
|
693
|
+
), index_to_residue_map
|
694
|
+
|
695
|
+
def find_residue_for_entry(self, entry: Entry) -> Optional[Residue3D]:
|
696
|
+
"""Finds the Residue3D object corresponding to a BpSeq Entry."""
|
697
|
+
return self.bpseq_index_to_residue_map.get(entry.index_)
|
698
|
+
|
699
|
+
def get_residues_for_strand(self, strand: Strand) -> List[Residue3D]:
|
700
|
+
"""Retrieves the list of Residue3D objects corresponding to a Strand."""
|
701
|
+
residues = []
|
702
|
+
# Strand indices are 1-based and inclusive
|
703
|
+
for index_ in range(strand.first, strand.last + 1):
|
704
|
+
residue = self.bpseq_index_to_residue_map.get(index_)
|
705
|
+
if residue:
|
706
|
+
residues.append(residue)
|
707
|
+
return residues
|
635
708
|
|
636
709
|
@cached_property
|
637
710
|
def dot_bracket(self) -> str:
|
@@ -647,6 +720,196 @@ class Mapping2D3D:
|
|
647
720
|
i += len(sequence)
|
648
721
|
return "\n".join(result)
|
649
722
|
|
723
|
+
def _calculate_pair_centroid(
|
724
|
+
self, residue1: Residue3D, residue2: Residue3D
|
725
|
+
) -> Optional[numpy.typing.NDArray[numpy.floating]]:
|
726
|
+
"""Calculates the geometric mean of base atoms for a pair of residues."""
|
727
|
+
base_atoms = []
|
728
|
+
for residue in [residue1, residue2]:
|
729
|
+
base_atom_names = Residue3D.nucleobase_heavy_atoms.get(
|
730
|
+
residue.one_letter_name.upper(), set()
|
731
|
+
)
|
732
|
+
if not base_atom_names:
|
733
|
+
logging.warning(
|
734
|
+
f"Could not find base atom definition for residue {residue.full_name}"
|
735
|
+
)
|
736
|
+
continue
|
737
|
+
for atom in residue.atoms:
|
738
|
+
if atom.name in base_atom_names:
|
739
|
+
base_atoms.append(atom)
|
740
|
+
|
741
|
+
if not base_atoms:
|
742
|
+
logging.warning(
|
743
|
+
f"No base atoms found for pair {residue1.full_name} - {residue2.full_name}"
|
744
|
+
)
|
745
|
+
return None
|
746
|
+
|
747
|
+
coordinates = [atom.coordinates for atom in base_atoms]
|
748
|
+
return numpy.mean(coordinates, axis=0)
|
749
|
+
|
750
|
+
def get_stem_coordinates(
|
751
|
+
self, stem: Stem
|
752
|
+
) -> List[numpy.typing.NDArray[numpy.floating]]:
|
753
|
+
"""
|
754
|
+
Calculates the geometric centroid for each base pair in the stem.
|
755
|
+
|
756
|
+
Args:
|
757
|
+
stem: The Stem object.
|
758
|
+
|
759
|
+
Returns:
|
760
|
+
A list of numpy arrays, where each array is the centroid of a
|
761
|
+
base pair in the stem. Returns an empty list if no centroids
|
762
|
+
can be calculated.
|
763
|
+
"""
|
764
|
+
all_pair_centroids = []
|
765
|
+
stem_len = stem.strand5p.last - stem.strand5p.first + 1
|
766
|
+
|
767
|
+
for i in range(stem_len):
|
768
|
+
idx5p = stem.strand5p.first + i
|
769
|
+
idx3p = stem.strand3p.last - i
|
770
|
+
try:
|
771
|
+
res5p = self.bpseq_index_to_residue_map[idx5p]
|
772
|
+
res3p = self.bpseq_index_to_residue_map[idx3p]
|
773
|
+
centroid = self._calculate_pair_centroid(res5p, res3p)
|
774
|
+
if centroid is not None:
|
775
|
+
all_pair_centroids.append(centroid)
|
776
|
+
except KeyError:
|
777
|
+
logging.warning(
|
778
|
+
f"Could not find residues for pair {idx5p}-{idx3p} in stem {stem}"
|
779
|
+
)
|
780
|
+
continue # Continue calculating other centroids
|
781
|
+
|
782
|
+
return all_pair_centroids
|
783
|
+
|
784
|
+
def calculate_inter_stem_parameters(
|
785
|
+
self, stem1: Stem, stem2: Stem, kappa: float = 10.0
|
786
|
+
) -> Optional[Dict[str, Union[str, float]]]:
|
787
|
+
"""
|
788
|
+
Calculates geometric parameters between two stems based on closest endpoints
|
789
|
+
and the probability of the observed torsion angle based on an expected
|
790
|
+
A-RNA twist using a von Mises distribution.
|
791
|
+
|
792
|
+
Args:
|
793
|
+
stem1: The first Stem object.
|
794
|
+
stem2: The second Stem object.
|
795
|
+
kappa: Concentration parameter for the von Mises distribution (default: 10.0).
|
796
|
+
|
797
|
+
Returns:
|
798
|
+
A dictionary containing:
|
799
|
+
- 'type': The type of closest endpoint pair ('cs55', 'cs53', 'cs35', 'cs33').
|
800
|
+
- 'torsion_angle': The calculated torsion angle in degrees.
|
801
|
+
- 'min_endpoint_distance': The minimum distance between the endpoints.
|
802
|
+
- 'torsion_angle_pdf': The probability density function (PDF) value of the
|
803
|
+
torsion angle under the von Mises distribution.
|
804
|
+
- 'min_endpoint_distance_pdf': The probability density function (PDF) value
|
805
|
+
based on the minimum endpoint distance using a Lennard-Jones-like function.
|
806
|
+
- 'coaxial_probability': The normalized product of the torsion angle PDF and
|
807
|
+
distance PDF, indicating the likelihood of coaxial stacking (0-1).
|
808
|
+
Returns None if either stem has fewer than 2 base pairs or centroids
|
809
|
+
cannot be calculated.
|
810
|
+
"""
|
811
|
+
stem1_centroids = self.get_stem_coordinates(stem1)
|
812
|
+
stem2_centroids = self.get_stem_coordinates(stem2)
|
813
|
+
|
814
|
+
# Need at least 2 centroids (base pairs) per stem
|
815
|
+
if len(stem1_centroids) < 2 or len(stem2_centroids) < 2:
|
816
|
+
logging.warning(
|
817
|
+
f"Cannot calculate inter-stem parameters for stems {stem1} and {stem2}: "
|
818
|
+
f"Insufficient base pairs ({len(stem1_centroids)} and {len(stem2_centroids)} respectively)."
|
819
|
+
)
|
820
|
+
return None
|
821
|
+
|
822
|
+
# Define the endpoints for each stem
|
823
|
+
s1_first, s1_last = stem1_centroids[0], stem1_centroids[-1]
|
824
|
+
s2_first, s2_last = stem2_centroids[0], stem2_centroids[-1]
|
825
|
+
|
826
|
+
# Calculate distances between the four endpoint pairs
|
827
|
+
endpoint_distances = {
|
828
|
+
"cs55": numpy.linalg.norm(s1_first - s2_first),
|
829
|
+
"cs53": numpy.linalg.norm(s1_first - s2_last),
|
830
|
+
"cs35": numpy.linalg.norm(s1_last - s2_first),
|
831
|
+
"cs33": numpy.linalg.norm(s1_last - s2_last),
|
832
|
+
}
|
833
|
+
|
834
|
+
# Find the minimum endpoint distance and the corresponding pair
|
835
|
+
min_endpoint_distance = min(endpoint_distances.values())
|
836
|
+
closest_pair_key = min(endpoint_distances, key=endpoint_distances.get)
|
837
|
+
|
838
|
+
# Select the points for torsion and determine mu based on the closest pair.
|
839
|
+
# s1p2 and s2p1 must be the endpoints involved in the minimum distance.
|
840
|
+
a_rna_twist = 32.7
|
841
|
+
mu_degrees = 0.0
|
842
|
+
|
843
|
+
if closest_pair_key == "cs55":
|
844
|
+
# Closest: s1_first and s2_first
|
845
|
+
# Torsion points: s1_second, s1_first, s2_first, s2_second
|
846
|
+
s1p1, s1p2 = stem1_centroids[1], stem1_centroids[0]
|
847
|
+
s2p1, s2p2 = stem2_centroids[0], stem2_centroids[1]
|
848
|
+
mu_degrees = 180.0 - a_rna_twist
|
849
|
+
elif closest_pair_key == "cs53":
|
850
|
+
# Closest: s1_first and s2_last
|
851
|
+
# Torsion points: s1_second, s1_first, s2_last, s2_second_last
|
852
|
+
s1p1, s1p2 = stem1_centroids[1], stem1_centroids[0]
|
853
|
+
s2p1, s2p2 = stem2_centroids[-1], stem2_centroids[-2]
|
854
|
+
mu_degrees = 0.0 - a_rna_twist
|
855
|
+
elif closest_pair_key == "cs35":
|
856
|
+
# Closest: s1_last and s2_first
|
857
|
+
# Torsion points: s1_second_last, s1_last, s2_first, s2_second
|
858
|
+
s1p1, s1p2 = stem1_centroids[-2], stem1_centroids[-1]
|
859
|
+
s2p1, s2p2 = stem2_centroids[0], stem2_centroids[1]
|
860
|
+
mu_degrees = 0.0 + a_rna_twist
|
861
|
+
elif closest_pair_key == "cs33":
|
862
|
+
# Closest: s1_last and s2_last
|
863
|
+
# Torsion points: s1_second_last, s1_last, s2_last, s2_second_last
|
864
|
+
s1p1, s1p2 = stem1_centroids[-2], stem1_centroids[-1]
|
865
|
+
s2p1, s2p2 = stem2_centroids[-1], stem2_centroids[-2]
|
866
|
+
mu_degrees = 180.0 + a_rna_twist
|
867
|
+
else:
|
868
|
+
# This case should ideally not be reached if endpoint_distances is not empty
|
869
|
+
logging.error(
|
870
|
+
f"Unexpected closest pair key: {closest_pair_key}. Cannot calculate parameters."
|
871
|
+
)
|
872
|
+
return None
|
873
|
+
|
874
|
+
# Calculate torsion angle (in radians)
|
875
|
+
torsion_radians = calculate_torsion_angle_coords(s1p1, s1p2, s2p1, s2p2)
|
876
|
+
|
877
|
+
# Create von Mises distribution instance
|
878
|
+
mu_radians = math.radians(mu_degrees)
|
879
|
+
vm_dist = vonmises(kappa=kappa, loc=mu_radians)
|
880
|
+
|
881
|
+
# Calculate the probability density function (PDF) value for the torsion angle
|
882
|
+
torsion_probability = vm_dist.pdf(torsion_radians)
|
883
|
+
|
884
|
+
# Calculate the probability density for the minimum endpoint distance
|
885
|
+
distance_probability = distance_pdf(
|
886
|
+
min_endpoint_distance
|
887
|
+
) # Use the new function
|
888
|
+
|
889
|
+
# Calculate the coaxial probability
|
890
|
+
# Max torsion probability occurs at mu (location of the distribution)
|
891
|
+
max_torsion_probability = vm_dist.pdf(mu_radians)
|
892
|
+
# Max distance probability is 1.0 by design of lennard_jones_like_pdf
|
893
|
+
max_distance_probability = 1.0
|
894
|
+
# Normalization factor is the product of maximum possible probabilities
|
895
|
+
normalization_factor = max_torsion_probability * max_distance_probability
|
896
|
+
|
897
|
+
coaxial_probability = 0.0
|
898
|
+
if normalization_factor > 1e-9: # Avoid division by zero
|
899
|
+
probability_product = torsion_probability * distance_probability
|
900
|
+
coaxial_probability = probability_product / normalization_factor
|
901
|
+
# Clamp between 0 and 1
|
902
|
+
coaxial_probability = max(0.0, min(1.0, coaxial_probability))
|
903
|
+
|
904
|
+
return {
|
905
|
+
"type": closest_pair_key,
|
906
|
+
"torsion_angle": math.degrees(torsion_radians),
|
907
|
+
"min_endpoint_distance": min_endpoint_distance,
|
908
|
+
"torsion_angle_pdf": torsion_probability,
|
909
|
+
"min_endpoint_distance_pdf": distance_probability,
|
910
|
+
"coaxial_probability": coaxial_probability,
|
911
|
+
}
|
912
|
+
|
650
913
|
def __generate_dot_bracket_per_strand(self, dbn_structure: str) -> List[str]:
|
651
914
|
dbn = dbn_structure
|
652
915
|
i = 0
|
@@ -698,7 +961,7 @@ class Mapping2D3D:
|
|
698
961
|
|
699
962
|
for row in [row1, row2]:
|
700
963
|
if row:
|
701
|
-
bpseq = self.__generate_bpseq(row)
|
964
|
+
bpseq, _ = self.__generate_bpseq(row) # Unpack the tuple
|
702
965
|
dbns = self.__generate_dot_bracket_per_strand(
|
703
966
|
bpseq.dot_bracket.structure
|
704
967
|
)
|
@@ -709,11 +972,121 @@ class Mapping2D3D:
|
|
709
972
|
return "\n".join(["\n".join(r) for r in result])
|
710
973
|
|
711
974
|
|
975
|
+
def distance_pdf(
|
976
|
+
x: float, lower_bound: float = 3.0, upper_bound: float = 7.0, steepness: float = 5.0
|
977
|
+
) -> float:
|
978
|
+
"""
|
979
|
+
Calculates a probability density based on distance using a plateau function.
|
980
|
+
|
981
|
+
The function uses the product of two sigmoid functions to create a distribution
|
982
|
+
that is close to 1.0 between lower_bound and upper_bound, and drops off
|
983
|
+
rapidly outside this range.
|
984
|
+
|
985
|
+
Args:
|
986
|
+
x: The distance value.
|
987
|
+
lower_bound: The start of the high-probability plateau (default: 3.0).
|
988
|
+
upper_bound: The end of the high-probability plateau (default: 7.0).
|
989
|
+
steepness: Controls how quickly the probability drops outside the plateau
|
990
|
+
(default: 5.0). Higher values mean steeper drops.
|
991
|
+
|
992
|
+
Returns:
|
993
|
+
The calculated probability density (between 0.0 and 1.0).
|
994
|
+
"""
|
995
|
+
# Define a maximum exponent value to prevent overflow
|
996
|
+
max_exponent = 700.0
|
997
|
+
|
998
|
+
# Calculate exponent for the first sigmoid (increasing)
|
999
|
+
exponent1 = -steepness * (x - lower_bound)
|
1000
|
+
# Clamp the exponent if it's excessively large (which happens when x << lower_bound)
|
1001
|
+
exponent1 = min(exponent1, max_exponent)
|
1002
|
+
sigmoid1 = 1.0 / (1.0 + math.exp(exponent1))
|
1003
|
+
|
1004
|
+
# Calculate exponent for the second sigmoid (decreasing)
|
1005
|
+
exponent2 = steepness * (x - upper_bound)
|
1006
|
+
# Clamp the exponent if it's excessively large (which happens when x >> upper_bound)
|
1007
|
+
exponent2 = min(exponent2, max_exponent)
|
1008
|
+
sigmoid2 = 1.0 / (1.0 + math.exp(exponent2))
|
1009
|
+
|
1010
|
+
# The product creates the plateau effect
|
1011
|
+
probability = sigmoid1 * sigmoid2
|
1012
|
+
# Clamp to handle potential floating point inaccuracies near 0 and 1
|
1013
|
+
return max(0.0, min(1.0, probability))
|
1014
|
+
|
1015
|
+
|
1016
|
+
def calculate_all_inter_stem_parameters(
|
1017
|
+
mapping: Mapping2D3D,
|
1018
|
+
) -> List[InterStemParameters]:
|
1019
|
+
"""
|
1020
|
+
Calculates InterStemParameters for all valid pairs of stems found in the mapping.
|
1021
|
+
|
1022
|
+
Args:
|
1023
|
+
mapping: The Mapping2D3D object containing structure, 2D info, and mapping.
|
1024
|
+
|
1025
|
+
"""
|
1026
|
+
stems = mapping.bpseq.elements[0] # Get stems from mapping
|
1027
|
+
inter_stem_params = []
|
1028
|
+
for i, j in itertools.combinations(range(len(stems)), 2):
|
1029
|
+
stem1 = stems[i]
|
1030
|
+
stem2 = stems[j]
|
1031
|
+
|
1032
|
+
# Ensure both stems have at least 2 base pairs for parameter calculation
|
1033
|
+
if (stem1.strand5p.last - stem1.strand5p.first + 1) > 1 and (
|
1034
|
+
stem2.strand5p.last - stem2.strand5p.first + 1
|
1035
|
+
) > 1:
|
1036
|
+
params = mapping.calculate_inter_stem_parameters(stem1, stem2)
|
1037
|
+
# Only add if calculation returned valid values
|
1038
|
+
if params is not None:
|
1039
|
+
inter_stem_params.append(
|
1040
|
+
InterStemParameters(
|
1041
|
+
stem1_idx=i,
|
1042
|
+
stem2_idx=j,
|
1043
|
+
type=params["type"],
|
1044
|
+
torsion=params["torsion_angle"],
|
1045
|
+
min_endpoint_distance=params["min_endpoint_distance"],
|
1046
|
+
torsion_angle_pdf=params["torsion_angle_pdf"],
|
1047
|
+
min_endpoint_distance_pdf=params["min_endpoint_distance_pdf"],
|
1048
|
+
coaxial_probability=params["coaxial_probability"],
|
1049
|
+
)
|
1050
|
+
)
|
1051
|
+
return inter_stem_params
|
1052
|
+
|
1053
|
+
|
712
1054
|
def torsion_angle(a1: Atom, a2: Atom, a3: Atom, a4: Atom) -> float:
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
1055
|
+
"""Calculates the torsion angle between four atoms."""
|
1056
|
+
return calculate_torsion_angle_coords(
|
1057
|
+
a1.coordinates, a2.coordinates, a3.coordinates, a4.coordinates
|
1058
|
+
)
|
1059
|
+
|
1060
|
+
|
1061
|
+
def calculate_torsion_angle_coords(
|
1062
|
+
p1: numpy.typing.NDArray[numpy.floating],
|
1063
|
+
p2: numpy.typing.NDArray[numpy.floating],
|
1064
|
+
p3: numpy.typing.NDArray[numpy.floating],
|
1065
|
+
p4: numpy.typing.NDArray[numpy.floating],
|
1066
|
+
) -> float:
|
1067
|
+
"""Calculates the torsion angle between four points defined by their coordinates."""
|
1068
|
+
v1 = p2 - p1
|
1069
|
+
v2 = p3 - p2
|
1070
|
+
v3 = p4 - p3
|
1071
|
+
|
1072
|
+
# Normalize vectors to avoid issues with very short vectors
|
1073
|
+
v1_norm = v1 / numpy.linalg.norm(v1) if numpy.linalg.norm(v1) > 1e-6 else v1
|
1074
|
+
v2_norm = v2 / numpy.linalg.norm(v2) if numpy.linalg.norm(v2) > 1e-6 else v2
|
1075
|
+
v3_norm = v3 / numpy.linalg.norm(v3) if numpy.linalg.norm(v3) > 1e-6 else v3
|
1076
|
+
|
1077
|
+
t1 = numpy.cross(v1_norm, v2_norm)
|
1078
|
+
t2 = numpy.cross(v2_norm, v3_norm)
|
1079
|
+
t3 = v1_norm * numpy.linalg.norm(v2_norm)
|
1080
|
+
|
1081
|
+
# Ensure t1 and t2 are not zero vectors before calculating dot products
|
1082
|
+
if numpy.linalg.norm(t1) < 1e-6 or numpy.linalg.norm(t2) < 1e-6:
|
1083
|
+
return 0.0 # Or handle as undefined/error
|
1084
|
+
|
1085
|
+
dot_t1_t2 = numpy.dot(t1, t2)
|
1086
|
+
dot_t2_t3 = numpy.dot(t2, t3)
|
1087
|
+
|
1088
|
+
# Clamp dot product arguments for acos/atan2 to avoid domain errors
|
1089
|
+
dot_t1_t2 = numpy.clip(dot_t1_t2, -1.0, 1.0)
|
1090
|
+
|
1091
|
+
angle = math.atan2(dot_t2_t3, dot_t1_t2)
|
1092
|
+
return angle if not math.isnan(angle) else 0.0
|
@@ -1,8 +1,8 @@
|
|
1
|
-
rnapolis/adapter.py,sha256=
|
1
|
+
rnapolis/adapter.py,sha256=hgOPzbvLhdPxuqpV2fLqizHQSpAtglIXrySf_SzsxCc,15379
|
2
2
|
rnapolis/aligner.py,sha256=o7rQyjAZ3n4VXcnSPY3HVB8nLNRkVbl552O3NVh0mfg,3429
|
3
|
-
rnapolis/annotator.py,sha256=
|
3
|
+
rnapolis/annotator.py,sha256=zzjyZ13JYd32E_SUcTCyfV4XYpWHtgzuBsacNDHCMI8,31835
|
4
4
|
rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
|
5
|
-
rnapolis/common.py,sha256=
|
5
|
+
rnapolis/common.py,sha256=p70gydcG8bcA8_NXPef40efvz9Jrt4TAKPfhNpg4iKg,31896
|
6
6
|
rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
|
7
7
|
rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
|
8
8
|
rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
|
@@ -15,14 +15,14 @@ rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
|
|
15
15
|
rnapolis/parser_v2.py,sha256=qG6CO3or7zmuJu368g9Nzokiqdeip4yjD14F163uH6w,40618
|
16
16
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
17
17
|
rnapolis/splitter.py,sha256=x-Zn21mkiMgvYPptUFD9BbdNIvoaM6b8GzGf6uYXEwE,4052
|
18
|
-
rnapolis/tertiary.py,sha256=
|
18
|
+
rnapolis/tertiary.py,sha256=mTVpz8rz5Q9s5_QVSMdEMXSooCce0rAD5RQMh00bHm4,39200
|
19
19
|
rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
|
20
20
|
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
21
21
|
rnapolis/unifier.py,sha256=2ge7IB9FdRgzSAiVD39U_ciwtdDJ2fGzf8mUIudbrqY,5820
|
22
22
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
23
|
-
rnapolis-0.8.
|
24
|
-
rnapolis-0.8.
|
25
|
-
rnapolis-0.8.
|
26
|
-
rnapolis-0.8.
|
27
|
-
rnapolis-0.8.
|
28
|
-
rnapolis-0.8.
|
23
|
+
rnapolis-0.8.2.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
24
|
+
rnapolis-0.8.2.dist-info/METADATA,sha256=1_ITZKV6JF324k9MawJIQCYH1VI8fQffIp9IRquKwnE,54537
|
25
|
+
rnapolis-0.8.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
26
|
+
rnapolis-0.8.2.dist-info/entry_points.txt,sha256=H00KoN54wU3dFOofAu3H_3PADmZOBTB1hXf5TUU2uzo,438
|
27
|
+
rnapolis-0.8.2.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
28
|
+
rnapolis-0.8.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|