RNApolis 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnapolis/adapter.py +27 -117
- rnapolis/annotator.py +256 -20
- rnapolis/common.py +13 -0
- rnapolis/parser_v2.py +774 -240
- rnapolis/splitter.py +18 -5
- rnapolis/tertiary.py +383 -10
- rnapolis/unifier.py +20 -5
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/METADATA +1 -1
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/RECORD +13 -13
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/WHEEL +0 -0
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/entry_points.txt +0 -0
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/licenses/LICENSE +0 -0
- {rnapolis-0.8.0.dist-info → rnapolis-0.8.2.dist-info}/top_level.txt +0 -0
rnapolis/adapter.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
#! /usr/bin/env python
|
2
2
|
import argparse
|
3
|
-
import csv
|
4
3
|
import logging
|
5
4
|
import os
|
6
5
|
from enum import Enum
|
@@ -8,6 +7,10 @@ from typing import Dict, List, Optional, Tuple
|
|
8
7
|
|
9
8
|
import orjson
|
10
9
|
|
10
|
+
from rnapolis.annotator import (
|
11
|
+
add_common_output_arguments,
|
12
|
+
handle_output_arguments,
|
13
|
+
)
|
11
14
|
from rnapolis.common import (
|
12
15
|
BR,
|
13
16
|
BaseInteractions,
|
@@ -15,7 +18,6 @@ from rnapolis.common import (
|
|
15
18
|
BasePhosphate,
|
16
19
|
BaseRibose,
|
17
20
|
BPh,
|
18
|
-
BpSeq,
|
19
21
|
LeontisWesthof,
|
20
22
|
OtherInteraction,
|
21
23
|
Residue,
|
@@ -25,7 +27,11 @@ from rnapolis.common import (
|
|
25
27
|
Structure2D,
|
26
28
|
)
|
27
29
|
from rnapolis.parser import read_3d_structure
|
28
|
-
from rnapolis.tertiary import
|
30
|
+
from rnapolis.tertiary import (
|
31
|
+
Mapping2D3D,
|
32
|
+
Structure3D,
|
33
|
+
calculate_all_inter_stem_parameters, # Import the new helper function
|
34
|
+
)
|
29
35
|
from rnapolis.util import handle_input_file
|
30
36
|
|
31
37
|
|
@@ -317,7 +323,7 @@ def process_external_tool_output(
|
|
317
323
|
model: Optional[int] = None,
|
318
324
|
find_gaps: bool = False,
|
319
325
|
all_dot_brackets: bool = False,
|
320
|
-
) -> Tuple[Structure2D, List[str]]:
|
326
|
+
) -> Tuple[Structure2D, List[str], Mapping2D3D]: # Added Mapping2D3D to return tuple
|
321
327
|
"""
|
322
328
|
Process external tool output and create a secondary structure representation.
|
323
329
|
|
@@ -333,7 +339,8 @@ def process_external_tool_output(
|
|
333
339
|
all_dot_brackets: Whether to return all possible dot-bracket notations
|
334
340
|
|
335
341
|
Returns:
|
336
|
-
A tuple containing the Structure2D object
|
342
|
+
A tuple containing the Structure2D object, a list of dot-bracket notations,
|
343
|
+
and the Mapping2D3D object.
|
337
344
|
"""
|
338
345
|
# Parse external tool output
|
339
346
|
base_interactions = parse_external_output(external_file_path, tool, structure3d)
|
@@ -350,7 +357,7 @@ def extract_secondary_structure_from_external(
|
|
350
357
|
model: Optional[int] = None,
|
351
358
|
find_gaps: bool = False,
|
352
359
|
all_dot_brackets: bool = False,
|
353
|
-
) -> Tuple[Structure2D, List[str]]:
|
360
|
+
) -> Tuple[Structure2D, List[str], Mapping2D3D]: # Added Mapping2D3D to return tuple
|
354
361
|
"""
|
355
362
|
Create a secondary structure representation using interactions from an external tool.
|
356
363
|
|
@@ -362,7 +369,8 @@ def extract_secondary_structure_from_external(
|
|
362
369
|
all_dot_brackets: Whether to return all possible dot-bracket notations
|
363
370
|
|
364
371
|
Returns:
|
365
|
-
A tuple containing the Structure2D object
|
372
|
+
A tuple containing the Structure2D object, a list of dot-bracket notations,
|
373
|
+
and the Mapping2D3D object.
|
366
374
|
"""
|
367
375
|
mapping = Mapping2D3D(
|
368
376
|
tertiary_structure,
|
@@ -371,6 +379,10 @@ def extract_secondary_structure_from_external(
|
|
371
379
|
find_gaps,
|
372
380
|
)
|
373
381
|
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
382
|
+
|
383
|
+
# Calculate inter-stem parameters using the helper function
|
384
|
+
inter_stem_params = calculate_all_inter_stem_parameters(mapping)
|
385
|
+
|
374
386
|
structure2d = Structure2D(
|
375
387
|
base_interactions,
|
376
388
|
str(mapping.bpseq),
|
@@ -380,81 +392,15 @@ def extract_secondary_structure_from_external(
|
|
380
392
|
single_strands,
|
381
393
|
hairpins,
|
382
394
|
loops,
|
395
|
+
inter_stem_params, # Added inter-stem parameters
|
383
396
|
)
|
384
397
|
if all_dot_brackets:
|
385
|
-
return structure2d, mapping.all_dot_brackets
|
398
|
+
return structure2d, mapping.all_dot_brackets, mapping # Return mapping
|
386
399
|
else:
|
387
|
-
return structure2d, [structure2d.dotBracket]
|
388
|
-
|
389
|
-
|
390
|
-
def write_json(path: str, structure2d: BaseInteractions):
|
391
|
-
with open(path, "wb") as f:
|
392
|
-
f.write(orjson.dumps(structure2d))
|
393
|
-
|
394
|
-
|
395
|
-
def write_csv(path: str, structure2d: Structure2D):
|
396
|
-
with open(path, "w") as f:
|
397
|
-
writer = csv.writer(f)
|
398
|
-
writer.writerow(["nt1", "nt2", "type", "classification-1", "classification-2"])
|
399
|
-
for base_pair in structure2d.baseInteractions.basePairs:
|
400
|
-
writer.writerow(
|
401
|
-
[
|
402
|
-
base_pair.nt1.full_name,
|
403
|
-
base_pair.nt2.full_name,
|
404
|
-
"base pair",
|
405
|
-
base_pair.lw.value,
|
406
|
-
(
|
407
|
-
base_pair.saenger.value or ""
|
408
|
-
if base_pair.saenger is not None
|
409
|
-
else ""
|
410
|
-
),
|
411
|
-
]
|
412
|
-
)
|
413
|
-
for stacking in structure2d.baseInteractions.stackings:
|
414
|
-
writer.writerow(
|
415
|
-
[
|
416
|
-
stacking.nt1.full_name,
|
417
|
-
stacking.nt2.full_name,
|
418
|
-
"stacking",
|
419
|
-
stacking.topology.value if stacking.topology is not None else "",
|
420
|
-
"",
|
421
|
-
]
|
422
|
-
)
|
423
|
-
for base_phosphate in structure2d.baseInteractions.basePhosphateInteractions:
|
424
|
-
writer.writerow(
|
425
|
-
[
|
426
|
-
base_phosphate.nt1.full_name,
|
427
|
-
base_phosphate.nt2.full_name,
|
428
|
-
"base-phosphate interaction",
|
429
|
-
base_phosphate.bph.value if base_phosphate.bph is not None else "",
|
430
|
-
"",
|
431
|
-
]
|
432
|
-
)
|
433
|
-
for base_ribose in structure2d.baseInteractions.baseRiboseInteractions:
|
434
|
-
writer.writerow(
|
435
|
-
[
|
436
|
-
base_ribose.nt1.full_name,
|
437
|
-
base_ribose.nt2.full_name,
|
438
|
-
"base-ribose interaction",
|
439
|
-
base_ribose.br.value if base_ribose.br is not None else "",
|
440
|
-
"",
|
441
|
-
]
|
442
|
-
)
|
443
|
-
for other in structure2d.baseInteractions.otherInteractions:
|
444
|
-
writer.writerow(
|
445
|
-
[
|
446
|
-
other.nt1.full_name,
|
447
|
-
other.nt2.full_name,
|
448
|
-
"other interaction",
|
449
|
-
"",
|
450
|
-
"",
|
451
|
-
]
|
452
|
-
)
|
400
|
+
return structure2d, [structure2d.dotBracket], mapping # Return mapping
|
453
401
|
|
454
402
|
|
455
|
-
|
456
|
-
with open(path, "w") as f:
|
457
|
-
f.write(str(bpseq))
|
403
|
+
# Removed duplicate functions - now imported from annotator
|
458
404
|
|
459
405
|
|
460
406
|
def main():
|
@@ -471,39 +417,21 @@ def main():
|
|
471
417
|
required=True,
|
472
418
|
help="External tool that generated the output file",
|
473
419
|
)
|
474
|
-
parser.add_argument(
|
475
|
-
"-a",
|
476
|
-
"--all-dot-brackets",
|
477
|
-
action="store_true",
|
478
|
-
help="(optional) print all dot-brackets, not only optimal one (exclusive with -e/--extended)",
|
479
|
-
)
|
480
|
-
parser.add_argument("-b", "--bpseq", help="(optional) path to output BPSEQ file")
|
481
|
-
parser.add_argument("-c", "--csv", help="(optional) path to output CSV file")
|
482
|
-
parser.add_argument(
|
483
|
-
"-j",
|
484
|
-
"--json",
|
485
|
-
help="(optional) path to output JSON file",
|
486
|
-
)
|
487
|
-
parser.add_argument(
|
488
|
-
"-e",
|
489
|
-
"--extended",
|
490
|
-
action="store_true",
|
491
|
-
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
492
|
-
)
|
493
420
|
parser.add_argument(
|
494
421
|
"-f",
|
495
422
|
"--find-gaps",
|
496
423
|
action="store_true",
|
497
424
|
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands",
|
498
425
|
)
|
499
|
-
parser
|
426
|
+
add_common_output_arguments(parser)
|
427
|
+
# The --inter-stem-csv and --stems-csv arguments are now added by add_common_output_arguments
|
500
428
|
args = parser.parse_args()
|
501
429
|
|
502
430
|
file = handle_input_file(args.input)
|
503
431
|
structure3d = read_3d_structure(file, None)
|
504
432
|
|
505
433
|
# Process external tool output and get secondary structure
|
506
|
-
structure2d, dot_brackets = process_external_tool_output(
|
434
|
+
structure2d, dot_brackets, mapping = process_external_tool_output(
|
507
435
|
structure3d,
|
508
436
|
args.external,
|
509
437
|
ExternalTool(args.tool),
|
@@ -512,25 +440,7 @@ def main():
|
|
512
440
|
args.all_dot_brackets,
|
513
441
|
)
|
514
442
|
|
515
|
-
|
516
|
-
write_csv(args.csv, structure2d)
|
517
|
-
|
518
|
-
if args.json:
|
519
|
-
write_json(args.json, structure2d)
|
520
|
-
|
521
|
-
if args.bpseq:
|
522
|
-
write_bpseq(args.bpseq, structure2d.bpseq)
|
523
|
-
|
524
|
-
if args.extended:
|
525
|
-
print(structure2d.extendedDotBracket)
|
526
|
-
elif args.all_dot_brackets:
|
527
|
-
for dot_bracket in dot_brackets:
|
528
|
-
print(dot_bracket)
|
529
|
-
else:
|
530
|
-
print(structure2d.dotBracket)
|
531
|
-
|
532
|
-
if args.dot:
|
533
|
-
print(BpSeq.from_string(structure2d.bpseq).graphviz)
|
443
|
+
handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
|
534
444
|
|
535
445
|
|
536
446
|
if __name__ == "__main__":
|
rnapolis/annotator.py
CHANGED
@@ -10,6 +10,7 @@ from typing import Dict, List, Optional, Set, Tuple
|
|
10
10
|
import numpy
|
11
11
|
import numpy.typing
|
12
12
|
import orjson
|
13
|
+
import pandas as pd
|
13
14
|
from ordered_set import OrderedSet
|
14
15
|
from scipy.spatial import KDTree
|
15
16
|
|
@@ -26,6 +27,7 @@ from rnapolis.common import (
|
|
26
27
|
Saenger,
|
27
28
|
Stacking,
|
28
29
|
StackingTopology,
|
30
|
+
Stem,
|
29
31
|
Structure2D,
|
30
32
|
)
|
31
33
|
from rnapolis.parser import read_3d_structure
|
@@ -38,9 +40,10 @@ from rnapolis.tertiary import (
|
|
38
40
|
PHOSPHATE_ACCEPTORS,
|
39
41
|
RIBOSE_ACCEPTORS,
|
40
42
|
Atom,
|
41
|
-
Mapping2D3D,
|
43
|
+
Mapping2D3D, # Added import
|
42
44
|
Residue3D,
|
43
45
|
Structure3D,
|
46
|
+
calculate_all_inter_stem_parameters, # Import the new helper function
|
44
47
|
torsion_angle,
|
45
48
|
)
|
46
49
|
from rnapolis.util import handle_input_file
|
@@ -496,6 +499,10 @@ def extract_secondary_structure(
|
|
496
499
|
find_gaps,
|
497
500
|
)
|
498
501
|
stems, single_strands, hairpins, loops = mapping.bpseq.elements
|
502
|
+
|
503
|
+
# Calculate inter-stem parameters using the helper function
|
504
|
+
inter_stem_params = calculate_all_inter_stem_parameters(mapping)
|
505
|
+
|
499
506
|
structure2d = Structure2D(
|
500
507
|
base_interactions,
|
501
508
|
str(mapping.bpseq),
|
@@ -505,6 +512,7 @@ def extract_secondary_structure(
|
|
505
512
|
single_strands,
|
506
513
|
hairpins,
|
507
514
|
loops,
|
515
|
+
inter_stem_params, # Added inter-stem parameters
|
508
516
|
)
|
509
517
|
if all_dot_brackets:
|
510
518
|
return structure2d, mapping.all_dot_brackets
|
@@ -512,9 +520,102 @@ def extract_secondary_structure(
|
|
512
520
|
return structure2d, [structure2d.dotBracket]
|
513
521
|
|
514
522
|
|
515
|
-
def
|
523
|
+
def generate_pymol_script(mapping: Mapping2D3D, stems: List[Stem]) -> str:
|
524
|
+
"""Generates a PyMOL script to draw stems as cylinders."""
|
525
|
+
pymol_commands = []
|
526
|
+
radius = 0.5
|
527
|
+
r, g, b = 1.0, 0.0, 0.0 # Red color
|
528
|
+
|
529
|
+
for stem_idx, stem in enumerate(stems):
|
530
|
+
# Get residues for selection string
|
531
|
+
try:
|
532
|
+
res5p_first = mapping.bpseq_index_to_residue_map[stem.strand5p.first]
|
533
|
+
res5p_last = mapping.bpseq_index_to_residue_map[stem.strand5p.last]
|
534
|
+
res3p_first = mapping.bpseq_index_to_residue_map[stem.strand3p.first]
|
535
|
+
res3p_last = mapping.bpseq_index_to_residue_map[stem.strand3p.last]
|
536
|
+
|
537
|
+
# Prefer auth chain/number if available
|
538
|
+
chain5p = (
|
539
|
+
res5p_first.auth.chain if res5p_first.auth else res5p_first.label.chain
|
540
|
+
)
|
541
|
+
num5p_first = (
|
542
|
+
res5p_first.auth.number
|
543
|
+
if res5p_first.auth
|
544
|
+
else res5p_first.label.number
|
545
|
+
)
|
546
|
+
num5p_last = (
|
547
|
+
res5p_last.auth.number if res5p_last.auth else res5p_last.label.number
|
548
|
+
)
|
549
|
+
|
550
|
+
chain3p = (
|
551
|
+
res3p_first.auth.chain if res3p_first.auth else res3p_first.label.chain
|
552
|
+
)
|
553
|
+
num3p_first = (
|
554
|
+
res3p_first.auth.number
|
555
|
+
if res3p_first.auth
|
556
|
+
else res3p_first.label.number
|
557
|
+
)
|
558
|
+
num3p_last = (
|
559
|
+
res3p_last.auth.number if res3p_last.auth else res3p_last.label.number
|
560
|
+
)
|
561
|
+
|
562
|
+
# Format selection string: select stem0, A/1-5/ or A/10-15/
|
563
|
+
selection_str = f"{chain5p}/{num5p_first}-{num5p_last}/ or {chain3p}/{num3p_first}-{num3p_last}/"
|
564
|
+
pymol_commands.append(f"select stem{stem_idx}, {selection_str}")
|
565
|
+
|
566
|
+
except (KeyError, AttributeError) as e:
|
567
|
+
logging.warning(
|
568
|
+
f"Could not generate selection string for stem {stem_idx}: Missing residue data ({e})"
|
569
|
+
)
|
570
|
+
|
571
|
+
centroids = mapping.get_stem_coordinates(stem)
|
572
|
+
|
573
|
+
# Need at least 2 centroids to draw a segment
|
574
|
+
if len(centroids) < 2:
|
575
|
+
# Removed warning log for stems with < 2 base pairs
|
576
|
+
continue
|
577
|
+
|
578
|
+
# Create pseudoatoms for each centroid
|
579
|
+
for centroid_idx, centroid in enumerate(centroids):
|
580
|
+
x, y, z = centroid
|
581
|
+
pseudoatom_name = f"stem{stem_idx}_centroid{centroid_idx}"
|
582
|
+
pymol_commands.append(
|
583
|
+
f"pseudoatom {pseudoatom_name}, pos=[{x:.3f}, {y:.3f}, {z:.3f}]"
|
584
|
+
)
|
585
|
+
|
586
|
+
# Draw cylinders between consecutive centroids
|
587
|
+
for seg_idx in range(len(centroids) - 1):
|
588
|
+
p1 = centroids[seg_idx]
|
589
|
+
p2 = centroids[seg_idx + 1]
|
590
|
+
x1, y1, z1 = p1
|
591
|
+
x2, y2, z2 = p2
|
592
|
+
# Format: [CYLINDER, x1, y1, z1, x2, y2, z2, radius, r1, g1, b1, r2, g2, b2]
|
593
|
+
# Use 9.0 for CYLINDER code
|
594
|
+
# Use same color for both ends
|
595
|
+
cgo_object = f"[ 9.0, {x1:.3f}, {y1:.3f}, {z1:.3f}, {x2:.3f}, {y2:.3f}, {z2:.3f}, {radius}, {r}, {g}, {b}, {r}, {g}, {b} ]"
|
596
|
+
pymol_commands.append(
|
597
|
+
f'cmd.load_cgo({cgo_object}, "stem_{stem_idx}_seg_{seg_idx}")'
|
598
|
+
)
|
599
|
+
|
600
|
+
# Calculate and display dihedral angles between consecutive centroids
|
601
|
+
if len(centroids) >= 4:
|
602
|
+
for i in range(len(centroids) - 3):
|
603
|
+
pa1 = f"stem{stem_idx}_centroid{i}"
|
604
|
+
pa2 = f"stem{stem_idx}_centroid{i + 1}"
|
605
|
+
pa3 = f"stem{stem_idx}_centroid{i + 2}"
|
606
|
+
pa4 = f"stem{stem_idx}_centroid{i + 3}"
|
607
|
+
dihedral_name = f"stem{stem_idx}_dihedral{i}"
|
608
|
+
pymol_commands.append(
|
609
|
+
f"dihedral {dihedral_name}, {pa1}, {pa2}, {pa3}, {pa4}"
|
610
|
+
)
|
611
|
+
|
612
|
+
return "\n".join(pymol_commands)
|
613
|
+
|
614
|
+
|
615
|
+
def write_json(path: str, structure2d: Structure2D):
|
516
616
|
with open(path, "wb") as f:
|
517
|
-
|
617
|
+
# Add OPT_SERIALIZE_NUMPY to handle numpy types like float64
|
618
|
+
f.write(orjson.dumps(structure2d, option=orjson.OPT_SERIALIZE_NUMPY))
|
518
619
|
|
519
620
|
|
520
621
|
def write_csv(path: str, structure2d: Structure2D):
|
@@ -555,13 +656,13 @@ def write_csv(path: str, structure2d: Structure2D):
|
|
555
656
|
"",
|
556
657
|
]
|
557
658
|
)
|
558
|
-
for base_ribose in structure2d.baseInteractions.
|
659
|
+
for base_ribose in structure2d.baseInteractions.baseRiboseInteractions:
|
559
660
|
writer.writerow(
|
560
661
|
[
|
561
662
|
base_ribose.nt1.full_name,
|
562
663
|
base_ribose.nt2.full_name,
|
563
664
|
"base-ribose interaction",
|
564
|
-
base_ribose.
|
665
|
+
base_ribose.br.value if base_ribose.br is not None else "",
|
565
666
|
"",
|
566
667
|
]
|
567
668
|
)
|
@@ -582,9 +683,8 @@ def write_bpseq(path: str, bpseq: BpSeq):
|
|
582
683
|
f.write(str(bpseq))
|
583
684
|
|
584
685
|
|
585
|
-
def
|
586
|
-
|
587
|
-
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
686
|
+
def add_common_output_arguments(parser: argparse.ArgumentParser):
|
687
|
+
"""Adds common output and processing arguments to the parser."""
|
588
688
|
parser.add_argument(
|
589
689
|
"-a",
|
590
690
|
"--all-dot-brackets",
|
@@ -604,22 +704,29 @@ def main():
|
|
604
704
|
action="store_true",
|
605
705
|
help="(optional) if set, the program will print extended secondary structure to the standard output",
|
606
706
|
)
|
707
|
+
parser.add_argument("-d", "--dot", help="(optional) path to output DOT file")
|
607
708
|
parser.add_argument(
|
608
|
-
"-
|
609
|
-
"--find-gaps",
|
610
|
-
action="store_true",
|
611
|
-
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
612
|
-
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
709
|
+
"-p", "--pml", help="(optional) path to output PyMOL PML script for stems"
|
613
710
|
)
|
614
|
-
parser.add_argument(
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
711
|
+
parser.add_argument(
|
712
|
+
"--inter-stem-csv",
|
713
|
+
help="(optional) path to output CSV file for inter-stem parameters",
|
714
|
+
)
|
715
|
+
parser.add_argument(
|
716
|
+
"--stems-csv",
|
717
|
+
help="(optional) path to output CSV file for stem details",
|
621
718
|
)
|
622
719
|
|
720
|
+
|
721
|
+
def handle_output_arguments(
|
722
|
+
args: argparse.Namespace,
|
723
|
+
structure2d: Structure2D,
|
724
|
+
dot_brackets: List[str],
|
725
|
+
mapping: Mapping2D3D,
|
726
|
+
input_filename: str,
|
727
|
+
):
|
728
|
+
"""Handles writing output based on provided arguments."""
|
729
|
+
input_basename = os.path.basename(input_filename)
|
623
730
|
if args.csv:
|
624
731
|
write_csv(args.csv, structure2d)
|
625
732
|
|
@@ -640,6 +747,135 @@ def main():
|
|
640
747
|
if args.dot:
|
641
748
|
print(BpSeq.from_string(structure2d.bpseq).graphviz)
|
642
749
|
|
750
|
+
if args.pml:
|
751
|
+
pml_script = generate_pymol_script(mapping, structure2d.stems)
|
752
|
+
with open(args.pml, "w") as f:
|
753
|
+
f.write(pml_script)
|
754
|
+
|
755
|
+
if args.inter_stem_csv:
|
756
|
+
if structure2d.interStemParameters:
|
757
|
+
# Convert list of dataclasses to list of dicts
|
758
|
+
params_list = [
|
759
|
+
{
|
760
|
+
"stem1_idx": p.stem1_idx,
|
761
|
+
"stem2_idx": p.stem2_idx,
|
762
|
+
"type": p.type,
|
763
|
+
"torsion": p.torsion,
|
764
|
+
"min_endpoint_distance": p.min_endpoint_distance,
|
765
|
+
"torsion_angle_pdf": p.torsion_angle_pdf,
|
766
|
+
"min_endpoint_distance_pdf": p.min_endpoint_distance_pdf,
|
767
|
+
"coaxial_probability": p.coaxial_probability,
|
768
|
+
}
|
769
|
+
for p in structure2d.interStemParameters
|
770
|
+
]
|
771
|
+
df = pd.DataFrame(params_list)
|
772
|
+
df["input_basename"] = input_basename
|
773
|
+
# Reorder columns to put input_basename first
|
774
|
+
cols = ["input_basename"] + [
|
775
|
+
col for col in df.columns if col != "input_basename"
|
776
|
+
]
|
777
|
+
df = df[cols]
|
778
|
+
df.to_csv(args.inter_stem_csv, index=False)
|
779
|
+
else:
|
780
|
+
logging.warning(
|
781
|
+
f"No inter-stem parameters calculated for {input_basename}, CSV file '{args.inter_stem_csv}' will be empty or not created."
|
782
|
+
)
|
783
|
+
# Optionally create an empty file with headers
|
784
|
+
# pd.DataFrame(columns=['input_basename', 'stem1_idx', ...]).to_csv(args.inter_stem_csv, index=False)
|
785
|
+
|
786
|
+
if args.stems_csv:
|
787
|
+
if structure2d.stems:
|
788
|
+
stems_data = []
|
789
|
+
for i, stem in enumerate(structure2d.stems):
|
790
|
+
try:
|
791
|
+
res5p_first = mapping.bpseq_index_to_residue_map.get(
|
792
|
+
stem.strand5p.first
|
793
|
+
)
|
794
|
+
res5p_last = mapping.bpseq_index_to_residue_map.get(
|
795
|
+
stem.strand5p.last
|
796
|
+
)
|
797
|
+
res3p_first = mapping.bpseq_index_to_residue_map.get(
|
798
|
+
stem.strand3p.first
|
799
|
+
)
|
800
|
+
res3p_last = mapping.bpseq_index_to_residue_map.get(
|
801
|
+
stem.strand3p.last
|
802
|
+
)
|
803
|
+
|
804
|
+
stems_data.append(
|
805
|
+
{
|
806
|
+
"stem_idx": i,
|
807
|
+
"strand5p_first_nt_id": res5p_first.full_name
|
808
|
+
if res5p_first
|
809
|
+
else None,
|
810
|
+
"strand5p_last_nt_id": res5p_last.full_name
|
811
|
+
if res5p_last
|
812
|
+
else None,
|
813
|
+
"strand3p_first_nt_id": res3p_first.full_name
|
814
|
+
if res3p_first
|
815
|
+
else None,
|
816
|
+
"strand3p_last_nt_id": res3p_last.full_name
|
817
|
+
if res3p_last
|
818
|
+
else None,
|
819
|
+
"strand5p_sequence": stem.strand5p.sequence,
|
820
|
+
"strand3p_sequence": stem.strand3p.sequence,
|
821
|
+
}
|
822
|
+
)
|
823
|
+
except KeyError as e:
|
824
|
+
logging.warning(
|
825
|
+
f"Could not find residue for stem {i} (index {e}), skipping stem details."
|
826
|
+
)
|
827
|
+
continue
|
828
|
+
|
829
|
+
if stems_data:
|
830
|
+
df_stems = pd.DataFrame(stems_data)
|
831
|
+
df_stems["input_basename"] = input_basename
|
832
|
+
# Reorder columns
|
833
|
+
stem_cols = ["input_basename", "stem_idx"] + [
|
834
|
+
col
|
835
|
+
for col in df_stems.columns
|
836
|
+
if col not in ["input_basename", "stem_idx"]
|
837
|
+
]
|
838
|
+
df_stems = df_stems[stem_cols]
|
839
|
+
df_stems.to_csv(args.stems_csv, index=False)
|
840
|
+
else:
|
841
|
+
logging.warning(
|
842
|
+
f"No valid stem data generated for {input_basename}, CSV file '{args.stems_csv}' will be empty or not created."
|
843
|
+
)
|
844
|
+
else:
|
845
|
+
logging.warning(
|
846
|
+
f"No stems found for {input_basename}, CSV file '{args.stems_csv}' will be empty or not created."
|
847
|
+
)
|
848
|
+
|
849
|
+
|
850
|
+
def main():
|
851
|
+
parser = argparse.ArgumentParser()
|
852
|
+
parser.add_argument("input", help="Path to PDB or mmCIF file")
|
853
|
+
parser.add_argument(
|
854
|
+
"-f",
|
855
|
+
"--find-gaps",
|
856
|
+
action="store_true",
|
857
|
+
help="(optional) if set, the program will detect gaps and break the PDB chain into two or more strands; "
|
858
|
+
f"the gap is defined as O3'-P distance greater then {1.5 * AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT}",
|
859
|
+
)
|
860
|
+
add_common_output_arguments(parser)
|
861
|
+
args = parser.parse_args()
|
862
|
+
|
863
|
+
file = handle_input_file(args.input)
|
864
|
+
structure3d = read_3d_structure(file, None)
|
865
|
+
structure2d, dot_brackets = extract_secondary_structure(
|
866
|
+
structure3d, None, args.find_gaps, args.all_dot_brackets
|
867
|
+
)
|
868
|
+
|
869
|
+
# Need the mapping object for PML generation
|
870
|
+
mapping = Mapping2D3D(
|
871
|
+
structure3d,
|
872
|
+
structure2d.baseInteractions.basePairs,
|
873
|
+
structure2d.baseInteractions.stackings,
|
874
|
+
args.find_gaps,
|
875
|
+
)
|
876
|
+
|
877
|
+
handle_output_arguments(args, structure2d, dot_brackets, mapping, args.input)
|
878
|
+
|
643
879
|
|
644
880
|
if __name__ == "__main__":
|
645
881
|
main()
|
rnapolis/common.py
CHANGED
@@ -1057,6 +1057,18 @@ class BaseInteractions:
|
|
1057
1057
|
otherInteractions: List[OtherInteraction]
|
1058
1058
|
|
1059
1059
|
|
1060
|
+
@dataclass(frozen=True, order=True)
|
1061
|
+
class InterStemParameters:
|
1062
|
+
stem1_idx: int
|
1063
|
+
stem2_idx: int
|
1064
|
+
type: Optional[str] # Type of closest endpoint pair ('cs55', 'cs53', etc.)
|
1065
|
+
torsion: Optional[float] # Torsion angle between stem segments (degrees)
|
1066
|
+
min_endpoint_distance: Optional[float] # Minimum distance between stem endpoints
|
1067
|
+
torsion_angle_pdf: Optional[float] # PDF value of the torsion angle
|
1068
|
+
min_endpoint_distance_pdf: Optional[float] # PDF value of the min endpoint distance
|
1069
|
+
coaxial_probability: Optional[float] # Probability of stems being coaxial (0-1)
|
1070
|
+
|
1071
|
+
|
1060
1072
|
@dataclass(frozen=True, order=True)
|
1061
1073
|
class Structure2D:
|
1062
1074
|
baseInteractions: BaseInteractions
|
@@ -1067,3 +1079,4 @@ class Structure2D:
|
|
1067
1079
|
singleStrands: List[SingleStrand]
|
1068
1080
|
hairpins: List[Hairpin]
|
1069
1081
|
loops: List[Loop]
|
1082
|
+
interStemParameters: List[InterStemParameters]
|