RNApolis 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rnapolis/aligner.py ADDED
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import os
4
+ import tempfile
5
+
6
+ import pandas as pd
7
+
8
+ from rnapolis.parser import is_cif
9
+ from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
10
+ from rnapolis.tertiary_v2 import Structure
11
+
12
+
13
+ def main():
14
+ """Main function to run the unifier tool."""
15
+ parser = argparse.ArgumentParser(description="Align two PDB or mmCIF files.")
16
+ parser.add_argument("--output", "-o", help="Output directory", required=True)
17
+ parser.add_argument(
18
+ "--format",
19
+ "-f",
20
+ help="Output format (possible values: PDB, mmCIF, keep. Default: keep)",
21
+ default="keep",
22
+ )
23
+ parser.add_argument("pdb1", help="First PDB or mmCIF file")
24
+ parser.add_argument("pdb2", help="Second PDB or mmCIF file")
25
+ args = parser.parse_args()
26
+
27
+ from pymol import cmd
28
+
29
+ cmd.load(args.pdb1, "pdb1")
30
+ cmd.load(args.pdb2, "pdb2")
31
+ cmd.align("pdb1", "pdb2", object="aligned", cycles=0)
32
+
33
+ pdb1_aligned = []
34
+ pdb2_aligned = []
35
+
36
+ with tempfile.NamedTemporaryFile("wt+", suffix=".aln") as f:
37
+ cmd.save(f.name, "aligned")
38
+ f.seek(0)
39
+
40
+ for line in f:
41
+ if line.startswith("pdb1"):
42
+ pdb1_aligned.append(line.split()[1])
43
+ elif line.startswith("pdb2"):
44
+ pdb2_aligned.append(line.split()[1])
45
+
46
+ pdb1_aligned = " ".join(pdb1_aligned)
47
+ pdb2_aligned = " ".join(pdb2_aligned)
48
+ residues_to_remove = {"pdb1": [], "pdb2": []}
49
+
50
+ for i, (c1, c2) in enumerate(zip(pdb1_aligned, pdb2_aligned)):
51
+ if c1 == c2:
52
+ continue
53
+ if c1 == "-":
54
+ residues_to_remove["pdb2"].append(i)
55
+ elif c2 == "-":
56
+ residues_to_remove["pdb1"].append(i)
57
+ elif c1 != c2:
58
+ residues_to_remove["pdb1"].append(i)
59
+ residues_to_remove["pdb2"].append(i)
60
+ else:
61
+ raise ValueError("This should not happen!")
62
+
63
+ if not residues_to_remove["pdb1"] and not residues_to_remove["pdb2"]:
64
+ print("Structures are already aligned")
65
+
66
+ structures = {}
67
+ for key, path in [("pdb1", args.pdb1), ("pdb2", args.pdb2)]:
68
+ with open(path) as f:
69
+ if is_cif(f):
70
+ atoms = parse_cif_atoms(f)
71
+ else:
72
+ atoms = parse_pdb_atoms(f)
73
+
74
+ structures[key] = Structure(atoms).residues
75
+
76
+ for key, residues in structures.items():
77
+ for i in sorted(residues_to_remove[key], reverse=True):
78
+ del residues[i]
79
+
80
+ # Write output
81
+ os.makedirs(args.output, exist_ok=True)
82
+
83
+ for (key, residues), path in zip(structures.items(), [args.pdb1, args.pdb2]):
84
+ base, _ = os.path.splitext(os.path.basename(path))
85
+
86
+ if args.format == "keep":
87
+ format = residues[0].atoms.attrs["format"]
88
+ else:
89
+ format = args.format
90
+
91
+ ext = ".pdb" if format == "PDB" else ".cif"
92
+
93
+ with open(f"{args.output}/{base}{ext}", "w") as f:
94
+ df = pd.concat([residue.atoms for residue in residues])
95
+
96
+ if format == "PDB":
97
+ write_pdb(df, f)
98
+ else:
99
+ write_cif(df, f)
100
+
101
+
102
+ if __name__ == "__main__":
103
+ main()
rnapolis/clashfinder.py CHANGED
@@ -179,20 +179,20 @@ def main():
179
179
  for ci, cj in sorted(clashing_chains):
180
180
  if ci == cj:
181
181
  print(
182
- f"Clashes found in chain {ci} with maximum occupancy sum equal to {max_occupancy_chains[(ci,cj)]}"
182
+ f"Clashes found in chain {ci} with maximum occupancy sum equal to {max_occupancy_chains[(ci, cj)]}"
183
183
  )
184
184
  else:
185
185
  print(
186
- f"Clashes found between chains {ci} and {cj} with maximum occupancy sum equal to {max_occupancy_chains[(ci,cj)]}"
186
+ f"Clashes found between chains {ci} and {cj} with maximum occupancy sum equal to {max_occupancy_chains[(ci, cj)]}"
187
187
  )
188
188
  for ri, rj in clashing_chains[(ci, cj)]:
189
189
  if ri == rj:
190
190
  print(
191
- f" Clashes found in residue {ri} with maximum occupancy sum equal to {max_occupancy_residues[(ri,rj)]}"
191
+ f" Clashes found in residue {ri} with maximum occupancy sum equal to {max_occupancy_residues[(ri, rj)]}"
192
192
  )
193
193
  else:
194
194
  print(
195
- f" Clashes found between residues {ri} and {rj} with maximum occupancy sum equal to {max_occupancy_residues[(ri,rj)]}"
195
+ f" Clashes found between residues {ri} and {rj} with maximum occupancy sum equal to {max_occupancy_residues[(ri, rj)]}"
196
196
  )
197
197
  for ai, aj, occupancy in sorted(clashing_chains[(ci, cj)][(ri, rj)]):
198
198
  print(
@@ -0,0 +1,38 @@
1
+ comp_id,atom_id,alt_atom_id,type_symbol,charge,pdbx_align,pdbx_aromatic_flag,pdbx_leaving_atom_flag,pdbx_stereo_config,model_Cartn_x,model_Cartn_y,model_Cartn_z,pdbx_model_Cartn_x_ideal,pdbx_model_Cartn_y_ideal,pdbx_model_Cartn_z_ideal,pdbx_component_atom_id,pdbx_component_comp_id,pdbx_ordinal
2
+ A,OP3,O3P,O,0,1,N,Y,N,22.586,9.736,-6.030,2.135,-1.141,-5.313,OP3,A,1
3
+ A,P,P,P,0,1,N,N,N,23.014,10.223,-7.491,1.024,-0.137,-4.723,P,A,2
4
+ A,OP1,O1P,O,0,1,N,N,N,21.938,9.966,-8.412,1.633,1.190,-4.488,OP1,A,3
5
+ A,OP2,O2P,O,0,1,N,N,N,24.378,9.686,-7.860,-0.183,0.005,-5.778,OP2,A,4
6
+ A,O5',O5*,O,0,1,N,N,N,23.144,11.720,-7.092,0.456,-0.720,-3.334,O5',A,5
7
+ A,C5',C5*,C,0,1,N,N,N,24.013,12.484,-7.839,-0.520,0.209,-2.863,C5',A,6
8
+ A,C4',C4*,C,0,1,N,N,R,23.996,13.899,-7.276,-1.101,-0.287,-1.538,C4',A,7
9
+ A,O4',O4*,O,0,1,N,N,N,24.523,13.928,-5.945,-0.064,-0.383,-0.538,O4',A,8
10
+ A,C3',C3*,C,0,1,N,N,S,24.868,14.877,-8.041,-2.105,0.739,-0.969,C3',A,9
11
+ A,O3',O3*,O,0,1,N,N,N,24.195,15.389,-9.178,-3.445,0.360,-1.287,O3',A,10
12
+ A,C2',C2*,C,0,1,N,N,R,25.172,15.942,-7.004,-1.874,0.684,0.558,C2',A,11
13
+ A,O2',O2*,O,0,1,N,N,N,24.060,16.759,-6.874,-3.065,0.271,1.231,O2',A,12
14
+ A,C1',C1*,C,0,1,N,N,R,25.387,15.094,-5.814,-0.755,-0.367,0.729,C1',A,13
15
+ A,N9,N9,N,0,1,Y,N,N,26.745,14.503,-5.630,0.158,0.029,1.803,N9,A,14
16
+ A,C8,C8,C,0,1,Y,N,N,27.163,13.163,-5.773,1.265,0.813,1.672,C8,A,15
17
+ A,N7,N7,N,0,1,Y,N,N,28.488,13.043,-5.514,1.843,0.963,2.828,N7,A,16
18
+ A,C5,C5,C,0,1,Y,N,N,28.887,14.292,-5.222,1.143,0.292,3.773,C5,A,17
19
+ A,C6,C6,C,0,1,Y,N,N,30.177,14.748,-4.871,1.290,0.091,5.156,C6,A,18
20
+ A,N6,N6,N,0,1,N,N,N,31.245,13.997,-4.775,2.344,0.664,5.846,N6,A,19
21
+ A,N1,N1,N,0,1,Y,N,N,30.286,16.119,-4.615,0.391,-0.656,5.787,N1,A,20
22
+ A,C2,C2,C,0,1,Y,N,N,29.272,16.866,-4.702,-0.617,-1.206,5.136,C2,A,21
23
+ A,N3,N3,N,0,1,Y,N,N,27.992,16.509,-5.030,-0.792,-1.051,3.841,N3,A,22
24
+ A,C4,C4,C,0,1,Y,N,N,27.856,15.249,-5.271,0.056,-0.320,3.126,C4,A,23
25
+ A,HOP3,3HOP,H,0,0,N,N,N,23.296,9.905,-5.422,2.448,-0.755,-6.142,HOP3,A,24
26
+ A,HOP2,2HOP,H,0,0,N,N,N,25.088,9.855,-7.252,-0.552,-0.879,-5.902,HOP2,A,25
27
+ A,H5',1H5*,H,0,1,N,N,N,25.039,12.050,-7.885,-1.319,0.301,-3.599,H5',A,26
28
+ A,H5'',2H5*,H,0,0,N,N,N,23.788,12.450,-8.930,-0.052,1.182,-2.712,H5'',A,27
29
+ A,H4',H4*,H,0,1,N,N,N,22.923,14.196,-7.338,-1.586,-1.254,-1.677,H4',A,28
30
+ A,H3',H3*,H,0,1,N,N,N,25.794,14.420,-8.461,-1.890,1.736,-1.353,H3',A,29
31
+ A,HO3',H3T,H,0,0,N,Y,N,24.741,16.001,-9.657,-4.024,1.035,-0.908,HO3',A,30
32
+ A,H2',H2*,H,0,1,N,N,N,26.025,16.626,-7.219,-1.543,1.654,0.930,H2',A,31
33
+ A,HO2',2HO*,H,0,0,N,N,N,24.250,17.425,-6.224,-3.740,0.936,1.037,HO2',A,32
34
+ A,H1',H1*,H,0,1,N,N,N,25.197,15.783,-4.958,-1.185,-1.346,0.940,H1',A,33
35
+ A,H8,H8,H,0,1,N,N,N,26.526,12.307,-6.055,1.611,1.246,0.745,H8,A,34
36
+ A,H61,1H6,H,0,1,N,N,N,32.176,14.326,-4.521,2.432,0.522,6.801,H61,A,35
37
+ A,H62,2H6,H,0,1,N,N,N,31.340,13.509,-5.666,2.996,1.205,5.374,H62,A,36
38
+ A,H2,H2,H,0,1,N,N,N,29.524,17.915,-4.473,-1.325,-1.807,5.688,H2,A,37
@@ -0,0 +1,36 @@
1
+ comp_id,atom_id,alt_atom_id,type_symbol,charge,pdbx_align,pdbx_aromatic_flag,pdbx_leaving_atom_flag,pdbx_stereo_config,model_Cartn_x,model_Cartn_y,model_Cartn_z,pdbx_model_Cartn_x_ideal,pdbx_model_Cartn_y_ideal,pdbx_model_Cartn_z_ideal,pdbx_component_atom_id,pdbx_component_comp_id,pdbx_ordinal
2
+ C,OP3,O3P,O,0,1,N,Y,N,26.803,20.514,-11.017,2.147,-1.021,-4.678,OP3,C,1
3
+ C,P,P,P,0,1,N,N,N,27.386,20.433,-12.503,1.049,-0.039,-4.028,P,C,2
4
+ C,OP1,O1P,O,0,1,N,N,N,26.539,21.293,-13.322,1.692,1.237,-3.646,OP1,C,3
5
+ C,OP2,O2P,O,0,1,N,N,N,27.570,19.015,-12.877,-0.116,0.246,-5.102,OP2,C,4
6
+ C,O5',O5*,O,0,1,N,N,N,28.830,21.055,-12.361,0.415,-0.733,-2.721,O5',C,5
7
+ C,C5',C5*,C,0,1,N,N,N,29.051,22.423,-11.973,-0.546,0.181,-2.193,C5',C,6
8
+ C,C4',C4*,C,0,1,N,N,R,30.525,22.652,-11.850,-1.189,-0.419,-0.942,C4',C,7
9
+ C,O4',O4*,O,0,1,N,N,N,30.993,22.001,-10.600,-0.190,-0.648,0.076,O4',C,8
10
+ C,C3',C3*,C,0,1,N,N,S,31.457,22.096,-12.933,-2.178,0.583,-0.307,C3',C,9
11
+ C,O3',O3*,O,0,1,N,N,N,31.346,22.915,-14.074,-3.518,0.283,-0.703,O3',C,10
12
+ C,C2',C2*,C,0,1,N,N,R,32.751,22.157,-12.194,-2.001,0.373,1.215,C2',C,11
13
+ C,O2',O2*,O,0,1,N,N,N,33.186,23.463,-12.031,-3.228,-0.059,1.806,O2',C,12
14
+ C,C1',C1*,C,0,1,N,N,R,32.361,21.627,-10.851,-0.924,-0.729,1.317,C1',C,13
15
+ C,N1,N1,N,0,1,N,N,N,32.476,20.131,-10.779,-0.036,-0.470,2.453,N1,C,14
16
+ C,C2,C2,C,0,1,N,N,N,33.674,19.589,-10.493,0.652,0.683,2.514,C2,C,15
17
+ C,O2,O2,O,0,1,N,N,N,34.680,20.354,-10.277,0.529,1.504,1.620,O2,C,16
18
+ C,N3,N3,N,0,1,N,N,N,33.855,18.230,-10.434,1.467,0.945,3.535,N3,C,17
19
+ C,C4,C4,C,0,1,N,N,N,32.804,17.495,-10.663,1.620,0.070,4.520,C4,C,18
20
+ C,N4,N4,N,0,1,N,N,N,32.905,16.139,-10.606,2.464,0.350,5.569,N4,C,19
21
+ C,C5,C5,C,0,1,N,N,N,31.488,18.044,-10.975,0.916,-1.151,4.483,C5,C,20
22
+ C,C6,C6,C,0,1,N,N,N,31.389,19.360,-11.041,0.087,-1.399,3.442,C6,C,21
23
+ C,HOP3,3HOP,H,0,0,N,N,N,27.354,19.953,-10.483,2.501,-0.569,-5.456,HOP3,C,22
24
+ C,HOP2,2HOP,H,0,0,N,N,N,28.121,18.454,-12.343,-0.508,-0.608,-5.323,HOP2,C,23
25
+ C,H5',1H5*,H,0,1,N,N,N,28.562,23.147,-12.665,-1.315,0.371,-2.941,H5',C,24
26
+ C,H5'',2H5*,H,0,0,N,N,N,28.496,22.699,-11.045,-0.052,1.118,-1.933,H5'',C,25
27
+ C,H4',H4*,H,0,1,N,N,N,30.596,23.763,-11.911,-1.699,-1.350,-1.188,H4',C,26
28
+ C,H3',H3*,H,0,1,N,N,N,31.269,21.074,-13.339,-1.917,1.604,-0.586,H3',C,27
29
+ C,HO3',H3T,H,0,0,N,Y,N,31.923,22.570,-14.745,-4.088,0.939,-0.278,HO3',C,28
30
+ C,H2',H2*,H,0,1,N,N,N,33.568,21.607,-12.716,-1.653,1.290,1.689,H2',C,29
31
+ C,HO2',2HO*,H,0,0,N,N,N,34.010,23.501,-11.560,-3.874,0.644,1.656,HO2',C,30
32
+ C,H1',H1*,H,0,1,N,N,N,33.051,22.057,-10.088,-1.392,-1.708,1.418,H1',C,31
33
+ C,H41,1H4,H,0,1,N,N,N,33.824,15.755,-10.388,2.950,1.189,5.590,H41,C,32
34
+ C,H42,2H4,H,0,1,N,N,N,32.564,15.734,-11.478,2.571,-0.289,6.290,H42,C,33
35
+ C,H5,H5,H,0,1,N,N,N,30.568,17.464,-11.160,1.030,-1.873,5.278,H5,C,34
36
+ C,H6,H6,H,0,1,N,N,N,30.417,19.806,-11.309,-0.465,-2.326,3.393,H6,C,35
@@ -0,0 +1,39 @@
1
+ comp_id,atom_id,alt_atom_id,type_symbol,charge,pdbx_align,pdbx_aromatic_flag,pdbx_leaving_atom_flag,pdbx_stereo_config,model_Cartn_x,model_Cartn_y,model_Cartn_z,pdbx_model_Cartn_x_ideal,pdbx_model_Cartn_y_ideal,pdbx_model_Cartn_z_ideal,pdbx_component_atom_id,pdbx_component_comp_id,pdbx_ordinal
2
+ G,OP3,O3P,O,0,1,N,Y,N,34.313,3.314,-0.422,-1.945,-1.360,5.599,OP3,G,1
3
+ G,P,P,P,0,1,N,N,N,33.741,4.431,0.367,-0.911,-0.277,5.008,P,G,2
4
+ G,OP1,O1P,O,0,1,N,N,N,33.537,5.671,-0.451,-1.598,1.022,4.844,OP1,G,3
5
+ G,OP2,O2P,O,0,1,N,N,N,34.442,4.727,1.661,0.325,-0.105,6.025,OP2,G,4
6
+ G,O5',O5*,O,0,1,N,N,N,32.289,3.932,0.811,-0.365,-0.780,3.580,O5',G,5
7
+ G,C5',C5*,C,0,1,N,N,N,32.101,2.551,1.198,0.542,0.217,3.109,C5',G,6
8
+ G,C4',C4*,C,0,1,N,N,R,30.760,2.450,1.879,1.100,-0.200,1.748,C4',G,7
9
+ G,O4',O4*,O,0,1,N,N,N,30.797,3.202,3.104,0.033,-0.318,0.782,O4',G,8
10
+ G,C3',C3*,C,0,1,N,N,S,29.597,3.022,1.070,2.025,0.898,1.182,C3',G,9
11
+ G,O3',O3*,O,0,1,N,N,N,29.106,2.045,0.152,3.395,0.582,1.439,O3',G,10
12
+ G,C2',C2*,C,0,1,N,N,R,28.603,3.421,2.118,1.741,0.884,-0.338,C2',G,11
13
+ G,O2',O2*,O,0,1,N,N,N,27.930,2.319,2.657,2.927,0.560,-1.066,O2',G,12
14
+ G,C1',C1*,C,0,1,N,N,R,29.487,3.936,3.170,0.675,-0.220,-0.507,C1',G,13
15
+ G,N9,N9,N,0,1,Y,N,N,29.942,5.378,3.195,-0.297,0.162,-1.534,N9,G,14
16
+ G,C8,C8,C,0,1,Y,N,N,31.187,5.907,3.065,-1.440,0.880,-1.334,C8,G,15
17
+ G,N7,N7,N,0,1,Y,N,N,31.237,7.191,3.136,-2.066,1.037,-2.464,N7,G,16
18
+ G,C5,C5,C,0,1,Y,N,N,29.896,7.536,3.341,-1.364,0.431,-3.453,C5,G,17
19
+ G,C6,C6,C,0,1,N,N,N,29.331,8.813,3.503,-1.556,0.279,-4.846,C6,G,18
20
+ G,O6,O6,O,0,1,N,N,N,29.901,9.926,3.495,-2.534,0.755,-5.397,O6,G,19
21
+ G,N1,N1,N,0,1,N,N,N,27.948,8.749,3.683,-0.626,-0.401,-5.551,N1,G,20
22
+ G,C2,C2,C,0,1,N,N,N,27.233,7.615,3.707,0.459,-0.934,-4.923,C2,G,21
23
+ G,N2,N2,N,0,1,N,N,N,25.894,7.743,3.899,1.384,-1.626,-5.664,N2,G,22
24
+ G,N3,N3,N,0,1,N,N,N,27.758,6.393,3.559,0.649,-0.800,-3.630,N3,G,23
25
+ G,C4,C4,C,0,1,Y,N,N,29.079,6.431,3.382,-0.226,-0.134,-2.868,C4,G,24
26
+ G,HOP3,3HOP,H,0,0,N,N,N,34.442,2.528,0.096,-2.247,-1.021,6.453,HOP3,G,25
27
+ G,HOP2,2HOP,H,0,0,N,N,N,34.571,3.941,2.179,0.745,-0.973,6.104,HOP2,G,26
28
+ G,H5',1H5*,H,0,1,N,N,N,32.209,1.841,0.344,1.362,0.327,3.820,H5',G,27
29
+ G,H5'',2H5*,H,0,0,N,N,N,32.936,2.156,1.822,0.018,1.168,3.011,H5'',G,28
30
+ G,H4',H4*,H,0,1,N,N,N,30.585,1.358,2.025,1.640,-1.144,1.833,H4',G,29
31
+ G,H3',H3*,H,0,1,N,N,N,29.867,3.891,0.426,1.772,1.868,1.610,H3',G,30
32
+ G,HO3',H3T,H,0,0,N,Y,N,28.382,2.400,-0.351,3.923,1.300,1.065,HO3',G,31
33
+ G,H2',H2*,H,0,1,N,N,N,27.827,4.115,1.719,1.346,1.847,-0.662,H2',G,32
34
+ G,HO2',2HO*,H,0,0,N,N,N,27.299,2.572,3.321,3.573,1.254,-0.871,HO2',G,33
35
+ G,H1',H1*,H,0,1,N,N,N,28.814,3.801,4.048,1.148,-1.167,-0.769,H1',G,34
36
+ G,H8,H8,H,0,1,N,N,N,32.110,5.323,2.909,-1.776,1.261,-0.381,H8,G,35
37
+ G,H1,H1,H,0,1,N,N,N,27.411,9.607,3.808,-0.736,-0.518,-6.508,H1,G,36
38
+ G,H21,1H2,H,0,1,N,N,N,25.350,6.880,3.917,2.165,-2.007,-5.232,H21,G,37
39
+ G,H22,2H2,H,0,1,N,N,N,25.507,8.377,3.200,1.256,-1.736,-6.619,H22,G,38
@@ -0,0 +1,35 @@
1
+ comp_id,atom_id,alt_atom_id,type_symbol,charge,pdbx_align,pdbx_aromatic_flag,pdbx_leaving_atom_flag,pdbx_stereo_config,model_Cartn_x,model_Cartn_y,model_Cartn_z,pdbx_model_Cartn_x_ideal,pdbx_model_Cartn_y_ideal,pdbx_model_Cartn_z_ideal,pdbx_component_atom_id,pdbx_component_comp_id,pdbx_ordinal
2
+ U,OP3,O3P,O,0,1,N,Y,N,29.106,2.045,0.152,-2.122,1.033,-4.690,OP3,U,1
3
+ U,P,P,P,0,1,N,N,N,28.940,2.442,-1.379,-1.030,0.047,-4.037,P,U,2
4
+ U,OP1,O1P,O,0,1,N,N,N,28.520,1.217,-2.078,-1.679,-1.228,-3.660,OP1,U,3
5
+ U,OP2,O2P,O,0,1,N,N,N,30.133,3.129,-1.866,0.138,-0.241,-5.107,OP2,U,4
6
+ U,O5',O5*,O,0,1,N,N,N,27.784,3.522,-1.387,-0.399,0.736,-2.726,O5',U,5
7
+ U,C5',C5*,C,0,1,N,N,N,26.432,3.117,-1.047,0.557,-0.182,-2.196,C5',U,6
8
+ U,C4',C4*,C,0,1,N,N,R,25.647,4.373,-0.834,1.197,0.415,-0.942,C4',U,7
9
+ U,O4',O4*,O,0,1,N,N,N,26.122,5.093,0.327,0.194,0.645,0.074,O4',U,8
10
+ U,C3',C3*,C,0,1,N,N,S,25.763,5.465,-1.895,2.181,-0.588,-0.301,C3',U,9
11
+ U,O3',O3*,O,0,1,N,N,N,25.041,5.077,-3.062,3.524,-0.288,-0.686,O3',U,10
12
+ U,C2',C2*,C,0,1,N,N,R,25.213,6.663,-1.148,1.995,-0.383,1.218,C2',U,11
13
+ U,O2',O2*,O,0,1,N,N,N,23.829,6.500,-1.012,3.219,0.046,1.819,O2',U,12
14
+ U,C1',C1*,C,0,1,N,N,R,25.917,6.524,0.133,0.922,0.723,1.319,C1',U,13
15
+ U,N1,N1,N,0,1,N,N,N,27.224,7.194,0.137,0.028,0.464,2.451,N1,U,14
16
+ U,C2,C2,C,0,1,N,N,N,27.201,8.578,0.406,-0.690,-0.671,2.486,C2,U,15
17
+ U,O2,O2,O,0,1,N,N,N,26.156,9.121,0.619,-0.587,-1.474,1.580,O2,U,16
18
+ U,N3,N3,N,0,1,N,N,N,28.408,9.189,0.403,-1.515,-0.936,3.517,N3,U,17
19
+ U,C4,C4,C,0,1,N,N,N,29.660,8.606,0.152,-1.641,-0.055,4.530,C4,U,18
20
+ U,O4,O4,O,0,1,N,N,N,30.676,9.330,0.195,-2.391,-0.292,5.460,O4,U,19
21
+ U,C5,C5,C,0,1,N,N,N,29.604,7.215,-0.113,-0.894,1.146,4.502,C5,U,20
22
+ U,C6,C6,C,0,1,N,N,N,28.447,6.605,-0.111,-0.070,1.384,3.459,C6,U,21
23
+ U,HOP3,3HOP,H,0,0,N,N,N,29.377,2.835,0.603,-2.475,0.583,-5.470,HOP3,U,22
24
+ U,HOP2,2HOP,H,0,0,N,N,N,30.404,3.919,-1.414,0.534,0.613,-5.325,HOP2,U,23
25
+ U,H5',1H5*,H,0,1,N,N,N,25.974,2.434,-1.800,1.329,-0.373,-2.942,H5',U,24
26
+ U,H5'',2H5*,H,0,0,N,N,N,26.387,2.417,-0.179,0.060,-1.117,-1.940,H5'',U,25
27
+ U,H4',H4*,H,0,1,N,N,N,24.613,3.957,-0.795,1.712,1.345,-1.185,H4',U,26
28
+ U,H3',H3*,H,0,1,N,N,N,26.782,5.677,-2.293,1.923,-1.609,-0.583,H3',U,27
29
+ U,HO3',H3T,H,0,0,N,Y,N,25.113,5.756,-3.722,4.094,-0.926,-0.234,HO3',U,28
30
+ U,H2',H2*,H,0,1,N,N,N,25.358,7.661,-1.622,1.643,-1.301,1.688,H2',U,29
31
+ U,HO2',2HO*,H,0,0,N,N,N,23.484,7.251,-0.543,3.865,-0.657,1.671,HO2',U,30
32
+ U,H1',H1*,H,0,1,N,N,N,25.312,6.996,0.941,1.392,1.700,1.423,H1',U,31
33
+ U,H3,H3,H,0,1,N,N,N,28.370,10.187,0.610,-2.024,-1.762,3.528,H3,U,32
34
+ U,H5,H5,H,0,1,N,N,N,30.486,6.589,-0.327,-0.982,1.863,5.305,H5,U,33
35
+ U,H6,H6,H,0,1,N,N,N,28.506,5.526,-0.332,0.507,2.295,3.421,H6,U,34
rnapolis/parser_v2.py CHANGED
@@ -1,7 +1,10 @@
1
- from typing import IO, Union
1
+ import io
2
+ import tempfile
3
+ from typing import IO, TextIO, Union
2
4
 
3
5
  import pandas as pd
4
6
  from mmcif.io.IoAdapterPy import IoAdapterPy
7
+ from mmcif.io.PdbxReader import DataCategory, DataContainer
5
8
 
6
9
 
7
10
  def parse_pdb_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
@@ -200,3 +203,304 @@ def parse_cif_atoms(content: Union[str, IO[str]]) -> pd.DataFrame:
200
203
  df.attrs["format"] = "mmCIF"
201
204
 
202
205
  return df
206
+
207
+
208
+ def write_pdb(
209
+ df: pd.DataFrame, output: Union[str, TextIO, None] = None
210
+ ) -> Union[str, None]:
211
+ """
212
+ Write a DataFrame of atom records to PDB format.
213
+
214
+ Parameters:
215
+ -----------
216
+ df : pd.DataFrame
217
+ DataFrame containing atom records, as created by parse_pdb_atoms or parse_cif_atoms
218
+ output : Union[str, TextIO, None], optional
219
+ Output file path or file-like object. If None, returns the PDB content as a string.
220
+
221
+ Returns:
222
+ --------
223
+ Union[str, None]
224
+ If output is None, returns the PDB content as a string. Otherwise, returns None.
225
+ """
226
+ # Create a buffer to store the PDB content
227
+ buffer = io.StringIO()
228
+
229
+ # Get the format of the DataFrame
230
+ format_type = df.attrs.get("format", "PDB")
231
+
232
+ # Process each row in the DataFrame
233
+ for _, row in df.iterrows():
234
+ # Initialize the line with spaces
235
+ line = " " * 80
236
+
237
+ # Set record type (ATOM or HETATM)
238
+ if format_type == "PDB":
239
+ record_type = row["record_type"]
240
+ else: # mmCIF
241
+ record_type = row.get("group_PDB", "ATOM")
242
+ line = record_type.ljust(6) + line[6:]
243
+
244
+ # Set atom serial number
245
+ if format_type == "PDB":
246
+ serial = str(int(row["serial"]))
247
+ else: # mmCIF
248
+ serial = str(int(row["id"]))
249
+ line = line[:6] + serial.rjust(5) + line[11:]
250
+
251
+ # Set atom name
252
+ if format_type == "PDB":
253
+ atom_name = row["name"]
254
+ else: # mmCIF
255
+ atom_name = row.get("auth_atom_id", row.get("label_atom_id", ""))
256
+
257
+ # Right-justify atom name if it starts with a number
258
+ if atom_name and atom_name[0].isdigit():
259
+ line = line[:12] + atom_name.ljust(4) + line[16:]
260
+ else:
261
+ line = line[:12] + " " + atom_name.ljust(3) + line[16:]
262
+
263
+ # Set alternate location indicator
264
+ if format_type == "PDB":
265
+ alt_loc = row.get("altLoc", "")
266
+ else: # mmCIF
267
+ alt_loc = row.get("label_alt_id", "")
268
+ line = line[:16] + alt_loc + line[17:]
269
+
270
+ # Set residue name
271
+ if format_type == "PDB":
272
+ res_name = row["resName"]
273
+ else: # mmCIF
274
+ res_name = row.get("auth_comp_id", row.get("label_comp_id", ""))
275
+ line = line[:17] + res_name.ljust(3) + line[20:]
276
+
277
+ # Set chain identifier
278
+ if format_type == "PDB":
279
+ chain_id = row["chainID"]
280
+ else: # mmCIF
281
+ chain_id = row.get("auth_asym_id", row.get("label_asym_id", ""))
282
+ line = line[:21] + chain_id + line[22:]
283
+
284
+ # Set residue sequence number
285
+ if format_type == "PDB":
286
+ res_seq = str(int(row["resSeq"]))
287
+ else: # mmCIF
288
+ res_seq = str(int(row.get("auth_seq_id", row.get("label_seq_id", 0))))
289
+ line = line[:22] + res_seq.rjust(4) + line[26:]
290
+
291
+ # Set insertion code
292
+ if format_type == "PDB":
293
+ icode = row["iCode"] if pd.notna(row["iCode"]) else ""
294
+ else: # mmCIF
295
+ icode = (
296
+ row.get("pdbx_PDB_ins_code", "")
297
+ if pd.notna(row.get("pdbx_PDB_ins_code", ""))
298
+ else ""
299
+ )
300
+ line = line[:26] + icode + line[27:]
301
+
302
+ # Set X coordinate
303
+ if format_type == "PDB":
304
+ x = float(row["x"])
305
+ else: # mmCIF
306
+ x = float(row["Cartn_x"])
307
+ line = line[:30] + f"{x:8.3f}" + line[38:]
308
+
309
+ # Set Y coordinate
310
+ if format_type == "PDB":
311
+ y = float(row["y"])
312
+ else: # mmCIF
313
+ y = float(row["Cartn_y"])
314
+ line = line[:38] + f"{y:8.3f}" + line[46:]
315
+
316
+ # Set Z coordinate
317
+ if format_type == "PDB":
318
+ z = float(row["z"])
319
+ else: # mmCIF
320
+ z = float(row["Cartn_z"])
321
+ line = line[:46] + f"{z:8.3f}" + line[54:]
322
+
323
+ # Set occupancy
324
+ if format_type == "PDB":
325
+ occupancy = float(row["occupancy"])
326
+ else: # mmCIF
327
+ occupancy = float(row.get("occupancy", 1.0))
328
+ line = line[:54] + f"{occupancy:6.2f}" + line[60:]
329
+
330
+ # Set temperature factor
331
+ if format_type == "PDB":
332
+ temp_factor = float(row["tempFactor"])
333
+ else: # mmCIF
334
+ temp_factor = float(row.get("B_iso_or_equiv", 0.0))
335
+ line = line[:60] + f"{temp_factor:6.2f}" + line[66:]
336
+
337
+ # Set element symbol
338
+ if format_type == "PDB":
339
+ element = row["element"]
340
+ else: # mmCIF
341
+ element = row.get("type_symbol", "")
342
+ line = line[:76] + element.rjust(2) + line[78:]
343
+
344
+ # Set charge
345
+ if format_type == "PDB":
346
+ charge = row["charge"]
347
+ else: # mmCIF
348
+ charge = row.get("pdbx_formal_charge", "")
349
+ if charge and charge not in ["?", "."]:
350
+ # Convert numeric charge to PDB format (e.g., "1+" or "2-")
351
+ try:
352
+ charge_val = int(charge)
353
+ if charge_val != 0:
354
+ charge = f"{abs(charge_val)}{'+' if charge_val > 0 else '-'}"
355
+ else:
356
+ charge = ""
357
+ except ValueError:
358
+ pass
359
+ line = line[:78] + charge + line[80:]
360
+
361
+ # Write the line to the buffer
362
+ buffer.write(line.rstrip() + "\n")
363
+
364
+ # Add END record
365
+ buffer.write("END\n")
366
+
367
+ # Get the content as a string
368
+ content = buffer.getvalue()
369
+ buffer.close()
370
+
371
+ # Write to output if provided
372
+ if output is not None:
373
+ if isinstance(output, str):
374
+ with open(output, "w") as f:
375
+ f.write(content)
376
+ else:
377
+ output.write(content)
378
+ return None
379
+
380
+ # Return the content as a string
381
+ return content
382
+
383
+
384
+ def write_cif(
385
+ df: pd.DataFrame, output: Union[str, TextIO, None] = None
386
+ ) -> Union[str, None]:
387
+ """
388
+ Write a DataFrame of atom records to mmCIF format.
389
+
390
+ Parameters:
391
+ -----------
392
+ df : pd.DataFrame
393
+ DataFrame containing atom records, as created by parse_pdb_atoms or parse_cif_atoms
394
+ output : Union[str, TextIO, None], optional
395
+ Output file path or file-like object. If None, returns the mmCIF content as a string.
396
+
397
+ Returns:
398
+ --------
399
+ Union[str, None]
400
+ If output is None, returns the mmCIF content as a string. Otherwise, returns None.
401
+ """
402
+ # Get the format of the DataFrame
403
+ format_type = df.attrs.get("format", "PDB")
404
+
405
+ # Create a new DataContainer
406
+ data_container = DataContainer("data_structure")
407
+
408
+ # Define the attributes for atom_site category
409
+ if format_type == "mmCIF":
410
+ # Use existing mmCIF attributes
411
+ attributes = list(df.columns)
412
+ else: # PDB format
413
+ # Map PDB columns to mmCIF attributes
414
+ attributes = [
415
+ "group_PDB", # record_type
416
+ "id", # serial
417
+ "type_symbol", # element
418
+ "label_atom_id", # name
419
+ "label_alt_id", # altLoc
420
+ "label_comp_id", # resName
421
+ "label_asym_id", # chainID
422
+ "label_entity_id", # (generated)
423
+ "label_seq_id", # resSeq
424
+ "pdbx_PDB_ins_code", # iCode
425
+ "Cartn_x", # x
426
+ "Cartn_y", # y
427
+ "Cartn_z", # z
428
+ "occupancy", # occupancy
429
+ "B_iso_or_equiv", # tempFactor
430
+ "pdbx_formal_charge", # charge
431
+ "auth_seq_id", # resSeq
432
+ "auth_comp_id", # resName
433
+ "auth_asym_id", # chainID
434
+ "auth_atom_id", # name
435
+ "pdbx_PDB_model_num", # (generated)
436
+ ]
437
+
438
+ # Prepare rows for the atom_site category
439
+ rows = []
440
+
441
+ for _, row in df.iterrows():
442
+ if format_type == "mmCIF":
443
+ # Use existing mmCIF data
444
+ row_data = [str(row.get(attr, "?")) for attr in attributes]
445
+ else: # PDB format
446
+ # Map PDB data to mmCIF format
447
+ entity_id = "1" # Default entity ID
448
+ model_num = "1" # Default model number
449
+
450
+ row_data = [
451
+ str(row["record_type"]), # group_PDB
452
+ str(int(row["serial"])), # id
453
+ str(row["element"]), # type_symbol
454
+ str(row["name"]), # label_atom_id
455
+ str(row.get("altLoc", "")), # label_alt_id
456
+ str(row["resName"]), # label_comp_id
457
+ str(row["chainID"]), # label_asym_id
458
+ entity_id, # label_entity_id
459
+ str(int(row["resSeq"])), # label_seq_id
460
+ str(row["iCode"])
461
+ if pd.notna(row["iCode"])
462
+ else "?", # pdbx_PDB_ins_code
463
+ f"{float(row['x']):.3f}", # Cartn_x
464
+ f"{float(row['y']):.3f}", # Cartn_y
465
+ f"{float(row['z']):.3f}", # Cartn_z
466
+ f"{float(row['occupancy']):.2f}", # occupancy
467
+ f"{float(row['tempFactor']):.2f}", # B_iso_or_equiv
468
+ str(row.get("charge", "")) or "?", # pdbx_formal_charge
469
+ str(int(row["resSeq"])), # auth_seq_id
470
+ str(row["resName"]), # auth_comp_id
471
+ str(row["chainID"]), # auth_asym_id
472
+ str(row["name"]), # auth_atom_id
473
+ model_num, # pdbx_PDB_model_num
474
+ ]
475
+
476
+ rows.append(row_data)
477
+
478
+ # Create the atom_site category
479
+ atom_site_category = DataCategory("atom_site", attributes, rows)
480
+
481
+ # Add the category to the data container
482
+ data_container.append(atom_site_category)
483
+
484
+ # Create an IoAdapter for writing
485
+ adapter = IoAdapterPy()
486
+
487
+ # Handle output
488
+ if output is None:
489
+ # Return as string - write to a temporary file and read it back
490
+ with tempfile.NamedTemporaryFile(mode="w+", suffix=".cif") as temp_file:
491
+ adapter.writeFile(temp_file.name, [data_container])
492
+ temp_file.flush()
493
+ temp_file.seek(0)
494
+ return temp_file.read()
495
+ elif isinstance(output, str):
496
+ # Write to a file path
497
+ adapter.writeFile(output, [data_container])
498
+ return None
499
+ else:
500
+ # Write to a file-like object
501
+ with tempfile.NamedTemporaryFile(mode="w+", suffix=".cif") as temp_file:
502
+ adapter.writeFile(temp_file.name, [data_container])
503
+ temp_file.flush()
504
+ temp_file.seek(0)
505
+ output.write(temp_file.read())
506
+ return None
rnapolis/tertiary_v2.py CHANGED
@@ -379,7 +379,7 @@ class Residue:
379
379
  self.atoms = residue_df
380
380
  self.format = residue_df.attrs.get("format", "unknown")
381
381
 
382
- @cached_property
382
+ @property
383
383
  def chain_id(self) -> str:
384
384
  """Get the chain identifier for this residue."""
385
385
  if self.format == "PDB":
@@ -391,7 +391,18 @@ class Residue:
391
391
  return self.atoms["label_asym_id"].iloc[0]
392
392
  return ""
393
393
 
394
- @cached_property
394
+ @chain_id.setter
395
+ def chain_id(self, value: str) -> None:
396
+ """Set the chain identifier for this residue."""
397
+ if self.format == "PDB":
398
+ self.atoms["chainID"] = value
399
+ elif self.format == "mmCIF":
400
+ if "auth_asym_id" in self.atoms.columns:
401
+ self.atoms["auth_asym_id"] = value
402
+ if "label_asym_id" in self.atoms.columns:
403
+ self.atoms["label_asym_id"] = value
404
+
405
+ @property
395
406
  def residue_number(self) -> int:
396
407
  """Get the residue sequence number."""
397
408
  if self.format == "PDB":
@@ -403,7 +414,18 @@ class Residue:
403
414
  return int(self.atoms["label_seq_id"].iloc[0])
404
415
  return 0
405
416
 
406
- @cached_property
417
+ @residue_number.setter
418
+ def residue_number(self, value: int) -> None:
419
+ """Set the residue sequence number."""
420
+ if self.format == "PDB":
421
+ self.atoms["resSeq"] = value
422
+ elif self.format == "mmCIF":
423
+ if "auth_seq_id" in self.atoms.columns:
424
+ self.atoms["auth_seq_id"] = value
425
+ if "label_seq_id" in self.atoms.columns:
426
+ self.atoms["label_seq_id"] = value
427
+
428
+ @property
407
429
  def insertion_code(self) -> Optional[str]:
408
430
  """Get the insertion code, if any."""
409
431
  if self.format == "PDB":
@@ -415,6 +437,15 @@ class Residue:
415
437
  return icode if pd.notna(icode) else None
416
438
  return None
417
439
 
440
+ @insertion_code.setter
441
+ def insertion_code(self, value: Optional[str]) -> None:
442
+ """Set the insertion code."""
443
+ if self.format == "PDB":
444
+ self.atoms["iCode"] = value
445
+ elif self.format == "mmCIF":
446
+ if "pdbx_PDB_ins_code" in self.atoms.columns:
447
+ self.atoms["pdbx_PDB_ins_code"] = value
448
+
418
449
  @cached_property
419
450
  def residue_name(self) -> str:
420
451
  """Get the residue name (e.g., 'A', 'G', 'C', 'U', etc.)."""
@@ -494,10 +525,11 @@ class Residue:
494
525
  def __str__(self) -> str:
495
526
  """String representation of the residue."""
496
527
  # Start with chain ID and residue name
497
- if self.chain_id.isspace() or not self.chain_id:
528
+ chain = self.chain_id
529
+ if chain.isspace() or not chain:
498
530
  builder = f"{self.residue_name}"
499
531
  else:
500
- builder = f"{self.chain_id}.{self.residue_name}"
532
+ builder = f"{chain}.{self.residue_name}"
501
533
 
502
534
  # Add a separator if the residue name ends with a digit
503
535
  if len(self.residue_name) > 0 and self.residue_name[-1] in string.digits:
@@ -507,8 +539,9 @@ class Residue:
507
539
  builder += f"{self.residue_number}"
508
540
 
509
541
  # Add insertion code if present
510
- if self.insertion_code is not None:
511
- builder += f"^{self.insertion_code}"
542
+ icode = self.insertion_code
543
+ if icode is not None:
544
+ builder += f"^{icode}"
512
545
 
513
546
  return builder
514
547
 
rnapolis/unifier.py ADDED
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import os
4
+ import sys
5
+ from collections import Counter
6
+
7
+ import pandas as pd
8
+
9
+ from rnapolis.parser import is_cif
10
+ from rnapolis.parser_v2 import parse_cif_atoms, parse_pdb_atoms, write_cif, write_pdb
11
+ from rnapolis.tertiary_v2 import Structure
12
+
13
+
14
+ def load_components():
15
+ result = {}
16
+ for residue in "ACGU":
17
+ component = os.path.join(
18
+ os.path.abspath(os.path.dirname(__file__)), f"component_{residue}.csv"
19
+ )
20
+ result[residue] = pd.read_csv(component)
21
+ return result
22
+
23
+
24
+ def main():
25
+ """Main function to run the unifier tool."""
26
+ parser = argparse.ArgumentParser(
27
+ description="Unify content of a set of PDB or mmCIF files."
28
+ )
29
+ parser.add_argument("--output", "-o", help="Output directory", required=True)
30
+ parser.add_argument(
31
+ "--format",
32
+ "-f",
33
+ help="Output format (possible values: PDB, mmCIF, keep. Default: keep)",
34
+ default="keep",
35
+ )
36
+ parser.add_argument("files", nargs="+", help="PDB or mmCIF files to compare")
37
+ args = parser.parse_args()
38
+
39
+ components = load_components()
40
+ structures = []
41
+
42
+ for path in args.files:
43
+ with open(path) as f:
44
+ if is_cif(f):
45
+ atoms = parse_cif_atoms(f)
46
+ else:
47
+ atoms = parse_pdb_atoms(f)
48
+
49
+ residues = []
50
+
51
+ for residue in Structure(atoms).residues:
52
+ if residue.residue_name not in "ACGU":
53
+ continue
54
+
55
+ component = components[residue.residue_name]
56
+ mapping_dict = dict(
57
+ [row["alt_atom_id"], row["atom_id"]] for _, row in component.iterrows()
58
+ )
59
+ valid_names = component["atom_id"]
60
+ valid_names = valid_names[~valid_names.str.startswith("H")]
61
+ valid_order = {value: idx for idx, value in enumerate(valid_names.tolist())}
62
+ column = "name" if residue.format == "PDB" else "auth_atom_id"
63
+
64
+ # Replace alternative name with standard name
65
+ residue.atoms[column] = residue.atoms[column].replace(mapping_dict)
66
+ # Leave only standard, non-hydrogen atoms
67
+ residue.atoms = residue.atoms[residue.atoms[column].isin(valid_names)]
68
+ # Reorder atoms
69
+ residue.atoms = residue.atoms.sort_values(
70
+ by=[column], key=lambda col: col.map(valid_order)
71
+ )
72
+ residues.append(residue)
73
+
74
+ structures.append((path, residues))
75
+
76
+ for path, residues in structures:
77
+ ref_path, ref_residues = structures[0]
78
+
79
+ # Validity check 1: residue count must be equal
80
+ if len(residues) != len(ref_residues):
81
+ print(
82
+ f"Number of residues in {path} does not match {ref_path}, cannot continue"
83
+ )
84
+ sys.exit(1)
85
+
86
+ # Validity check 2: residue names must be equal
87
+ for i, (residue, ref_residue) in enumerate(zip(residues, ref_residues)):
88
+ if residue.residue_name != ref_residue.residue_name:
89
+ print(
90
+ f"Residue {str(residue)} in {path} does not match {str(ref_residue)} in {ref_path}, cannot continue"
91
+ )
92
+ sys.exit(1)
93
+
94
+ # Find residues with different number of atoms
95
+ residues_to_remove = set()
96
+ for i, (residue, ref_residue) in enumerate(zip(residues, ref_residues)):
97
+ if len(residue.atoms) != len(ref_residue.atoms):
98
+ print(
99
+ f"Number of atoms in {str(residue)} in {path} does not match {str(ref_residue)} in {ref_path}, will unify this"
100
+ )
101
+ residues_to_remove.add(i)
102
+
103
+ # Remove residues with different number of atoms
104
+ for _, residues in structures:
105
+ for i in sorted(residues_to_remove, reverse=True):
106
+ del residues[i]
107
+
108
+ # Find most common residue identifiers for each residue
109
+ n = len(structures[0][1])
110
+ counters = [Counter() for _ in range(n)]
111
+ for _, residues in structures:
112
+ for i, residue in enumerate(residues):
113
+ counters[i].update(
114
+ [(residue.chain_id, residue.residue_number, residue.insertion_code)]
115
+ )
116
+
117
+ # If any residue has different identifiers, use the most common one in all structures
118
+ for i, counter in enumerate(counters):
119
+ (chain_id, residue_number, insertion_code), count = counter.most_common(1)[0]
120
+ if count != len(structures):
121
+ print(
122
+ f"Residue {i + 1} has different identifiers in different structures, will unify this"
123
+ )
124
+ for _, residues in structures:
125
+ residue = residues[i]
126
+ residue.chain_id = chain_id
127
+ residue.residue_number = residue_number
128
+ residue.insertion_code = insertion_code
129
+
130
+ # Write output
131
+ os.makedirs(args.output, exist_ok=True)
132
+
133
+ for path, residues in structures:
134
+ base, _ = os.path.splitext(os.path.basename(path))
135
+
136
+ if args.format == "keep":
137
+ format = residues[0].atoms.attrs["format"]
138
+ else:
139
+ format = args.format
140
+
141
+ ext = ".pdb" if format == "PDB" else ".cif"
142
+
143
+ with open(f"{args.output}/{base}{ext}", "w") as f:
144
+ df = pd.concat([residue.atoms for residue in residues])
145
+
146
+ if format == "PDB":
147
+ write_pdb(df, f)
148
+ else:
149
+ write_cif(df, f)
150
+
151
+
152
+ if __name__ == "__main__":
153
+ main()
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: RNApolis
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -32,6 +32,7 @@ Dynamic: classifier
32
32
  Dynamic: description
33
33
  Dynamic: description-content-type
34
34
  Dynamic: home-page
35
+ Dynamic: license-file
35
36
  Dynamic: project-url
36
37
  Dynamic: requires-dist
37
38
  Dynamic: summary
@@ -0,0 +1,26 @@
1
+ rnapolis/aligner.py,sha256=oJ81FrjlEEzqJcYJdZUE1PrPjabIOT7j0idwAHXVQMI,3156
2
+ rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
3
+ rnapolis/clashfinder.py,sha256=AC9_tIx7QIk57sELq_aKfU1u3UMOXbgcccQeGHhMR6c,8517
4
+ rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
5
+ rnapolis/component_A.csv,sha256=koirS-AwUZwoYGItT8yn3wS6Idvmh2FANfTQcOS_xh8,2897
6
+ rnapolis/component_C.csv,sha256=NtvsAu_YrUgTjzZm3j4poW4IZ99x3dPARB09XVIiMCc,2803
7
+ rnapolis/component_G.csv,sha256=Z5wl8OnHRyx4XhTyBiWgRZiEvmZXhoxtVRH8bn6Vxf0,2898
8
+ rnapolis/component_U.csv,sha256=8BUoU1m2YzGmi8_kw1xdpf3pucszHjFEtTex87CuXiE,2645
9
+ rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
10
+ rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5744659
11
+ rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
12
+ rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
13
+ rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
14
+ rnapolis/parser_v2.py,sha256=ltesVKBiIKk9JlM02ttTJzLm1g5MHdPzDgQTcl40GP8,16257
15
+ rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
16
+ rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
17
+ rnapolis/tertiary_v2.py,sha256=I1uyHWIUePNGO5m-suoL4ibtz02qAJUMvYm0BUKUygY,22480
18
+ rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
19
+ rnapolis/unifier.py,sha256=bXscX3lxeSxT4K1fm2UEURcU9_0JA0HdTbd8ZoHZFAY,5442
20
+ rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
21
+ rnapolis-0.6.0.dist-info/licenses/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
22
+ rnapolis-0.6.0.dist-info/METADATA,sha256=TcGmjLlYH8jPvWJr48a2ce-UhIIl_dAO_wygm4ZPrKY,54537
23
+ rnapolis-0.6.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
24
+ rnapolis-0.6.0.dist-info/entry_points.txt,sha256=kS_Ji3_6UaomxkOaYpGHh4aZKaIh9CAfzoexbaS3y50,372
25
+ rnapolis-0.6.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
26
+ rnapolis-0.6.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +1,5 @@
1
1
  [console_scripts]
2
+ aligner = rnapolis.aligner:main
2
3
  annotator = rnapolis.annotator:main
3
4
  clashfinder = rnapolis.clashfinder:main
4
5
  metareader = rnapolis.metareader:main
@@ -6,3 +7,4 @@ molecule-filter = rnapolis.molecule_filter:main
6
7
  motif-extractor = rnapolis.motif_extractor:main
7
8
  rfam-folder = rnapolis.rfam_folder:main
8
9
  transformer = rnapolis.transformer:main
10
+ unifier = rnapolis.unifier:main
@@ -1,20 +0,0 @@
1
- rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
2
- rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
3
- rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
4
- rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
- rnapolis/mmcif_pdbx_v50.dic,sha256=5QFx1ssDaehR4_DQ-tS9VQux262SiLXaqcwmwwejF5c,5744659
6
- rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
7
- rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
8
- rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
9
- rnapolis/parser_v2.py,sha256=L85dRYlh_aOcSvt2ZtRJYFhYa0bwvYgoTQi9kUSqDGQ,5803
10
- rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
11
- rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
12
- rnapolis/tertiary_v2.py,sha256=GuTSEtbkMlYks6XA-P8pbLaT4M1cVS1T8gb8zcaGRzQ,21250
13
- rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
14
- rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
15
- rnapolis-0.5.0.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
16
- rnapolis-0.5.0.dist-info/METADATA,sha256=gq8j-Oln2H84wuzLZNvilJ5m1dPYtvm7vX2cpEunHYg,54515
17
- rnapolis-0.5.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
18
- rnapolis-0.5.0.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
19
- rnapolis-0.5.0.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
20
- rnapolis-0.5.0.dist-info/RECORD,,