RNADiscrepancy 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnadiscrepancy-0.0.2/.gitignore +22 -0
- rnadiscrepancy-0.0.2/LICENSE +17 -0
- rnadiscrepancy-0.0.2/PKG-INFO +63 -0
- rnadiscrepancy-0.0.2/README.md +49 -0
- rnadiscrepancy-0.0.2/pyproject.toml +24 -0
- rnadiscrepancy-0.0.2/src/RNADiscrepancy/.test.py.swp +0 -0
- rnadiscrepancy-0.0.2/src/RNADiscrepancy/__init__.py +0 -0
- rnadiscrepancy-0.0.2/src/RNADiscrepancy/measures.py +171 -0
- rnadiscrepancy-0.0.2/src/RNADiscrepancy/references.py +66 -0
- rnadiscrepancy-0.0.2/src/RNADiscrepancy/rna.py +57 -0
- rnadiscrepancy-0.0.2/src/RNADiscrepancy/test.py +61 -0
- rnadiscrepancy-0.0.2/src/RNADiscrepancy/utilities.py +7 -0
- rnadiscrepancy-0.0.2/test/10samples.pickle +0 -0
- rnadiscrepancy-0.0.2/test/test.py +1 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
VEnv/
|
|
2
|
+
__pycache__/
|
|
3
|
+
|
|
4
|
+
# Distribution / packaging
|
|
5
|
+
.Python
|
|
6
|
+
build/
|
|
7
|
+
develop-eggs/
|
|
8
|
+
dist/
|
|
9
|
+
downloads/
|
|
10
|
+
eggs/
|
|
11
|
+
.eggs/
|
|
12
|
+
lib/
|
|
13
|
+
lib64/
|
|
14
|
+
parts/
|
|
15
|
+
sdist/
|
|
16
|
+
var/
|
|
17
|
+
wheels/
|
|
18
|
+
share/python-wheels/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
.installed.cfg
|
|
21
|
+
*.egg
|
|
22
|
+
MANIFEST
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
RNADiscrepancy - Computing RNA discrepancy values
|
|
2
|
+
|
|
3
|
+
Copyright (C) 2026 Vladimir Reinharz
|
|
4
|
+
|
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
|
6
|
+
it under the terms of the GNU General Public License as published by
|
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
8
|
+
(at your option) any later version.
|
|
9
|
+
|
|
10
|
+
This program is distributed in the hope that it will be useful,
|
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
GNU General Public License for more details.
|
|
14
|
+
|
|
15
|
+
You should have received a copy of the GNU General Public License
|
|
16
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: RNADiscrepancy
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Compute discrepancy between 3D RNA (sub)structures
|
|
5
|
+
Project-URL: Homepage, https://codeberg.org/vreinharz/RNADiscrepancy
|
|
6
|
+
Project-URL: Issues, https://codeberg.org/vreinharz/RNADiscrepancy/issues
|
|
7
|
+
Author-email: Vladimir Reinharz <reinharz.vladimir@uqam.ca>
|
|
8
|
+
License-Expression: GPL-3.0
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Requires-Python: >=3.9
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
#RNADiscrepancy
|
|
16
|
+
|
|
17
|
+
isostericity
|
|
18
|
+
RMSD
|
|
19
|
+
maybe more?
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
>>> from RNADiscrepancy.utilities import rawdata_format_nucleotide
|
|
24
|
+
>>> from RNADiscrepancy.measures import isodiscrepancy
|
|
25
|
+
>>> cwwAU_A_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N9","C8","N7","C5","C6","N6","N1","C2","N3","C4"]
|
|
26
|
+
>>> cwwAU_A_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","N","C","C","N","N","C","N","C"]
|
|
27
|
+
>>> cwwAU_A_list_positions = [(-46.6870002746582, -80.67400360107422, -42.457000732421875),(-45.249000549316406, -80.74099731445312, -42.09299850463867),(-47.303001403808594, -79.36100006103516, -42.77299880981445),(-46.92900085449219, -81.6449966430664, -43.694000244140625),(-47.27000045776367, -83.01000213623047, -43.49399948120117),(-47.70500183105469, -83.62999725341797, -44.79600143432617),(-49.13199996948242, -83.92400360107422, -44.75299835205078),(-47.60100173950195, -82.73999786376953, -46.02299880981445),(-46.290000915527344, -82.58599853515625, -46.52899932861328),(-48.512001037597656, -83.47599792480469, -46.97999954223633),(-47.96799850463867, -84.72100067138672, -47.375),(-49.6870002746582, -83.72000122070312, -46.04600143432617),(-50.590999603271484, -82.5719985961914, -46.012001037597656),(-50.654998779296875, -81.52100372314453, -45.125),(-51.59299850463867, -80.6449966430664, -45.40800094604492),(-52.19300079345703, -81.16400146484375, -46.55400085449219),(-53.26100158691406, -80.71800231933594, -47.35499954223633),(-53.959999084472656, -79.60900115966797, -47.10100173950195),(-53.59199905395508, -81.46099853515625, -48.4379997253418),(-52.902000427246094, -82.58399963378906, -48.68299865722656),(-51.887001037597656, -83.11399841308594, -48.000999450683594),(-51.58000183105469, -82.34600067138672, -46.9370002746582)]
|
|
28
|
+
>>> cwwAU_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
29
|
+
>>> cwwAU_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
30
|
+
>>> cwwAU_U_list_positions = [(-62.04399871826172, -78.197998046875, -52.75299835205078),(-63.35599899291992, -78.33399963378906, -53.4370002746582),(-61.986000061035156, -77.62999725341797, -51.382999420166016),(-61.347999572753906, -79.62899780273438, -52.72700119018555),(-61.56800079345703, -80.56199645996094, -53.777000427246094),(-60.268001556396484, -81.21900177001953, -54.17399978637695),(-59.19599914550781, -80.2249984741211, -54.268001556396484),(-59.689998626708984, -82.20500183105469, -53.178001403808594),(-60.340999603271484, -83.45899963378906, -53.125),(-58.26900100708008, -82.29499816894531, -53.6879997253418),(-58.19900131225586, -82.8759994506836, -54.974998474121094),(-57.983001708984375, -80.80400085449219, -53.79199981689453),(-57.66400146484375, -80.25, -52.470001220703125),(-56.564998626708984, -80.7750015258789, -51.81100082397461),(-55.832000732421875, -81.61399841308594, -52.30500030517578),(-56.35599899291992, -80.28099822998047, -50.54899978637695),(-57.10900115966797, -79.3290023803711, -49.89699935913086),(-56.82600021362305, -79.02300262451172, -48.737998962402344),(-58.20600128173828, -78.81199645996094, -50.659000396728516),(-58.439998626708984, -79.2750015258789, -51.88999938964844)]
|
|
31
|
+
>>> twwUC_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
32
|
+
>>> twwUC_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
33
|
+
>>> twwUC_U_list_positions = [(52.62300109863281, 163.0240020751953, 82.34400177001953),(51.7239990234375, 162.08799743652344, 81.61499786376953),(53.42900085449219, 164.01199340820312, 81.5770034790039),(51.78300094604492, 163.79200744628906, 83.45999908447266),(50.75199890136719, 163.1179962158203, 84.20899963378906),(50.0260009765625, 164.10000610351562, 85.0989990234375),(50.957000732421875, 164.63400268554688, 86.0719985961914),(49.45600128173828, 165.31300354003906, 84.38600158691406),(48.15599822998047, 165.03599548339844, 83.86499786376953),(49.45199966430664, 166.3730010986328, 85.48100280761719),(48.358001708984375, 166.2570037841797, 86.36199951171875),(50.731998443603516, 166.02000427246094, 86.24299621582031),(51.94499969482422, 166.73500061035156, 85.80999755859375),(51.97999954223633, 168.10499572753906, 85.95700073242188),(51.03900146484375, 168.75, 86.38800048828125),(53.1619987487793, 168.69700622558594, 85.58200073242188),(54.284000396728516, 168.07400512695312, 85.08399963378906),(55.29899978637695, 168.73300170898438, 84.87999725341797),(54.16299819946289, 166.66700744628906, 84.93399810791016),(53.02899932861328, 166.0590057373047, 85.28900146484375)]
|
|
34
|
+
>>> twwUC_C_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","N4","C5","C6"]
|
|
35
|
+
>>> twwUC_C_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","N","C","C"]
|
|
36
|
+
>>> twwUC_C_list_positions = [(51.45399856567383, 178.2270050048828, 85.64199829101562),(51.52799987792969, 179.64500427246094, 85.23200225830078),(50.196998596191406, 177.45799255371094, 85.41799926757812),(52.66400146484375, 177.4499969482422, 84.95800018310547),(54.012001037597656, 177.9250030517578, 85.10099792480469),(54.97700119018555, 176.7689971923828, 85.08399963378906),(54.652000427246094, 175.8520050048828, 86.15599822998047),(54.93899917602539, 175.89599609375, 83.8489990234375),(55.71099853515625, 176.46200561523438, 82.81400299072266),(55.54800033569336, 174.60000610351562, 84.3550033569336),(56.957000732421875, 174.67799377441406, 84.4469985961914),(54.93899917602539, 174.52000427246094, 85.75399780273438),(53.6879997253418, 173.73199462890625, 85.80500030517578),(53.76300048828125, 172.32899475097656, 85.83999633789062),(54.87699890136719, 171.78500366210938, 85.81199645996094),(52.61399841308594, 171.60699462890625, 85.89900207519531),(51.43299865722656, 172.22999572753906, 85.91899871826172),(50.332000732421875, 171.48800659179688, 85.98400115966797),(51.32899856567383, 173.6479949951172, 85.87699890136719),(52.46799850463867, 174.35299682617188, 85.82099914550781)]
|
|
37
|
+
>>> cww_AU_A = rawdata_format_nucleotide('A', cwwAU_A_list_atoms, cwwAU_A_list_atoms_elements, cwwAU_A_list_positions)
|
|
38
|
+
>>> cww_AU_U = rawdata_format_nucleotide('U', cwwAU_U_list_atoms, cwwAU_U_list_atoms_elements, cwwAU_U_list_positions)
|
|
39
|
+
>>> tww_UC_U = rawdata_format_nucleotide('U', twwUC_U_list_atoms, twwUC_U_list_atoms_elements, twwUC_U_list_positions)
|
|
40
|
+
>>> tww_UC_C = rawdata_format_nucleotide('C', twwUC_C_list_atoms, twwUC_C_list_atoms_elements, twwUC_C_list_positions)
|
|
41
|
+
>>> >>> from RNADiscrepancy.utilities import rawdata_format_nucleotide
|
|
42
|
+
>>> from RNADiscrepancy.measures import isodiscrepancy
|
|
43
|
+
>>> cwwAU_A_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N9","C8","N7","C5","C6","N6","N1","C2","N3","C4"]
|
|
44
|
+
>>> cwwAU_A_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","N","C","C","N","N","C","N","C"]
|
|
45
|
+
>>> cwwAU_A_list_positions = [(-46.6870002746582, -80.67400360107422, -42.457000732421875),(-45.249000549316406, -80.74099731445312, -42.09299850463867),(-47.303001403808594, -79.36100006103516, -42.77299880981445),(-46.92900085449219, -81.6449966430664, -43.694000244140625),(-47.27000045776367, -83.01000213623047, -43.49399948120117),(-47.70500183105469, -83.62999725341797, -44.79600143432617),(-49.13199996948242, -83.92400360107422, -44.75299835205078),(-47.60100173950195, -82.73999786376953, -46.02299880981445),(-46.290000915527344, -82.58599853515625, -46.52899932861328),(-48.512001037597656, -83.47599792480469, -46.97999954223633),(-47.96799850463867, -84.72100067138672, -47.375),(-49.6870002746582, -83.72000122070312, -46.04600143432617),(-50.590999603271484, -82.5719985961914, -46.012001037597656),(-50.654998779296875, -81.52100372314453, -45.125),(-51.59299850463867, -80.6449966430664, -45.40800094604492),(-52.19300079345703, -81.16400146484375, -46.55400085449219),(-53.26100158691406, -80.71800231933594, -47.35499954223633),(-53.959999084472656, -79.60900115966797, -47.10100173950195),(-53.59199905395508, -81.46099853515625, -48.4379997253418),(-52.902000427246094, -82.58399963378906, -48.68299865722656),(-51.887001037597656, -83.11399841308594, -48.000999450683594),(-51.58000183105469, -82.34600067138672, -46.9370002746582)]
|
|
46
|
+
>>> cwwAU_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
47
|
+
>>> cwwAU_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
48
|
+
>>> cwwAU_U_list_positions = [(-62.04399871826172, -78.197998046875, -52.75299835205078),(-63.35599899291992, -78.33399963378906, -53.4370002746582),(-61.986000061035156, -77.62999725341797, -51.382999420166016),(-61.347999572753906, -79.62899780273438, -52.72700119018555),(-61.56800079345703, -80.56199645996094, -53.777000427246094),(-60.268001556396484, -81.21900177001953, -54.17399978637695),(-59.19599914550781, -80.2249984741211, -54.268001556396484),(-59.689998626708984, -82.20500183105469, -53.178001403808594),(-60.340999603271484, -83.45899963378906, -53.125),(-58.26900100708008, -82.29499816894531, -53.6879997253418),(-58.19900131225586, -82.8759994506836, -54.974998474121094),(-57.983001708984375, -80.80400085449219, -53.79199981689453),(-57.66400146484375, -80.25, -52.470001220703125),(-56.564998626708984, -80.7750015258789, -51.81100082397461),(-55.832000732421875, -81.61399841308594, -52.30500030517578),(-56.35599899291992, -80.28099822998047, -50.54899978637695),(-57.10900115966797, -79.3290023803711, -49.89699935913086),(-56.82600021362305, -79.02300262451172, -48.737998962402344),(-58.20600128173828, -78.81199645996094, -50.659000396728516),(-58.439998626708984, -79.2750015258789, -51.88999938964844)]
|
|
49
|
+
>>> twwUC_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
50
|
+
>>> twwUC_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
51
|
+
>>> twwUC_U_list_positions = [(52.62300109863281, 163.0240020751953, 82.34400177001953),(51.7239990234375, 162.08799743652344, 81.61499786376953),(53.42900085449219, 164.01199340820312, 81.5770034790039),(51.78300094604492, 163.79200744628906, 83.45999908447266),(50.75199890136719, 163.1179962158203, 84.20899963378906),(50.0260009765625, 164.10000610351562, 85.0989990234375),(50.957000732421875, 164.63400268554688, 86.0719985961914),(49.45600128173828, 165.31300354003906, 84.38600158691406),(48.15599822998047, 165.03599548339844, 83.86499786376953),(49.45199966430664, 166.3730010986328, 85.48100280761719),(48.358001708984375, 166.2570037841797, 86.36199951171875),(50.731998443603516, 166.02000427246094, 86.24299621582031),(51.94499969482422, 166.73500061035156, 85.80999755859375),(51.97999954223633, 168.10499572753906, 85.95700073242188),(51.03900146484375, 168.75, 86.38800048828125),(53.1619987487793, 168.69700622558594, 85.58200073242188),(54.284000396728516, 168.07400512695312, 85.08399963378906),(55.29899978637695, 168.73300170898438, 84.87999725341797),(54.16299819946289, 166.66700744628906, 84.93399810791016),(53.02899932861328, 166.0590057373047, 85.28900146484375)]
|
|
52
|
+
>>> twwUC_C_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","N4","C5","C6"]
|
|
53
|
+
>>> twwUC_C_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","N","C","C"]
|
|
54
|
+
>>> twwUC_C_list_positions = [(51.45399856567383, 178.2270050048828, 85.64199829101562),(51.52799987792969, 179.64500427246094, 85.23200225830078),(50.196998596191406, 177.45799255371094, 85.41799926757812),(52.66400146484375, 177.4499969482422, 84.95800018310547),(54.012001037597656, 177.9250030517578, 85.10099792480469),(54.97700119018555, 176.7689971923828, 85.08399963378906),(54.652000427246094, 175.8520050048828, 86.15599822998047),(54.93899917602539, 175.89599609375, 83.8489990234375),(55.71099853515625, 176.46200561523438, 82.81400299072266),(55.54800033569336, 174.60000610351562, 84.3550033569336),(56.957000732421875, 174.67799377441406, 84.4469985961914),(54.93899917602539, 174.52000427246094, 85.75399780273438),(53.6879997253418, 173.73199462890625, 85.80500030517578),(53.76300048828125, 172.32899475097656, 85.83999633789062),(54.87699890136719, 171.78500366210938, 85.81199645996094),(52.61399841308594, 171.60699462890625, 85.89900207519531),(51.43299865722656, 172.22999572753906, 85.91899871826172),(50.332000732421875, 171.48800659179688, 85.98400115966797),(51.32899856567383, 173.6479949951172, 85.87699890136719),(52.46799850463867, 174.35299682617188, 85.82099914550781)]
|
|
55
|
+
>>> cww_AU_A = rawdata_format_nucleotide('A', cwwAU_A_list_atoms, cwwAU_A_list_atoms_elements, cwwAU_A_list_positions)
|
|
56
|
+
>>> cww_AU_U = rawdata_format_nucleotide('U', cwwAU_U_list_atoms, cwwAU_U_list_atoms_elements, cwwAU_U_list_positions)
|
|
57
|
+
>>> tww_UC_U = rawdata_format_nucleotide('U', twwUC_U_list_atoms, twwUC_U_list_atoms_elements, twwUC_U_list_positions)
|
|
58
|
+
>>> tww_UC_C = rawdata_format_nucleotide('C', twwUC_C_list_atoms, twwUC_C_list_atoms_elements, twwUC_C_list_positions)
|
|
59
|
+
>>> cww_AU = (cww_AU_A, cww_AU_U)
|
|
60
|
+
>>> tww_UC = (tww_UC_U, tww_UC_C)
|
|
61
|
+
>>> isodiscrepancy(cww_AU, tww_UC)
|
|
62
|
+
15.096495858783285
|
|
63
|
+
```
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#RNADiscrepancy
|
|
2
|
+
|
|
3
|
+
isostericity
|
|
4
|
+
RMSD
|
|
5
|
+
maybe more?
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
>>> from RNADiscrepancy.utilities import rawdata_format_nucleotide
|
|
10
|
+
>>> from RNADiscrepancy.measures import isodiscrepancy
|
|
11
|
+
>>> cwwAU_A_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N9","C8","N7","C5","C6","N6","N1","C2","N3","C4"]
|
|
12
|
+
>>> cwwAU_A_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","N","C","C","N","N","C","N","C"]
|
|
13
|
+
>>> cwwAU_A_list_positions = [(-46.6870002746582, -80.67400360107422, -42.457000732421875),(-45.249000549316406, -80.74099731445312, -42.09299850463867),(-47.303001403808594, -79.36100006103516, -42.77299880981445),(-46.92900085449219, -81.6449966430664, -43.694000244140625),(-47.27000045776367, -83.01000213623047, -43.49399948120117),(-47.70500183105469, -83.62999725341797, -44.79600143432617),(-49.13199996948242, -83.92400360107422, -44.75299835205078),(-47.60100173950195, -82.73999786376953, -46.02299880981445),(-46.290000915527344, -82.58599853515625, -46.52899932861328),(-48.512001037597656, -83.47599792480469, -46.97999954223633),(-47.96799850463867, -84.72100067138672, -47.375),(-49.6870002746582, -83.72000122070312, -46.04600143432617),(-50.590999603271484, -82.5719985961914, -46.012001037597656),(-50.654998779296875, -81.52100372314453, -45.125),(-51.59299850463867, -80.6449966430664, -45.40800094604492),(-52.19300079345703, -81.16400146484375, -46.55400085449219),(-53.26100158691406, -80.71800231933594, -47.35499954223633),(-53.959999084472656, -79.60900115966797, -47.10100173950195),(-53.59199905395508, -81.46099853515625, -48.4379997253418),(-52.902000427246094, -82.58399963378906, -48.68299865722656),(-51.887001037597656, -83.11399841308594, -48.000999450683594),(-51.58000183105469, -82.34600067138672, -46.9370002746582)]
|
|
14
|
+
>>> cwwAU_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
15
|
+
>>> cwwAU_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
16
|
+
>>> cwwAU_U_list_positions = [(-62.04399871826172, -78.197998046875, -52.75299835205078),(-63.35599899291992, -78.33399963378906, -53.4370002746582),(-61.986000061035156, -77.62999725341797, -51.382999420166016),(-61.347999572753906, -79.62899780273438, -52.72700119018555),(-61.56800079345703, -80.56199645996094, -53.777000427246094),(-60.268001556396484, -81.21900177001953, -54.17399978637695),(-59.19599914550781, -80.2249984741211, -54.268001556396484),(-59.689998626708984, -82.20500183105469, -53.178001403808594),(-60.340999603271484, -83.45899963378906, -53.125),(-58.26900100708008, -82.29499816894531, -53.6879997253418),(-58.19900131225586, -82.8759994506836, -54.974998474121094),(-57.983001708984375, -80.80400085449219, -53.79199981689453),(-57.66400146484375, -80.25, -52.470001220703125),(-56.564998626708984, -80.7750015258789, -51.81100082397461),(-55.832000732421875, -81.61399841308594, -52.30500030517578),(-56.35599899291992, -80.28099822998047, -50.54899978637695),(-57.10900115966797, -79.3290023803711, -49.89699935913086),(-56.82600021362305, -79.02300262451172, -48.737998962402344),(-58.20600128173828, -78.81199645996094, -50.659000396728516),(-58.439998626708984, -79.2750015258789, -51.88999938964844)]
|
|
17
|
+
>>> twwUC_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
18
|
+
>>> twwUC_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
19
|
+
>>> twwUC_U_list_positions = [(52.62300109863281, 163.0240020751953, 82.34400177001953),(51.7239990234375, 162.08799743652344, 81.61499786376953),(53.42900085449219, 164.01199340820312, 81.5770034790039),(51.78300094604492, 163.79200744628906, 83.45999908447266),(50.75199890136719, 163.1179962158203, 84.20899963378906),(50.0260009765625, 164.10000610351562, 85.0989990234375),(50.957000732421875, 164.63400268554688, 86.0719985961914),(49.45600128173828, 165.31300354003906, 84.38600158691406),(48.15599822998047, 165.03599548339844, 83.86499786376953),(49.45199966430664, 166.3730010986328, 85.48100280761719),(48.358001708984375, 166.2570037841797, 86.36199951171875),(50.731998443603516, 166.02000427246094, 86.24299621582031),(51.94499969482422, 166.73500061035156, 85.80999755859375),(51.97999954223633, 168.10499572753906, 85.95700073242188),(51.03900146484375, 168.75, 86.38800048828125),(53.1619987487793, 168.69700622558594, 85.58200073242188),(54.284000396728516, 168.07400512695312, 85.08399963378906),(55.29899978637695, 168.73300170898438, 84.87999725341797),(54.16299819946289, 166.66700744628906, 84.93399810791016),(53.02899932861328, 166.0590057373047, 85.28900146484375)]
|
|
20
|
+
>>> twwUC_C_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","N4","C5","C6"]
|
|
21
|
+
>>> twwUC_C_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","N","C","C"]
|
|
22
|
+
>>> twwUC_C_list_positions = [(51.45399856567383, 178.2270050048828, 85.64199829101562),(51.52799987792969, 179.64500427246094, 85.23200225830078),(50.196998596191406, 177.45799255371094, 85.41799926757812),(52.66400146484375, 177.4499969482422, 84.95800018310547),(54.012001037597656, 177.9250030517578, 85.10099792480469),(54.97700119018555, 176.7689971923828, 85.08399963378906),(54.652000427246094, 175.8520050048828, 86.15599822998047),(54.93899917602539, 175.89599609375, 83.8489990234375),(55.71099853515625, 176.46200561523438, 82.81400299072266),(55.54800033569336, 174.60000610351562, 84.3550033569336),(56.957000732421875, 174.67799377441406, 84.4469985961914),(54.93899917602539, 174.52000427246094, 85.75399780273438),(53.6879997253418, 173.73199462890625, 85.80500030517578),(53.76300048828125, 172.32899475097656, 85.83999633789062),(54.87699890136719, 171.78500366210938, 85.81199645996094),(52.61399841308594, 171.60699462890625, 85.89900207519531),(51.43299865722656, 172.22999572753906, 85.91899871826172),(50.332000732421875, 171.48800659179688, 85.98400115966797),(51.32899856567383, 173.6479949951172, 85.87699890136719),(52.46799850463867, 174.35299682617188, 85.82099914550781)]
|
|
23
|
+
>>> cww_AU_A = rawdata_format_nucleotide('A', cwwAU_A_list_atoms, cwwAU_A_list_atoms_elements, cwwAU_A_list_positions)
|
|
24
|
+
>>> cww_AU_U = rawdata_format_nucleotide('U', cwwAU_U_list_atoms, cwwAU_U_list_atoms_elements, cwwAU_U_list_positions)
|
|
25
|
+
>>> tww_UC_U = rawdata_format_nucleotide('U', twwUC_U_list_atoms, twwUC_U_list_atoms_elements, twwUC_U_list_positions)
|
|
26
|
+
>>> tww_UC_C = rawdata_format_nucleotide('C', twwUC_C_list_atoms, twwUC_C_list_atoms_elements, twwUC_C_list_positions)
|
|
27
|
+
>>> >>> from RNADiscrepancy.utilities import rawdata_format_nucleotide
|
|
28
|
+
>>> from RNADiscrepancy.measures import isodiscrepancy
|
|
29
|
+
>>> cwwAU_A_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N9","C8","N7","C5","C6","N6","N1","C2","N3","C4"]
|
|
30
|
+
>>> cwwAU_A_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","N","C","C","N","N","C","N","C"]
|
|
31
|
+
>>> cwwAU_A_list_positions = [(-46.6870002746582, -80.67400360107422, -42.457000732421875),(-45.249000549316406, -80.74099731445312, -42.09299850463867),(-47.303001403808594, -79.36100006103516, -42.77299880981445),(-46.92900085449219, -81.6449966430664, -43.694000244140625),(-47.27000045776367, -83.01000213623047, -43.49399948120117),(-47.70500183105469, -83.62999725341797, -44.79600143432617),(-49.13199996948242, -83.92400360107422, -44.75299835205078),(-47.60100173950195, -82.73999786376953, -46.02299880981445),(-46.290000915527344, -82.58599853515625, -46.52899932861328),(-48.512001037597656, -83.47599792480469, -46.97999954223633),(-47.96799850463867, -84.72100067138672, -47.375),(-49.6870002746582, -83.72000122070312, -46.04600143432617),(-50.590999603271484, -82.5719985961914, -46.012001037597656),(-50.654998779296875, -81.52100372314453, -45.125),(-51.59299850463867, -80.6449966430664, -45.40800094604492),(-52.19300079345703, -81.16400146484375, -46.55400085449219),(-53.26100158691406, -80.71800231933594, -47.35499954223633),(-53.959999084472656, -79.60900115966797, -47.10100173950195),(-53.59199905395508, -81.46099853515625, -48.4379997253418),(-52.902000427246094, -82.58399963378906, -48.68299865722656),(-51.887001037597656, -83.11399841308594, -48.000999450683594),(-51.58000183105469, -82.34600067138672, -46.9370002746582)]
|
|
32
|
+
>>> cwwAU_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
33
|
+
>>> cwwAU_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
34
|
+
>>> cwwAU_U_list_positions = [(-62.04399871826172, -78.197998046875, -52.75299835205078),(-63.35599899291992, -78.33399963378906, -53.4370002746582),(-61.986000061035156, -77.62999725341797, -51.382999420166016),(-61.347999572753906, -79.62899780273438, -52.72700119018555),(-61.56800079345703, -80.56199645996094, -53.777000427246094),(-60.268001556396484, -81.21900177001953, -54.17399978637695),(-59.19599914550781, -80.2249984741211, -54.268001556396484),(-59.689998626708984, -82.20500183105469, -53.178001403808594),(-60.340999603271484, -83.45899963378906, -53.125),(-58.26900100708008, -82.29499816894531, -53.6879997253418),(-58.19900131225586, -82.8759994506836, -54.974998474121094),(-57.983001708984375, -80.80400085449219, -53.79199981689453),(-57.66400146484375, -80.25, -52.470001220703125),(-56.564998626708984, -80.7750015258789, -51.81100082397461),(-55.832000732421875, -81.61399841308594, -52.30500030517578),(-56.35599899291992, -80.28099822998047, -50.54899978637695),(-57.10900115966797, -79.3290023803711, -49.89699935913086),(-56.82600021362305, -79.02300262451172, -48.737998962402344),(-58.20600128173828, -78.81199645996094, -50.659000396728516),(-58.439998626708984, -79.2750015258789, -51.88999938964844)]
|
|
35
|
+
>>> twwUC_U_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","O4","C5","C6"]
|
|
36
|
+
>>> twwUC_U_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","O","C","C"]
|
|
37
|
+
>>> twwUC_U_list_positions = [(52.62300109863281, 163.0240020751953, 82.34400177001953),(51.7239990234375, 162.08799743652344, 81.61499786376953),(53.42900085449219, 164.01199340820312, 81.5770034790039),(51.78300094604492, 163.79200744628906, 83.45999908447266),(50.75199890136719, 163.1179962158203, 84.20899963378906),(50.0260009765625, 164.10000610351562, 85.0989990234375),(50.957000732421875, 164.63400268554688, 86.0719985961914),(49.45600128173828, 165.31300354003906, 84.38600158691406),(48.15599822998047, 165.03599548339844, 83.86499786376953),(49.45199966430664, 166.3730010986328, 85.48100280761719),(48.358001708984375, 166.2570037841797, 86.36199951171875),(50.731998443603516, 166.02000427246094, 86.24299621582031),(51.94499969482422, 166.73500061035156, 85.80999755859375),(51.97999954223633, 168.10499572753906, 85.95700073242188),(51.03900146484375, 168.75, 86.38800048828125),(53.1619987487793, 168.69700622558594, 85.58200073242188),(54.284000396728516, 168.07400512695312, 85.08399963378906),(55.29899978637695, 168.73300170898438, 84.87999725341797),(54.16299819946289, 166.66700744628906, 84.93399810791016),(53.02899932861328, 166.0590057373047, 85.28900146484375)]
|
|
38
|
+
>>> twwUC_C_list_atoms = ["P","OP1","OP2","O5'","C5'","C4'","O4'","C3'","O3'","C2'","O2'","C1'","N1","C2","O2","N3","C4","N4","C5","C6"]
|
|
39
|
+
>>> twwUC_C_list_atoms_elements = ["P","O","O","O","C","C","O","C","O","C","O","C","N","C","O","N","C","N","C","C"]
|
|
40
|
+
>>> twwUC_C_list_positions = [(51.45399856567383, 178.2270050048828, 85.64199829101562),(51.52799987792969, 179.64500427246094, 85.23200225830078),(50.196998596191406, 177.45799255371094, 85.41799926757812),(52.66400146484375, 177.4499969482422, 84.95800018310547),(54.012001037597656, 177.9250030517578, 85.10099792480469),(54.97700119018555, 176.7689971923828, 85.08399963378906),(54.652000427246094, 175.8520050048828, 86.15599822998047),(54.93899917602539, 175.89599609375, 83.8489990234375),(55.71099853515625, 176.46200561523438, 82.81400299072266),(55.54800033569336, 174.60000610351562, 84.3550033569336),(56.957000732421875, 174.67799377441406, 84.4469985961914),(54.93899917602539, 174.52000427246094, 85.75399780273438),(53.6879997253418, 173.73199462890625, 85.80500030517578),(53.76300048828125, 172.32899475097656, 85.83999633789062),(54.87699890136719, 171.78500366210938, 85.81199645996094),(52.61399841308594, 171.60699462890625, 85.89900207519531),(51.43299865722656, 172.22999572753906, 85.91899871826172),(50.332000732421875, 171.48800659179688, 85.98400115966797),(51.32899856567383, 173.6479949951172, 85.87699890136719),(52.46799850463867, 174.35299682617188, 85.82099914550781)]
|
|
41
|
+
>>> cww_AU_A = rawdata_format_nucleotide('A', cwwAU_A_list_atoms, cwwAU_A_list_atoms_elements, cwwAU_A_list_positions)
|
|
42
|
+
>>> cww_AU_U = rawdata_format_nucleotide('U', cwwAU_U_list_atoms, cwwAU_U_list_atoms_elements, cwwAU_U_list_positions)
|
|
43
|
+
>>> tww_UC_U = rawdata_format_nucleotide('U', twwUC_U_list_atoms, twwUC_U_list_atoms_elements, twwUC_U_list_positions)
|
|
44
|
+
>>> tww_UC_C = rawdata_format_nucleotide('C', twwUC_C_list_atoms, twwUC_C_list_atoms_elements, twwUC_C_list_positions)
|
|
45
|
+
>>> cww_AU = (cww_AU_A, cww_AU_U)
|
|
46
|
+
>>> tww_UC = (tww_UC_U, tww_UC_C)
|
|
47
|
+
>>> isodiscrepancy(cww_AU, tww_UC)
|
|
48
|
+
15.096495858783285
|
|
49
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling >= 1.26", "numpy >= 2.4.3", "scipy >= 1.17.0"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
[project]
|
|
7
|
+
name = "RNADiscrepancy"
|
|
8
|
+
version = "0.0.2"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name="Vladimir Reinharz", email="reinharz.vladimir@uqam.ca" },
|
|
11
|
+
]
|
|
12
|
+
description = "Compute discrepancy between 3D RNA (sub)structures"
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = ">=3.9"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
license = "gpl-3.0"
|
|
20
|
+
license-files = ["LICENSE*"]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://codeberg.org/vreinharz/RNADiscrepancy"
|
|
24
|
+
Issues = "https://codeberg.org/vreinharz/RNADiscrepancy/issues"
|
|
Binary file
|
|
File without changes
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import math
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from scipy.spatial.transform import Rotation
|
|
6
|
+
|
|
7
|
+
from RNADiscrepancy import references
|
|
8
|
+
from RNADiscrepancy.rna import Atom, Nucleotide
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def c1prime_distance_difference(pair1, pair2):
|
|
12
|
+
"""Returns the difference in distance between the C1' atoms (connecting sugar to residue) in the two pairs
|
|
13
|
+
always positive
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
(nt1_left, nt1_right), (nt2_left, nt2_right) = pair1, pair2
|
|
18
|
+
dist_c1 = abs(nt1_left["C1'"].distance(nt1_right["C1'"]) - nt2_left["C1'"].distance(nt2_right["C2'"]))
|
|
19
|
+
|
|
20
|
+
return dist_c1
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def c1prime_translation(pair1, pair2):
|
|
24
|
+
"""
|
|
25
|
+
Here we have to do things in regards to the _left and _right side symmetricaly
|
|
26
|
+
|
|
27
|
+
1'. Use N1/N9 of their reference as (0, 0)
|
|
28
|
+
2'. Rotate to align the first base to the reference (and apply rotation to the pair)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
(nt1_left, nt1_right), (nt2_left, nt2_right) = pair1, pair2
|
|
32
|
+
|
|
33
|
+
ref_nt1_left = references.nucleotide[nt1_left.name]
|
|
34
|
+
ref_nt1_right = references.nucleotide[nt1_right.name]
|
|
35
|
+
ref_nt2_left = references.nucleotide[nt2_left.name]
|
|
36
|
+
ref_nt2_right = references.nucleotide[nt2_right.name]
|
|
37
|
+
|
|
38
|
+
#Retrieve position of nucleotide + ref for alignment
|
|
39
|
+
nt1_left_pos = []
|
|
40
|
+
nt1_left_pos_ref = []
|
|
41
|
+
for atom_name, atom in nt1_left.items():
|
|
42
|
+
try:
|
|
43
|
+
nt1_left_pos_ref.append(ref_nt1_left[atom_name].position)
|
|
44
|
+
nt1_left_pos.append(atom.position)
|
|
45
|
+
except KeyError:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
nt1_right_pos = []
|
|
49
|
+
nt1_right_pos_ref = []
|
|
50
|
+
for atom_name, atom in nt1_right.items():
|
|
51
|
+
try:
|
|
52
|
+
nt1_right_pos_ref.append(ref_nt1_right[atom_name].position)
|
|
53
|
+
nt1_right_pos.append(atom.position)
|
|
54
|
+
except KeyError:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
nt2_left_pos = []
|
|
59
|
+
nt2_left_pos_ref = []
|
|
60
|
+
for atom_name, atom in nt2_left.items():
|
|
61
|
+
try:
|
|
62
|
+
nt2_left_pos_ref.append(ref_nt2_left[atom_name].position)
|
|
63
|
+
nt2_left_pos.append(atom.position)
|
|
64
|
+
except KeyError:
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
nt2_right_pos = []
|
|
68
|
+
nt2_right_pos_ref = []
|
|
69
|
+
for atom_name, atom in nt2_right.items():
|
|
70
|
+
try:
|
|
71
|
+
nt2_right_pos_ref.append(ref_nt2_right[atom_name].position)
|
|
72
|
+
nt2_right_pos.append(atom.position)
|
|
73
|
+
except KeyError:
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
nt1_left_rotation = best_rotation(nt1_left_pos_ref, np.array(ref_nt1_left.ribose_fulcrum.position),
|
|
79
|
+
nt1_left_pos, np.array(nt1_left.ribose_fulcrum.position))[0]
|
|
80
|
+
nt1_right_rotation = best_rotation(nt1_right_pos_ref, np.array(ref_nt1_right.ribose_fulcrum.position),
|
|
81
|
+
nt1_right_pos, np.array(nt1_right.ribose_fulcrum.position))[0]
|
|
82
|
+
nt2_left_rotation = best_rotation(nt2_left_pos_ref, np.array(ref_nt2_left.ribose_fulcrum.position),
|
|
83
|
+
nt2_left_pos, np.array(nt2_left.ribose_fulcrum.position))[0]
|
|
84
|
+
nt2_right_rotation = best_rotation(nt2_right_pos_ref, np.array(ref_nt2_right.ribose_fulcrum.position),
|
|
85
|
+
nt2_right_pos, np.array(nt2_right.ribose_fulcrum.position))[0]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
c1p_1right = nt1_right["C1'"].position
|
|
89
|
+
c1p_1right = (c1p_1right - np.array(nt1_left.ribose_fulcrum.position)) @ nt1_left_rotation
|
|
90
|
+
c1p_2right = nt2_right["C1'"].position
|
|
91
|
+
c1p_2right = (c1p_2right - np.array(nt2_left.ribose_fulcrum.position)) @ nt2_left_rotation
|
|
92
|
+
c1p_t1 = math.sqrt((c1p_1right[0] - c1p_2right[0])**2 + (c1p_1right[1] - c1p_2right[1])**2)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
c1p_1left = nt1_left["C1'"].position
|
|
96
|
+
c1p_1left = (c1p_1left - np.array(nt1_right.ribose_fulcrum.position)) @ nt1_right_rotation
|
|
97
|
+
c1p_2left = nt2_left["C1'"].position
|
|
98
|
+
c1p_2left = (c1p_2left - np.array(nt2_right.ribose_fulcrum.position)) @ nt2_right_rotation
|
|
99
|
+
c1p_t2 = math.sqrt((c1p_1left[0] - c1p_2left[0])**2 + (c1p_1left[1] - c1p_2left[1])**2)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
bond_1right = [nt1_right["C1'"].position, nt1_right.ribose_fulcrum.position]
|
|
103
|
+
bond_1right = (bond_1right - np.array(nt1_left.ribose_fulcrum.position)) @ nt1_left_rotation
|
|
104
|
+
bond_2right = [nt2_right["C1'"].position, nt2_left.ribose_fulcrum.position]
|
|
105
|
+
bond_2right = (bond_2right - np.array(nt2_left.ribose_fulcrum.position)) @ nt2_left_rotation
|
|
106
|
+
bond_1right[:, 2:] = 0
|
|
107
|
+
bond_2right[:, 2:] = 0
|
|
108
|
+
output = best_rotation(bond_1right[:2, :], 0, bond_2right[:2, :], 0)
|
|
109
|
+
theta1 = math.acos(output[0][0][0])
|
|
110
|
+
|
|
111
|
+
bond_1left = [nt1_left["C1'"].position, nt1_left.ribose_fulcrum.position]
|
|
112
|
+
bond_1left = (bond_1left - np.array(nt1_right.ribose_fulcrum.position)) @ nt1_right_rotation
|
|
113
|
+
bond_2left = [nt2_left["C1'"].position, nt2_left.ribose_fulcrum.position]
|
|
114
|
+
bond_2left = (bond_2left - np.array(nt2_right.ribose_fulcrum.position)) @ nt2_right_rotation
|
|
115
|
+
bond_1left[:, 2:] = 0
|
|
116
|
+
bond_2left[:, 2:] = 0
|
|
117
|
+
output = best_rotation(bond_1left, 0, bond_2left, 0)
|
|
118
|
+
theta2 = math.acos(output[0][0][0])
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
return c1p_t1, c1p_t2, theta1, theta2
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def isodiscrepancy(pair1, pair2):
|
|
127
|
+
"""
|
|
128
|
+
Return IDI between pairs (nt1_left, nt1_right) and (nt2_left, nt2_right)
|
|
129
|
+
it assumes the orientation is to align nt1_left with nt2_left / nt1_right with nt2_right
|
|
130
|
+
|
|
131
|
+
we follow https://doi.org/10.1093/nar/gkp011
|
|
132
|
+
|
|
133
|
+
1) delta c1' distances
|
|
134
|
+
2) align on one N1/N9 and see other C1' translation distance
|
|
135
|
+
3) rotation in plane to orient same second base in relation to first
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
c1prime_dist_delta = c1prime_distance_difference(pair1, pair2)
|
|
139
|
+
c1prime_t1, c1prime_t2, theta1, theta2 = c1prime_translation(pair1, pair2)
|
|
140
|
+
|
|
141
|
+
return math.sqrt(c1prime_dist_delta**2
|
|
142
|
+
+ (c1prime_t1**2 + c1prime_t2**2)/2
|
|
143
|
+
+ ((2 * theta1)**2 + (2 * theta2)**2)/2
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def best_rotation(A, center_A, B, center_B):
|
|
147
|
+
A_centered = A - center_A
|
|
148
|
+
B_centered = B - center_B
|
|
149
|
+
rotation, _ = Rotation.align_vectors(A_centered, B_centered)
|
|
150
|
+
rotation = rotation.as_matrix()
|
|
151
|
+
B2A = B_centered @ rotation + center_A
|
|
152
|
+
difference = A_centered - B2A + center_A
|
|
153
|
+
sum_square_error = float(np.sum(difference ** 2))
|
|
154
|
+
RMSD = float(np.sqrt(sum_square_error / len(A)))
|
|
155
|
+
return rotation, B2A, center_A, center_B, sum_square_error, RMSD
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def best_mean_centered_rotation(A, B):
|
|
159
|
+
"""Least-squares rigid-body fit transform of two numpy array
|
|
160
|
+
The center of mass of the nucleotide is at (0, 0, 0)
|
|
161
|
+
returns
|
|
162
|
+
rotation : matrix
|
|
163
|
+
B2A : B moved to over A
|
|
164
|
+
mean_A,
|
|
165
|
+
mean_B,
|
|
166
|
+
sum_square_error,
|
|
167
|
+
RMSD
|
|
168
|
+
"""
|
|
169
|
+
mean_A = A.mean(axis=0)
|
|
170
|
+
mean_B = B.mean(axis=0)
|
|
171
|
+
return best_rotation(A, mean_A, B, mean_B)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from RNADiscrepancy import rna
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Position of idealized nucleotides heavy atoms with center at (0,0,0)
|
|
5
|
+
It follows FR3D standard orientation with the Watson-Crick edge in
|
|
6
|
+
the positive x and y quadrant.
|
|
7
|
+
"""
|
|
8
|
+
nucleotide = {}
|
|
9
|
+
|
|
10
|
+
ref_heavy_atoms_positions = {
|
|
11
|
+
'A' : {
|
|
12
|
+
'N9' : ([-1.110515, -1.823319, 0.0] ),
|
|
13
|
+
'C4' : ([0.007975, -1.020192, 0.0] ),
|
|
14
|
+
'N3' : ([1.298514, -1.383716, 0.0] ),
|
|
15
|
+
'N1' : ([1.754329, 1.001072, 0.0] ),
|
|
16
|
+
'C6' : ([0.453129, 1.320188, 0.0] ),
|
|
17
|
+
'N6' : ([0.092589, 2.623715, 0.0] ),
|
|
18
|
+
'C8' : ([-2.200426, -0.989732, 0.0] ),
|
|
19
|
+
'C5' : ([-0.504186, 0.282479, 0.0] ),
|
|
20
|
+
'C2' : ([2.092069, -0.307357, 0.0] ),
|
|
21
|
+
'N7' : ([-1.883477, 0.296861, 0.0] ),
|
|
22
|
+
"C1'" :([ -1.110515, (-1.823319 - 1.48), 0.000000])
|
|
23
|
+
},
|
|
24
|
+
'C' : {
|
|
25
|
+
'N1' : ([-0.380579, -1.484583, 0.0] ),
|
|
26
|
+
'C2' : ([0.908756, -0.88533, 0.0] ),
|
|
27
|
+
'O2' : ([1.888871, -1.605366, 0.0] ),
|
|
28
|
+
'N3' : ([0.931558, 0.493192, 0.0] ),
|
|
29
|
+
'C4' : ([-0.197209, 1.170043, 0.0] ),
|
|
30
|
+
'N4' : ([-0.099223, 2.524619, 0.0] ),
|
|
31
|
+
'C6' : ([-1.542632, -0.786228, 0.0] ),
|
|
32
|
+
'C5' : ([-1.509542, 0.573654, 0.0] ),
|
|
33
|
+
"C1'" : ( [ -0.380579, (-1.484583-1.48), 0.000000] )
|
|
34
|
+
},
|
|
35
|
+
'G' : {
|
|
36
|
+
'N9' : ([-1.45668, -1.711888, 0.0] ),
|
|
37
|
+
'C4' : ([-0.339093, -0.921183, 0.0] ),
|
|
38
|
+
'N3' : ([0.947138, -1.370892, 0.0] ),
|
|
39
|
+
'N1' : ([1.440727, 0.946157, 0.0] ),
|
|
40
|
+
'C6' : ([0.110442, 1.47918, 0.0] ),
|
|
41
|
+
'O6' : ([-0.059694, 2.682325, 0.0] ),
|
|
42
|
+
'C8' : ([-2.54532, -0.870005, 0.0] ),
|
|
43
|
+
'C5' : ([-0.831466, 0.385014, 0.0] ),
|
|
44
|
+
'C2' : ([1.802732, -0.378447, 0.0] ),
|
|
45
|
+
'N7' : ([-2.208072, 0.408244, 0.0] ),
|
|
46
|
+
'N2' : ([3.139287, -0.648506, 0.0] ),
|
|
47
|
+
"C1'" : ( [ -1.456680, (-1.711888-1.48), 0.000000])
|
|
48
|
+
},
|
|
49
|
+
'U' : {
|
|
50
|
+
'N1' : ([-0.32642, -1.514422, 0.0] ),
|
|
51
|
+
'C2' : ([0.933866, -0.925171, 0.0] ),
|
|
52
|
+
'O2' : ([1.96607, -1.562816, 0.0] ),
|
|
53
|
+
'N3' : ([0.868896, 0.45947, 0.0] ),
|
|
54
|
+
'C4' : ([-0.266417, 1.293935, 0.0] ),
|
|
55
|
+
'O4' : ([-0.149264, 2.505638, 0.0] ),
|
|
56
|
+
'C6' : ([-1.504181, -0.804727, 0.0] ),
|
|
57
|
+
'C5' : ([-1.52255, 0.548093, 0.0] ),
|
|
58
|
+
"C1'" : ([ -0.326420, (-1.514422-1.48), 0.000000])
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for nt_name, v in ref_heavy_atoms_positions.items():
|
|
63
|
+
atoms = []
|
|
64
|
+
for atom_name, pos in v.items():
|
|
65
|
+
atoms.append(rna.Atom(atom_name, atom_name[0], *pos) )
|
|
66
|
+
nucleotide[nt_name] = rna.Nucleotide(nt_name, atoms)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import math
|
|
2
|
+
|
|
3
|
+
class Atom:
|
|
4
|
+
def __init__(self, name, element, x, y, z):
|
|
5
|
+
self.name = name
|
|
6
|
+
self.element = element
|
|
7
|
+
self.x = x
|
|
8
|
+
self.y = y
|
|
9
|
+
self.z = z
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
def position(self):
|
|
13
|
+
return self.x, self.y, self.z
|
|
14
|
+
|
|
15
|
+
def distance(self, other):
|
|
16
|
+
x, y, z = self.position
|
|
17
|
+
x2, y2, z2 = other.position
|
|
18
|
+
return math.sqrt((x - x2)**2 + (y - y2)**2 + (z - z2)**2)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __eq__(self, other):
|
|
22
|
+
if isinstance(other, self.__class__):
|
|
23
|
+
return self.name == other.name
|
|
24
|
+
else:
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
def __neq__(self, other):
|
|
28
|
+
return not self.__eq__(other)
|
|
29
|
+
|
|
30
|
+
def __str__(self):
|
|
31
|
+
return f"Atom class: {self.name}, {self.element}, {self.x}, {self.y}, {self.z}"
|
|
32
|
+
|
|
33
|
+
def __repr__(self):
|
|
34
|
+
return f"Atom class: {self.name}, {self.element}, {self.x}, {self.y}, {self.z}"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Nucleotide(dict):
|
|
39
|
+
def __init__(self, name, atoms):
|
|
40
|
+
self.name = name
|
|
41
|
+
for atom in atoms:
|
|
42
|
+
self[atom.name] = atom
|
|
43
|
+
|
|
44
|
+
if name not in ('A', 'C', 'G', 'U'):
|
|
45
|
+
raise Exception(f"WTF this should be ACGU not {name}")
|
|
46
|
+
|
|
47
|
+
#Set fulcrum
|
|
48
|
+
if name in ('C', 'U'):
|
|
49
|
+
try:
|
|
50
|
+
self.ribose_fulcrum = self['N1']
|
|
51
|
+
except KeyError:
|
|
52
|
+
self.ribose_fulcrum = None
|
|
53
|
+
elif name in ('A', 'G'):
|
|
54
|
+
try:
|
|
55
|
+
self.ribose_fulcrum = self['N9']
|
|
56
|
+
except KeyError:
|
|
57
|
+
self.ribose_fulcrum = None
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
import pickle
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
import measures
|
|
6
|
+
import rna
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def read_samples(path):
|
|
10
|
+
with open(path, 'rb') as f:
|
|
11
|
+
data = pickle.load(f)
|
|
12
|
+
loaded_data = defaultdict(list)
|
|
13
|
+
|
|
14
|
+
for (lw, nt1, nt2), values in data.items():
|
|
15
|
+
if not (nt1 in ('A', 'C', 'G', 'U') and nt2 in ('A', 'C', 'G', 'U')):
|
|
16
|
+
continue
|
|
17
|
+
for instance in values:
|
|
18
|
+
atoms = []
|
|
19
|
+
for name, element, x, y, z in zip(*(instance[0][k] for k in ('atoms_name', 'atoms_element', 'atoms_x', 'atoms_y', 'atoms_z'))):
|
|
20
|
+
atoms.append(rna.Atom(name, element, x, y, z))
|
|
21
|
+
nt1_nuc = rna.Nucleotide(nt1, atoms)
|
|
22
|
+
|
|
23
|
+
atoms = []
|
|
24
|
+
for name, element, x, y, z in zip(*(instance[1][k] for k in ('atoms_name', 'atoms_element', 'atoms_x', 'atoms_y', 'atoms_z'))):
|
|
25
|
+
atoms.append(rna.Atom(name, element, x, y, z))
|
|
26
|
+
nt2_nuc = rna.Nucleotide(nt2, atoms)
|
|
27
|
+
|
|
28
|
+
if nt1_nuc.ribose_fulcrum is None or nt2_nuc.ribose_fulcrum is None:
|
|
29
|
+
continue
|
|
30
|
+
|
|
31
|
+
loaded_data[lw, nt1, nt2].append((nt1_nuc, nt2_nuc))
|
|
32
|
+
|
|
33
|
+
return loaded_data
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def compute_isodiscrepancy(pair1, pair2):
|
|
37
|
+
return measures.isodiscrepancy(pair1, pair2)
|
|
38
|
+
|
|
39
|
+
def main():
|
|
40
|
+
|
|
41
|
+
path_samples = '../../test/10samples.pickle'
|
|
42
|
+
data = read_samples(path_samples)
|
|
43
|
+
|
|
44
|
+
print(data['CWW', 'U', 'A'][0][0].name, data['CWW', 'C', 'G'][0][1].name)
|
|
45
|
+
print(compute_isodiscrepancy(data['CWW', 'U', 'A'][0], data['CWW', 'C', 'G'][0]))
|
|
46
|
+
print("\n")
|
|
47
|
+
print(compute_isodiscrepancy(data['CWW', 'A', 'U'][0], data['TWW', 'G', 'G'][0]))
|
|
48
|
+
|
|
49
|
+
U, C = data['TWW', 'U', 'C'][0]
|
|
50
|
+
for atom in C:
|
|
51
|
+
print(f'"{C[atom].name}"', end=",")
|
|
52
|
+
print()
|
|
53
|
+
for atom in C:
|
|
54
|
+
print(f'"{C[atom].element}"', end=",")
|
|
55
|
+
print()
|
|
56
|
+
for atom in C:
|
|
57
|
+
print(f"{C[atom].position}", end=",")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
if __name__ == '__main__':
|
|
61
|
+
main()
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
from RNADiscrepancy import rna
|
|
2
|
+
|
|
3
|
+
def rawdata_format_nucleotide(nucleotide_name, list_atoms_names, list_atoms_elements, list_positions):
|
|
4
|
+
atoms = []
|
|
5
|
+
for name, element, pos in zip(list_atoms_names, list_atoms_elements, list_positions):
|
|
6
|
+
atoms.append(rna.Atom(name, element, *pos))
|
|
7
|
+
return rna.Nucleotide(nucleotide_name, atoms)
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import pickle
|