modifinder 1.4b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. modifinder-1.4b0/LICENSE.md +10 -0
  2. modifinder-1.4b0/PKG-INFO +77 -0
  3. modifinder-1.4b0/Readme.md +34 -0
  4. modifinder-1.4b0/modifinder/__init__.py +9 -0
  5. modifinder-1.4b0/modifinder/arguments.py +16 -0
  6. modifinder-1.4b0/modifinder/calculate_scores.py +274 -0
  7. modifinder-1.4b0/modifinder/classes/Compound.py +490 -0
  8. modifinder-1.4b0/modifinder/classes/EdgeDetail.py +89 -0
  9. modifinder-1.4b0/modifinder/classes/ModiFinder.py +521 -0
  10. modifinder-1.4b0/modifinder/classes/Spectrum.py +307 -0
  11. modifinder-1.4b0/modifinder/classes/__init__.py +4 -0
  12. modifinder-1.4b0/modifinder/classes/tests/__init__.py +0 -0
  13. modifinder-1.4b0/modifinder/classes/tests/test_Compound.py +66 -0
  14. modifinder-1.4b0/modifinder/classes/tests/test_ModiFinder.py +52 -0
  15. modifinder-1.4b0/modifinder/convert.py +205 -0
  16. modifinder-1.4b0/modifinder/engines/Abtracts.py +107 -0
  17. modifinder-1.4b0/modifinder/engines/__init__.py +0 -0
  18. modifinder-1.4b0/modifinder/engines/alignment/CosineAlignmentEngine.py +194 -0
  19. modifinder-1.4b0/modifinder/engines/alignment/__init__.py +0 -0
  20. modifinder-1.4b0/modifinder/engines/annotation/MAGMaAnnotationEngine.py +260 -0
  21. modifinder-1.4b0/modifinder/engines/annotation/__init__.py +0 -0
  22. modifinder-1.4b0/modifinder/engines/annotation/magma/fragmentation_py.py +253 -0
  23. modifinder-1.4b0/modifinder/engines/annotation/magma/rdkit_engine.py +148 -0
  24. modifinder-1.4b0/modifinder/engines/prediction/BasicPredictionEngine.py +210 -0
  25. modifinder-1.4b0/modifinder/engines/tests/test_CosineAlignmentEngine.py +42 -0
  26. modifinder-1.4b0/modifinder/engines/tests/test_MagmaAnnotationEngine.py +28 -0
  27. modifinder-1.4b0/modifinder/exceptions.py +38 -0
  28. modifinder-1.4b0/modifinder/main.py +65 -0
  29. modifinder-1.4b0/modifinder/samples/__init__.py +0 -0
  30. modifinder-1.4b0/modifinder/samples/caffeine.py +69 -0
  31. modifinder-1.4b0/modifinder/samples/theophylline.py +59 -0
  32. modifinder-1.4b0/modifinder/tests/__init__.py +0 -0
  33. modifinder-1.4b0/modifinder/tests/test_convert.py +131 -0
  34. modifinder-1.4b0/modifinder/tests/utils.py +42 -0
  35. modifinder-1.4b0/modifinder/utilities/__init__.py +0 -0
  36. modifinder-1.4b0/modifinder/utilities/general_utils.py +497 -0
  37. modifinder-1.4b0/modifinder/utilities/gnps_types.py +178 -0
  38. modifinder-1.4b0/modifinder/utilities/mol_utils.py +691 -0
  39. modifinder-1.4b0/modifinder/utilities/network.py +147 -0
  40. modifinder-1.4b0/modifinder/utilities/tests/__init__.py +0 -0
  41. modifinder-1.4b0/modifinder/utilities/tests/test_general_utils.py +23 -0
  42. modifinder-1.4b0/modifinder/utilities/tests/test_gnps_types.py +70 -0
  43. modifinder-1.4b0/modifinder/utilities/tests/test_network.py +41 -0
  44. modifinder-1.4b0/modifinder/utilities/visualizer.py +910 -0
  45. modifinder-1.4b0/modifinder.egg-info/PKG-INFO +77 -0
  46. modifinder-1.4b0/modifinder.egg-info/SOURCES.txt +49 -0
  47. modifinder-1.4b0/modifinder.egg-info/dependency_links.txt +1 -0
  48. modifinder-1.4b0/modifinder.egg-info/requires.txt +22 -0
  49. modifinder-1.4b0/modifinder.egg-info/top_level.txt +1 -0
  50. modifinder-1.4b0/pyproject.toml +43 -0
  51. modifinder-1.4b0/setup.cfg +4 -0
@@ -0,0 +1,10 @@
1
+ ::
2
+
3
+ Academic Software License: © 2024 UCR (“Institution”). Academic or nonprofit researchers are permitted to use this Software (as defined below) subject to Paragraphs 1-4:
4
+
5
+ 1. Institution hereby grants to you free of charge, so long as you are an academic or nonprofit researcher, a nonexclusive license under Institution’s copyright ownership interest in this software and any derivative works made by you thereof (collectively, the “Software”) to use, copy, and make derivative works of the Software solely for educational or academic research purposes, and to distribute such Software free of charge to other academic or nonprofit researchers for their educational or academic research purposes, in all cases subject to the terms of this Academic Software License. Except as granted herein, all rights are reserved by Institution, including the right to pursue patent protection of the Software.
6
+ 2. Any distribution of copies of this Software -- including any derivative works made by you thereof -- must include a copy (including the copyright notice above), and be made subject to the terms, of this Academic Software License; failure by you to adhere to the requirements in Paragraphs 1 and 2 will result in immediate termination of the license granted to you pursuant to this Academic Software License effective as of the date you first used the Software.
7
+ 3. IN NO EVENT WILL INSTITUTION BE LIABLE TO ANY ENTITY OR PERSON FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF INSTITUTION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INSTITUTION SPECIFICALLY DISCLAIMS ANY AND ALL WARRANTIES, EXPRESS AND IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE IS PROVIDED “AS IS.” INSTITUTION HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS OF THIS SOFTWARE.
8
+ 4. Any academic or scholarly publication arising from the use of this Software or any derivative works thereof will include the following acknowledgment: The Software used in this research was created by [INSERT AUTHOR NAMES] of UC Riverside. © 2024 UCR.
9
+
10
+ Commercial entities: please contact mingxun.wang@cs.ucr.edu or tp@ucr.edu for licensing opportunities.
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.2
2
+ Name: modifinder
3
+ Version: 1.4b0
4
+ Summary: ModiFinder package
5
+ Author-email: Reza Shahneh <mzare008@ucr.edu>
6
+ License: ::
7
+
8
+ Academic Software License: © 2024 UCR (“Institution”). Academic or nonprofit researchers are permitted to use this Software (as defined below) subject to Paragraphs 1-4:
9
+
10
+ 1. Institution hereby grants to you free of charge, so long as you are an academic or nonprofit researcher, a nonexclusive license under Institution’s copyright ownership interest in this software and any derivative works made by you thereof (collectively, the “Software”) to use, copy, and make derivative works of the Software solely for educational or academic research purposes, and to distribute such Software free of charge to other academic or nonprofit researchers for their educational or academic research purposes, in all cases subject to the terms of this Academic Software License. Except as granted herein, all rights are reserved by Institution, including the right to pursue patent protection of the Software.
11
+ 2. Any distribution of copies of this Software -- including any derivative works made by you thereof -- must include a copy (including the copyright notice above), and be made subject to the terms, of this Academic Software License; failure by you to adhere to the requirements in Paragraphs 1 and 2 will result in immediate termination of the license granted to you pursuant to this Academic Software License effective as of the date you first used the Software.
12
+ 3. IN NO EVENT WILL INSTITUTION BE LIABLE TO ANY ENTITY OR PERSON FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF INSTITUTION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INSTITUTION SPECIFICALLY DISCLAIMS ANY AND ALL WARRANTIES, EXPRESS AND IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE IS PROVIDED “AS IS.” INSTITUTION HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS OF THIS SOFTWARE.
13
+ 4. Any academic or scholarly publication arising from the use of this Software or any derivative works thereof will include the following acknowledgment: The Software used in this research was created by [INSERT AUTHOR NAMES] of UC Riverside. © 2024 UCR.
14
+
15
+ Commercial entities: please contact mingxun.wang@cs.ucr.edu or tp@ucr.edu for licensing opportunities.
16
+
17
+ Project-URL: Documentation, https://wang-bioinformatics-lab.github.io/ModiFinder_base/
18
+ Project-URL: Repository, https://github.com/Wang-Bioinformatics-Lab/ModiFinder_base
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE.md
22
+ Requires-Dist: cairosvg
23
+ Requires-Dist: rdkit
24
+ Requires-Dist: requests
25
+ Requires-Dist: xlsxwriter
26
+ Requires-Dist: prettytable
27
+ Requires-Dist: dash
28
+ Requires-Dist: numpy
29
+ Requires-Dist: tqdm
30
+ Requires-Dist: IPython
31
+ Requires-Dist: furl
32
+ Requires-Dist: pandas
33
+ Requires-Dist: dash-bootstrap-components
34
+ Requires-Dist: pillow
35
+ Requires-Dist: openpyxl
36
+ Requires-Dist: openpyxl-image-loader
37
+ Requires-Dist: msbuddy==0.2.3
38
+ Requires-Dist: seaborn
39
+ Requires-Dist: pyteomics
40
+ Requires-Dist: matplotlib
41
+ Requires-Dist: networkx
42
+ Provides-Extra: extras
43
+
44
+ Welcome to ModiFinder's documentation!
45
+ ======================================
46
+
47
+ **ModiFinder** is a tool for site localization of structural modifications using MS/MS data.
48
+
49
+
50
+ _This project is under active development._
51
+
52
+ The documentation is available at: [https://wang-bioinformatics-lab.github.io/ModiFinder_base/](https://wang-bioinformatics-lab.github.io/ModiFinder_base/)
53
+
54
+
55
+ Citing
56
+ ------
57
+
58
+ ModiFinder: Tandem Mass Spectral Alignment Enables Structural Modification Site Localization
59
+
60
+ Mohammad Reza Zare Shahneh, Michael Strobel, Giovanni Andrea Vitale, Christian Geibel, Yasin El Abiead, Neha Garg, Berenike Wagner, Karl Forchhammer, Allegra Aron, Vanessa V Phelan, Daniel Petras, and Mingxun Wang
61
+
62
+ Journal of the American Society for Mass Spectrometry 2024 35 (11), 2564-2578
63
+
64
+ DOI: 10.1021/jasms.4c00061
65
+
66
+
67
+ License
68
+ -------
69
+
70
+ Academic Software License: © 2024 UCR (“Institution”). Academic or nonprofit researchers are permitted to use this Software (as defined below) subject to Paragraphs 1-4:
71
+
72
+ 1. Institution hereby grants to you free of charge, so long as you are an academic or nonprofit researcher, a nonexclusive license under Institution’s copyright ownership interest in this software and any derivative works made by you thereof (collectively, the “Software”) to use, copy, and make derivative works of the Software solely for educational or academic research purposes, and to distribute such Software free of charge to other academic or nonprofit researchers for their educational or academic research purposes, in all cases subject to the terms of this Academic Software License. Except as granted herein, all rights are reserved by Institution, including the right to pursue patent protection of the Software.
73
+ 2. Any distribution of copies of this Software -- including any derivative works made by you thereof -- must include a copy (including the copyright notice above), and be made subject to the terms, of this Academic Software License; failure by you to adhere to the requirements in Paragraphs 1 and 2 will result in immediate termination of the license granted to you pursuant to this Academic Software License effective as of the date you first used the Software.
74
+ 3. IN NO EVENT WILL INSTITUTION BE LIABLE TO ANY ENTITY OR PERSON FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF INSTITUTION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INSTITUTION SPECIFICALLY DISCLAIMS ANY AND ALL WARRANTIES, EXPRESS AND IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE IS PROVIDED “AS IS.” INSTITUTION HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS OF THIS SOFTWARE.
75
+ 4. Any academic or scholarly publication arising from the use of this Software or any derivative works thereof will include the following acknowledgment: The Software used in this research was created by [INSERT AUTHOR NAMES] of UC Riverside. © 2024 UCR.
76
+
77
+ Commercial entities: please contact mingxun.wang@cs.ucr.edu or tp@ucr.edu for licensing opportunities.
@@ -0,0 +1,34 @@
1
+ Welcome to ModiFinder's documentation!
2
+ ======================================
3
+
4
+ **ModiFinder** is a tool for site localization of structural modifications using MS/MS data.
5
+
6
+
7
+ _This project is under active development._
8
+
9
+ The documentation is available at: [https://wang-bioinformatics-lab.github.io/ModiFinder_base/](https://wang-bioinformatics-lab.github.io/ModiFinder_base/)
10
+
11
+
12
+ Citing
13
+ ------
14
+
15
+ ModiFinder: Tandem Mass Spectral Alignment Enables Structural Modification Site Localization
16
+
17
+ Mohammad Reza Zare Shahneh, Michael Strobel, Giovanni Andrea Vitale, Christian Geibel, Yasin El Abiead, Neha Garg, Berenike Wagner, Karl Forchhammer, Allegra Aron, Vanessa V Phelan, Daniel Petras, and Mingxun Wang
18
+
19
+ Journal of the American Society for Mass Spectrometry 2024 35 (11), 2564-2578
20
+
21
+ DOI: 10.1021/jasms.4c00061
22
+
23
+
24
+ License
25
+ -------
26
+
27
+ Academic Software License: © 2024 UCR (“Institution”). Academic or nonprofit researchers are permitted to use this Software (as defined below) subject to Paragraphs 1-4:
28
+
29
+ 1. Institution hereby grants to you free of charge, so long as you are an academic or nonprofit researcher, a nonexclusive license under Institution’s copyright ownership interest in this software and any derivative works made by you thereof (collectively, the “Software”) to use, copy, and make derivative works of the Software solely for educational or academic research purposes, and to distribute such Software free of charge to other academic or nonprofit researchers for their educational or academic research purposes, in all cases subject to the terms of this Academic Software License. Except as granted herein, all rights are reserved by Institution, including the right to pursue patent protection of the Software.
30
+ 2. Any distribution of copies of this Software -- including any derivative works made by you thereof -- must include a copy (including the copyright notice above), and be made subject to the terms, of this Academic Software License; failure by you to adhere to the requirements in Paragraphs 1 and 2 will result in immediate termination of the license granted to you pursuant to this Academic Software License effective as of the date you first used the Software.
31
+ 3. IN NO EVENT WILL INSTITUTION BE LIABLE TO ANY ENTITY OR PERSON FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF INSTITUTION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. INSTITUTION SPECIFICALLY DISCLAIMS ANY AND ALL WARRANTIES, EXPRESS AND IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE IS PROVIDED “AS IS.” INSTITUTION HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS OF THIS SOFTWARE.
32
+ 4. Any academic or scholarly publication arising from the use of this Software or any derivative works thereof will include the following acknowledgment: The Software used in this research was created by [INSERT AUTHOR NAMES] of UC Riverside. © 2024 UCR.
33
+
34
+ Commercial entities: please contact mingxun.wang@cs.ucr.edu or tp@ucr.edu for licensing opportunities.
@@ -0,0 +1,9 @@
1
+ """
2
+ ModiFinder
3
+ ==========
4
+
5
+ ModiFinder is a Python package for the identification of modifications in mass spectrometry data.
6
+ """
7
+
8
+ from modifinder.exceptions import *
9
+ from modifinder.classes import *
@@ -0,0 +1,16 @@
1
+ args = {
2
+ 'adduct': {"name": "adduct", "short":'add', "defult":"M+H"},
3
+ 'USI1': {"name": "USI1", "short":'USI1', "defult":""},
4
+ 'USI2': {"name": "USI2", "short":'USI2', "defult":""},
5
+ 'SMILES1': {"name": "SMILES1", "short":'SMILES1', "defult":""},
6
+ 'SMILES2': {"name": "SMILES2", "short":'SMILES2', "defult":""},
7
+ 'mz_tolerance': {"name": "mz_tolerance", "short":'mz', "defult":0.05},
8
+ 'ppm': {"name": "ppm", "short":'ppm', "defult":40},
9
+ 'filter_peaks_method': {"name": "filter_peaks_method", "short":'filter', "defult":"intensity"},
10
+ 'filter_peaks_variable': {"name": "filter_peaks_variable", "short":'filter_var', "defult":0.01},
11
+ 'presence_only': {"name": "presence_only", "short":'presence', "defult":False},
12
+ 'consider_intensity': {"name": "consider_intensity", "short":'intensity', "defult":True},
13
+ 'shifted_only': {"name": "shifted_only", "short":'shifted', "defult":False},
14
+ 'USIhelp': {"name": "USIhelp", "short":'USIhelp', "defult":None},
15
+ 'SMILEShelp': {"name": "SMILEShelp", "short":'SMILEShelp', "defult":None},
16
+ }
@@ -0,0 +1,274 @@
1
+
2
+ import numpy as np
3
+ from typing import List, Tuple
4
+ import copy
5
+ from . import utils as utils
6
+
7
+ def is_max(G, probabilities, true_index):
8
+ if probabilities[true_index] == max(probabilities):
9
+ # find how many times the max value appears
10
+ count = 0
11
+ for i in range(len(probabilities)):
12
+ if probabilities[i] == max(probabilities):
13
+ count += 1
14
+
15
+ return 1/count
16
+ else:
17
+ return 0
18
+
19
+ def dist_from_max(G, probabilities, true_index):
20
+ min_dist = 100000
21
+ eps = 0.000001
22
+ max_val = max(probabilities)
23
+ graph_diameter = np.amax(G)
24
+ for i in range(len(probabilities)):
25
+ if probabilities[i] == max_val:
26
+ min_dist = min(min_dist, G[true_index, i])
27
+ return float(min_dist/(graph_diameter))
28
+
29
+ def proximity(G, probabilities, true_index):
30
+ min_dist = 100000
31
+ eps = 0.000001
32
+ max_val = max(probabilities)
33
+ graph_diameter = np.amax(G)
34
+ for i in range(len(probabilities)):
35
+ if probabilities[i] == max_val:
36
+ min_dist = min(min_dist, G[true_index, i])
37
+ return (graph_diameter - min_dist)/graph_diameter
38
+
39
+ def average_dist_from_max(G, probabilities, true_index):
40
+ eps = 0.000001
41
+ max_val = max(probabilities)
42
+ graph_diameter = np.amax(G)
43
+ dists = 0
44
+ count = 0
45
+ for i in range(len(probabilities)):
46
+ if probabilities[i] == max_val:
47
+ value = G[true_index,i]/(graph_diameter)
48
+ dists += value * probabilities[i]
49
+ count += probabilities[i]
50
+ return float(dists/count)
51
+
52
+ def average_dist(G, probabilities, true_index):
53
+ eps = 0.000001
54
+ dists = 0
55
+ count = 0
56
+ for i in range(len(probabilities)):
57
+ value = np.exp(-G[true_index, i])
58
+ # value = 1/(G[true_index, i]+ 1)
59
+ dists += value * probabilities[i]
60
+ count += probabilities[i]
61
+ return float(dists/count)
62
+
63
+ def average_dist_normalized(G, probabilities, true_index):
64
+ eps = 0.000001
65
+ graph_diameter = np.amax(G)
66
+ dists = 0
67
+ count = 0
68
+ for i in range(len(probabilities)):
69
+ value = np.exp(-G[true_index, i]/(graph_diameter-G[true_index, i]+eps))
70
+ # value = (graph_diameter - G[true_index, i])/graph_diameter
71
+ dists += value * probabilities[i]
72
+ count += probabilities[i]
73
+ return float(dists/count)
74
+
75
+ def regulated_exp(G, probabilities, modificationSiteIdx):
76
+ maxScore = max(probabilities)
77
+ if maxScore == 0:
78
+ return 0
79
+
80
+
81
+ for i in range(len(probabilities)):
82
+ if probabilities[i] < 0.5 * maxScore:
83
+ probabilities[i] = 0
84
+ probabilities /= np.sum(probabilities)
85
+ maxScore = max(probabilities)
86
+ graphDiameter = np.amax(G)
87
+ count = 0
88
+ localDistances = 0
89
+ closestMaxAtomIndx = 0
90
+ # print("DUAAAM", graphDiameter, self.molMol.GetNumAtoms())
91
+ for i in range(len(probabilities)):
92
+ if probabilities[i] == maxScore:
93
+ # print("in if")
94
+ count += probabilities[i]/maxScore
95
+
96
+ # print("ASD", self.distances[modificationSiteIdx][i])
97
+ localDistances += (G[modificationSiteIdx, i]/graphDiameter) * probabilities[i]/maxScore
98
+ if probabilities[i] == maxScore and G[modificationSiteIdx, i] < G[modificationSiteIdx, closestMaxAtomIndx]:
99
+ closestMaxAtomIndx = i
100
+
101
+ # score = np.exp(-self.distances[modificationSiteIdx][closestMaxAtomIndx]/3) * 0.5 + np.exp(-(localDistances/count)) * 0.5
102
+ # score = np.exp(-self.distances[modificationSiteIdx][closestMaxAtomIndx])
103
+ if count == 0:
104
+ return 0
105
+ score = np.exp(-(localDistances/count))
106
+ # print("the score is!", score, localDistances, count, graphDiameter, maxScore, modificationSiteIdx, closestMaxAtomIndx)
107
+ return score
108
+
109
+ def ranking_loss(G, probabilities, modificationSiteIdx):
110
+ # find how far the index of the true modification site is from the max probability
111
+ # sort the probabilities and keep the indices
112
+ sorted_indices = np.argsort(probabilities)
113
+
114
+ # reverse the indices
115
+ sorted_indices = sorted_indices[::-1]
116
+
117
+ # find the index of the true modification site
118
+ true_index = np.where(sorted_indices == modificationSiteIdx)[0][0]
119
+
120
+ # return the ranking loss
121
+ return 1 - true_index/len(probabilities)
122
+
123
+
124
+ def sorted_rank(G, probabilities, modificationSiteIdx):
125
+ # find how far the index of the true modification site is from the max probability
126
+ # sort the probabilities and keep the indices
127
+ sorted_indices = np.argsort(probabilities)
128
+
129
+ # find the index of the true modification site
130
+ true_index = np.where(sorted_indices == modificationSiteIdx)[0][0]
131
+
132
+ # return the ranking loss
133
+ return true_index/(len(probabilities)-1)
134
+
135
+
136
+
137
+
138
+ def entropy_distance(G, probabilities, modificationSiteIdx, alpha = 0.5, gamma=1.0):
139
+ # penalize the entropy of the probabilities
140
+ H = utils.entropy(probabilities)
141
+ # print("H", H, 1-H, alpha)
142
+ S = 1 - H
143
+ if S > 1e-8:
144
+ S = np.power(S, alpha)
145
+ else:
146
+ S = 0
147
+
148
+ # penalize the distances between the modification site and the other sites normalized by the graph diameter and gamma regularization
149
+ graphDiameter = np.amax(G)
150
+ D = 0
151
+ for i in range(len(probabilities)):
152
+ D += G[modificationSiteIdx, i] / graphDiameter * probabilities[i]
153
+ D = 1 - D
154
+ D = np.power(D, gamma)
155
+
156
+ # print(S, D)
157
+ return np.sqrt(S * D)
158
+
159
+
160
+ # def calculate_spanning_graph(G, probabilities):
161
+ # max_val = max(probabilities)
162
+ # graph = []
163
+ # for i in range(len(probabilities)):
164
+ # for j in range(len(probabilities)):
165
+ # if probabilities[i] == max_val and probabilities[j] == max_val:
166
+ # graph.append((i, j, G[i][j]))
167
+ # return graph
168
+
169
+ # def kruskal(graph: List[Tuple[int, int, int]]) -> List[Tuple[int, int, int]]:
170
+ # sorted_edges = sorted(graph, key=lambda x: x[2])
171
+ # components = {v: k for k, v in enumerate(set([u for u, _, _ in graph] + [v for _, v, _ in graph]))}
172
+ # min_spanning_tree = []
173
+ # for edge in sorted_edges:
174
+ # u, v, w = edge
175
+ # # If the endpoints of the edge belong to different connected components, add the edge to the minimum spanning tree
176
+ # if components[u] != components[v]:
177
+ # min_spanning_tree.append(edge)
178
+
179
+ # # Merge the connected components
180
+ # old_component = components[v]
181
+ # new_component = components[u]
182
+ # for vertex, component in components.items():
183
+ # if component == old_component:
184
+ # components[vertex] = new_component
185
+
186
+ # return min_spanning_tree
187
+
188
+ # def calculate_minimum_spanning_graph(G, probabilities):
189
+ # spanning_graph = calculate_spanning_graph(G, probabilities)
190
+
191
+ # # floyd-warshall algorithm
192
+ # for k in range(len(spanning_graph)):
193
+ # for i in range(len(spanning_graph)):
194
+ # for j in range(len(spanning_graph)):
195
+ # spanning_graph[i][j] = min(spanning_graph[i][j], spanning_graph[i][k] + spanning_graph[k][j])
196
+
197
+
198
+ def softmax(probabilities):
199
+ if min(probabilities) < 0:
200
+ probabilities = probabilities - min(probabilities)
201
+ if max(probabilities) == 0:
202
+ return probabilities
203
+ smallest_non_zero = min([x for x in probabilities if x > 0])
204
+ probabilities /= smallest_non_zero
205
+ exp_x = np.exp(probabilities - np.max(probabilities)) # Subtracting the max value for numerical stability
206
+ # print(exp_x)
207
+ probabilities = exp_x / exp_x.sum()
208
+ return probabilities
209
+
210
+ def linear(x):
211
+ if np.min(x) < 0:
212
+ x = x - np.min(x)
213
+ if np.sum(x) != 0:
214
+ x = x / np.sum(x)
215
+ return x
216
+
217
+ def power_prob(probabilities):
218
+ # copy the probabilities to avoid changing the original
219
+ probabilities2 = copy.deepcopy(probabilities)
220
+ if min(probabilities2) < 0:
221
+ probabilities2 = probabilities2 - min(probabilities2)
222
+ if max(probabilities2) == 0:
223
+ return probabilities2
224
+ # make anythin less than half of the max value zero
225
+ probabilities2[probabilities2 < max(probabilities2) / 2] = 0
226
+
227
+ probabilities2 = np.power(probabilities2, 4)
228
+ if sum(probabilities2) == 0:
229
+ return probabilities2
230
+ probabilities2 = probabilities2 / probabilities2.sum()
231
+ return probabilities2
232
+
233
+
234
+ def calculate(G, input_probabilities, true_modification_site, method, normalization_method = "linear"):
235
+ # if input_probabilities is list of lists, calculate the average
236
+ if isinstance(input_probabilities[0], list):
237
+ average = 0
238
+ for i in range(len(input_probabilities)):
239
+ average += calculate(G, input_probabilities[i], true_modification_site, method, normalization_method)
240
+ return average / len(input_probabilities)
241
+
242
+ # if input_probabilities is a 2d array, calculate the average
243
+ if isinstance(input_probabilities[0], np.ndarray):
244
+ average = 0
245
+ for i in range(len(input_probabilities)):
246
+ average += calculate(G, input_probabilities[i], true_modification_site, method, normalization_method)
247
+ return average / len(input_probabilities)
248
+
249
+ probabilities = linear(input_probabilities)
250
+ if max(probabilities) == 0:
251
+ return 0
252
+ # call the score function based on the method
253
+ if method == "is_max":
254
+ return is_max(G, probabilities, true_modification_site)
255
+ elif method == "dist_from_max":
256
+ return dist_from_max(G, probabilities, true_modification_site)
257
+ elif method == "average_dist_from_max":
258
+ return average_dist_from_max(G, probabilities, true_modification_site)
259
+ elif method == "average_dist":
260
+ return average_dist(G, probabilities, true_modification_site)
261
+ elif method == "average_dist_normalized":
262
+ return average_dist_normalized(G, probabilities, true_modification_site)
263
+ elif method == "regulated_exp":
264
+ return regulated_exp(G, probabilities, true_modification_site)
265
+ elif method == "ranking_loss":
266
+ return ranking_loss(G, probabilities, true_modification_site)
267
+ elif method == "entropy_distance":
268
+ return entropy_distance(G, probabilities, true_modification_site)
269
+ elif method == "sorted_rank":
270
+ return sorted_rank(G, probabilities, true_modification_site)
271
+ elif method == "proximity":
272
+ return proximity(G, probabilities, true_modification_site)
273
+ else:
274
+ raise Exception("Method not found")