PySAR 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +53 -0
- pySAR/__init__.py +28 -0
- pySAR/descriptors.py +2893 -0
- pySAR/encoding.py +986 -0
- pySAR/evaluate.py +231 -0
- pySAR/globals_.py +21 -0
- pySAR/model.py +559 -0
- pySAR/plots.py +92 -0
- pySAR/py.typed +0 -0
- pySAR/pyDSP.py +582 -0
- pySAR/pySAR.py +962 -0
- pySAR/utils.py +283 -0
- pysar-2.5.0.dist-info/METADATA +740 -0
- pysar-2.5.0.dist-info/RECORD +17 -0
- pysar-2.5.0.dist-info/WHEEL +5 -0
- pysar-2.5.0.dist-info/licenses/LICENSE +21 -0
- pysar-2.5.0.dist-info/top_level.txt +2 -0
pySAR/evaluate.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
################# Evaluate #################
|
|
3
|
+
################################################################################
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, \
|
|
7
|
+
explained_variance_score, max_error
|
|
8
|
+
|
|
9
|
+
class Evaluate():
|
|
10
|
+
"""
|
|
11
|
+
An instance of the Evaluate class will calculate various metric values for
|
|
12
|
+
the inputted observed (Y_true) and predicted (Y_pred) arrays, storing the
|
|
13
|
+
results in the class attributes. The class supports metrics: R2, RMSE, MSE,
|
|
14
|
+
MAE, RPD, Explained Variance and Max Error.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
==========
|
|
18
|
+
:Y_true: np.ndarray
|
|
19
|
+
array of observed activity/fitness values.
|
|
20
|
+
:Y_pred: np.ndarray
|
|
21
|
+
array of predicted activity/fitness values.
|
|
22
|
+
|
|
23
|
+
Attributes
|
|
24
|
+
==========
|
|
25
|
+
:r2: float
|
|
26
|
+
R2 (coefficient of determination) score.
|
|
27
|
+
:rmse: float
|
|
28
|
+
Root Mean Square Error score.
|
|
29
|
+
:mse: float
|
|
30
|
+
Mean Square Error score.
|
|
31
|
+
:mae: float
|
|
32
|
+
Mean Absolute Error score.
|
|
33
|
+
:rpd: float
|
|
34
|
+
Ratio of Performance to Deviation. Returns np.inf if MSE is 0.
|
|
35
|
+
:explained_var: float
|
|
36
|
+
Explained Variance score.
|
|
37
|
+
:max_error: float
|
|
38
|
+
Maximum residual error.
|
|
39
|
+
|
|
40
|
+
Methods
|
|
41
|
+
=======
|
|
42
|
+
r2_(multioutput='uniform_average'):
|
|
43
|
+
calculate R2 score.
|
|
44
|
+
rmse_(multioutput='uniform_average'):
|
|
45
|
+
calculate RMSE value.
|
|
46
|
+
mse_(multioutput='uniform_average'):
|
|
47
|
+
calculate MSE value.
|
|
48
|
+
mae_(multioutput='uniform_average'):
|
|
49
|
+
calculate MAE value.
|
|
50
|
+
rpd_():
|
|
51
|
+
calculate ratio of performance to deviation.
|
|
52
|
+
explained_var_(multioutput='uniform_average'):
|
|
53
|
+
calculate explained variance.
|
|
54
|
+
max_error_():
|
|
55
|
+
calculate max error.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
======
|
|
59
|
+
:ValueError:
|
|
60
|
+
if Y_true or Y_pred contain NaN values.
|
|
61
|
+
:ValueError:
|
|
62
|
+
if Y_true or Y_pred contain infinite values.
|
|
63
|
+
:ValueError:
|
|
64
|
+
if Y_true and Y_pred are not the same shape after reshaping.
|
|
65
|
+
"""
|
|
66
|
+
def __init__(self, Y_true, Y_pred):
|
|
67
|
+
|
|
68
|
+
#convert input observed and predicted values into numpy arrays and reshape
|
|
69
|
+
self.Y_true = np.array(Y_true).reshape((-1,1))
|
|
70
|
+
self.Y_pred = np.array(Y_pred).reshape((-1,1))
|
|
71
|
+
|
|
72
|
+
#validate inputs contain no NaN or infinite values
|
|
73
|
+
if np.any(np.isnan(self.Y_true)) or np.any(np.isnan(self.Y_pred)):
|
|
74
|
+
raise ValueError('Y_true and Y_pred must not contain NaN values.')
|
|
75
|
+
if np.any(np.isinf(self.Y_true)) or np.any(np.isinf(self.Y_pred)):
|
|
76
|
+
raise ValueError('Y_true and Y_pred must not contain infinite values.')
|
|
77
|
+
|
|
78
|
+
#validate that predicted and observed input arrays are of the same length,
|
|
79
|
+
#if not same shape then raise error
|
|
80
|
+
if (self.Y_true.shape != self.Y_pred.shape):
|
|
81
|
+
raise ValueError(f'Observed and predicted values must be of the same shape, '
|
|
82
|
+
f'Y_true = {self.Y_true.shape} & Y_pred = {self.Y_pred.shape}.')
|
|
83
|
+
|
|
84
|
+
#calculate all metric values for inputs
|
|
85
|
+
self.r2 = self.r2_()
|
|
86
|
+
self.rmse = self.rmse_()
|
|
87
|
+
self.mse = self.mse_()
|
|
88
|
+
self.mae = self.mae_()
|
|
89
|
+
self.rpd = self.rpd_()
|
|
90
|
+
self.explained_var = self.explained_var_()
|
|
91
|
+
self.max_error = self.max_error_()
|
|
92
|
+
|
|
93
|
+
def r2_(self, multioutput='uniform_average'):
|
|
94
|
+
"""
|
|
95
|
+
Calculate R^2 (coefficient of determination) regression score function.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
==========
|
|
99
|
+
:multioutput: str (default='uniform_average')
|
|
100
|
+
method that defines aggregating of multiple output scores. Default
|
|
101
|
+
is reccomended ('uniform_average'), available values:
|
|
102
|
+
{‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
=======
|
|
106
|
+
:r2: float
|
|
107
|
+
R2 (coefficient of determination) score for observed and predicted values.
|
|
108
|
+
"""
|
|
109
|
+
return r2_score(self.Y_true, self.Y_pred, multioutput=multioutput)
|
|
110
|
+
|
|
111
|
+
def mse_(self, multioutput='uniform_average'):
|
|
112
|
+
"""
|
|
113
|
+
Calculate MSE (mean square error) regression loss score for observed
|
|
114
|
+
and predicted values.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
==========
|
|
118
|
+
:multioutput: str (default='uniform_average')
|
|
119
|
+
method that defines aggregating of multiple output scores. Default
|
|
120
|
+
is reccomended ('uniform_average'), available values:
|
|
121
|
+
{‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
=======
|
|
125
|
+
:mse: float
|
|
126
|
+
MSE (mean square error) score for observed and predicted values.
|
|
127
|
+
"""
|
|
128
|
+
return mean_squared_error(self.Y_true, self.Y_pred, multioutput=multioutput)
|
|
129
|
+
|
|
130
|
+
def rmse_(self, multioutput='uniform_average'):
|
|
131
|
+
"""
|
|
132
|
+
Calculate the RMSE (root mean square error) regression loss score for
|
|
133
|
+
inputted observed and predicted values. Uses the same function for
|
|
134
|
+
calculating MSE with the squared parameter set to False.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
==========
|
|
138
|
+
:multioutput: str (default='uniform_average')
|
|
139
|
+
method that defines aggregating of multiple output scores. Default
|
|
140
|
+
is reccomended ('uniform_average'), available values:
|
|
141
|
+
{‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
=======
|
|
145
|
+
:rmse: float
|
|
146
|
+
RMSE score for observed and predicted values.
|
|
147
|
+
"""
|
|
148
|
+
return np.sqrt(mean_squared_error(self.Y_true, self.Y_pred, multioutput=multioutput))
|
|
149
|
+
|
|
150
|
+
def mae_(self, multioutput='uniform_average'):
|
|
151
|
+
"""
|
|
152
|
+
Calculate the Mean Absolute Error (MAE) regression loss for
|
|
153
|
+
inputted observed and predicted values.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
==========
|
|
157
|
+
:multioutput: str (default='uniform_average')
|
|
158
|
+
method that defines aggregating of multiple output scores. Default
|
|
159
|
+
is reccomended ('uniform_average'), available values:
|
|
160
|
+
{‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
=======
|
|
164
|
+
:mae: float
|
|
165
|
+
If multioutput is ‘raw_values’, then MAE is returned for each output
|
|
166
|
+
separately. If multioutput is ‘uniform_average’ or an ndarray of
|
|
167
|
+
weights, then the weighted average of all output errors is returned.
|
|
168
|
+
The output is a non-negative floating point. The best value is 0.0.
|
|
169
|
+
"""
|
|
170
|
+
return mean_absolute_error(self.Y_true, self.Y_pred, multioutput=multioutput)
|
|
171
|
+
|
|
172
|
+
def rpd_(self):
|
|
173
|
+
"""
|
|
174
|
+
Calculates the Ratio of Performance to Deviation (RPD). RPD is the ratio
|
|
175
|
+
between the standard deviation of a variable and the standard error of
|
|
176
|
+
prediction of that variable by a given model.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
==========
|
|
180
|
+
None
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
=======
|
|
184
|
+
:rpd: float
|
|
185
|
+
the RPD score for the model.
|
|
186
|
+
"""
|
|
187
|
+
mse = self.mse_()
|
|
188
|
+
return self.Y_true.std() / np.sqrt(mse) if mse > 0 else np.inf
|
|
189
|
+
|
|
190
|
+
def explained_var_(self, multioutput='uniform_average'):
|
|
191
|
+
"""
|
|
192
|
+
Calculates the Explained Variance regression score. Best possible score is 1.0,
|
|
193
|
+
lower values are worse.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
==========
|
|
197
|
+
:multioutput: str (default='uniform_average')
|
|
198
|
+
method that defines aggregating of multiple output scores. Default
|
|
199
|
+
is reccomended ('uniform_average'), available values:
|
|
200
|
+
{‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
=======
|
|
204
|
+
:explained_var: float
|
|
205
|
+
The explained variance or ndarray if ‘multioutput’ is ‘raw_values’.
|
|
206
|
+
"""
|
|
207
|
+
return explained_variance_score(self.Y_true, self.Y_pred, multioutput=multioutput)
|
|
208
|
+
|
|
209
|
+
def max_error_(self):
|
|
210
|
+
"""
|
|
211
|
+
Calculates the maximum residual error between observed and predicted values.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
==========
|
|
215
|
+
None
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
=======
|
|
219
|
+
:max_error: float
|
|
220
|
+
A positive floating point value of the maximal residual error
|
|
221
|
+
(the best value is 0.0).
|
|
222
|
+
"""
|
|
223
|
+
return float(max_error(self.Y_true, self.Y_pred))
|
|
224
|
+
|
|
225
|
+
def __repr__(self):
|
|
226
|
+
return f"<Evaluate(Y_true: {self.Y_true.shape} Y_pred: {self.Y_pred.shape})>."
|
|
227
|
+
|
|
228
|
+
def __str__(self):
|
|
229
|
+
return f"Instance of Evaluate Class with attribute values: \
|
|
230
|
+
R2: {self.r2}, RMSE: {self.rmse}, MSE: {self.mse}, MAE: {self.mae}, RPD: {self.rpd}, Explained Variance: {self.explained_var},\
|
|
231
|
+
Max Error: {self.max_error}."
|
pySAR/globals_.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
############## Global Variables ###############
|
|
3
|
+
################################################################################
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
NOW = datetime.now()
|
|
9
|
+
|
|
10
|
+
#output dir is the default directory used to store all outputs generated
|
|
11
|
+
global OUTPUT_DIR
|
|
12
|
+
OUTPUT_DIR = 'outputs'
|
|
13
|
+
|
|
14
|
+
#current datetime appended to output assets & directories to uniquely identify them
|
|
15
|
+
global CURRENT_DATETIME
|
|
16
|
+
CURRENT_DATETIME = NOW.strftime('%Y-%m-%d_%H-%M-%S')
|
|
17
|
+
|
|
18
|
+
#output folder is the default folder within the OUTPUT_DIR used to store all
|
|
19
|
+
#outputs generated from one run of the program.
|
|
20
|
+
global OUTPUT_FOLDER
|
|
21
|
+
OUTPUT_FOLDER = os.path.join(OUTPUT_DIR, f'model_output_{CURRENT_DATETIME}')
|