PySAR 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pySAR/evaluate.py ADDED
@@ -0,0 +1,231 @@
1
+ ################################################################################
2
+ ################# Evaluate #################
3
+ ################################################################################
4
+
5
+ import numpy as np
6
+ from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, \
7
+ explained_variance_score, max_error
8
+
9
+ class Evaluate():
10
+ """
11
+ An instance of the Evaluate class will calculate various metric values for
12
+ the inputted observed (Y_true) and predicted (Y_pred) arrays, storing the
13
+ results in the class attributes. The class supports metrics: R2, RMSE, MSE,
14
+ MAE, RPD, Explained Variance and Max Error.
15
+
16
+ Parameters
17
+ ==========
18
+ :Y_true: np.ndarray
19
+ array of observed activity/fitness values.
20
+ :Y_pred: np.ndarray
21
+ array of predicted activity/fitness values.
22
+
23
+ Attributes
24
+ ==========
25
+ :r2: float
26
+ R2 (coefficient of determination) score.
27
+ :rmse: float
28
+ Root Mean Square Error score.
29
+ :mse: float
30
+ Mean Square Error score.
31
+ :mae: float
32
+ Mean Absolute Error score.
33
+ :rpd: float
34
+ Ratio of Performance to Deviation. Returns np.inf if MSE is 0.
35
+ :explained_var: float
36
+ Explained Variance score.
37
+ :max_error: float
38
+ Maximum residual error.
39
+
40
+ Methods
41
+ =======
42
+ r2_(multioutput='uniform_average'):
43
+ calculate R2 score.
44
+ rmse_(multioutput='uniform_average'):
45
+ calculate RMSE value.
46
+ mse_(multioutput='uniform_average'):
47
+ calculate MSE value.
48
+ mae_(multioutput='uniform_average'):
49
+ calculate MAE value.
50
+ rpd_():
51
+ calculate ratio of performance to deviation.
52
+ explained_var_(multioutput='uniform_average'):
53
+ calculate explained variance.
54
+ max_error_():
55
+ calculate max error.
56
+
57
+ Raises
58
+ ======
59
+ :ValueError:
60
+ if Y_true or Y_pred contain NaN values.
61
+ :ValueError:
62
+ if Y_true or Y_pred contain infinite values.
63
+ :ValueError:
64
+ if Y_true and Y_pred are not the same shape after reshaping.
65
+ """
66
+ def __init__(self, Y_true, Y_pred):
67
+
68
+ #convert input observed and predicted values into numpy arrays and reshape
69
+ self.Y_true = np.array(Y_true).reshape((-1,1))
70
+ self.Y_pred = np.array(Y_pred).reshape((-1,1))
71
+
72
+ #validate inputs contain no NaN or infinite values
73
+ if np.any(np.isnan(self.Y_true)) or np.any(np.isnan(self.Y_pred)):
74
+ raise ValueError('Y_true and Y_pred must not contain NaN values.')
75
+ if np.any(np.isinf(self.Y_true)) or np.any(np.isinf(self.Y_pred)):
76
+ raise ValueError('Y_true and Y_pred must not contain infinite values.')
77
+
78
+ #validate that predicted and observed input arrays are of the same length,
79
+ #if not same shape then raise error
80
+ if (self.Y_true.shape != self.Y_pred.shape):
81
+ raise ValueError(f'Observed and predicted values must be of the same shape, '
82
+ f'Y_true = {self.Y_true.shape} & Y_pred = {self.Y_pred.shape}.')
83
+
84
+ #calculate all metric values for inputs
85
+ self.r2 = self.r2_()
86
+ self.rmse = self.rmse_()
87
+ self.mse = self.mse_()
88
+ self.mae = self.mae_()
89
+ self.rpd = self.rpd_()
90
+ self.explained_var = self.explained_var_()
91
+ self.max_error = self.max_error_()
92
+
93
+ def r2_(self, multioutput='uniform_average'):
94
+ """
95
+ Calculate R^2 (coefficient of determination) regression score function.
96
+
97
+ Parameters
98
+ ==========
99
+ :multioutput: str (default='uniform_average')
100
+ method that defines aggregating of multiple output scores. Default
101
+ is reccomended ('uniform_average'), available values:
102
+ {‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
103
+
104
+ Returns
105
+ =======
106
+ :r2: float
107
+ R2 (coefficient of determination) score for observed and predicted values.
108
+ """
109
+ return r2_score(self.Y_true, self.Y_pred, multioutput=multioutput)
110
+
111
+ def mse_(self, multioutput='uniform_average'):
112
+ """
113
+ Calculate MSE (mean square error) regression loss score for observed
114
+ and predicted values.
115
+
116
+ Parameters
117
+ ==========
118
+ :multioutput: str (default='uniform_average')
119
+ method that defines aggregating of multiple output scores. Default
120
+ is reccomended ('uniform_average'), available values:
121
+ {‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
122
+
123
+ Returns
124
+ =======
125
+ :mse: float
126
+ MSE (mean square error) score for observed and predicted values.
127
+ """
128
+ return mean_squared_error(self.Y_true, self.Y_pred, multioutput=multioutput)
129
+
130
+ def rmse_(self, multioutput='uniform_average'):
131
+ """
132
+ Calculate the RMSE (root mean square error) regression loss score for
133
+ inputted observed and predicted values. Uses the same function for
134
+ calculating MSE with the squared parameter set to False.
135
+
136
+ Parameters
137
+ ==========
138
+ :multioutput: str (default='uniform_average')
139
+ method that defines aggregating of multiple output scores. Default
140
+ is reccomended ('uniform_average'), available values:
141
+ {‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
142
+
143
+ Returns
144
+ =======
145
+ :rmse: float
146
+ RMSE score for observed and predicted values.
147
+ """
148
+ return np.sqrt(mean_squared_error(self.Y_true, self.Y_pred, multioutput=multioutput))
149
+
150
+ def mae_(self, multioutput='uniform_average'):
151
+ """
152
+ Calculate the Mean Absolute Error (MAE) regression loss for
153
+ inputted observed and predicted values.
154
+
155
+ Parameters
156
+ ==========
157
+ :multioutput: str (default='uniform_average')
158
+ method that defines aggregating of multiple output scores. Default
159
+ is reccomended ('uniform_average'), available values:
160
+ {‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
161
+
162
+ Returns
163
+ =======
164
+ :mae: float
165
+ If multioutput is ‘raw_values’, then MAE is returned for each output
166
+ separately. If multioutput is ‘uniform_average’ or an ndarray of
167
+ weights, then the weighted average of all output errors is returned.
168
+ The output is a non-negative floating point. The best value is 0.0.
169
+ """
170
+ return mean_absolute_error(self.Y_true, self.Y_pred, multioutput=multioutput)
171
+
172
+ def rpd_(self):
173
+ """
174
+ Calculates the Ratio of Performance to Deviation (RPD). RPD is the ratio
175
+ between the standard deviation of a variable and the standard error of
176
+ prediction of that variable by a given model.
177
+
178
+ Parameters
179
+ ==========
180
+ None
181
+
182
+ Returns
183
+ =======
184
+ :rpd: float
185
+ the RPD score for the model.
186
+ """
187
+ mse = self.mse_()
188
+ return self.Y_true.std() / np.sqrt(mse) if mse > 0 else np.inf
189
+
190
+ def explained_var_(self, multioutput='uniform_average'):
191
+ """
192
+ Calculates the Explained Variance regression score. Best possible score is 1.0,
193
+ lower values are worse.
194
+
195
+ Parameters
196
+ ==========
197
+ :multioutput: str (default='uniform_average')
198
+ method that defines aggregating of multiple output scores. Default
199
+ is reccomended ('uniform_average'), available values:
200
+ {‘raw_values’, ‘uniform_average’, ‘variance_weighted’}.
201
+
202
+ Returns
203
+ =======
204
+ :explained_var: float
205
+ The explained variance or ndarray if ‘multioutput’ is ‘raw_values’.
206
+ """
207
+ return explained_variance_score(self.Y_true, self.Y_pred, multioutput=multioutput)
208
+
209
+ def max_error_(self):
210
+ """
211
+ Calculates the maximum residual error between observed and predicted values.
212
+
213
+ Parameters
214
+ ==========
215
+ None
216
+
217
+ Returns
218
+ =======
219
+ :max_error: float
220
+ A positive floating point value of the maximal residual error
221
+ (the best value is 0.0).
222
+ """
223
+ return float(max_error(self.Y_true, self.Y_pred))
224
+
225
+ def __repr__(self):
226
+ return f"<Evaluate(Y_true: {self.Y_true.shape} Y_pred: {self.Y_pred.shape})>."
227
+
228
+ def __str__(self):
229
+ return f"Instance of Evaluate Class with attribute values: \
230
+ R2: {self.r2}, RMSE: {self.rmse}, MSE: {self.mse}, MAE: {self.mae}, RPD: {self.rpd}, Explained Variance: {self.explained_var},\
231
+ Max Error: {self.max_error}."
pySAR/globals_.py ADDED
@@ -0,0 +1,21 @@
1
+ ################################################################################
2
+ ############## Global Variables ###############
3
+ ################################################################################
4
+
5
+ import os
6
+ from datetime import datetime
7
+
8
+ NOW = datetime.now()
9
+
10
+ #output dir is the default directory used to store all outputs generated
11
+ global OUTPUT_DIR
12
+ OUTPUT_DIR = 'outputs'
13
+
14
+ #current datetime appended to output assets & directories to uniquely identify them
15
+ global CURRENT_DATETIME
16
+ CURRENT_DATETIME = NOW.strftime('%Y-%m-%d_%H-%M-%S')
17
+
18
+ #output folder is the default folder within the OUTPUT_DIR used to store all
19
+ #outputs generated from one run of the program.
20
+ global OUTPUT_FOLDER
21
+ OUTPUT_FOLDER = os.path.join(OUTPUT_DIR, f'model_output_{CURRENT_DATETIME}')