Moral88 0.8.0__tar.gz → 0.9.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- Moral88-0.9.0/Moral88/regression.py +419 -0
- {Moral88-0.8.0 → Moral88-0.9.0/Moral88.egg-info}/PKG-INFO +1 -1
- {Moral88-0.8.0/Moral88.egg-info → Moral88-0.9.0}/PKG-INFO +1 -1
- {Moral88-0.8.0 → Moral88-0.9.0}/setup.py +1 -1
- Moral88-0.8.0/Moral88/regression.py +0 -421
- {Moral88-0.8.0 → Moral88-0.9.0}/LICENSE +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/Moral88/__init__.py +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/Moral88/segmentation.py +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/Moral88.egg-info/SOURCES.txt +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/Moral88.egg-info/dependency_links.txt +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/Moral88.egg-info/requires.txt +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/Moral88.egg-info/top_level.txt +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/README.md +0 -0
- {Moral88-0.8.0 → Moral88-0.9.0}/setup.cfg +0 -0
@@ -0,0 +1,419 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import warnings
|
3
|
+
from typing import Union, List, Tuple
|
4
|
+
from scipy import sparse
|
5
|
+
|
6
|
+
class DataValidator:
|
7
|
+
def __init__(self):
|
8
|
+
pass
|
9
|
+
|
10
|
+
def check_device_cpu(self, device):
|
11
|
+
if device not in {"cpu", None}:
|
12
|
+
raise ValueError(f"Unsupported device: {device!r}. Only 'cpu' is supported.")
|
13
|
+
|
14
|
+
def is_1d_array(self, array: Union[np.ndarray, list], warn: bool = False) -> np.ndarray:
|
15
|
+
"""
|
16
|
+
Ensures input is a 1D array. Raises an error if it's not 1D or convertible to 1D.
|
17
|
+
"""
|
18
|
+
array = np.asarray(array)
|
19
|
+
shape = array.shape
|
20
|
+
|
21
|
+
if len(shape) == 1:
|
22
|
+
return array
|
23
|
+
elif len(shape) == 2 and shape[1] == 1:
|
24
|
+
if warn:
|
25
|
+
warnings.warn("Input is 2D but will be converted to 1D.", UserWarning)
|
26
|
+
return array.ravel()
|
27
|
+
else:
|
28
|
+
raise ValueError(f"Input must be 1D. Found shape {shape}.")
|
29
|
+
|
30
|
+
def check_samples(self, array: Union[np.ndarray, list]) -> int:
|
31
|
+
"""
|
32
|
+
Returns the number of samples in the array.
|
33
|
+
"""
|
34
|
+
array = np.asarray(array)
|
35
|
+
if hasattr(array, 'shape') and len(array.shape) > 0:
|
36
|
+
return array.shape[0]
|
37
|
+
else:
|
38
|
+
raise TypeError("Input must be an array-like object with at least one dimension.")
|
39
|
+
|
40
|
+
def check_consistent_length(self, *arrays: Union[np.ndarray, list]):
|
41
|
+
"""
|
42
|
+
Ensures all input arrays have the same length.
|
43
|
+
"""
|
44
|
+
lengths = [self.check_samples(arr) for arr in arrays]
|
45
|
+
if len(set(lengths)) > 1:
|
46
|
+
raise ValueError(f"Inconsistent lengths: {lengths}")
|
47
|
+
|
48
|
+
def validate_regression_targets(self, y_true, y_pred, dtype=np.float64):
|
49
|
+
"""
|
50
|
+
Ensures regression target values are consistent and converted to the specified dtype.
|
51
|
+
"""
|
52
|
+
y_true = np.asarray(y_true, dtype=dtype)
|
53
|
+
y_pred = np.asarray(y_pred, dtype=dtype)
|
54
|
+
|
55
|
+
if y_true.shape != y_pred.shape:
|
56
|
+
raise ValueError(f"Shapes of y_true {y_true.shape} and y_pred {y_pred.shape} do not match.")
|
57
|
+
|
58
|
+
return y_true, y_pred
|
59
|
+
|
60
|
+
def check_array(self, array, ensure_2d: bool = True, dtype=np.float64, allow_nan: bool = False):
|
61
|
+
"""
|
62
|
+
Validates input array and converts it to specified dtype.
|
63
|
+
"""
|
64
|
+
array = np.asarray(array, dtype=dtype)
|
65
|
+
|
66
|
+
if ensure_2d and array.ndim == 1:
|
67
|
+
array = array.reshape(-1, 1)
|
68
|
+
|
69
|
+
if not allow_nan and np.isnan(array).any():
|
70
|
+
raise ValueError("Input contains NaN values, which are not allowed.")
|
71
|
+
|
72
|
+
return array
|
73
|
+
|
74
|
+
def check_sparse(self, array, accept_sparse: Tuple[str] = ('csr', 'csc')):
|
75
|
+
"""
|
76
|
+
Validates sparse matrices and converts to an acceptable format.
|
77
|
+
"""
|
78
|
+
if sparse.issparse(array):
|
79
|
+
if array.format not in accept_sparse:
|
80
|
+
return array.asformat(accept_sparse[0])
|
81
|
+
return array
|
82
|
+
else:
|
83
|
+
raise ValueError("Input is not a sparse matrix.")
|
84
|
+
|
85
|
+
def validate_r2_score_inputs(self, y_true, y_pred, sample_weight=None):
|
86
|
+
"""
|
87
|
+
Ensures inputs for R2 score computation are valid.
|
88
|
+
"""
|
89
|
+
y_true, y_pred = self.validate_regression_targets(y_true, y_pred)
|
90
|
+
if sample_weight is not None:
|
91
|
+
sample_weight = self.is_1d_array(sample_weight)
|
92
|
+
return y_true, y_pred, sample_weight
|
93
|
+
|
94
|
+
def validate_mae_mse_inputs(self, y_true, y_pred, library=None):
|
95
|
+
"""
|
96
|
+
Ensures inputs for MAE and MSE computation are valid.
|
97
|
+
"""
|
98
|
+
y_true, y_pred = self.validate_regression_targets(y_true, y_pred)
|
99
|
+
if library not in {None, 'sklearn', 'torch', 'tensorflow', 'Moral88'}:
|
100
|
+
raise ValueError(f"Invalid library: {library}. Choose from {{'Moral88', 'sklearn', 'torch', 'tensorflow'}}.")
|
101
|
+
return y_true, y_pred
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
def mean_bias_deviation(self, y_true, y_pred, library=None, flatten=True):
|
106
|
+
"""
|
107
|
+
Computes Mean Bias Deviation (MBD).
|
108
|
+
"""
|
109
|
+
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
110
|
+
|
111
|
+
if flatten and y_true.ndim > 1:
|
112
|
+
y_true = y_true.flatten()
|
113
|
+
y_pred = y_pred.flatten()
|
114
|
+
|
115
|
+
if library == 'sklearn':
|
116
|
+
# Sklearn does not have a direct implementation for MBD
|
117
|
+
raise NotImplementedError("Mean Bias Deviation is not implemented in sklearn.")
|
118
|
+
|
119
|
+
if library == 'torch':
|
120
|
+
import torch
|
121
|
+
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
122
|
+
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
123
|
+
bias = torch.mean(y_pred_tensor - y_true_tensor).item()
|
124
|
+
return bias
|
125
|
+
|
126
|
+
if library == 'tensorflow':
|
127
|
+
import tensorflow as tf
|
128
|
+
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
129
|
+
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
130
|
+
bias = tf.reduce_mean(y_pred_tensor - y_true_tensor).numpy()
|
131
|
+
return bias
|
132
|
+
|
133
|
+
# Default implementation
|
134
|
+
return np.mean(y_pred - y_true)
|
135
|
+
def __init__(self):
|
136
|
+
self.validator = DataValidator()
|
137
|
+
|
138
|
+
def r2_score(self, y_true, y_pred, sample_weight=None, library=None, flatten=True):
|
139
|
+
"""
|
140
|
+
Computes R2 score.
|
141
|
+
"""
|
142
|
+
y_true, y_pred, sample_weight = self.validator.validate_r2_score_inputs(y_true, y_pred, sample_weight)
|
143
|
+
|
144
|
+
if flatten and y_true.ndim > 1:
|
145
|
+
y_true = y_true.flatten()
|
146
|
+
y_pred = y_pred.flatten()
|
147
|
+
|
148
|
+
if library == 'sklearn':
|
149
|
+
from sklearn.metrics import r2_score as sklearn_r2
|
150
|
+
return sklearn_r2(y_true, y_pred, sample_weight=sample_weight)
|
151
|
+
|
152
|
+
if library == 'statsmodels':
|
153
|
+
import statsmodels.api as sm
|
154
|
+
model = sm.OLS(y_true, sm.add_constant(y_pred)).fit()
|
155
|
+
return model.rsquared
|
156
|
+
|
157
|
+
numerator = np.sum((y_true - y_pred) ** 2)
|
158
|
+
denominator = np.sum((y_true - np.mean(y_true)) ** 2)
|
159
|
+
|
160
|
+
if denominator == 0:
|
161
|
+
return 0.0
|
162
|
+
return 1 - (numerator / denominator)
|
163
|
+
|
164
|
+
def mean_absolute_error(self, y_true, y_pred, normalize=True, threshold=None, method='mean', library='Moral88', flatten=True):
|
165
|
+
"""
|
166
|
+
Computes Mean Absolute Error (MAE).
|
167
|
+
"""
|
168
|
+
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
169
|
+
|
170
|
+
if flatten:
|
171
|
+
y_true = y_true.ravel()
|
172
|
+
y_pred = y_pred.ravel()
|
173
|
+
|
174
|
+
if library == 'Moral88':
|
175
|
+
if threshold is not None:
|
176
|
+
y_pred = np.clip(y_pred, threshold[0], threshold[1])
|
177
|
+
|
178
|
+
if y_true.ndim > 1 and flatten:
|
179
|
+
y_true = y_true.flatten()
|
180
|
+
y_pred = y_pred.flatten()
|
181
|
+
absolute_errors = np.abs(y_true - y_pred)
|
182
|
+
|
183
|
+
if method == 'mean':
|
184
|
+
result = np.mean(absolute_errors)
|
185
|
+
elif method == 'sum':
|
186
|
+
result = np.sum(absolute_errors)
|
187
|
+
elif method == 'none':
|
188
|
+
result = absolute_errors
|
189
|
+
else:
|
190
|
+
raise ValueError("Invalid method. Choose from {'mean', 'sum', 'none'}.")
|
191
|
+
|
192
|
+
# if normalize and method != 'none':
|
193
|
+
# range_y = np.ptp(y_true)
|
194
|
+
# result = result / max(abs(range_y), 1)
|
195
|
+
|
196
|
+
return result
|
197
|
+
|
198
|
+
elif library == 'sklearn':
|
199
|
+
from sklearn.metrics import mean_absolute_error as sklearn_mae
|
200
|
+
return sklearn_mae(y_true, y_pred)
|
201
|
+
|
202
|
+
elif library == 'torch':
|
203
|
+
import torch
|
204
|
+
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
205
|
+
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
206
|
+
return torch.mean(torch.abs(y_true_tensor - y_pred_tensor)).item()
|
207
|
+
|
208
|
+
elif library == 'tensorflow':
|
209
|
+
import tensorflow as tf
|
210
|
+
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
211
|
+
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
212
|
+
return tf.reduce_mean(tf.abs(y_true_tensor - y_pred_tensor)).numpy()
|
213
|
+
|
214
|
+
def mean_squared_error(self, y_true, y_pred, normalize=True, threshold=None, method='mean', library='Moral88', flatten=True):
|
215
|
+
"""
|
216
|
+
Computes Mean Squared Error (MSE).
|
217
|
+
"""
|
218
|
+
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
219
|
+
|
220
|
+
if flatten:
|
221
|
+
y_true = y_true.ravel()
|
222
|
+
y_pred = y_pred.ravel()
|
223
|
+
|
224
|
+
if library == 'Moral88':
|
225
|
+
if threshold is not None:
|
226
|
+
y_pred = np.clip(y_pred, threshold[0], threshold[1])
|
227
|
+
|
228
|
+
if y_true.ndim > 1 and flatten:
|
229
|
+
y_true = y_true.flatten()
|
230
|
+
y_pred = y_pred.flatten()
|
231
|
+
squared_errors = (y_true - y_pred) ** 2
|
232
|
+
|
233
|
+
if method == 'mean':
|
234
|
+
result = np.mean(squared_errors)
|
235
|
+
elif method == 'sum':
|
236
|
+
result = np.sum(squared_errors)
|
237
|
+
elif method == 'none':
|
238
|
+
result = squared_errors
|
239
|
+
else:
|
240
|
+
raise ValueError("Invalid method. Choose from {'mean', 'sum', 'none'}.")
|
241
|
+
|
242
|
+
# if normalize and method != 'none':
|
243
|
+
# range_y = np.ptp(y_true)
|
244
|
+
# result = result / max(abs(range_y), 1)
|
245
|
+
|
246
|
+
return result
|
247
|
+
|
248
|
+
elif library == 'sklearn':
|
249
|
+
from sklearn.metrics import mean_squared_error as sklearn_mse
|
250
|
+
return sklearn_mse(y_true, y_pred)
|
251
|
+
|
252
|
+
elif library == 'torch':
|
253
|
+
import torch
|
254
|
+
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
255
|
+
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
256
|
+
return torch.mean((y_true_tensor - y_pred_tensor) ** 2).item()
|
257
|
+
|
258
|
+
elif library == 'tensorflow':
|
259
|
+
import tensorflow as tf
|
260
|
+
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
261
|
+
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
262
|
+
return tf.reduce_mean(tf.square(y_true_tensor - y_pred_tensor)).numpy()
|
263
|
+
|
264
|
+
def root_mean_squared_error(self, y_true, y_pred, library=None):
|
265
|
+
"""
|
266
|
+
Computes Root Mean Squared Error (RMSE).
|
267
|
+
"""
|
268
|
+
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
269
|
+
|
270
|
+
if library == 'sklearn':
|
271
|
+
from sklearn.metrics import mean_squared_error as sklearn_mse
|
272
|
+
return np.sqrt(sklearn_mse(y_true, y_pred))
|
273
|
+
|
274
|
+
if library == 'torch':
|
275
|
+
import torch
|
276
|
+
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
277
|
+
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
278
|
+
return torch.sqrt(torch.mean((y_true_tensor - y_pred_tensor) ** 2)).item()
|
279
|
+
|
280
|
+
if library == 'tensorflow':
|
281
|
+
import tensorflow as tf
|
282
|
+
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
283
|
+
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
284
|
+
return tf.sqrt(tf.reduce_mean(tf.square(y_true_tensor - y_pred_tensor))).numpy()
|
285
|
+
|
286
|
+
mse = self.mean_squared_error(y_true, y_pred)
|
287
|
+
return np.sqrt(mse)
|
288
|
+
|
289
|
+
def mean_absolute_percentage_error(self, y_true, y_pred, library=None):
|
290
|
+
"""
|
291
|
+
Computes Mean Absolute Percentage Error (MAPE).
|
292
|
+
"""
|
293
|
+
y_true, y_pred = self.validator.validate_regression_targets(y_true, y_pred)
|
294
|
+
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
295
|
+
y_true = np.clip(y_true, 1e-8, None)
|
296
|
+
|
297
|
+
if library == 'sklearn':
|
298
|
+
from sklearn.metrics import mean_absolute_percentage_error as sklearn_mape
|
299
|
+
return sklearn_mape(y_true, y_pred) * 100
|
300
|
+
|
301
|
+
if library == 'torch':
|
302
|
+
import torch
|
303
|
+
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
304
|
+
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
305
|
+
return torch.mean(torch.abs((y_true_tensor - y_pred_tensor) / torch.clamp(y_true_tensor, min=1e-8))).item() * 100
|
306
|
+
|
307
|
+
if library == 'tensorflow':
|
308
|
+
import tensorflow as tf
|
309
|
+
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
310
|
+
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
311
|
+
return tf.reduce_mean(tf.abs((y_true_tensor - y_pred_tensor) / tf.clip_by_value(y_true_tensor, 1e-8, tf.float32.max))).numpy() * 100
|
312
|
+
|
313
|
+
return np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-8, None))) * 100
|
314
|
+
|
315
|
+
def explained_variance_score(self, y_true, y_pred, library=None, flatten=True):
|
316
|
+
"""
|
317
|
+
Computes Explained Variance Score.
|
318
|
+
"""
|
319
|
+
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
320
|
+
|
321
|
+
if library == 'sklearn':
|
322
|
+
from sklearn.metrics import explained_variance_score as sklearn_evs
|
323
|
+
return sklearn_evs(y_true, y_pred)
|
324
|
+
|
325
|
+
if library == 'torch':
|
326
|
+
import torch
|
327
|
+
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
328
|
+
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
329
|
+
variance_residual = torch.var(y_true_tensor - y_pred_tensor)
|
330
|
+
variance_y = torch.var(y_true_tensor)
|
331
|
+
return 1 - variance_residual / variance_y if variance_y != 0 else 0
|
332
|
+
|
333
|
+
if library == 'tensorflow':
|
334
|
+
import tensorflow as tf
|
335
|
+
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
336
|
+
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
337
|
+
variance_residual = tf.math.reduce_variance(y_true_tensor - y_pred_tensor)
|
338
|
+
variance_y = tf.math.reduce_variance(y_true_tensor)
|
339
|
+
return 1 - variance_residual / variance_y if variance_y != 0 else 0
|
340
|
+
|
341
|
+
numerator = np.var(y_true - y_pred)
|
342
|
+
denominator = np.var(y_true)
|
343
|
+
return 1 - numerator / denominator if denominator != 0 else 0
|
344
|
+
|
345
|
+
def adjusted_r2_score(self, y_true, y_pred, n_features, library=None, flatten=True):
|
346
|
+
"""
|
347
|
+
Computes Adjusted R-Squared Score.
|
348
|
+
|
349
|
+
Parameters:
|
350
|
+
y_true: array-like of shape (n_samples,)
|
351
|
+
Ground truth (correct) target values.
|
352
|
+
|
353
|
+
y_pred: array-like of shape (n_samples,)
|
354
|
+
Estimated target values.
|
355
|
+
|
356
|
+
n_features: int
|
357
|
+
Number of independent features in the model.
|
358
|
+
|
359
|
+
library: str, optional (default=None)
|
360
|
+
Library to use for computation. Supports {'sklearn', 'statsmodels', None}.
|
361
|
+
|
362
|
+
flatten: bool, optional (default=True)
|
363
|
+
If True, flattens multidimensional arrays before computation.
|
364
|
+
"""
|
365
|
+
# Validate inputs
|
366
|
+
y_true, y_pred, _ = self.validator.validate_r2_score_inputs(y_true, y_pred)
|
367
|
+
|
368
|
+
# Ensure inputs are 1D arrays
|
369
|
+
if y_true.ndim == 0 or y_pred.ndim == 0:
|
370
|
+
y_true = np.array([y_true])
|
371
|
+
y_pred = np.array([y_pred])
|
372
|
+
|
373
|
+
if flatten and y_true.ndim > 1:
|
374
|
+
y_true = y_true.flatten()
|
375
|
+
y_pred = y_pred.flatten()
|
376
|
+
|
377
|
+
if library == 'sklearn':
|
378
|
+
from sklearn.metrics import r2_score
|
379
|
+
r2 = r2_score(y_true, y_pred)
|
380
|
+
elif library == 'statsmodels':
|
381
|
+
import statsmodels.api as sm
|
382
|
+
X = sm.add_constant(y_pred)
|
383
|
+
model = sm.OLS(y_true, X).fit()
|
384
|
+
r2 = model.rsquared
|
385
|
+
else:
|
386
|
+
numerator = np.sum((y_true - y_pred) ** 2)
|
387
|
+
denominator = np.sum((y_true - np.mean(y_true)) ** 2)
|
388
|
+
r2 = 1 - (numerator / denominator) if denominator != 0 else 0.0
|
389
|
+
|
390
|
+
n_samples = len(y_true)
|
391
|
+
if n_samples <= n_features + 1:
|
392
|
+
raise ValueError("Number of samples must be greater than number of features plus one for adjusted R-squared computation.")
|
393
|
+
|
394
|
+
adjusted_r2 = 1 - (1 - r2) * (n_samples - 1) / (n_samples - n_features - 1)
|
395
|
+
return adjusted_r2
|
396
|
+
|
397
|
+
if __name__ == '__main__':
|
398
|
+
# Example usage
|
399
|
+
validator = DataValidator()
|
400
|
+
metrics = Metrics()
|
401
|
+
|
402
|
+
# Test validation
|
403
|
+
arr = [[1], [2], [3]]
|
404
|
+
print("1D array:", validator.is_1d_array(arr))
|
405
|
+
print("Samples:", validator.check_samples(arr))
|
406
|
+
|
407
|
+
# Test MAE, MSE, R2, MBD, EV, MAPE, RMSE
|
408
|
+
y_true = [3, -0.5, 2, 7]
|
409
|
+
y_pred = [2.5, 0.0, 2, 8]
|
410
|
+
|
411
|
+
print("Mean Absolute Error:", metrics.mean_absolute_error(y_true, y_pred))
|
412
|
+
print("Mean Squared Error:", metrics.mean_squared_error(y_true, y_pred))
|
413
|
+
print("R2 Score:", metrics.r2_score(y_true, y_pred))
|
414
|
+
print("Mean Bias Deviation: ", metrics.mean_bias_deviation(y_true, y_pred))
|
415
|
+
print("Explained Variance Score: ", metrics.explained_variance_score(y_true, y_pred))
|
416
|
+
print("Mean Absolute Percentage Error: ", metrics.mean_absolute_percentage_error(y_true, y_pred))
|
417
|
+
print("Root Mean Squared Error: ", metrics.root_mean_squared_error(y_true, y_pred))
|
418
|
+
print("adjusted_r2_score: ", metrics.adjusted_r2_score(y_true, y_pred, 2))
|
419
|
+
|
@@ -1,421 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
import warnings
|
3
|
-
from typing import Union, List, Tuple
|
4
|
-
from scipy import sparse
|
5
|
-
|
6
|
-
class DataValidator:
|
7
|
-
def __init__(self):
|
8
|
-
pass
|
9
|
-
|
10
|
-
def check_device_cpu(self, device):
|
11
|
-
if device not in {"cpu", None}:
|
12
|
-
raise ValueError(f"Unsupported device: {device!r}. Only 'cpu' is supported.")
|
13
|
-
|
14
|
-
def is_1d_array(self, array: Union[np.ndarray, list], warn: bool = False) -> np.ndarray:
|
15
|
-
"""
|
16
|
-
Ensures input is a 1D array. Raises an error if it's not 1D or convertible to 1D.
|
17
|
-
"""
|
18
|
-
array = np.asarray(array)
|
19
|
-
shape = array.shape
|
20
|
-
|
21
|
-
if len(shape) == 1:
|
22
|
-
return array
|
23
|
-
elif len(shape) == 2 and shape[1] == 1:
|
24
|
-
if warn:
|
25
|
-
warnings.warn("Input is 2D but will be converted to 1D.", UserWarning)
|
26
|
-
return array.ravel()
|
27
|
-
else:
|
28
|
-
raise ValueError(f"Input must be 1D. Found shape {shape}.")
|
29
|
-
|
30
|
-
def check_samples(self, array: Union[np.ndarray, list]) -> int:
|
31
|
-
"""
|
32
|
-
Returns the number of samples in the array.
|
33
|
-
"""
|
34
|
-
array = np.asarray(array)
|
35
|
-
if hasattr(array, 'shape') and len(array.shape) > 0:
|
36
|
-
return array.shape[0]
|
37
|
-
else:
|
38
|
-
raise TypeError("Input must be an array-like object with at least one dimension.")
|
39
|
-
|
40
|
-
def check_consistent_length(self, *arrays: Union[np.ndarray, list]):
|
41
|
-
"""
|
42
|
-
Ensures all input arrays have the same length.
|
43
|
-
"""
|
44
|
-
lengths = [self.check_samples(arr) for arr in arrays]
|
45
|
-
if len(set(lengths)) > 1:
|
46
|
-
raise ValueError(f"Inconsistent lengths: {lengths}")
|
47
|
-
|
48
|
-
def validate_regression_targets(self, y_true, y_pred, dtype=np.float64):
|
49
|
-
"""
|
50
|
-
Ensures regression target values are consistent and converted to the specified dtype.
|
51
|
-
"""
|
52
|
-
y_true = np.asarray(y_true, dtype=dtype)
|
53
|
-
y_pred = np.asarray(y_pred, dtype=dtype)
|
54
|
-
|
55
|
-
if y_true.shape != y_pred.shape:
|
56
|
-
raise ValueError(f"Shapes of y_true {y_true.shape} and y_pred {y_pred.shape} do not match.")
|
57
|
-
|
58
|
-
return y_true, y_pred
|
59
|
-
|
60
|
-
def check_array(self, array, ensure_2d: bool = True, dtype=np.float64, allow_nan: bool = False):
|
61
|
-
"""
|
62
|
-
Validates input array and converts it to specified dtype.
|
63
|
-
"""
|
64
|
-
array = np.asarray(array, dtype=dtype)
|
65
|
-
|
66
|
-
if ensure_2d and array.ndim == 1:
|
67
|
-
array = array.reshape(-1, 1)
|
68
|
-
|
69
|
-
if not allow_nan and np.isnan(array).any():
|
70
|
-
raise ValueError("Input contains NaN values, which are not allowed.")
|
71
|
-
|
72
|
-
return array
|
73
|
-
|
74
|
-
def check_sparse(self, array, accept_sparse: Tuple[str] = ('csr', 'csc')):
|
75
|
-
"""
|
76
|
-
Validates sparse matrices and converts to an acceptable format.
|
77
|
-
"""
|
78
|
-
if sparse.issparse(array):
|
79
|
-
if array.format not in accept_sparse:
|
80
|
-
return array.asformat(accept_sparse[0])
|
81
|
-
return array
|
82
|
-
else:
|
83
|
-
raise ValueError("Input is not a sparse matrix.")
|
84
|
-
|
85
|
-
def validate_r2_score_inputs(self, y_true, y_pred, sample_weight=None):
|
86
|
-
"""
|
87
|
-
Ensures inputs for R2 score computation are valid.
|
88
|
-
"""
|
89
|
-
y_true, y_pred = self.validate_regression_targets(y_true, y_pred)
|
90
|
-
if sample_weight is not None:
|
91
|
-
sample_weight = self.is_1d_array(sample_weight)
|
92
|
-
return y_true, y_pred, sample_weight
|
93
|
-
|
94
|
-
def validate_mae_mse_inputs(self, y_true, y_pred, library=None):
|
95
|
-
"""
|
96
|
-
Ensures inputs for MAE and MSE computation are valid.
|
97
|
-
"""
|
98
|
-
y_true, y_pred = self.validate_regression_targets(y_true, y_pred)
|
99
|
-
if library not in {None, 'sklearn', 'torch', 'tensorflow', 'Moral88'}:
|
100
|
-
raise ValueError(f"Invalid library: {library}. Choose from {{'Moral88', 'sklearn', 'torch', 'tensorflow'}}.")
|
101
|
-
return y_true, y_pred
|
102
|
-
|
103
|
-
|
104
|
-
class metrics:
|
105
|
-
def __init__(self):
|
106
|
-
pass
|
107
|
-
def mean_bias_deviation(self, y_true, y_pred, library=None, flatten=True):
|
108
|
-
"""
|
109
|
-
Computes Mean Bias Deviation (MBD).
|
110
|
-
"""
|
111
|
-
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
112
|
-
|
113
|
-
if flatten and y_true.ndim > 1:
|
114
|
-
y_true = y_true.flatten()
|
115
|
-
y_pred = y_pred.flatten()
|
116
|
-
|
117
|
-
if library == 'sklearn':
|
118
|
-
# Sklearn does not have a direct implementation for MBD
|
119
|
-
raise NotImplementedError("Mean Bias Deviation is not implemented in sklearn.")
|
120
|
-
|
121
|
-
if library == 'torch':
|
122
|
-
import torch
|
123
|
-
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
124
|
-
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
125
|
-
bias = torch.mean(y_pred_tensor - y_true_tensor).item()
|
126
|
-
return bias
|
127
|
-
|
128
|
-
if library == 'tensorflow':
|
129
|
-
import tensorflow as tf
|
130
|
-
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
131
|
-
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
132
|
-
bias = tf.reduce_mean(y_pred_tensor - y_true_tensor).numpy()
|
133
|
-
return bias
|
134
|
-
|
135
|
-
# Default implementation
|
136
|
-
return np.mean(y_pred - y_true)
|
137
|
-
def __init__(self):
|
138
|
-
self.validator = DataValidator()
|
139
|
-
|
140
|
-
def r2_score(self, y_true, y_pred, sample_weight=None, library=None, flatten=True):
|
141
|
-
"""
|
142
|
-
Computes R2 score.
|
143
|
-
"""
|
144
|
-
y_true, y_pred, sample_weight = self.validator.validate_r2_score_inputs(y_true, y_pred, sample_weight)
|
145
|
-
|
146
|
-
if flatten and y_true.ndim > 1:
|
147
|
-
y_true = y_true.flatten()
|
148
|
-
y_pred = y_pred.flatten()
|
149
|
-
|
150
|
-
if library == 'sklearn':
|
151
|
-
from sklearn.metrics import r2_score as sklearn_r2
|
152
|
-
return sklearn_r2(y_true, y_pred, sample_weight=sample_weight)
|
153
|
-
|
154
|
-
if library == 'statsmodels':
|
155
|
-
import statsmodels.api as sm
|
156
|
-
model = sm.OLS(y_true, sm.add_constant(y_pred)).fit()
|
157
|
-
return model.rsquared
|
158
|
-
|
159
|
-
numerator = np.sum((y_true - y_pred) ** 2)
|
160
|
-
denominator = np.sum((y_true - np.mean(y_true)) ** 2)
|
161
|
-
|
162
|
-
if denominator == 0:
|
163
|
-
return 0.0
|
164
|
-
return 1 - (numerator / denominator)
|
165
|
-
|
166
|
-
def mean_absolute_error(self, y_true, y_pred, normalize=True, threshold=None, method='mean', library='Moral88', flatten=True):
|
167
|
-
"""
|
168
|
-
Computes Mean Absolute Error (MAE).
|
169
|
-
"""
|
170
|
-
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
171
|
-
|
172
|
-
if flatten:
|
173
|
-
y_true = y_true.ravel()
|
174
|
-
y_pred = y_pred.ravel()
|
175
|
-
|
176
|
-
if library == 'Moral88':
|
177
|
-
if threshold is not None:
|
178
|
-
y_pred = np.clip(y_pred, threshold[0], threshold[1])
|
179
|
-
|
180
|
-
if y_true.ndim > 1 and flatten:
|
181
|
-
y_true = y_true.flatten()
|
182
|
-
y_pred = y_pred.flatten()
|
183
|
-
absolute_errors = np.abs(y_true - y_pred)
|
184
|
-
|
185
|
-
if method == 'mean':
|
186
|
-
result = np.mean(absolute_errors)
|
187
|
-
elif method == 'sum':
|
188
|
-
result = np.sum(absolute_errors)
|
189
|
-
elif method == 'none':
|
190
|
-
result = absolute_errors
|
191
|
-
else:
|
192
|
-
raise ValueError("Invalid method. Choose from {'mean', 'sum', 'none'}.")
|
193
|
-
|
194
|
-
# if normalize and method != 'none':
|
195
|
-
# range_y = np.ptp(y_true)
|
196
|
-
# result = result / max(abs(range_y), 1)
|
197
|
-
|
198
|
-
return result
|
199
|
-
|
200
|
-
elif library == 'sklearn':
|
201
|
-
from sklearn.metrics import mean_absolute_error as sklearn_mae
|
202
|
-
return sklearn_mae(y_true, y_pred)
|
203
|
-
|
204
|
-
elif library == 'torch':
|
205
|
-
import torch
|
206
|
-
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
207
|
-
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
208
|
-
return torch.mean(torch.abs(y_true_tensor - y_pred_tensor)).item()
|
209
|
-
|
210
|
-
elif library == 'tensorflow':
|
211
|
-
import tensorflow as tf
|
212
|
-
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
213
|
-
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
214
|
-
return tf.reduce_mean(tf.abs(y_true_tensor - y_pred_tensor)).numpy()
|
215
|
-
|
216
|
-
def mean_squared_error(self, y_true, y_pred, normalize=True, threshold=None, method='mean', library='Moral88', flatten=True):
|
217
|
-
"""
|
218
|
-
Computes Mean Squared Error (MSE).
|
219
|
-
"""
|
220
|
-
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
221
|
-
|
222
|
-
if flatten:
|
223
|
-
y_true = y_true.ravel()
|
224
|
-
y_pred = y_pred.ravel()
|
225
|
-
|
226
|
-
if library == 'Moral88':
|
227
|
-
if threshold is not None:
|
228
|
-
y_pred = np.clip(y_pred, threshold[0], threshold[1])
|
229
|
-
|
230
|
-
if y_true.ndim > 1 and flatten:
|
231
|
-
y_true = y_true.flatten()
|
232
|
-
y_pred = y_pred.flatten()
|
233
|
-
squared_errors = (y_true - y_pred) ** 2
|
234
|
-
|
235
|
-
if method == 'mean':
|
236
|
-
result = np.mean(squared_errors)
|
237
|
-
elif method == 'sum':
|
238
|
-
result = np.sum(squared_errors)
|
239
|
-
elif method == 'none':
|
240
|
-
result = squared_errors
|
241
|
-
else:
|
242
|
-
raise ValueError("Invalid method. Choose from {'mean', 'sum', 'none'}.")
|
243
|
-
|
244
|
-
# if normalize and method != 'none':
|
245
|
-
# range_y = np.ptp(y_true)
|
246
|
-
# result = result / max(abs(range_y), 1)
|
247
|
-
|
248
|
-
return result
|
249
|
-
|
250
|
-
elif library == 'sklearn':
|
251
|
-
from sklearn.metrics import mean_squared_error as sklearn_mse
|
252
|
-
return sklearn_mse(y_true, y_pred)
|
253
|
-
|
254
|
-
elif library == 'torch':
|
255
|
-
import torch
|
256
|
-
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
257
|
-
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
258
|
-
return torch.mean((y_true_tensor - y_pred_tensor) ** 2).item()
|
259
|
-
|
260
|
-
elif library == 'tensorflow':
|
261
|
-
import tensorflow as tf
|
262
|
-
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
263
|
-
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
264
|
-
return tf.reduce_mean(tf.square(y_true_tensor - y_pred_tensor)).numpy()
|
265
|
-
|
266
|
-
def root_mean_squared_error(self, y_true, y_pred, library=None):
|
267
|
-
"""
|
268
|
-
Computes Root Mean Squared Error (RMSE).
|
269
|
-
"""
|
270
|
-
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
271
|
-
|
272
|
-
if library == 'sklearn':
|
273
|
-
from sklearn.metrics import mean_squared_error as sklearn_mse
|
274
|
-
return np.sqrt(sklearn_mse(y_true, y_pred))
|
275
|
-
|
276
|
-
if library == 'torch':
|
277
|
-
import torch
|
278
|
-
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
279
|
-
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
280
|
-
return torch.sqrt(torch.mean((y_true_tensor - y_pred_tensor) ** 2)).item()
|
281
|
-
|
282
|
-
if library == 'tensorflow':
|
283
|
-
import tensorflow as tf
|
284
|
-
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
285
|
-
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
286
|
-
return tf.sqrt(tf.reduce_mean(tf.square(y_true_tensor - y_pred_tensor))).numpy()
|
287
|
-
|
288
|
-
mse = self.mean_squared_error(y_true, y_pred)
|
289
|
-
return np.sqrt(mse)
|
290
|
-
|
291
|
-
def mean_absolute_percentage_error(self, y_true, y_pred, library=None):
|
292
|
-
"""
|
293
|
-
Computes Mean Absolute Percentage Error (MAPE).
|
294
|
-
"""
|
295
|
-
y_true, y_pred = self.validator.validate_regression_targets(y_true, y_pred)
|
296
|
-
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
297
|
-
y_true = np.clip(y_true, 1e-8, None)
|
298
|
-
|
299
|
-
if library == 'sklearn':
|
300
|
-
from sklearn.metrics import mean_absolute_percentage_error as sklearn_mape
|
301
|
-
return sklearn_mape(y_true, y_pred) * 100
|
302
|
-
|
303
|
-
if library == 'torch':
|
304
|
-
import torch
|
305
|
-
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
306
|
-
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
307
|
-
return torch.mean(torch.abs((y_true_tensor - y_pred_tensor) / torch.clamp(y_true_tensor, min=1e-8))).item() * 100
|
308
|
-
|
309
|
-
if library == 'tensorflow':
|
310
|
-
import tensorflow as tf
|
311
|
-
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
312
|
-
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
313
|
-
return tf.reduce_mean(tf.abs((y_true_tensor - y_pred_tensor) / tf.clip_by_value(y_true_tensor, 1e-8, tf.float32.max))).numpy() * 100
|
314
|
-
|
315
|
-
return np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-8, None))) * 100
|
316
|
-
|
317
|
-
def explained_variance_score(self, y_true, y_pred, library=None, flatten=True):
|
318
|
-
"""
|
319
|
-
Computes Explained Variance Score.
|
320
|
-
"""
|
321
|
-
y_true, y_pred = self.validator.validate_mae_mse_inputs(y_true, y_pred, library)
|
322
|
-
|
323
|
-
if library == 'sklearn':
|
324
|
-
from sklearn.metrics import explained_variance_score as sklearn_evs
|
325
|
-
return sklearn_evs(y_true, y_pred)
|
326
|
-
|
327
|
-
if library == 'torch':
|
328
|
-
import torch
|
329
|
-
y_true_tensor = torch.tensor(y_true, dtype=torch.float32)
|
330
|
-
y_pred_tensor = torch.tensor(y_pred, dtype=torch.float32)
|
331
|
-
variance_residual = torch.var(y_true_tensor - y_pred_tensor)
|
332
|
-
variance_y = torch.var(y_true_tensor)
|
333
|
-
return 1 - variance_residual / variance_y if variance_y != 0 else 0
|
334
|
-
|
335
|
-
if library == 'tensorflow':
|
336
|
-
import tensorflow as tf
|
337
|
-
y_true_tensor = tf.convert_to_tensor(y_true, dtype=tf.float32)
|
338
|
-
y_pred_tensor = tf.convert_to_tensor(y_pred, dtype=tf.float32)
|
339
|
-
variance_residual = tf.math.reduce_variance(y_true_tensor - y_pred_tensor)
|
340
|
-
variance_y = tf.math.reduce_variance(y_true_tensor)
|
341
|
-
return 1 - variance_residual / variance_y if variance_y != 0 else 0
|
342
|
-
|
343
|
-
numerator = np.var(y_true - y_pred)
|
344
|
-
denominator = np.var(y_true)
|
345
|
-
return 1 - numerator / denominator if denominator != 0 else 0
|
346
|
-
|
347
|
-
def adjusted_r2_score(self, y_true, y_pred, n_features, library=None, flatten=True):
|
348
|
-
"""
|
349
|
-
Computes Adjusted R-Squared Score.
|
350
|
-
|
351
|
-
Parameters:
|
352
|
-
y_true: array-like of shape (n_samples,)
|
353
|
-
Ground truth (correct) target values.
|
354
|
-
|
355
|
-
y_pred: array-like of shape (n_samples,)
|
356
|
-
Estimated target values.
|
357
|
-
|
358
|
-
n_features: int
|
359
|
-
Number of independent features in the model.
|
360
|
-
|
361
|
-
library: str, optional (default=None)
|
362
|
-
Library to use for computation. Supports {'sklearn', 'statsmodels', None}.
|
363
|
-
|
364
|
-
flatten: bool, optional (default=True)
|
365
|
-
If True, flattens multidimensional arrays before computation.
|
366
|
-
"""
|
367
|
-
# Validate inputs
|
368
|
-
y_true, y_pred, _ = self.validator.validate_r2_score_inputs(y_true, y_pred)
|
369
|
-
|
370
|
-
# Ensure inputs are 1D arrays
|
371
|
-
if y_true.ndim == 0 or y_pred.ndim == 0:
|
372
|
-
y_true = np.array([y_true])
|
373
|
-
y_pred = np.array([y_pred])
|
374
|
-
|
375
|
-
if flatten and y_true.ndim > 1:
|
376
|
-
y_true = y_true.flatten()
|
377
|
-
y_pred = y_pred.flatten()
|
378
|
-
|
379
|
-
if library == 'sklearn':
|
380
|
-
from sklearn.metrics import r2_score
|
381
|
-
r2 = r2_score(y_true, y_pred)
|
382
|
-
elif library == 'statsmodels':
|
383
|
-
import statsmodels.api as sm
|
384
|
-
X = sm.add_constant(y_pred)
|
385
|
-
model = sm.OLS(y_true, X).fit()
|
386
|
-
r2 = model.rsquared
|
387
|
-
else:
|
388
|
-
numerator = np.sum((y_true - y_pred) ** 2)
|
389
|
-
denominator = np.sum((y_true - np.mean(y_true)) ** 2)
|
390
|
-
r2 = 1 - (numerator / denominator) if denominator != 0 else 0.0
|
391
|
-
|
392
|
-
n_samples = len(y_true)
|
393
|
-
if n_samples <= n_features + 1:
|
394
|
-
raise ValueError("Number of samples must be greater than number of features plus one for adjusted R-squared computation.")
|
395
|
-
|
396
|
-
adjusted_r2 = 1 - (1 - r2) * (n_samples - 1) / (n_samples - n_features - 1)
|
397
|
-
return adjusted_r2
|
398
|
-
|
399
|
-
if __name__ == '__main__':
|
400
|
-
# Example usage
|
401
|
-
validator = DataValidator()
|
402
|
-
metrics = Metrics()
|
403
|
-
|
404
|
-
# Test validation
|
405
|
-
arr = [[1], [2], [3]]
|
406
|
-
print("1D array:", validator.is_1d_array(arr))
|
407
|
-
print("Samples:", validator.check_samples(arr))
|
408
|
-
|
409
|
-
# Test MAE, MSE, R2, MBD, EV, MAPE, RMSE
|
410
|
-
y_true = [3, -0.5, 2, 7]
|
411
|
-
y_pred = [2.5, 0.0, 2, 8]
|
412
|
-
|
413
|
-
print("Mean Absolute Error:", metrics.mean_absolute_error(y_true, y_pred))
|
414
|
-
print("Mean Squared Error:", metrics.mean_squared_error(y_true, y_pred))
|
415
|
-
print("R2 Score:", metrics.r2_score(y_true, y_pred))
|
416
|
-
print("Mean Bias Deviation: ", metrics.mean_bias_deviation(y_true, y_pred))
|
417
|
-
print("Explained Variance Score: ", metrics.explained_variance_score(y_true, y_pred))
|
418
|
-
print("Mean Absolute Percentage Error: ", metrics.mean_absolute_percentage_error(y_true, y_pred))
|
419
|
-
print("Root Mean Squared Error: ", metrics.root_mean_squared_error(y_true, y_pred))
|
420
|
-
print("adjusted_r2_score: ", metrics.adjusted_r2_score(y_true, y_pred, 2))
|
421
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|