Moral88 0.10.0__tar.gz → 0.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from Moral88.utils import DataValidator
4
+
5
+ validator = DataValidator()
6
+ def accuracy(y_true, y_pred):
7
+ """Compute Accuracy"""
8
+ validator.validate_all(y_true, y_pred)
9
+ return np.sum(y_true == y_pred) / len(y_true)
10
+
11
+ def auc_roc(y_true, y_scores, average='macro'):
12
+ """Compute AUC-ROC Score"""
13
+ validator.validate_all(y_true, np.round(y_scores))
14
+ unique_classes = np.unique(y_true)
15
+ aucs = []
16
+
17
+ for cls in unique_classes:
18
+ y_binary = (y_true == cls).astype(int)
19
+ sorted_indices = np.argsort(y_scores[:, cls])[::-1]
20
+ y_sorted = y_binary[sorted_indices]
21
+ cum_true = np.cumsum(y_sorted)
22
+ cum_false = np.cumsum(1 - y_sorted)
23
+ auc = np.sum(cum_true * (1 - y_sorted)) / (cum_true[-1] * cum_false[-1]) if (cum_true[-1] > 0 and cum_false[-1] > 0) else 0
24
+ aucs.append(auc)
25
+
26
+ if average == 'macro':
27
+ return np.mean(aucs)
28
+ elif average == 'weighted':
29
+ class_counts = np.bincount(y_true)
30
+ return np.sum([aucs[i] * class_counts[i] for i in range(len(unique_classes))]) / len(y_true)
31
+ return aucs
32
+
33
+ def precision(y_true, y_pred, average='binary'):
34
+ """Compute Precision Score"""
35
+ validator.validate_all(y_true, y_pred)
36
+ unique_classes = np.unique(y_true)
37
+ precisions = []
38
+
39
+ for cls in unique_classes:
40
+ tp = np.sum((y_true == cls) & (y_pred == cls))
41
+ fp = np.sum((y_true != cls) & (y_pred == cls))
42
+ precisions.append(tp / (tp + fp) if (tp + fp) > 0 else 0)
43
+
44
+ if average == 'macro':
45
+ return np.mean(precisions)
46
+ elif average == 'micro':
47
+ tp_total = np.sum(y_true == y_pred)
48
+ fp_total = np.sum(y_true != y_pred)
49
+ return tp_total / (tp_total + fp_total) if (tp_total + fp_total) > 0 else 0
50
+ elif average == 'weighted':
51
+ class_counts = np.bincount(y_true)
52
+ return np.sum([precisions[i] * class_counts[i] for i in range(len(unique_classes))]) / len(y_true)
53
+ return precisions
54
+
55
+ def recall(y_true, y_pred, average='binary'):
56
+ """Compute Recall Score"""
57
+ validator.validate_all(y_true, y_pred)
58
+ unique_classes = np.unique(y_true)
59
+ recalls = []
60
+
61
+ for cls in unique_classes:
62
+ tp = np.sum((y_true == cls) & (y_pred == cls))
63
+ fn = np.sum((y_true == cls) & (y_pred != cls))
64
+ recalls.append(tp / (tp + fn) if (tp + fn) > 0 else 0)
65
+
66
+ if average == 'macro':
67
+ return np.mean(recalls)
68
+ elif average == 'micro':
69
+ tp_total = np.sum(y_true == y_pred)
70
+ fn_total = np.sum(y_true != y_pred)
71
+ return tp_total / (tp_total + fn_total) if (tp_total + fn_total) > 0 else 0
72
+ elif average == 'weighted':
73
+ class_counts = np.bincount(y_true)
74
+ return np.sum([recalls[i] * class_counts[i] for i in range(len(unique_classes))]) / len(y_true)
75
+ return recalls
76
+ def balanced_accuracy(y_true, y_pred):
77
+ """Compute Balanced Accuracy"""
78
+ validator.validate_all(y_true, y_pred)
79
+ unique_classes = np.unique(y_true)
80
+ recalls = []
81
+
82
+ for cls in unique_classes:
83
+ tp = np.sum((y_true == cls) & (y_pred == cls))
84
+ fn = np.sum((y_true == cls) & (y_pred != cls))
85
+ recalls.append(tp / (tp + fn) if (tp + fn) > 0 else 0)
86
+
87
+ return np.mean(recalls)
88
+
89
+ def matthews_correlation_coefficient(y_true, y_pred):
90
+ """Compute Matthews Correlation Coefficient (MCC)"""
91
+ validator.validate_all(y_true, y_pred)
92
+ unique_classes = np.unique(y_true)
93
+ confusion_mat = np.zeros((len(unique_classes), len(unique_classes)), dtype=int)
94
+
95
+ for i, cls_true in enumerate(unique_classes):
96
+ for j, cls_pred in enumerate(unique_classes):
97
+ confusion_mat[i, j] = np.sum((y_true == cls_true) & (y_pred == cls_pred))
98
+
99
+ tp = np.diag(confusion_mat)
100
+ fp = np.sum(confusion_mat, axis=0) - tp
101
+ fn = np.sum(confusion_mat, axis=1) - tp
102
+ tn = np.sum(confusion_mat) - (tp + fp + fn)
103
+
104
+ numerator = (tp * tn) - (fp * fn)
105
+ denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
106
+ mcc = np.mean(numerator / denominator) if np.all(denominator > 0) else 0
107
+ return mcc
108
+
109
+ def cohens_kappa(y_true, y_pred):
110
+ """Compute Cohen’s Kappa Score"""
111
+ validator.validate_all(y_true, y_pred)
112
+ unique_classes = np.unique(y_true)
113
+ confusion_mat = np.zeros((len(unique_classes), len(unique_classes)), dtype=int)
114
+
115
+ for i, cls_true in enumerate(unique_classes):
116
+ for j, cls_pred in enumerate(unique_classes):
117
+ confusion_mat[i, j] = np.sum((y_true == cls_true) & (y_pred == cls_pred))
118
+
119
+ total = np.sum(confusion_mat)
120
+ po = np.sum(np.diag(confusion_mat)) / total
121
+ pe = np.sum(np.sum(confusion_mat, axis=0) * np.sum(confusion_mat, axis=1)) / (total ** 2)
122
+
123
+ return (po - pe) / (1 - pe) if (1 - pe) > 0 else 0
@@ -0,0 +1,129 @@
1
+ import numpy as np
2
+ from Moral88.utils import DataValidator
3
+
4
+ validator = DataValidator()
5
+
6
+
7
+ def adjusted_rand_index(labels_true, labels_pred):
8
+ """Compute Adjusted Rand Index (ARI)"""
9
+ validator.validate_all(labels_true, labels_pred)
10
+ n = len(labels_true)
11
+ contingency_matrix = np.zeros((len(set(labels_true)), len(set(labels_pred))))
12
+
13
+ for i in range(n):
14
+ contingency_matrix[labels_true[i], labels_pred[i]] += 1
15
+
16
+ sum_comb_c = np.sum([np.sum(row) * (np.sum(row) - 1) for row in contingency_matrix]) / 2
17
+ sum_comb_k = np.sum([np.sum(col) * (np.sum(col) - 1) for col in contingency_matrix.T]) / 2
18
+ sum_comb = np.sum(contingency_matrix * (contingency_matrix - 1)) / 2
19
+
20
+ expected_index = (sum_comb_c * sum_comb_k) / (n * (n - 1) / 2)
21
+ max_index = (sum_comb_c + sum_comb_k) / 2
22
+
23
+ return (sum_comb - expected_index) / (max_index - expected_index) if max_index != expected_index else 1
24
+
25
+ def normalized_mutual_info(labels_true, labels_pred, epsilon=1e-10):
26
+ """Compute Normalized Mutual Information (NMI) avoid devided by zero"""
27
+ validator.validate_all(labels_true, labels_pred)
28
+ unique_true, counts_true = np.unique(labels_true, return_counts=True)
29
+ unique_pred, counts_pred = np.unique(labels_pred, return_counts=True)
30
+
31
+ contingency_matrix = np.zeros((len(unique_true), len(unique_pred)))
32
+ for i in range(len(labels_true)):
33
+ contingency_matrix[labels_true[i], labels_pred[i]] += 1
34
+
35
+ h_true = -np.sum((counts_true / len(labels_true)) * np.log2(np.where(counts_true > 0, counts_true / len(labels_true), epsilon)))
36
+ h_pred = -np.sum((counts_pred / len(labels_pred)) * np.log2(np.where(counts_pred > 0, counts_pred / len(labels_pred), epsilon)))
37
+
38
+ joint_prob = contingency_matrix / len(labels_true)
39
+ mutual_info = np.sum(joint_prob * np.log2(np.where(joint_prob > 0, joint_prob / ((counts_true[:, None] / len(labels_true)) * (counts_pred[None, :] / len(labels_pred))), epsilon)))
40
+
41
+ return mutual_info / np.sqrt(h_true * h_pred) if h_true * h_pred > 0 else 0
42
+
43
+ def silhouette_score(X, labels):
44
+ """Compute Silhouette Score"""
45
+ validator.validate_all(labels, labels)
46
+ unique_labels = np.unique(labels)
47
+ a = np.zeros(len(X))
48
+ b = np.zeros(len(X))
49
+
50
+ for i, label in enumerate(labels):
51
+ same_cluster = X[labels == label]
52
+ other_clusters = [X[labels == other_label] for other_label in unique_labels if other_label != label]
53
+
54
+ a[i] = np.mean(np.linalg.norm(same_cluster - X[i], axis=1)) if len(same_cluster) > 1 else 0
55
+ b[i] = np.min([np.mean(np.linalg.norm(other_cluster - X[i], axis=1)) for other_cluster in other_clusters]) if other_clusters else 0
56
+
57
+ silhouette_values = (b - a) / np.maximum(a, b)
58
+ return np.mean(silhouette_values)
59
+
60
+ def calinski_harabasz_index(X, labels):
61
+ """Compute Calinski-Harabasz Index"""
62
+ validator.validate_all(labels, labels)
63
+ n_clusters = len(np.unique(labels))
64
+ n_samples = len(X)
65
+ cluster_means = np.array([np.mean(X[labels == label], axis=0) for label in np.unique(labels)])
66
+ overall_mean = np.mean(X, axis=0)
67
+
68
+ between_group_dispersion = np.sum([len(X[labels == label]) * np.linalg.norm(cluster_mean - overall_mean)**2 for label, cluster_mean in zip(np.unique(labels), cluster_means)])
69
+ within_group_dispersion = np.sum([np.sum((X[labels == label] - cluster_mean) ** 2) for label, cluster_mean in zip(np.unique(labels), cluster_means)])
70
+
71
+ return (between_group_dispersion / within_group_dispersion) * ((n_samples - n_clusters) / (n_clusters - 1))
72
+
73
+ def dunn_index(X, labels):
74
+ """Compute Dunn Index"""
75
+ validator.validate_all(labels, labels)
76
+ unique_labels = np.unique(labels)
77
+ cluster_means = [np.mean(X[labels == label], axis=0) for label in unique_labels]
78
+ intra_distances = [np.max(np.linalg.norm(X[labels == label] - cluster_mean, axis=1)) for label, cluster_mean in zip(unique_labels, cluster_means)]
79
+ inter_distances = [np.linalg.norm(cluster_means[i] - cluster_means[j]) for i in range(len(unique_labels)) for j in range(i + 1, len(unique_labels))]
80
+ return np.min(inter_distances) / np.max(intra_distances)
81
+
82
+ def inertia(X, labels):
83
+ """Compute Inertia (Sum of Squared Distances to Centroids)"""
84
+ validator.validate_all(labels, labels)
85
+ unique_labels = np.unique(labels)
86
+ cluster_means = [np.mean(X[labels == label], axis=0) for label in unique_labels]
87
+ return np.sum([np.sum((X[labels == label] - cluster_means[i]) ** 2) for i, label in enumerate(unique_labels)])
88
+
89
+ def homogeneity_score(labels_true, labels_pred, epsilon=1e-10):
90
+ """Compute Homogeneity Score avoid devided by zero"""
91
+ validator.validate_all(labels_true, labels_pred)
92
+ unique_true, counts_true = np.unique(labels_true, return_counts=True)
93
+ unique_pred, counts_pred = np.unique(labels_pred, return_counts=True)
94
+ contingency_matrix = np.zeros((len(unique_true), len(unique_pred)))
95
+
96
+ for i in range(len(labels_true)):
97
+ contingency_matrix[labels_true[i], labels_pred[i]] += 1
98
+
99
+ entropy_true = -np.sum((counts_true / len(labels_true)) * np.log2(np.where(counts_true > 0, counts_true / len(labels_true), epsilon)))
100
+ mutual_info = np.sum(contingency_matrix * np.log2(np.where(contingency_matrix > 0, contingency_matrix / ((counts_true[:, None] / len(labels_true)) * (counts_pred[None, :] / len(labels_pred))), epsilon)))
101
+
102
+ return mutual_info / entropy_true if entropy_true > 0 else 0
103
+
104
+
105
+ def completeness_score(labels_true, labels_pred, epsilon=1e-10):
106
+ """Compute Completeness Score avoid divided by zero"""
107
+ validator.validate_all(labels_true, labels_pred)
108
+ unique_true, counts_true = np.unique(labels_true, return_counts=True)
109
+ unique_pred, counts_pred = np.unique(labels_pred, return_counts=True)
110
+ contingency_matrix = np.zeros((len(unique_true), len(unique_pred)))
111
+
112
+ for i in range(len(labels_true)):
113
+ contingency_matrix[labels_true[i], labels_pred[i]] += 1
114
+
115
+ entropy_pred = -np.sum((counts_pred / len(labels_pred)) * np.log2(np.where(counts_pred > 0, counts_pred / len(labels_pred), epsilon)))
116
+ mutual_info = np.sum(contingency_matrix * np.log2(np.where(contingency_matrix > 0, contingency_matrix / ((counts_true[:, None] / len(labels_true)) * (counts_pred[None, :] / len(labels_pred))), epsilon)))
117
+
118
+ return mutual_info / entropy_pred if entropy_pred > 0 else 0
119
+
120
+ def davies_bouldin_index(X, labels):
121
+ """Compute Davies-Bouldin Index"""
122
+ validator.validate_all(labels, labels)
123
+ n_clusters = len(np.unique(labels))
124
+ cluster_means = np.array([np.mean(X[labels == label], axis=0) for label in np.unique(labels)])
125
+
126
+ dispersions = np.array([np.mean(np.linalg.norm(X[labels == label] - cluster_means[i], axis=1)) for i, label in enumerate(np.unique(labels))])
127
+ db_index = np.mean([max([(dispersions[i] + dispersions[j]) / np.linalg.norm(cluster_means[i] - cluster_means[j]) for j in range(n_clusters) if i != j]) for i in range(n_clusters)])
128
+
129
+ return db_index
@@ -0,0 +1,271 @@
1
+ import numpy as np
2
+ from Moral88.utils import DataValidator
3
+
4
+ validator = DataValidator()
5
+
6
+
7
+ def mean_absolute_error(y_true, y_pred, sample_weights=None, normalize=False, method='mean'):
8
+ """Compute Mean Absolute Error (MAE)"""
9
+ validator.validate_all(y_true, y_pred)
10
+ errors = np.abs(y_true - y_pred)
11
+
12
+ if sample_weights is not None:
13
+ sample_weights = np.array(sample_weights)
14
+ if len(sample_weights) != len(y_true):
15
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
16
+ errors *= sample_weights
17
+
18
+ if normalize:
19
+ errors /= np.mean(y_true)
20
+
21
+ if method == 'mean':
22
+ return np.mean(errors)
23
+ elif method == 'sum':
24
+ return np.sum(errors)
25
+ elif method == 'none':
26
+ return errors
27
+ else:
28
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
29
+
30
+ def mean_squared_error(y_true, y_pred, sample_weights=None, squared=True, method='mean'):
31
+ """Compute Mean Squared Error (MSE) or Root Mean Squared Error (RMSE)"""
32
+ validator.validate_all(y_true, y_pred)
33
+ errors = (y_true - y_pred) ** 2
34
+
35
+ if sample_weights is not None:
36
+ sample_weights = np.array(sample_weights)
37
+ if len(sample_weights) != len(y_true):
38
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
39
+ errors *= sample_weights
40
+
41
+ if method == 'mean':
42
+ result = np.mean(errors)
43
+ elif method == 'sum':
44
+ result = np.sum(errors)
45
+ elif method == 'none':
46
+ return errors
47
+ else:
48
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
49
+
50
+ return result if squared else np.sqrt(result)
51
+
52
+ def root_mean_squared_error(y_true, y_pred, sample_weights=None, method='mean'):
53
+ """Compute Root Mean Squared Error (RMSE)"""
54
+ validator.validate_all(y_true, y_pred)
55
+ errors = (y_true - y_pred) ** 2
56
+
57
+ if sample_weights is not None:
58
+ sample_weights = np.array(sample_weights)
59
+ if len(sample_weights) != len(y_true):
60
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
61
+ errors *= sample_weights
62
+
63
+ if method == 'mean':
64
+ result = np.mean(errors)
65
+ elif method == 'sum':
66
+ result = np.sum(errors)
67
+ elif method == 'none':
68
+ return np.sqrt(errors)
69
+ else:
70
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
71
+
72
+ return np.sqrt(result)
73
+
74
+ def mean_bias_deviation(y_true, y_pred, sample_weights=None, method='mean'):
75
+ """Compute Mean Bias Deviation (MBD)"""
76
+ validator.validate_all(y_true, y_pred)
77
+ errors = y_true - y_pred
78
+
79
+ if sample_weights is not None:
80
+ sample_weights = np.array(sample_weights)
81
+ if len(sample_weights) != len(y_true):
82
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
83
+ errors *= sample_weights
84
+
85
+ if method == 'mean':
86
+ return np.mean(errors)
87
+ elif method == 'sum':
88
+ return np.sum(errors)
89
+ elif method == 'none':
90
+ return errors
91
+ else:
92
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
93
+
94
+ def r_squared(y_true, y_pred, adjusted=False, n_features=None):
95
+ """Compute R-squared (R²) and Adjusted R-squared if needed"""
96
+ validator.validate_all(y_true, y_pred)
97
+ ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
98
+ ss_residual = np.sum((y_true - y_pred) ** 2)
99
+ r2 = 1 - (ss_residual / ss_total)
100
+
101
+ if adjusted:
102
+ if n_features is None:
103
+ raise ValueError("n_features must be provided for adjusted R² calculation")
104
+ n = len(y_true)
105
+ return 1 - ((1 - r2) * (n - 1) / (n - n_features - 1))
106
+
107
+ return r2
108
+
109
+ def adjusted_r_squared(y_true, y_pred, n_features):
110
+ """Compute Adjusted R-squared (R² Adjusted)"""
111
+ validator.validate_all(y_true, y_pred)
112
+
113
+ n = len(y_true)
114
+ if n_features >= n:
115
+ raise ValueError("Number of features must be less than number of samples")
116
+
117
+ ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
118
+ ss_residual = np.sum((y_true - y_pred) ** 2)
119
+ r2 = 1 - (ss_residual / ss_total)
120
+
121
+ return 1 - ((1 - r2) * (n - 1) / (n - n_features - 1))
122
+
123
+ def mean_absolute_percentage_error(y_true, y_pred, sample_weights=None, method='mean'):
124
+ """Compute Mean Absolute Percentage Error (MAPE)"""
125
+ validator.validate_all(y_true, y_pred, mape_based=True)
126
+ errors = np.abs((y_true - y_pred) / y_true) * 100
127
+
128
+ if sample_weights is not None:
129
+ sample_weights = np.array(sample_weights)
130
+ if len(sample_weights) != len(y_true):
131
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
132
+ errors *= sample_weights
133
+
134
+ if method == 'mean':
135
+ return np.mean(errors)
136
+ elif method == 'sum':
137
+ return np.sum(errors)
138
+ elif method == 'none':
139
+ return errors
140
+ else:
141
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
142
+
143
+ def symmetric_mean_absolute_percentage_error(y_true, y_pred, sample_weights=None, method='mean'):
144
+ """Compute Symmetric Mean Absolute Percentage Error (sMAPE)"""
145
+ validator.validate_all(y_true, y_pred, mape_based=True)
146
+ errors = 200 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))
147
+
148
+ if sample_weights is not None:
149
+ sample_weights = np.array(sample_weights)
150
+ if len(sample_weights) != len(y_true):
151
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
152
+ errors *= sample_weights
153
+
154
+ if method == 'mean':
155
+ return np.mean(errors)
156
+ elif method == 'sum':
157
+ return np.sum(errors)
158
+ elif method == 'none':
159
+ return errors
160
+ else:
161
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
162
+
163
+ def huber_loss(y_true, y_pred, delta=1.0, sample_weights=None, method='mean'):
164
+ """Compute Huber Loss"""
165
+ validator.validate_all(y_true, y_pred)
166
+ error = y_true - y_pred
167
+
168
+ loss = np.where(np.abs(error) <= delta, 0.5 * error ** 2, delta * (np.abs(error) - 0.5 * delta))
169
+
170
+ if sample_weights is not None:
171
+ sample_weights = np.array(sample_weights)
172
+ if len(sample_weights) != len(y_true):
173
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
174
+ loss *= sample_weights
175
+
176
+ if method == 'mean':
177
+ return np.mean(loss)
178
+ elif method == 'sum':
179
+ return np.sum(loss)
180
+ elif method == 'none':
181
+ return loss
182
+ else:
183
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
184
+
185
+ def relative_squared_error(y_true, y_pred):
186
+ """Compute Relative Squared Error (RSE)"""
187
+ validator.validate_all(y_true, y_pred)
188
+ return np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)
189
+
190
+ def mean_squared_log_error(y_true, y_pred, sample_weights=None, method='mean', squared=True):
191
+ """Compute Logarithmic Mean Squared Error (MSLE) or Root Mean Squared Log Error (RMSLE)"""
192
+ validator.validate_all(y_true, y_pred, log_based=True)
193
+ errors = (np.log1p(y_true) - np.log1p(y_pred)) ** 2
194
+
195
+ if sample_weights is not None:
196
+ sample_weights = np.array(sample_weights)
197
+ if len(sample_weights) != len(y_true):
198
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
199
+ errors *= sample_weights
200
+
201
+ if method == 'mean':
202
+ result = np.mean(errors)
203
+ elif method == 'sum':
204
+ result = np.sum(errors)
205
+ elif method == 'none':
206
+ return errors
207
+ else:
208
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
209
+
210
+ return result if squared else np.sqrt(result)
211
+
212
+ def root_mean_squared_log_error(y_true, y_pred, sample_weights=None, method='mean'):
213
+ """Compute Root Mean Squared Logarithmic Error (RMSLE)"""
214
+ validator.validate_all(y_true, y_pred, log_based=True)
215
+ errors = (np.log1p(y_true) - np.log1p(y_pred)) ** 2
216
+
217
+ if sample_weights is not None:
218
+ sample_weights = np.array(sample_weights)
219
+ if len(sample_weights) != len(y_true):
220
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
221
+ errors *= sample_weights
222
+
223
+ if method == 'mean':
224
+ return np.sqrt(np.mean(errors))
225
+ elif method == 'sum':
226
+ return np.sqrt(np.sum(errors))
227
+ elif method == 'none':
228
+ return np.sqrt(errors)
229
+ else:
230
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
231
+
232
+ def log_cosh_loss(y_true, y_pred, sample_weights=None, method='mean'):
233
+ """Compute Log-Cosh Loss"""
234
+ validator.validate_all(y_true, y_pred)
235
+ errors = np.log(np.cosh(y_pred - y_true))
236
+
237
+ if sample_weights is not None:
238
+ sample_weights = np.array(sample_weights)
239
+ if len(sample_weights) != len(y_true):
240
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
241
+ errors *= sample_weights
242
+
243
+ if method == 'mean':
244
+ return np.mean(errors)
245
+ elif method == 'sum':
246
+ return np.sum(errors)
247
+ elif method == 'none':
248
+ return errors
249
+ else:
250
+ raise ValueError("Invalid method. Choose from 'mean', 'sum', or 'none'")
251
+
252
+ def explained_variance(y_true, y_pred):
253
+ """Compute Explained Variance Score"""
254
+ validator.validate_all(y_true, y_pred)
255
+ variance_y_true = np.var(y_true)
256
+ return 1 - (np.var(y_true - y_pred) / variance_y_true) if variance_y_true != 0 else 0
257
+
258
+ def median_absolute_error(y_true, y_pred, sample_weights=None):
259
+ """Compute Median Absolute Error"""
260
+ validator = DataValidator()
261
+ validator.validate_all(y_true, y_pred)
262
+ errors = np.abs(y_true - y_pred)
263
+
264
+ if sample_weights is not None:
265
+ sample_weights = np.array(sample_weights)
266
+ if len(sample_weights) != len(y_true):
267
+ raise ValueError("sample_weights must have the same length as y_true and y_pred")
268
+ errors *= sample_weights
269
+
270
+ return np.median(errors)
271
+
@@ -0,0 +1,71 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ class DataValidator:
5
+ def __init__(self, raise_warning=True):
6
+ """Initialize the DataValidator class"""
7
+ self.raise_warning = raise_warning
8
+
9
+ def check_data_type(self, y_true, y_pred):
10
+ """Check if input data types are valid"""
11
+ valid_types = (np.ndarray, pd.Series, pd.DataFrame, list)
12
+ if not isinstance(y_true, valid_types) or not isinstance(y_pred, valid_types):
13
+ raise TypeError("y_true and y_pred must be numpy array, pandas series, or list")
14
+
15
+ def check_missing_values(self, y_true, y_pred):
16
+ """Check for missing values"""
17
+ if np.any(pd.isnull(y_true)) or np.any(pd.isnull(y_pred)):
18
+ raise ValueError("Missing values (NaN) detected in data")
19
+
20
+ def check_inf_values(self, y_true, y_pred):
21
+ """Check for infinite values"""
22
+ if np.any(np.isinf(y_true)) or np.any(np.isinf(y_pred)):
23
+ raise ValueError("Infinite values (inf) detected in data")
24
+
25
+ def check_lengths(self, y_true, y_pred):
26
+ """Check if y_true and y_pred have the same length"""
27
+ if len(y_true) != len(y_pred):
28
+ raise ValueError("y_true and y_pred must have the same length")
29
+
30
+ def check_numeric_values(self, y_true, y_pred):
31
+ """Check if values are numeric"""
32
+ if not np.issubdtype(np.array(y_true).dtype, np.number) or not np.issubdtype(np.array(y_pred).dtype, np.number):
33
+ raise TypeError("y_true and y_pred must contain numeric values")
34
+
35
+ def check_variance(self, y_true, y_pred):
36
+ """Check if variance of y_true is zero (can cause issues in R-squared calculation)"""
37
+ if np.var(y_true) == 0:
38
+ raise ValueError("Variance of y_true is zero. R-squared may not be meaningful")
39
+
40
+ def check_non_negative(self, y_true, y_pred):
41
+ """Check that values are non-negative for Logarithmic Mean Squared Error"""
42
+ if np.any(y_true < -1) or np.any(y_pred < -1):
43
+ raise ValueError("y_true and y_pred must be greater than or equal to -1 for log-based metrics")
44
+
45
+ def check_multicollinearity(self, X, threshold=0.9):
46
+ """Check for multicollinearity in input features"""
47
+ if isinstance(X, pd.DataFrame):
48
+ corr_matrix = X.corr().abs()
49
+ high_corr = (corr_matrix > threshold).sum().sum() - len(X.columns)
50
+ if high_corr > 0:
51
+ raise ValueError("High multicollinearity detected in input features")
52
+ else:
53
+ if self.raise_warning:
54
+ print("Warning: Multicollinearity check requires a pandas DataFrame")
55
+
56
+ def validate_all(self, y_true, y_pred, log_based=False, mape_based=False):
57
+ """Run all validation checks"""
58
+ self.check_data_type(y_true, y_pred)
59
+ self.check_missing_values(y_true, y_pred)
60
+ self.check_inf_values(y_true, y_pred)
61
+ self.check_lengths(y_true, y_pred)
62
+ self.check_numeric_values(y_true, y_pred)
63
+ self.check_variance(y_true, y_pred)
64
+ if log_based or mape_based:
65
+ self.check_non_negative(y_true, y_pred) # Ensure non-negative values for log-based functions and MAPE
66
+ return True # Return True if all checks pass
67
+
68
+
69
+ # Example usage
70
+ if __name__ == "__main__":
71
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: Moral88
3
- Version: 0.10.0
3
+ Version: 0.13.0
4
4
  Summary: A library for regression evaluation metrics.
5
5
  Author: Morteza Alizadeh
6
6
  Author-email: alizadeh.c2m@gmail.com
@@ -2,6 +2,8 @@ LICENSE
2
2
  README.md
3
3
  setup.py
4
4
  Moral88/__init__.py
5
+ Moral88/classification.py
6
+ Moral88/clustering.py
5
7
  Moral88/regression.py
6
8
  Moral88/utils.py
7
9
  Moral88.egg-info/PKG-INFO
@@ -9,5 +11,10 @@ Moral88.egg-info/SOURCES.txt
9
11
  Moral88.egg-info/dependency_links.txt
10
12
  Moral88.egg-info/requires.txt
11
13
  Moral88.egg-info/top_level.txt
12
- tests/__init__.py
13
- tests/test_regression.py
14
+ Test/__init__.py
15
+ Test/test_classification.py
16
+ Test/test_clustering.py
17
+ Test/test_regression.py
18
+ test/test_classification.py
19
+ test/test_clustering.py
20
+ test/test_regression.py
@@ -1,2 +1,2 @@
1
1
  Moral88
2
- tests
2
+ Test
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: Moral88
3
- Version: 0.10.0
3
+ Version: 0.13.0
4
4
  Summary: A library for regression evaluation metrics.
5
5
  Author: Morteza Alizadeh
6
6
  Author-email: alizadeh.c2m@gmail.com