balancr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,442 @@
1
+ """Metrics for evaluating imbalanced classification performance."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Dict
6
+ import numpy as np
7
+ from sklearn.metrics import (
8
+ accuracy_score,
9
+ precision_score,
10
+ recall_score,
11
+ f1_score,
12
+ roc_auc_score,
13
+ average_precision_score,
14
+ confusion_matrix,
15
+ )
16
+ from sklearn.model_selection import learning_curve
17
+
18
+
19
+ def format_time(seconds):
20
+ """Format time in seconds to minutes and seconds"""
21
+ minutes = int(seconds // 60)
22
+ remaining_seconds = seconds % 60
23
+ return f"{minutes}mins, {remaining_seconds:.2f}secs"
24
+
25
+
26
+ def get_metrics(
27
+ classifier,
28
+ X_test: np.ndarray,
29
+ y_test: np.ndarray,
30
+ ) -> Dict[str, float]:
31
+ """
32
+ Calculate metrics specifically suited for imbalanced classification.
33
+ Works with both binary and multiclass problems.
34
+
35
+ Args:
36
+ classifier: Pre-fitted classifier instance to evaluate
37
+ X_test: Test features
38
+ y_test: Test labels
39
+
40
+ Returns:
41
+ Dictionary containing various metric scores
42
+ """
43
+ # Get predictions
44
+ y_pred = classifier.predict(X_test)
45
+
46
+ metrics = {}
47
+
48
+ # For ROC AUC, we need probability predictions
49
+ if hasattr(classifier, "predict_proba"):
50
+ try:
51
+ y_pred_proba = classifier.predict_proba(X_test)
52
+ # For multiclass problems, we'll use different methods below
53
+ except (AttributeError, IndexError):
54
+ # Log warning and set probability metrics to NaN
55
+ logging.warning(
56
+ f"Failed to get probability predictions from classifier {classifier.__class__.__name__}. "
57
+ "ROC-AUC and average_precision metrics will be set to NaN."
58
+ )
59
+ metrics["roc_auc"] = float("nan")
60
+ metrics["average_precision"] = float("nan")
61
+ # Skip the rest of probability-based calculations
62
+ y_pred_proba = None
63
+ else:
64
+ # Log warning and set probability metrics to NaN
65
+ logging.warning(
66
+ f"Classifier {classifier.__class__.__name__} does not support predict_proba. "
67
+ "ROC-AUC and average_precision metrics will be set to NaN."
68
+ )
69
+ metrics["roc_auc"] = float("nan")
70
+ metrics["average_precision"] = float("nan")
71
+ # Skip the rest of probability-based calculations
72
+ y_pred_proba = None
73
+
74
+ # Calculate confusion matrix
75
+ cm = confusion_matrix(y_test, y_pred)
76
+ n_classes = len(np.unique(y_test))
77
+
78
+ # Calculate metrics based on number of classes
79
+ if n_classes == 2: # Binary classification
80
+ # Unpack binary confusion matrix
81
+ tn, fp, fn, tp = cm.ravel()
82
+
83
+ # Calculate binary metrics
84
+ specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
85
+ g_mean = np.sqrt(recall_score(y_test, y_pred) * specificity)
86
+
87
+ # Create metrics dictionary for binary case
88
+ metrics = {
89
+ "accuracy": accuracy_score(y_test, y_pred),
90
+ "precision": precision_score(y_test, y_pred),
91
+ "recall": recall_score(y_test, y_pred),
92
+ "specificity": specificity,
93
+ "f1": f1_score(y_test, y_pred),
94
+ "g_mean": g_mean,
95
+ }
96
+
97
+ # Add probability-based metrics if possible
98
+ try:
99
+ # For binary classification, we need the probability of the positive class
100
+ if (
101
+ isinstance(y_pred_proba, np.ndarray)
102
+ and y_pred_proba.ndim > 1
103
+ and y_pred_proba.shape[1] > 1
104
+ ):
105
+ y_pred_proba_pos = y_pred_proba[:, 1]
106
+ else:
107
+ y_pred_proba_pos = y_pred_proba
108
+
109
+ metrics["roc_auc"] = roc_auc_score(y_test, y_pred_proba_pos)
110
+ metrics["average_precision"] = average_precision_score(
111
+ y_test, y_pred_proba_pos
112
+ )
113
+ except (ValueError, TypeError):
114
+ # Skip these metrics if they can't be calculated
115
+ metrics["roc_auc"] = float("nan")
116
+ metrics["average_precision"] = float("nan")
117
+
118
+ else: # Multiclass classification
119
+ # Calculate per-class specificity and g-mean
120
+ specificities = []
121
+ recalls = []
122
+
123
+ for i in range(n_classes):
124
+ # For each class, treat it as positive and all others as negative
125
+ y_true_binary = (y_test == i).astype(int)
126
+ y_pred_binary = (y_pred == i).astype(int)
127
+
128
+ # Calculate per-class CM values
129
+ cm_i = confusion_matrix(y_true_binary, y_pred_binary)
130
+ if cm_i.shape[0] < 2: # Handle edge case
131
+ specificities.append(0)
132
+ recalls.append(0)
133
+ continue
134
+
135
+ tn_i, fp_i, fn_i, tp_i = cm_i.ravel()
136
+
137
+ # Calculate specificity and recall for this class
138
+ spec_i = tn_i / (tn_i + fp_i) if (tn_i + fp_i) > 0 else 0
139
+ rec_i = tp_i / (tp_i + fn_i) if (tp_i + fn_i) > 0 else 0
140
+
141
+ specificities.append(spec_i)
142
+ recalls.append(rec_i)
143
+
144
+ # Calculate macro-averaged metrics
145
+ macro_specificity = np.mean(specificities)
146
+ macro_recall = np.mean(recalls)
147
+ g_mean = np.sqrt(macro_recall * macro_specificity)
148
+
149
+ # Create metrics dictionary for multiclass case
150
+ metrics = {
151
+ "accuracy": accuracy_score(y_test, y_pred),
152
+ "precision": precision_score(y_test, y_pred, average="macro"),
153
+ "recall": recall_score(y_test, y_pred, average="macro"),
154
+ "specificity": macro_specificity,
155
+ "f1": f1_score(y_test, y_pred, average="macro"),
156
+ "g_mean": g_mean,
157
+ }
158
+
159
+ # Add multiclass ROC AUC if possible
160
+ try:
161
+ # For multiclass, use roc_auc_score with multi_class parameter
162
+ if hasattr(classifier, "predict_proba"):
163
+ metrics["roc_auc"] = roc_auc_score(
164
+ y_test, y_pred_proba, multi_class="ovr", average="macro"
165
+ )
166
+
167
+ # Calculate average precision for multiclass
168
+ metrics["average_precision"] = precision_score(
169
+ y_test, y_pred, average="macro"
170
+ )
171
+ else:
172
+ metrics["roc_auc"] = float("nan")
173
+ metrics["average_precision"] = float("nan")
174
+ except (ValueError, TypeError):
175
+ # Skip these metrics if they can't be calculated
176
+ metrics["roc_auc"] = float("nan")
177
+ metrics["average_precision"] = float("nan")
178
+
179
+ return metrics
180
+
181
+
182
+ def get_cv_scores(
183
+ classifier,
184
+ X_balanced: np.ndarray,
185
+ y_balanced: np.ndarray,
186
+ n_folds: int = 5,
187
+ ) -> Dict[str, float]:
188
+ """
189
+ Perform cross-validation and return average scores.
190
+ Automatically handles multiclass data by using macro averaging.
191
+
192
+ Args:
193
+ classifier: Classifier instance to evaluate
194
+ X_balanced: Balanced feature matrix
195
+ y_balanced: Balanced target vector
196
+ n_folds: Number of cross-validation folds
197
+
198
+ Returns:
199
+ Dictionary containing average metric scores
200
+ """
201
+ import logging
202
+ from sklearn.model_selection import cross_val_score, cross_val_predict
203
+ from sklearn.metrics import roc_auc_score, confusion_matrix
204
+ import numpy as np
205
+
206
+ # Determine if we're dealing with multiclass data
207
+ unique_classes = np.unique(y_balanced)
208
+ is_multiclass = len(unique_classes) > 2
209
+
210
+ # Initialise metrics dictionary
211
+ metrics = {}
212
+
213
+ # Use scikit-learn's cross_val_score for standard metrics
214
+ # Accuracy doesn't need special handling for multiclass
215
+ scores = cross_val_score(
216
+ classifier, X_balanced, y_balanced, cv=n_folds, scoring="accuracy"
217
+ )
218
+ metrics["cv_accuracy_mean"] = scores.mean()
219
+ metrics["cv_accuracy_std"] = scores.std()
220
+
221
+ # For metrics that need proper multiclass handling
222
+ for metric in ["precision", "recall", "f1"]:
223
+ # Use macro averaging for multiclass problems
224
+ if is_multiclass:
225
+ scoring = f"{metric}_macro"
226
+ else:
227
+ scoring = metric
228
+
229
+ scores = cross_val_score(
230
+ classifier, X_balanced, y_balanced, cv=n_folds, scoring=scoring
231
+ )
232
+ metrics[f"cv_{metric}_mean"] = scores.mean()
233
+ metrics[f"cv_{metric}_std"] = scores.std()
234
+
235
+ # For ROC-AUC and G-mean, we need the predictions
236
+ try:
237
+ # Get cross-validated predictions
238
+ y_pred = cross_val_predict(classifier, X_balanced, y_balanced, cv=n_folds)
239
+
240
+ # Calculate G-mean
241
+ if is_multiclass:
242
+ # Calculate per-class specificities and recalls for each class
243
+ specificities = []
244
+ recalls = []
245
+
246
+ for i in unique_classes:
247
+ # For each class, treat it as positive and all others as negative
248
+ y_true_binary = (y_balanced == i).astype(int)
249
+ y_pred_binary = (y_pred == i).astype(int)
250
+
251
+ # Calculate confusion matrix
252
+ cm = confusion_matrix(y_true_binary, y_pred_binary)
253
+
254
+ # Ensure the confusion matrix has the right shape
255
+ if cm.shape == (2, 2):
256
+ tn, fp, fn, tp = cm.ravel()
257
+
258
+ # Calculate specificity and recall
259
+ specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
260
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0
261
+
262
+ specificities.append(specificity)
263
+ recalls.append(recall)
264
+
265
+ # Calculate macro-averaged G-mean
266
+ if specificities and recalls: # Check if lists are not empty
267
+ macro_specificity = np.mean(specificities)
268
+ macro_recall = np.mean(recalls)
269
+ g_mean = np.sqrt(macro_specificity * macro_recall)
270
+
271
+ metrics["cv_g_mean_mean"] = g_mean
272
+ metrics["cv_g_mean_std"] = (
273
+ 0.0 # Cannot calculate std from a single value
274
+ )
275
+ else:
276
+ logging.warning(
277
+ f"Could not calculate G-mean for classifier {classifier.__class__.__name__}. "
278
+ "Some classes may not have been predicted correctly."
279
+ )
280
+ # We don't set the metrics here, letting the absence indicate an issue
281
+ else:
282
+ # Binary case
283
+ cm = confusion_matrix(y_balanced, y_pred)
284
+
285
+ if cm.shape == (2, 2):
286
+ tn, fp, fn, tp = cm.ravel()
287
+
288
+ specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
289
+ sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
290
+
291
+ g_mean = np.sqrt(specificity * sensitivity)
292
+ metrics["cv_g_mean_mean"] = g_mean
293
+ else:
294
+ logging.warning(
295
+ f"Could not calculate G-mean for classifier {classifier.__class__.__name__}. "
296
+ "Unexpected confusion matrix shape."
297
+ )
298
+ # Metrics not set
299
+ except Exception as e:
300
+ logging.warning(
301
+ f"Could not calculate G-mean for classifier {classifier.__class__.__name__}. "
302
+ f"Error: {str(e)}"
303
+ )
304
+ # Metrics not set
305
+
306
+ # ROC-AUC calculation - separate try block to ensure G-mean calculation happens even if ROC-AUC fails
307
+ try:
308
+ if hasattr(classifier, "predict_proba"):
309
+ # Get probability predictions
310
+ if is_multiclass:
311
+ y_proba = cross_val_predict(
312
+ classifier,
313
+ X_balanced,
314
+ y_balanced,
315
+ cv=n_folds,
316
+ method="predict_proba",
317
+ )
318
+ roc_auc = roc_auc_score(
319
+ y_balanced, y_proba, multi_class="ovr", average="macro"
320
+ )
321
+ else:
322
+ y_proba = cross_val_predict(
323
+ classifier,
324
+ X_balanced,
325
+ y_balanced,
326
+ cv=n_folds,
327
+ method="predict_proba",
328
+ )
329
+ # Use second column for positive class probability
330
+ if y_proba.shape[1] > 1:
331
+ roc_auc = roc_auc_score(y_balanced, y_proba[:, 1])
332
+ else:
333
+ roc_auc = roc_auc_score(y_balanced, y_proba)
334
+
335
+ metrics["cv_roc_auc_mean"] = roc_auc
336
+ else:
337
+ logging.warning(
338
+ f"Classifier {classifier.__class__.__name__} does not support predict_proba. "
339
+ "ROC-AUC cannot be calculated."
340
+ )
341
+ # Metrics not set
342
+ except Exception as e:
343
+ logging.warning(
344
+ f"Could not calculate ROC-AUC for classifier {classifier.__class__.__name__}. "
345
+ f"Error: {str(e)}"
346
+ )
347
+ # Metrics not set
348
+
349
+ return metrics
350
+
351
+
352
+ def get_learning_curve_data(
353
+ classifier,
354
+ X: np.ndarray,
355
+ y: np.ndarray,
356
+ train_sizes: np.ndarray = np.linspace(0.1, 1.0, 10),
357
+ n_folds: int = 5,
358
+ ) -> Dict[str, np.ndarray]:
359
+ """
360
+ Compute data for plotting learning curves.
361
+
362
+ Args:
363
+ classifier: Classifier instance to evaluate
364
+ X: Feature matrix
365
+ y: Target vector
366
+ train_sizes: Relative or absolute sizes of the training dataset
367
+ n_folds: Number of cross-validation folds
368
+
369
+ Returns:
370
+ Dictionary containing training sizes, training scores, and validation scores
371
+ """
372
+ train_sizes_abs, train_scores, val_scores = learning_curve(
373
+ estimator=classifier,
374
+ X=X,
375
+ y=y,
376
+ train_sizes=train_sizes,
377
+ cv=n_folds,
378
+ scoring="accuracy", # Default metric is accuracy
379
+ shuffle=True,
380
+ )
381
+
382
+ return {
383
+ "train_sizes": train_sizes_abs,
384
+ "train_scores": train_scores,
385
+ "val_scores": val_scores,
386
+ }
387
+
388
+
389
+ def get_learning_curve_data_multiple_techniques(
390
+ classifier_name: str,
391
+ classifier,
392
+ techniques_data: Dict[str, Dict[str, np.ndarray]],
393
+ train_sizes: np.ndarray = np.linspace(0.1, 1.0, 10),
394
+ n_folds: int = 5,
395
+ ) -> Dict[str, Dict[str, np.ndarray]]:
396
+ """
397
+ Compute data for plotting learning curves for multiple techniques.
398
+
399
+ Args:
400
+ classifier: Classifier instance to evaluate
401
+ techniques_data: A dictionary where keys are technique names and values are dictionaries
402
+ containing 'X_balanced' and 'y_balanced' for each technique
403
+ train_sizes: Relative or absolute sizes of the training dataset
404
+ n_folds: Number of cross-validation folds
405
+
406
+ Returns:
407
+ Dictionary containing training sizes, training scores, and validation scores for each technique
408
+ """
409
+ learning_curve_data = {}
410
+
411
+ # Loop through each technique's data
412
+ for technique_name, data in techniques_data.items():
413
+ X_balanced = data["X_balanced"]
414
+ y_balanced = data["y_balanced"]
415
+
416
+ start_time = time.time()
417
+ logging.info(
418
+ f"Generating learning curve for {classifier_name} trained on data"
419
+ f"balanced by {technique_name}..."
420
+ )
421
+ train_sizes_abs, train_scores, val_scores = learning_curve(
422
+ estimator=classifier,
423
+ X=X_balanced,
424
+ y=y_balanced,
425
+ train_sizes=train_sizes,
426
+ cv=n_folds,
427
+ scoring="accuracy", # Default metric is accuracy
428
+ shuffle=True,
429
+ )
430
+ curve_generating_time = time.time() - start_time
431
+ logging.info(
432
+ f"Generated learning curve for {classifier_name} trained on data"
433
+ f"balanced by {technique_name} (Time Taken: {format_time(curve_generating_time)})"
434
+ )
435
+
436
+ learning_curve_data[technique_name] = {
437
+ "train_sizes": train_sizes_abs,
438
+ "train_scores": train_scores,
439
+ "val_scores": val_scores,
440
+ }
441
+
442
+ return learning_curve_data