balancr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- balancr/__init__.py +13 -0
- balancr/base.py +14 -0
- balancr/classifier_registry.py +300 -0
- balancr/cli/__init__.py +0 -0
- balancr/cli/commands.py +1838 -0
- balancr/cli/config.py +165 -0
- balancr/cli/main.py +778 -0
- balancr/cli/utils.py +101 -0
- balancr/data/__init__.py +5 -0
- balancr/data/loader.py +59 -0
- balancr/data/preprocessor.py +556 -0
- balancr/evaluation/__init__.py +19 -0
- balancr/evaluation/metrics.py +442 -0
- balancr/evaluation/visualisation.py +660 -0
- balancr/imbalance_analyser.py +677 -0
- balancr/technique_registry.py +284 -0
- balancr/techniques/__init__.py +4 -0
- balancr/techniques/custom/__init__.py +0 -0
- balancr/techniques/custom/example_custom_technique.py +27 -0
- balancr-0.1.0.dist-info/LICENSE +21 -0
- balancr-0.1.0.dist-info/METADATA +536 -0
- balancr-0.1.0.dist-info/RECORD +25 -0
- balancr-0.1.0.dist-info/WHEEL +5 -0
- balancr-0.1.0.dist-info/entry_points.txt +2 -0
- balancr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,442 @@
|
|
1
|
+
"""Metrics for evaluating imbalanced classification performance."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import time
|
5
|
+
from typing import Dict
|
6
|
+
import numpy as np
|
7
|
+
from sklearn.metrics import (
|
8
|
+
accuracy_score,
|
9
|
+
precision_score,
|
10
|
+
recall_score,
|
11
|
+
f1_score,
|
12
|
+
roc_auc_score,
|
13
|
+
average_precision_score,
|
14
|
+
confusion_matrix,
|
15
|
+
)
|
16
|
+
from sklearn.model_selection import learning_curve
|
17
|
+
|
18
|
+
|
19
|
+
def format_time(seconds):
|
20
|
+
"""Format time in seconds to minutes and seconds"""
|
21
|
+
minutes = int(seconds // 60)
|
22
|
+
remaining_seconds = seconds % 60
|
23
|
+
return f"{minutes}mins, {remaining_seconds:.2f}secs"
|
24
|
+
|
25
|
+
|
26
|
+
def get_metrics(
|
27
|
+
classifier,
|
28
|
+
X_test: np.ndarray,
|
29
|
+
y_test: np.ndarray,
|
30
|
+
) -> Dict[str, float]:
|
31
|
+
"""
|
32
|
+
Calculate metrics specifically suited for imbalanced classification.
|
33
|
+
Works with both binary and multiclass problems.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
classifier: Pre-fitted classifier instance to evaluate
|
37
|
+
X_test: Test features
|
38
|
+
y_test: Test labels
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
Dictionary containing various metric scores
|
42
|
+
"""
|
43
|
+
# Get predictions
|
44
|
+
y_pred = classifier.predict(X_test)
|
45
|
+
|
46
|
+
metrics = {}
|
47
|
+
|
48
|
+
# For ROC AUC, we need probability predictions
|
49
|
+
if hasattr(classifier, "predict_proba"):
|
50
|
+
try:
|
51
|
+
y_pred_proba = classifier.predict_proba(X_test)
|
52
|
+
# For multiclass problems, we'll use different methods below
|
53
|
+
except (AttributeError, IndexError):
|
54
|
+
# Log warning and set probability metrics to NaN
|
55
|
+
logging.warning(
|
56
|
+
f"Failed to get probability predictions from classifier {classifier.__class__.__name__}. "
|
57
|
+
"ROC-AUC and average_precision metrics will be set to NaN."
|
58
|
+
)
|
59
|
+
metrics["roc_auc"] = float("nan")
|
60
|
+
metrics["average_precision"] = float("nan")
|
61
|
+
# Skip the rest of probability-based calculations
|
62
|
+
y_pred_proba = None
|
63
|
+
else:
|
64
|
+
# Log warning and set probability metrics to NaN
|
65
|
+
logging.warning(
|
66
|
+
f"Classifier {classifier.__class__.__name__} does not support predict_proba. "
|
67
|
+
"ROC-AUC and average_precision metrics will be set to NaN."
|
68
|
+
)
|
69
|
+
metrics["roc_auc"] = float("nan")
|
70
|
+
metrics["average_precision"] = float("nan")
|
71
|
+
# Skip the rest of probability-based calculations
|
72
|
+
y_pred_proba = None
|
73
|
+
|
74
|
+
# Calculate confusion matrix
|
75
|
+
cm = confusion_matrix(y_test, y_pred)
|
76
|
+
n_classes = len(np.unique(y_test))
|
77
|
+
|
78
|
+
# Calculate metrics based on number of classes
|
79
|
+
if n_classes == 2: # Binary classification
|
80
|
+
# Unpack binary confusion matrix
|
81
|
+
tn, fp, fn, tp = cm.ravel()
|
82
|
+
|
83
|
+
# Calculate binary metrics
|
84
|
+
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
|
85
|
+
g_mean = np.sqrt(recall_score(y_test, y_pred) * specificity)
|
86
|
+
|
87
|
+
# Create metrics dictionary for binary case
|
88
|
+
metrics = {
|
89
|
+
"accuracy": accuracy_score(y_test, y_pred),
|
90
|
+
"precision": precision_score(y_test, y_pred),
|
91
|
+
"recall": recall_score(y_test, y_pred),
|
92
|
+
"specificity": specificity,
|
93
|
+
"f1": f1_score(y_test, y_pred),
|
94
|
+
"g_mean": g_mean,
|
95
|
+
}
|
96
|
+
|
97
|
+
# Add probability-based metrics if possible
|
98
|
+
try:
|
99
|
+
# For binary classification, we need the probability of the positive class
|
100
|
+
if (
|
101
|
+
isinstance(y_pred_proba, np.ndarray)
|
102
|
+
and y_pred_proba.ndim > 1
|
103
|
+
and y_pred_proba.shape[1] > 1
|
104
|
+
):
|
105
|
+
y_pred_proba_pos = y_pred_proba[:, 1]
|
106
|
+
else:
|
107
|
+
y_pred_proba_pos = y_pred_proba
|
108
|
+
|
109
|
+
metrics["roc_auc"] = roc_auc_score(y_test, y_pred_proba_pos)
|
110
|
+
metrics["average_precision"] = average_precision_score(
|
111
|
+
y_test, y_pred_proba_pos
|
112
|
+
)
|
113
|
+
except (ValueError, TypeError):
|
114
|
+
# Skip these metrics if they can't be calculated
|
115
|
+
metrics["roc_auc"] = float("nan")
|
116
|
+
metrics["average_precision"] = float("nan")
|
117
|
+
|
118
|
+
else: # Multiclass classification
|
119
|
+
# Calculate per-class specificity and g-mean
|
120
|
+
specificities = []
|
121
|
+
recalls = []
|
122
|
+
|
123
|
+
for i in range(n_classes):
|
124
|
+
# For each class, treat it as positive and all others as negative
|
125
|
+
y_true_binary = (y_test == i).astype(int)
|
126
|
+
y_pred_binary = (y_pred == i).astype(int)
|
127
|
+
|
128
|
+
# Calculate per-class CM values
|
129
|
+
cm_i = confusion_matrix(y_true_binary, y_pred_binary)
|
130
|
+
if cm_i.shape[0] < 2: # Handle edge case
|
131
|
+
specificities.append(0)
|
132
|
+
recalls.append(0)
|
133
|
+
continue
|
134
|
+
|
135
|
+
tn_i, fp_i, fn_i, tp_i = cm_i.ravel()
|
136
|
+
|
137
|
+
# Calculate specificity and recall for this class
|
138
|
+
spec_i = tn_i / (tn_i + fp_i) if (tn_i + fp_i) > 0 else 0
|
139
|
+
rec_i = tp_i / (tp_i + fn_i) if (tp_i + fn_i) > 0 else 0
|
140
|
+
|
141
|
+
specificities.append(spec_i)
|
142
|
+
recalls.append(rec_i)
|
143
|
+
|
144
|
+
# Calculate macro-averaged metrics
|
145
|
+
macro_specificity = np.mean(specificities)
|
146
|
+
macro_recall = np.mean(recalls)
|
147
|
+
g_mean = np.sqrt(macro_recall * macro_specificity)
|
148
|
+
|
149
|
+
# Create metrics dictionary for multiclass case
|
150
|
+
metrics = {
|
151
|
+
"accuracy": accuracy_score(y_test, y_pred),
|
152
|
+
"precision": precision_score(y_test, y_pred, average="macro"),
|
153
|
+
"recall": recall_score(y_test, y_pred, average="macro"),
|
154
|
+
"specificity": macro_specificity,
|
155
|
+
"f1": f1_score(y_test, y_pred, average="macro"),
|
156
|
+
"g_mean": g_mean,
|
157
|
+
}
|
158
|
+
|
159
|
+
# Add multiclass ROC AUC if possible
|
160
|
+
try:
|
161
|
+
# For multiclass, use roc_auc_score with multi_class parameter
|
162
|
+
if hasattr(classifier, "predict_proba"):
|
163
|
+
metrics["roc_auc"] = roc_auc_score(
|
164
|
+
y_test, y_pred_proba, multi_class="ovr", average="macro"
|
165
|
+
)
|
166
|
+
|
167
|
+
# Calculate average precision for multiclass
|
168
|
+
metrics["average_precision"] = precision_score(
|
169
|
+
y_test, y_pred, average="macro"
|
170
|
+
)
|
171
|
+
else:
|
172
|
+
metrics["roc_auc"] = float("nan")
|
173
|
+
metrics["average_precision"] = float("nan")
|
174
|
+
except (ValueError, TypeError):
|
175
|
+
# Skip these metrics if they can't be calculated
|
176
|
+
metrics["roc_auc"] = float("nan")
|
177
|
+
metrics["average_precision"] = float("nan")
|
178
|
+
|
179
|
+
return metrics
|
180
|
+
|
181
|
+
|
182
|
+
def get_cv_scores(
|
183
|
+
classifier,
|
184
|
+
X_balanced: np.ndarray,
|
185
|
+
y_balanced: np.ndarray,
|
186
|
+
n_folds: int = 5,
|
187
|
+
) -> Dict[str, float]:
|
188
|
+
"""
|
189
|
+
Perform cross-validation and return average scores.
|
190
|
+
Automatically handles multiclass data by using macro averaging.
|
191
|
+
|
192
|
+
Args:
|
193
|
+
classifier: Classifier instance to evaluate
|
194
|
+
X_balanced: Balanced feature matrix
|
195
|
+
y_balanced: Balanced target vector
|
196
|
+
n_folds: Number of cross-validation folds
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
Dictionary containing average metric scores
|
200
|
+
"""
|
201
|
+
import logging
|
202
|
+
from sklearn.model_selection import cross_val_score, cross_val_predict
|
203
|
+
from sklearn.metrics import roc_auc_score, confusion_matrix
|
204
|
+
import numpy as np
|
205
|
+
|
206
|
+
# Determine if we're dealing with multiclass data
|
207
|
+
unique_classes = np.unique(y_balanced)
|
208
|
+
is_multiclass = len(unique_classes) > 2
|
209
|
+
|
210
|
+
# Initialise metrics dictionary
|
211
|
+
metrics = {}
|
212
|
+
|
213
|
+
# Use scikit-learn's cross_val_score for standard metrics
|
214
|
+
# Accuracy doesn't need special handling for multiclass
|
215
|
+
scores = cross_val_score(
|
216
|
+
classifier, X_balanced, y_balanced, cv=n_folds, scoring="accuracy"
|
217
|
+
)
|
218
|
+
metrics["cv_accuracy_mean"] = scores.mean()
|
219
|
+
metrics["cv_accuracy_std"] = scores.std()
|
220
|
+
|
221
|
+
# For metrics that need proper multiclass handling
|
222
|
+
for metric in ["precision", "recall", "f1"]:
|
223
|
+
# Use macro averaging for multiclass problems
|
224
|
+
if is_multiclass:
|
225
|
+
scoring = f"{metric}_macro"
|
226
|
+
else:
|
227
|
+
scoring = metric
|
228
|
+
|
229
|
+
scores = cross_val_score(
|
230
|
+
classifier, X_balanced, y_balanced, cv=n_folds, scoring=scoring
|
231
|
+
)
|
232
|
+
metrics[f"cv_{metric}_mean"] = scores.mean()
|
233
|
+
metrics[f"cv_{metric}_std"] = scores.std()
|
234
|
+
|
235
|
+
# For ROC-AUC and G-mean, we need the predictions
|
236
|
+
try:
|
237
|
+
# Get cross-validated predictions
|
238
|
+
y_pred = cross_val_predict(classifier, X_balanced, y_balanced, cv=n_folds)
|
239
|
+
|
240
|
+
# Calculate G-mean
|
241
|
+
if is_multiclass:
|
242
|
+
# Calculate per-class specificities and recalls for each class
|
243
|
+
specificities = []
|
244
|
+
recalls = []
|
245
|
+
|
246
|
+
for i in unique_classes:
|
247
|
+
# For each class, treat it as positive and all others as negative
|
248
|
+
y_true_binary = (y_balanced == i).astype(int)
|
249
|
+
y_pred_binary = (y_pred == i).astype(int)
|
250
|
+
|
251
|
+
# Calculate confusion matrix
|
252
|
+
cm = confusion_matrix(y_true_binary, y_pred_binary)
|
253
|
+
|
254
|
+
# Ensure the confusion matrix has the right shape
|
255
|
+
if cm.shape == (2, 2):
|
256
|
+
tn, fp, fn, tp = cm.ravel()
|
257
|
+
|
258
|
+
# Calculate specificity and recall
|
259
|
+
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
|
260
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
|
261
|
+
|
262
|
+
specificities.append(specificity)
|
263
|
+
recalls.append(recall)
|
264
|
+
|
265
|
+
# Calculate macro-averaged G-mean
|
266
|
+
if specificities and recalls: # Check if lists are not empty
|
267
|
+
macro_specificity = np.mean(specificities)
|
268
|
+
macro_recall = np.mean(recalls)
|
269
|
+
g_mean = np.sqrt(macro_specificity * macro_recall)
|
270
|
+
|
271
|
+
metrics["cv_g_mean_mean"] = g_mean
|
272
|
+
metrics["cv_g_mean_std"] = (
|
273
|
+
0.0 # Cannot calculate std from a single value
|
274
|
+
)
|
275
|
+
else:
|
276
|
+
logging.warning(
|
277
|
+
f"Could not calculate G-mean for classifier {classifier.__class__.__name__}. "
|
278
|
+
"Some classes may not have been predicted correctly."
|
279
|
+
)
|
280
|
+
# We don't set the metrics here, letting the absence indicate an issue
|
281
|
+
else:
|
282
|
+
# Binary case
|
283
|
+
cm = confusion_matrix(y_balanced, y_pred)
|
284
|
+
|
285
|
+
if cm.shape == (2, 2):
|
286
|
+
tn, fp, fn, tp = cm.ravel()
|
287
|
+
|
288
|
+
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
|
289
|
+
sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
|
290
|
+
|
291
|
+
g_mean = np.sqrt(specificity * sensitivity)
|
292
|
+
metrics["cv_g_mean_mean"] = g_mean
|
293
|
+
else:
|
294
|
+
logging.warning(
|
295
|
+
f"Could not calculate G-mean for classifier {classifier.__class__.__name__}. "
|
296
|
+
"Unexpected confusion matrix shape."
|
297
|
+
)
|
298
|
+
# Metrics not set
|
299
|
+
except Exception as e:
|
300
|
+
logging.warning(
|
301
|
+
f"Could not calculate G-mean for classifier {classifier.__class__.__name__}. "
|
302
|
+
f"Error: {str(e)}"
|
303
|
+
)
|
304
|
+
# Metrics not set
|
305
|
+
|
306
|
+
# ROC-AUC calculation - separate try block to ensure G-mean calculation happens even if ROC-AUC fails
|
307
|
+
try:
|
308
|
+
if hasattr(classifier, "predict_proba"):
|
309
|
+
# Get probability predictions
|
310
|
+
if is_multiclass:
|
311
|
+
y_proba = cross_val_predict(
|
312
|
+
classifier,
|
313
|
+
X_balanced,
|
314
|
+
y_balanced,
|
315
|
+
cv=n_folds,
|
316
|
+
method="predict_proba",
|
317
|
+
)
|
318
|
+
roc_auc = roc_auc_score(
|
319
|
+
y_balanced, y_proba, multi_class="ovr", average="macro"
|
320
|
+
)
|
321
|
+
else:
|
322
|
+
y_proba = cross_val_predict(
|
323
|
+
classifier,
|
324
|
+
X_balanced,
|
325
|
+
y_balanced,
|
326
|
+
cv=n_folds,
|
327
|
+
method="predict_proba",
|
328
|
+
)
|
329
|
+
# Use second column for positive class probability
|
330
|
+
if y_proba.shape[1] > 1:
|
331
|
+
roc_auc = roc_auc_score(y_balanced, y_proba[:, 1])
|
332
|
+
else:
|
333
|
+
roc_auc = roc_auc_score(y_balanced, y_proba)
|
334
|
+
|
335
|
+
metrics["cv_roc_auc_mean"] = roc_auc
|
336
|
+
else:
|
337
|
+
logging.warning(
|
338
|
+
f"Classifier {classifier.__class__.__name__} does not support predict_proba. "
|
339
|
+
"ROC-AUC cannot be calculated."
|
340
|
+
)
|
341
|
+
# Metrics not set
|
342
|
+
except Exception as e:
|
343
|
+
logging.warning(
|
344
|
+
f"Could not calculate ROC-AUC for classifier {classifier.__class__.__name__}. "
|
345
|
+
f"Error: {str(e)}"
|
346
|
+
)
|
347
|
+
# Metrics not set
|
348
|
+
|
349
|
+
return metrics
|
350
|
+
|
351
|
+
|
352
|
+
def get_learning_curve_data(
|
353
|
+
classifier,
|
354
|
+
X: np.ndarray,
|
355
|
+
y: np.ndarray,
|
356
|
+
train_sizes: np.ndarray = np.linspace(0.1, 1.0, 10),
|
357
|
+
n_folds: int = 5,
|
358
|
+
) -> Dict[str, np.ndarray]:
|
359
|
+
"""
|
360
|
+
Compute data for plotting learning curves.
|
361
|
+
|
362
|
+
Args:
|
363
|
+
classifier: Classifier instance to evaluate
|
364
|
+
X: Feature matrix
|
365
|
+
y: Target vector
|
366
|
+
train_sizes: Relative or absolute sizes of the training dataset
|
367
|
+
n_folds: Number of cross-validation folds
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
Dictionary containing training sizes, training scores, and validation scores
|
371
|
+
"""
|
372
|
+
train_sizes_abs, train_scores, val_scores = learning_curve(
|
373
|
+
estimator=classifier,
|
374
|
+
X=X,
|
375
|
+
y=y,
|
376
|
+
train_sizes=train_sizes,
|
377
|
+
cv=n_folds,
|
378
|
+
scoring="accuracy", # Default metric is accuracy
|
379
|
+
shuffle=True,
|
380
|
+
)
|
381
|
+
|
382
|
+
return {
|
383
|
+
"train_sizes": train_sizes_abs,
|
384
|
+
"train_scores": train_scores,
|
385
|
+
"val_scores": val_scores,
|
386
|
+
}
|
387
|
+
|
388
|
+
|
389
|
+
def get_learning_curve_data_multiple_techniques(
|
390
|
+
classifier_name: str,
|
391
|
+
classifier,
|
392
|
+
techniques_data: Dict[str, Dict[str, np.ndarray]],
|
393
|
+
train_sizes: np.ndarray = np.linspace(0.1, 1.0, 10),
|
394
|
+
n_folds: int = 5,
|
395
|
+
) -> Dict[str, Dict[str, np.ndarray]]:
|
396
|
+
"""
|
397
|
+
Compute data for plotting learning curves for multiple techniques.
|
398
|
+
|
399
|
+
Args:
|
400
|
+
classifier: Classifier instance to evaluate
|
401
|
+
techniques_data: A dictionary where keys are technique names and values are dictionaries
|
402
|
+
containing 'X_balanced' and 'y_balanced' for each technique
|
403
|
+
train_sizes: Relative or absolute sizes of the training dataset
|
404
|
+
n_folds: Number of cross-validation folds
|
405
|
+
|
406
|
+
Returns:
|
407
|
+
Dictionary containing training sizes, training scores, and validation scores for each technique
|
408
|
+
"""
|
409
|
+
learning_curve_data = {}
|
410
|
+
|
411
|
+
# Loop through each technique's data
|
412
|
+
for technique_name, data in techniques_data.items():
|
413
|
+
X_balanced = data["X_balanced"]
|
414
|
+
y_balanced = data["y_balanced"]
|
415
|
+
|
416
|
+
start_time = time.time()
|
417
|
+
logging.info(
|
418
|
+
f"Generating learning curve for {classifier_name} trained on data"
|
419
|
+
f"balanced by {technique_name}..."
|
420
|
+
)
|
421
|
+
train_sizes_abs, train_scores, val_scores = learning_curve(
|
422
|
+
estimator=classifier,
|
423
|
+
X=X_balanced,
|
424
|
+
y=y_balanced,
|
425
|
+
train_sizes=train_sizes,
|
426
|
+
cv=n_folds,
|
427
|
+
scoring="accuracy", # Default metric is accuracy
|
428
|
+
shuffle=True,
|
429
|
+
)
|
430
|
+
curve_generating_time = time.time() - start_time
|
431
|
+
logging.info(
|
432
|
+
f"Generated learning curve for {classifier_name} trained on data"
|
433
|
+
f"balanced by {technique_name} (Time Taken: {format_time(curve_generating_time)})"
|
434
|
+
)
|
435
|
+
|
436
|
+
learning_curve_data[technique_name] = {
|
437
|
+
"train_sizes": train_sizes_abs,
|
438
|
+
"train_scores": train_scores,
|
439
|
+
"val_scores": val_scores,
|
440
|
+
}
|
441
|
+
|
442
|
+
return learning_curve_data
|