balancr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- balancr/__init__.py +13 -0
- balancr/base.py +14 -0
- balancr/classifier_registry.py +300 -0
- balancr/cli/__init__.py +0 -0
- balancr/cli/commands.py +1838 -0
- balancr/cli/config.py +165 -0
- balancr/cli/main.py +778 -0
- balancr/cli/utils.py +101 -0
- balancr/data/__init__.py +5 -0
- balancr/data/loader.py +59 -0
- balancr/data/preprocessor.py +556 -0
- balancr/evaluation/__init__.py +19 -0
- balancr/evaluation/metrics.py +442 -0
- balancr/evaluation/visualisation.py +660 -0
- balancr/imbalance_analyser.py +677 -0
- balancr/technique_registry.py +284 -0
- balancr/techniques/__init__.py +4 -0
- balancr/techniques/custom/__init__.py +0 -0
- balancr/techniques/custom/example_custom_technique.py +27 -0
- balancr-0.1.0.dist-info/LICENSE +21 -0
- balancr-0.1.0.dist-info/METADATA +536 -0
- balancr-0.1.0.dist-info/RECORD +25 -0
- balancr-0.1.0.dist-info/WHEEL +5 -0
- balancr-0.1.0.dist-info/entry_points.txt +2 -0
- balancr-0.1.0.dist-info/top_level.txt +1 -0
balancr/cli/utils.py
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
"""
|
2
|
+
utils.py - Utility functions for the balancr CLI.
|
3
|
+
|
4
|
+
This module provides helper functions for logging, file handling,
|
5
|
+
and other common tasks used across the CLI application.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import sys
|
10
|
+
|
11
|
+
|
12
|
+
# Define a filter to exclude the matplotlib 'findfont' messages
|
13
|
+
class FontMessageFilter(logging.Filter):
|
14
|
+
"""Filter to remove noisy font-related log messages."""
|
15
|
+
|
16
|
+
def filter(self, record):
|
17
|
+
"""Filter out 'findfont' log messages from matplotlib."""
|
18
|
+
return "findfont" not in record.getMessage()
|
19
|
+
|
20
|
+
|
21
|
+
def setup_logging(log_level="default"):
|
22
|
+
"""
|
23
|
+
Set up logging with colour and the specified verbosity level.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
log_level: "verbose", "default", or "quiet"
|
27
|
+
"""
|
28
|
+
# Reset any existing handlers
|
29
|
+
root_logger = logging.getLogger()
|
30
|
+
for handler in root_logger.handlers[:]:
|
31
|
+
root_logger.removeHandler(handler)
|
32
|
+
|
33
|
+
level_map = {
|
34
|
+
"verbose": logging.DEBUG,
|
35
|
+
"default": logging.INFO,
|
36
|
+
"quiet": logging.WARNING,
|
37
|
+
}
|
38
|
+
|
39
|
+
# Get the log level, defaulting to INFO
|
40
|
+
log_level_value = level_map.get(log_level, logging.INFO)
|
41
|
+
|
42
|
+
# Root logger level
|
43
|
+
root_logger.setLevel(log_level_value)
|
44
|
+
|
45
|
+
# Coloured logging
|
46
|
+
try:
|
47
|
+
import colorama
|
48
|
+
|
49
|
+
colorama.init()
|
50
|
+
|
51
|
+
# Define colour codes
|
52
|
+
COLORS = {
|
53
|
+
"DEBUG": colorama.Fore.BLUE,
|
54
|
+
"INFO": colorama.Fore.GREEN,
|
55
|
+
"WARNING": colorama.Fore.YELLOW,
|
56
|
+
"ERROR": colorama.Fore.RED,
|
57
|
+
"CRITICAL": colorama.Fore.RED + colorama.Style.BRIGHT,
|
58
|
+
}
|
59
|
+
|
60
|
+
# Create custom formatter with colours
|
61
|
+
class ColoredFormatter(logging.Formatter):
|
62
|
+
def format(self, record):
|
63
|
+
levelname = record.levelname
|
64
|
+
if levelname in COLORS:
|
65
|
+
levelname_color = (
|
66
|
+
COLORS[levelname] + levelname + colorama.Style.RESET_ALL
|
67
|
+
)
|
68
|
+
record.levelname = levelname_color
|
69
|
+
return super().format(record)
|
70
|
+
|
71
|
+
# Create console handler with the custom formatter
|
72
|
+
console = logging.StreamHandler(sys.stdout)
|
73
|
+
formatter = ColoredFormatter("%(levelname)s: %(message)s")
|
74
|
+
console.setFormatter(formatter)
|
75
|
+
|
76
|
+
# Add filter for font messages
|
77
|
+
console.addFilter(FontMessageFilter())
|
78
|
+
|
79
|
+
root_logger.addHandler(console)
|
80
|
+
|
81
|
+
except ImportError:
|
82
|
+
# Fall back to standard logging if colorama is not available
|
83
|
+
logging.basicConfig(
|
84
|
+
level=log_level_value,
|
85
|
+
format="%(levelname)s: %(message)s",
|
86
|
+
handlers=[logging.StreamHandler(sys.stdout)],
|
87
|
+
)
|
88
|
+
|
89
|
+
# Add filter for font messages to the root logger
|
90
|
+
for handler in root_logger.handlers:
|
91
|
+
handler.addFilter(FontMessageFilter())
|
92
|
+
|
93
|
+
# Set higher levels for some third-party libraries to reduce noise
|
94
|
+
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
95
|
+
logging.getLogger("PIL").setLevel(logging.WARNING)
|
96
|
+
|
97
|
+
# Log the configured level
|
98
|
+
if log_level == "verbose":
|
99
|
+
logging.debug("Verbose logging enabled")
|
100
|
+
elif log_level == "quiet":
|
101
|
+
logging.warning("Quiet logging mode - only showing warnings and errors")
|
balancr/data/__init__.py
ADDED
balancr/data/loader.py
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
from typing import Tuple, Union, Optional
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
|
7
|
+
class DataLoader:
|
8
|
+
"""Handles loading data from various file formats"""
|
9
|
+
|
10
|
+
@staticmethod
|
11
|
+
def load_data(
|
12
|
+
file_path: Union[str, Path],
|
13
|
+
target_column: str,
|
14
|
+
feature_columns: Optional[list] = None,
|
15
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
16
|
+
"""
|
17
|
+
Load data from various file formats (CSV, Excel)
|
18
|
+
|
19
|
+
Args:
|
20
|
+
file_path: Path to the data file
|
21
|
+
target_column: Name of the target column
|
22
|
+
feature_columns: List of feature columns to use (optional)
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
X: Feature matrix
|
26
|
+
y: Target vector
|
27
|
+
"""
|
28
|
+
file_path = Path(file_path)
|
29
|
+
|
30
|
+
if file_path.suffix.lower() == ".csv":
|
31
|
+
data = pd.read_csv(file_path)
|
32
|
+
elif file_path.suffix.lower() in [".xlsx", ".xls"]:
|
33
|
+
try:
|
34
|
+
data = pd.read_excel(file_path)
|
35
|
+
except ModuleNotFoundError:
|
36
|
+
raise ModuleNotFoundError(
|
37
|
+
"The openpyxl package is required to read Excel files. "
|
38
|
+
"Please install it using: pip install openpyxl"
|
39
|
+
)
|
40
|
+
else:
|
41
|
+
raise ValueError(f"Unsupported file format: {file_path.suffix}")
|
42
|
+
|
43
|
+
# Extract target variable
|
44
|
+
if target_column not in data.columns:
|
45
|
+
raise ValueError(f"Target column '{target_column}' not found in data")
|
46
|
+
y = data[target_column].values
|
47
|
+
|
48
|
+
# Extract features
|
49
|
+
if feature_columns is None:
|
50
|
+
# Use all columns except target
|
51
|
+
feature_columns = [col for col in data.columns if col != target_column]
|
52
|
+
else:
|
53
|
+
# Verify all specified feature columns exist
|
54
|
+
missing_cols = [col for col in feature_columns if col not in data.columns]
|
55
|
+
if missing_cols:
|
56
|
+
raise ValueError(f"Feature columns not found: {missing_cols}")
|
57
|
+
|
58
|
+
X = data[feature_columns].values
|
59
|
+
return X, y
|