balancr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
balancr/cli/utils.py ADDED
@@ -0,0 +1,101 @@
1
+ """
2
+ utils.py - Utility functions for the balancr CLI.
3
+
4
+ This module provides helper functions for logging, file handling,
5
+ and other common tasks used across the CLI application.
6
+ """
7
+
8
+ import logging
9
+ import sys
10
+
11
+
12
+ # Define a filter to exclude the matplotlib 'findfont' messages
13
+ class FontMessageFilter(logging.Filter):
14
+ """Filter to remove noisy font-related log messages."""
15
+
16
+ def filter(self, record):
17
+ """Filter out 'findfont' log messages from matplotlib."""
18
+ return "findfont" not in record.getMessage()
19
+
20
+
21
+ def setup_logging(log_level="default"):
22
+ """
23
+ Set up logging with colour and the specified verbosity level.
24
+
25
+ Args:
26
+ log_level: "verbose", "default", or "quiet"
27
+ """
28
+ # Reset any existing handlers
29
+ root_logger = logging.getLogger()
30
+ for handler in root_logger.handlers[:]:
31
+ root_logger.removeHandler(handler)
32
+
33
+ level_map = {
34
+ "verbose": logging.DEBUG,
35
+ "default": logging.INFO,
36
+ "quiet": logging.WARNING,
37
+ }
38
+
39
+ # Get the log level, defaulting to INFO
40
+ log_level_value = level_map.get(log_level, logging.INFO)
41
+
42
+ # Root logger level
43
+ root_logger.setLevel(log_level_value)
44
+
45
+ # Coloured logging
46
+ try:
47
+ import colorama
48
+
49
+ colorama.init()
50
+
51
+ # Define colour codes
52
+ COLORS = {
53
+ "DEBUG": colorama.Fore.BLUE,
54
+ "INFO": colorama.Fore.GREEN,
55
+ "WARNING": colorama.Fore.YELLOW,
56
+ "ERROR": colorama.Fore.RED,
57
+ "CRITICAL": colorama.Fore.RED + colorama.Style.BRIGHT,
58
+ }
59
+
60
+ # Create custom formatter with colours
61
+ class ColoredFormatter(logging.Formatter):
62
+ def format(self, record):
63
+ levelname = record.levelname
64
+ if levelname in COLORS:
65
+ levelname_color = (
66
+ COLORS[levelname] + levelname + colorama.Style.RESET_ALL
67
+ )
68
+ record.levelname = levelname_color
69
+ return super().format(record)
70
+
71
+ # Create console handler with the custom formatter
72
+ console = logging.StreamHandler(sys.stdout)
73
+ formatter = ColoredFormatter("%(levelname)s: %(message)s")
74
+ console.setFormatter(formatter)
75
+
76
+ # Add filter for font messages
77
+ console.addFilter(FontMessageFilter())
78
+
79
+ root_logger.addHandler(console)
80
+
81
+ except ImportError:
82
+ # Fall back to standard logging if colorama is not available
83
+ logging.basicConfig(
84
+ level=log_level_value,
85
+ format="%(levelname)s: %(message)s",
86
+ handlers=[logging.StreamHandler(sys.stdout)],
87
+ )
88
+
89
+ # Add filter for font messages to the root logger
90
+ for handler in root_logger.handlers:
91
+ handler.addFilter(FontMessageFilter())
92
+
93
+ # Set higher levels for some third-party libraries to reduce noise
94
+ logging.getLogger("matplotlib").setLevel(logging.WARNING)
95
+ logging.getLogger("PIL").setLevel(logging.WARNING)
96
+
97
+ # Log the configured level
98
+ if log_level == "verbose":
99
+ logging.debug("Verbose logging enabled")
100
+ elif log_level == "quiet":
101
+ logging.warning("Quiet logging mode - only showing warnings and errors")
@@ -0,0 +1,5 @@
1
+ # src/balancr/data/__init__.py
2
+ # flake8: noqa
3
+
4
+ from .loader import DataLoader
5
+ from .preprocessor import DataPreprocessor
balancr/data/loader.py ADDED
@@ -0,0 +1,59 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from typing import Tuple, Union, Optional
4
+ from pathlib import Path
5
+
6
+
7
+ class DataLoader:
8
+ """Handles loading data from various file formats"""
9
+
10
+ @staticmethod
11
+ def load_data(
12
+ file_path: Union[str, Path],
13
+ target_column: str,
14
+ feature_columns: Optional[list] = None,
15
+ ) -> Tuple[np.ndarray, np.ndarray]:
16
+ """
17
+ Load data from various file formats (CSV, Excel)
18
+
19
+ Args:
20
+ file_path: Path to the data file
21
+ target_column: Name of the target column
22
+ feature_columns: List of feature columns to use (optional)
23
+
24
+ Returns:
25
+ X: Feature matrix
26
+ y: Target vector
27
+ """
28
+ file_path = Path(file_path)
29
+
30
+ if file_path.suffix.lower() == ".csv":
31
+ data = pd.read_csv(file_path)
32
+ elif file_path.suffix.lower() in [".xlsx", ".xls"]:
33
+ try:
34
+ data = pd.read_excel(file_path)
35
+ except ModuleNotFoundError:
36
+ raise ModuleNotFoundError(
37
+ "The openpyxl package is required to read Excel files. "
38
+ "Please install it using: pip install openpyxl"
39
+ )
40
+ else:
41
+ raise ValueError(f"Unsupported file format: {file_path.suffix}")
42
+
43
+ # Extract target variable
44
+ if target_column not in data.columns:
45
+ raise ValueError(f"Target column '{target_column}' not found in data")
46
+ y = data[target_column].values
47
+
48
+ # Extract features
49
+ if feature_columns is None:
50
+ # Use all columns except target
51
+ feature_columns = [col for col in data.columns if col != target_column]
52
+ else:
53
+ # Verify all specified feature columns exist
54
+ missing_cols = [col for col in feature_columns if col not in data.columns]
55
+ if missing_cols:
56
+ raise ValueError(f"Feature columns not found: {missing_cols}")
57
+
58
+ X = data[feature_columns].values
59
+ return X, y