balancr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- balancr/__init__.py +13 -0
- balancr/base.py +14 -0
- balancr/classifier_registry.py +300 -0
- balancr/cli/__init__.py +0 -0
- balancr/cli/commands.py +1838 -0
- balancr/cli/config.py +165 -0
- balancr/cli/main.py +778 -0
- balancr/cli/utils.py +101 -0
- balancr/data/__init__.py +5 -0
- balancr/data/loader.py +59 -0
- balancr/data/preprocessor.py +556 -0
- balancr/evaluation/__init__.py +19 -0
- balancr/evaluation/metrics.py +442 -0
- balancr/evaluation/visualisation.py +660 -0
- balancr/imbalance_analyser.py +677 -0
- balancr/technique_registry.py +284 -0
- balancr/techniques/__init__.py +4 -0
- balancr/techniques/custom/__init__.py +0 -0
- balancr/techniques/custom/example_custom_technique.py +27 -0
- balancr-0.1.0.dist-info/LICENSE +21 -0
- balancr-0.1.0.dist-info/METADATA +536 -0
- balancr-0.1.0.dist-info/RECORD +25 -0
- balancr-0.1.0.dist-info/WHEEL +5 -0
- balancr-0.1.0.dist-info/entry_points.txt +2 -0
- balancr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,284 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import Dict, Type, Optional, Any
|
3
|
+
import importlib
|
4
|
+
import inspect
|
5
|
+
import logging
|
6
|
+
import json
|
7
|
+
from .base import BaseBalancer
|
8
|
+
|
9
|
+
|
10
|
+
class TechniqueRegistry:
|
11
|
+
"""Registry for managing balancing techniques from various sources"""
|
12
|
+
|
13
|
+
# Define the modules to check for techniques
|
14
|
+
IMBLEARN_MODULES = [
|
15
|
+
"imblearn.over_sampling",
|
16
|
+
"imblearn.under_sampling",
|
17
|
+
"imblearn.combine",
|
18
|
+
]
|
19
|
+
|
20
|
+
def __init__(self):
|
21
|
+
self.custom_techniques: Dict[str, Type[BaseBalancer]] = {}
|
22
|
+
self._cached_imblearn_techniques: Dict[str, tuple] = {}
|
23
|
+
self._discover_imblearn_techniques()
|
24
|
+
self._load_custom_techniques()
|
25
|
+
|
26
|
+
def _discover_imblearn_techniques(self) -> None:
|
27
|
+
"""Dynamically discover all available techniques in imblearn"""
|
28
|
+
for module_path in self.IMBLEARN_MODULES:
|
29
|
+
try:
|
30
|
+
module = importlib.import_module(module_path)
|
31
|
+
# Get all classes from the module
|
32
|
+
for name, obj in inspect.getmembers(module, inspect.isclass):
|
33
|
+
# Check if it's a sampler (has fit_resample method)
|
34
|
+
if hasattr(obj, "fit_resample"):
|
35
|
+
self._cached_imblearn_techniques[name] = (module_path, obj)
|
36
|
+
except ImportError as e:
|
37
|
+
logging.warning(f"Could not import {module_path}: {str(e)}")
|
38
|
+
|
39
|
+
def get_technique_class(self, technique_name: str) -> Optional[Type[BaseBalancer]]:
|
40
|
+
"""Get the technique class by name, handling suffixed variations."""
|
41
|
+
# First, check for exact matches in custom techniques
|
42
|
+
if technique_name in self.custom_techniques:
|
43
|
+
return self.custom_techniques[technique_name]
|
44
|
+
|
45
|
+
# Then check for exact matches in imblearn techniques
|
46
|
+
if technique_name in self._cached_imblearn_techniques:
|
47
|
+
module_path, technique_class = self._cached_imblearn_techniques[
|
48
|
+
technique_name
|
49
|
+
]
|
50
|
+
return self._wrap_imblearn_technique(technique_class)
|
51
|
+
|
52
|
+
# If no exact match, extract base name if this is a variation with _ or - suffix
|
53
|
+
base_name = None
|
54
|
+
for delimiter in ["_", "-"]:
|
55
|
+
if delimiter in technique_name:
|
56
|
+
parts = technique_name.split(delimiter, 1)
|
57
|
+
if len(parts) > 1 and parts[0]: # Ensure we have a non-empty base name
|
58
|
+
base_name = parts[0]
|
59
|
+
break
|
60
|
+
|
61
|
+
# If we have a valid base name, try to look it up
|
62
|
+
if base_name:
|
63
|
+
# Check custom techniques for the base name
|
64
|
+
if base_name in self.custom_techniques:
|
65
|
+
return self.custom_techniques[base_name]
|
66
|
+
|
67
|
+
# Check imblearn techniques for the base name
|
68
|
+
if base_name in self._cached_imblearn_techniques:
|
69
|
+
module_path, technique_class = self._cached_imblearn_techniques[
|
70
|
+
base_name
|
71
|
+
]
|
72
|
+
return self._wrap_imblearn_technique(technique_class)
|
73
|
+
|
74
|
+
# If still not found, try to discover new techniques (in case imblearn was updated)
|
75
|
+
self._discover_imblearn_techniques()
|
76
|
+
|
77
|
+
# Try exact match again with freshly discovered techniques
|
78
|
+
if technique_name in self._cached_imblearn_techniques:
|
79
|
+
module_path, technique_class = self._cached_imblearn_techniques[
|
80
|
+
technique_name
|
81
|
+
]
|
82
|
+
return self._wrap_imblearn_technique(technique_class)
|
83
|
+
|
84
|
+
# Try base name again with freshly discovered techniques
|
85
|
+
if base_name and base_name in self._cached_imblearn_techniques:
|
86
|
+
module_path, technique_class = self._cached_imblearn_techniques[base_name]
|
87
|
+
return self._wrap_imblearn_technique(technique_class)
|
88
|
+
|
89
|
+
return None
|
90
|
+
|
91
|
+
def get_technique_default_params(self, technique_name: str) -> Dict[str, Any]:
|
92
|
+
"""
|
93
|
+
Extract default parameters from a technique, handling suffixed variations.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
technique_name: Name of the technique to extract parameters from
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
Dictionary of parameter names and their default values
|
100
|
+
"""
|
101
|
+
# First check for exact match in custom techniques
|
102
|
+
if technique_name in self.custom_techniques:
|
103
|
+
technique_class = self.custom_techniques[technique_name]
|
104
|
+
return self._extract_params_from_class(technique_class)
|
105
|
+
|
106
|
+
# Then check for exact match in imblearn techniques
|
107
|
+
if technique_name in self._cached_imblearn_techniques:
|
108
|
+
_, technique_class = self._cached_imblearn_techniques[technique_name]
|
109
|
+
return self._extract_params_from_class(technique_class)
|
110
|
+
|
111
|
+
# If no exact match, try to find a base name by removing suffix
|
112
|
+
base_name = None
|
113
|
+
for delimiter in ["_", "-"]:
|
114
|
+
if delimiter in technique_name:
|
115
|
+
parts = technique_name.split(delimiter, 1)
|
116
|
+
if len(parts) > 1 and parts[0]:
|
117
|
+
base_name = parts[0]
|
118
|
+
break
|
119
|
+
|
120
|
+
# If we have a valid base name, look it up
|
121
|
+
if base_name:
|
122
|
+
# Check custom techniques for the base name
|
123
|
+
if base_name in self.custom_techniques:
|
124
|
+
technique_class = self.custom_techniques[base_name]
|
125
|
+
return self._extract_params_from_class(technique_class)
|
126
|
+
|
127
|
+
# Check imblearn techniques for the base name
|
128
|
+
if base_name in self._cached_imblearn_techniques:
|
129
|
+
_, technique_class = self._cached_imblearn_techniques[base_name]
|
130
|
+
return self._extract_params_from_class(technique_class)
|
131
|
+
|
132
|
+
# If still not found
|
133
|
+
logging.warning(
|
134
|
+
f"Technique '{technique_name}' not found. Cannot extract parameters."
|
135
|
+
)
|
136
|
+
return {}
|
137
|
+
|
138
|
+
def _extract_params_from_class(self, cls) -> Dict[str, Any]:
|
139
|
+
"""
|
140
|
+
Extract default parameters from a class's __init__ method.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
cls: The class to extract parameters from
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
Dictionary of parameter names and their default values
|
147
|
+
"""
|
148
|
+
params = {}
|
149
|
+
|
150
|
+
try:
|
151
|
+
# Get the signature of the __init__ method
|
152
|
+
sig = inspect.signature(cls.__init__)
|
153
|
+
|
154
|
+
# Process each parameter
|
155
|
+
for name, param in sig.parameters.items():
|
156
|
+
# Skip 'self' parameter
|
157
|
+
if name == "self":
|
158
|
+
continue
|
159
|
+
|
160
|
+
# Get default value if it exists
|
161
|
+
if param.default is not inspect.Parameter.empty:
|
162
|
+
# Handle special case for None (JSON uses null)
|
163
|
+
if param.default is None:
|
164
|
+
params[name] = None
|
165
|
+
# Handle other types that can be serialised to JSON
|
166
|
+
elif isinstance(param.default, (int, float, str, bool, list, dict)):
|
167
|
+
params[name] = param.default
|
168
|
+
else:
|
169
|
+
# Convert non-JSON-serialisable defaults to string representation
|
170
|
+
params[name] = str(param.default)
|
171
|
+
else:
|
172
|
+
# For parameters without defaults, use None
|
173
|
+
params[name] = None
|
174
|
+
|
175
|
+
except Exception as e:
|
176
|
+
logging.warning(f"Error extracting parameters from {cls.__name__}: {e}")
|
177
|
+
|
178
|
+
return params
|
179
|
+
|
180
|
+
def list_available_techniques(self) -> Dict[str, list]:
|
181
|
+
"""List all available techniques grouped by source"""
|
182
|
+
# Rediscover techniques in case new ones were added
|
183
|
+
self._discover_imblearn_techniques()
|
184
|
+
|
185
|
+
return {
|
186
|
+
"custom": list(self.custom_techniques.keys()),
|
187
|
+
"imblearn": list(self._cached_imblearn_techniques.keys()),
|
188
|
+
}
|
189
|
+
|
190
|
+
def register_custom_technique(
|
191
|
+
self, name: str, technique_class: Type[BaseBalancer]
|
192
|
+
) -> None:
|
193
|
+
"""
|
194
|
+
Register a custom balancing technique.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
name: Name of the technique
|
198
|
+
technique_class: Class implementing the balancing technique
|
199
|
+
|
200
|
+
Raises:
|
201
|
+
TypeError: If technique_class is None or doesn't inherit from BaseBalancer
|
202
|
+
ValueError: If name is empty or not a string
|
203
|
+
"""
|
204
|
+
# Error handling
|
205
|
+
if not isinstance(name, str) or not name.strip():
|
206
|
+
raise ValueError("Technique name must be a non-empty string")
|
207
|
+
|
208
|
+
if technique_class is None:
|
209
|
+
raise TypeError("Technique class cannot be None")
|
210
|
+
|
211
|
+
if not isinstance(technique_class, type) or not issubclass(
|
212
|
+
technique_class, BaseBalancer
|
213
|
+
):
|
214
|
+
raise TypeError("Technique class must inherit from BaseBalancer")
|
215
|
+
self.custom_techniques[name] = technique_class
|
216
|
+
|
217
|
+
def _load_custom_techniques(self) -> None:
|
218
|
+
"""Load registered custom techniques from the custom techniques directory."""
|
219
|
+
custom_dir = Path.home() / ".balancr" / "custom_techniques"
|
220
|
+
if not custom_dir.exists():
|
221
|
+
return
|
222
|
+
|
223
|
+
metadata_file = custom_dir / "techniques_metadata.json"
|
224
|
+
if not metadata_file.exists():
|
225
|
+
return
|
226
|
+
|
227
|
+
try:
|
228
|
+
with open(metadata_file, "r") as f:
|
229
|
+
metadata = json.load(f)
|
230
|
+
|
231
|
+
for technique_name, info in metadata.items():
|
232
|
+
file_path = Path(info["file"])
|
233
|
+
class_name = info["class_name"]
|
234
|
+
|
235
|
+
if not file_path.exists():
|
236
|
+
logging.warning(f"Custom technique file not found: {file_path}")
|
237
|
+
continue
|
238
|
+
|
239
|
+
try:
|
240
|
+
# Import the module dynamically
|
241
|
+
module_name = file_path.stem
|
242
|
+
spec = importlib.util.spec_from_file_location(
|
243
|
+
module_name, file_path
|
244
|
+
)
|
245
|
+
if spec is None or spec.loader is None:
|
246
|
+
logging.warning(f"Could not load module from {file_path}")
|
247
|
+
continue
|
248
|
+
|
249
|
+
module = importlib.util.module_from_spec(spec)
|
250
|
+
spec.loader.exec_module(module)
|
251
|
+
|
252
|
+
# Find the specific class
|
253
|
+
technique_class = None
|
254
|
+
for name, obj in inspect.getmembers(module, inspect.isclass):
|
255
|
+
if name == class_name and issubclass(obj, BaseBalancer):
|
256
|
+
technique_class = obj
|
257
|
+
break
|
258
|
+
|
259
|
+
if technique_class:
|
260
|
+
self.custom_techniques[technique_name] = technique_class
|
261
|
+
logging.debug(f"Loaded custom technique: {technique_name}")
|
262
|
+
else:
|
263
|
+
logging.warning(f"Class {class_name} not found in {file_path}")
|
264
|
+
|
265
|
+
except Exception as e:
|
266
|
+
logging.warning(
|
267
|
+
f"Error loading custom technique {technique_name}: {e}"
|
268
|
+
)
|
269
|
+
|
270
|
+
except Exception as e:
|
271
|
+
logging.warning(f"Error loading custom techniques metadata: {e}")
|
272
|
+
|
273
|
+
def _wrap_imblearn_technique(self, technique_class: type) -> Type[BaseBalancer]:
|
274
|
+
"""Wrap imblearn technique to conform to our BaseBalancer interface"""
|
275
|
+
|
276
|
+
class WrappedTechnique(BaseBalancer):
|
277
|
+
def __init__(self, **kwargs):
|
278
|
+
super().__init__()
|
279
|
+
self.technique = technique_class(**kwargs)
|
280
|
+
|
281
|
+
def balance(self, X, y):
|
282
|
+
return self.technique.fit_resample(X, y)
|
283
|
+
|
284
|
+
return WrappedTechnique
|
File without changes
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from typing import Tuple
|
2
|
+
import numpy as np
|
3
|
+
from balancr.base import BaseBalancer
|
4
|
+
|
5
|
+
|
6
|
+
class ExampleCustomBalancer(BaseBalancer):
|
7
|
+
"""
|
8
|
+
A dummy balancing technique that simply returns the original data unchanged.
|
9
|
+
This class serves as a minimal example of implementing the BaseBalancer interface.
|
10
|
+
"""
|
11
|
+
|
12
|
+
def __init__(self):
|
13
|
+
"""Initialize the balancer."""
|
14
|
+
super().__init__()
|
15
|
+
|
16
|
+
def balance(self, X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
17
|
+
"""
|
18
|
+
A dummy implementation that returns the data unchanged.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
X: Feature matrix
|
22
|
+
y: Target labels
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
The original X and y unchanged
|
26
|
+
"""
|
27
|
+
return X, y
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Conor Doherty
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|