balancr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,284 @@
1
+ from pathlib import Path
2
+ from typing import Dict, Type, Optional, Any
3
+ import importlib
4
+ import inspect
5
+ import logging
6
+ import json
7
+ from .base import BaseBalancer
8
+
9
+
10
+ class TechniqueRegistry:
11
+ """Registry for managing balancing techniques from various sources"""
12
+
13
+ # Define the modules to check for techniques
14
+ IMBLEARN_MODULES = [
15
+ "imblearn.over_sampling",
16
+ "imblearn.under_sampling",
17
+ "imblearn.combine",
18
+ ]
19
+
20
+ def __init__(self):
21
+ self.custom_techniques: Dict[str, Type[BaseBalancer]] = {}
22
+ self._cached_imblearn_techniques: Dict[str, tuple] = {}
23
+ self._discover_imblearn_techniques()
24
+ self._load_custom_techniques()
25
+
26
+ def _discover_imblearn_techniques(self) -> None:
27
+ """Dynamically discover all available techniques in imblearn"""
28
+ for module_path in self.IMBLEARN_MODULES:
29
+ try:
30
+ module = importlib.import_module(module_path)
31
+ # Get all classes from the module
32
+ for name, obj in inspect.getmembers(module, inspect.isclass):
33
+ # Check if it's a sampler (has fit_resample method)
34
+ if hasattr(obj, "fit_resample"):
35
+ self._cached_imblearn_techniques[name] = (module_path, obj)
36
+ except ImportError as e:
37
+ logging.warning(f"Could not import {module_path}: {str(e)}")
38
+
39
+ def get_technique_class(self, technique_name: str) -> Optional[Type[BaseBalancer]]:
40
+ """Get the technique class by name, handling suffixed variations."""
41
+ # First, check for exact matches in custom techniques
42
+ if technique_name in self.custom_techniques:
43
+ return self.custom_techniques[technique_name]
44
+
45
+ # Then check for exact matches in imblearn techniques
46
+ if technique_name in self._cached_imblearn_techniques:
47
+ module_path, technique_class = self._cached_imblearn_techniques[
48
+ technique_name
49
+ ]
50
+ return self._wrap_imblearn_technique(technique_class)
51
+
52
+ # If no exact match, extract base name if this is a variation with _ or - suffix
53
+ base_name = None
54
+ for delimiter in ["_", "-"]:
55
+ if delimiter in technique_name:
56
+ parts = technique_name.split(delimiter, 1)
57
+ if len(parts) > 1 and parts[0]: # Ensure we have a non-empty base name
58
+ base_name = parts[0]
59
+ break
60
+
61
+ # If we have a valid base name, try to look it up
62
+ if base_name:
63
+ # Check custom techniques for the base name
64
+ if base_name in self.custom_techniques:
65
+ return self.custom_techniques[base_name]
66
+
67
+ # Check imblearn techniques for the base name
68
+ if base_name in self._cached_imblearn_techniques:
69
+ module_path, technique_class = self._cached_imblearn_techniques[
70
+ base_name
71
+ ]
72
+ return self._wrap_imblearn_technique(technique_class)
73
+
74
+ # If still not found, try to discover new techniques (in case imblearn was updated)
75
+ self._discover_imblearn_techniques()
76
+
77
+ # Try exact match again with freshly discovered techniques
78
+ if technique_name in self._cached_imblearn_techniques:
79
+ module_path, technique_class = self._cached_imblearn_techniques[
80
+ technique_name
81
+ ]
82
+ return self._wrap_imblearn_technique(technique_class)
83
+
84
+ # Try base name again with freshly discovered techniques
85
+ if base_name and base_name in self._cached_imblearn_techniques:
86
+ module_path, technique_class = self._cached_imblearn_techniques[base_name]
87
+ return self._wrap_imblearn_technique(technique_class)
88
+
89
+ return None
90
+
91
+ def get_technique_default_params(self, technique_name: str) -> Dict[str, Any]:
92
+ """
93
+ Extract default parameters from a technique, handling suffixed variations.
94
+
95
+ Args:
96
+ technique_name: Name of the technique to extract parameters from
97
+
98
+ Returns:
99
+ Dictionary of parameter names and their default values
100
+ """
101
+ # First check for exact match in custom techniques
102
+ if technique_name in self.custom_techniques:
103
+ technique_class = self.custom_techniques[technique_name]
104
+ return self._extract_params_from_class(technique_class)
105
+
106
+ # Then check for exact match in imblearn techniques
107
+ if technique_name in self._cached_imblearn_techniques:
108
+ _, technique_class = self._cached_imblearn_techniques[technique_name]
109
+ return self._extract_params_from_class(technique_class)
110
+
111
+ # If no exact match, try to find a base name by removing suffix
112
+ base_name = None
113
+ for delimiter in ["_", "-"]:
114
+ if delimiter in technique_name:
115
+ parts = technique_name.split(delimiter, 1)
116
+ if len(parts) > 1 and parts[0]:
117
+ base_name = parts[0]
118
+ break
119
+
120
+ # If we have a valid base name, look it up
121
+ if base_name:
122
+ # Check custom techniques for the base name
123
+ if base_name in self.custom_techniques:
124
+ technique_class = self.custom_techniques[base_name]
125
+ return self._extract_params_from_class(technique_class)
126
+
127
+ # Check imblearn techniques for the base name
128
+ if base_name in self._cached_imblearn_techniques:
129
+ _, technique_class = self._cached_imblearn_techniques[base_name]
130
+ return self._extract_params_from_class(technique_class)
131
+
132
+ # If still not found
133
+ logging.warning(
134
+ f"Technique '{technique_name}' not found. Cannot extract parameters."
135
+ )
136
+ return {}
137
+
138
+ def _extract_params_from_class(self, cls) -> Dict[str, Any]:
139
+ """
140
+ Extract default parameters from a class's __init__ method.
141
+
142
+ Args:
143
+ cls: The class to extract parameters from
144
+
145
+ Returns:
146
+ Dictionary of parameter names and their default values
147
+ """
148
+ params = {}
149
+
150
+ try:
151
+ # Get the signature of the __init__ method
152
+ sig = inspect.signature(cls.__init__)
153
+
154
+ # Process each parameter
155
+ for name, param in sig.parameters.items():
156
+ # Skip 'self' parameter
157
+ if name == "self":
158
+ continue
159
+
160
+ # Get default value if it exists
161
+ if param.default is not inspect.Parameter.empty:
162
+ # Handle special case for None (JSON uses null)
163
+ if param.default is None:
164
+ params[name] = None
165
+ # Handle other types that can be serialised to JSON
166
+ elif isinstance(param.default, (int, float, str, bool, list, dict)):
167
+ params[name] = param.default
168
+ else:
169
+ # Convert non-JSON-serialisable defaults to string representation
170
+ params[name] = str(param.default)
171
+ else:
172
+ # For parameters without defaults, use None
173
+ params[name] = None
174
+
175
+ except Exception as e:
176
+ logging.warning(f"Error extracting parameters from {cls.__name__}: {e}")
177
+
178
+ return params
179
+
180
+ def list_available_techniques(self) -> Dict[str, list]:
181
+ """List all available techniques grouped by source"""
182
+ # Rediscover techniques in case new ones were added
183
+ self._discover_imblearn_techniques()
184
+
185
+ return {
186
+ "custom": list(self.custom_techniques.keys()),
187
+ "imblearn": list(self._cached_imblearn_techniques.keys()),
188
+ }
189
+
190
+ def register_custom_technique(
191
+ self, name: str, technique_class: Type[BaseBalancer]
192
+ ) -> None:
193
+ """
194
+ Register a custom balancing technique.
195
+
196
+ Args:
197
+ name: Name of the technique
198
+ technique_class: Class implementing the balancing technique
199
+
200
+ Raises:
201
+ TypeError: If technique_class is None or doesn't inherit from BaseBalancer
202
+ ValueError: If name is empty or not a string
203
+ """
204
+ # Error handling
205
+ if not isinstance(name, str) or not name.strip():
206
+ raise ValueError("Technique name must be a non-empty string")
207
+
208
+ if technique_class is None:
209
+ raise TypeError("Technique class cannot be None")
210
+
211
+ if not isinstance(technique_class, type) or not issubclass(
212
+ technique_class, BaseBalancer
213
+ ):
214
+ raise TypeError("Technique class must inherit from BaseBalancer")
215
+ self.custom_techniques[name] = technique_class
216
+
217
+ def _load_custom_techniques(self) -> None:
218
+ """Load registered custom techniques from the custom techniques directory."""
219
+ custom_dir = Path.home() / ".balancr" / "custom_techniques"
220
+ if not custom_dir.exists():
221
+ return
222
+
223
+ metadata_file = custom_dir / "techniques_metadata.json"
224
+ if not metadata_file.exists():
225
+ return
226
+
227
+ try:
228
+ with open(metadata_file, "r") as f:
229
+ metadata = json.load(f)
230
+
231
+ for technique_name, info in metadata.items():
232
+ file_path = Path(info["file"])
233
+ class_name = info["class_name"]
234
+
235
+ if not file_path.exists():
236
+ logging.warning(f"Custom technique file not found: {file_path}")
237
+ continue
238
+
239
+ try:
240
+ # Import the module dynamically
241
+ module_name = file_path.stem
242
+ spec = importlib.util.spec_from_file_location(
243
+ module_name, file_path
244
+ )
245
+ if spec is None or spec.loader is None:
246
+ logging.warning(f"Could not load module from {file_path}")
247
+ continue
248
+
249
+ module = importlib.util.module_from_spec(spec)
250
+ spec.loader.exec_module(module)
251
+
252
+ # Find the specific class
253
+ technique_class = None
254
+ for name, obj in inspect.getmembers(module, inspect.isclass):
255
+ if name == class_name and issubclass(obj, BaseBalancer):
256
+ technique_class = obj
257
+ break
258
+
259
+ if technique_class:
260
+ self.custom_techniques[technique_name] = technique_class
261
+ logging.debug(f"Loaded custom technique: {technique_name}")
262
+ else:
263
+ logging.warning(f"Class {class_name} not found in {file_path}")
264
+
265
+ except Exception as e:
266
+ logging.warning(
267
+ f"Error loading custom technique {technique_name}: {e}"
268
+ )
269
+
270
+ except Exception as e:
271
+ logging.warning(f"Error loading custom techniques metadata: {e}")
272
+
273
+ def _wrap_imblearn_technique(self, technique_class: type) -> Type[BaseBalancer]:
274
+ """Wrap imblearn technique to conform to our BaseBalancer interface"""
275
+
276
+ class WrappedTechnique(BaseBalancer):
277
+ def __init__(self, **kwargs):
278
+ super().__init__()
279
+ self.technique = technique_class(**kwargs)
280
+
281
+ def balance(self, X, y):
282
+ return self.technique.fit_resample(X, y)
283
+
284
+ return WrappedTechnique
@@ -0,0 +1,4 @@
1
+ # src/balancr/techniques/__init__.py
2
+ # flake8: noqa
3
+
4
+ from .custom.example_custom_technique import ExampleCustomBalancer
File without changes
@@ -0,0 +1,27 @@
1
+ from typing import Tuple
2
+ import numpy as np
3
+ from balancr.base import BaseBalancer
4
+
5
+
6
+ class ExampleCustomBalancer(BaseBalancer):
7
+ """
8
+ A dummy balancing technique that simply returns the original data unchanged.
9
+ This class serves as a minimal example of implementing the BaseBalancer interface.
10
+ """
11
+
12
+ def __init__(self):
13
+ """Initialize the balancer."""
14
+ super().__init__()
15
+
16
+ def balance(self, X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
17
+ """
18
+ A dummy implementation that returns the data unchanged.
19
+
20
+ Args:
21
+ X: Feature matrix
22
+ y: Target labels
23
+
24
+ Returns:
25
+ The original X and y unchanged
26
+ """
27
+ return X, y
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Conor Doherty
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.