balancr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
balancr/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ # src/balancr/__init__.py
2
+ # flake8: noqa
3
+
4
+ from .base import BaseBalancer
5
+
6
+ from .technique_registry import TechniqueRegistry
7
+
8
+ from .classifier_registry import ClassifierRegistry
9
+
10
+ from .imbalance_analyser import (
11
+ BalancingFramework,
12
+ format_time,
13
+ )
balancr/base.py ADDED
@@ -0,0 +1,14 @@
1
+ from abc import ABC, abstractmethod
2
+ import numpy as np
3
+
4
+
5
+ class BaseBalancer(ABC):
6
+ """Base class for all balancing techniques"""
7
+
8
+ def __init__(self):
9
+ self.name = self.__class__.__name__
10
+
11
+ @abstractmethod
12
+ def balance(self, X: np.ndarray, y: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
13
+ """Balance the dataset"""
14
+ pass
@@ -0,0 +1,300 @@
1
+ from pathlib import Path
2
+ from typing import Dict, Type, Optional, List
3
+ import importlib
4
+ import inspect
5
+ import logging
6
+ import json
7
+ from sklearn.base import BaseEstimator
8
+
9
+
10
+ class ClassifierRegistry:
11
+ """Registry for managing classification algorithms from various sources"""
12
+
13
+ # List of scikit-learn modules where we'll look for classifiers
14
+ SKLEARN_MODULES = [
15
+ "sklearn.ensemble",
16
+ "sklearn.linear_model",
17
+ "sklearn.tree",
18
+ "sklearn.svm",
19
+ "sklearn.neighbors",
20
+ "sklearn.naive_bayes",
21
+ "sklearn.neural_network",
22
+ "sklearn.discriminant_analysis",
23
+ ]
24
+
25
+ def __init__(self):
26
+ # Storage for custom classifiers
27
+ self.custom_classifiers: Dict[str, Type[BaseEstimator]] = {}
28
+
29
+ # Cache of sklearn classifiers, organised by module
30
+ self._cached_sklearn_classifiers: Dict[str, Dict[str, tuple]] = {}
31
+
32
+ # Find all available classifiers when initialised
33
+ self._discover_sklearn_classifiers()
34
+
35
+ self._load_custom_classifiers()
36
+
37
+ def _discover_sklearn_classifiers(self) -> None:
38
+ """Look through scikit-learn modules to find usable classifier classes"""
39
+ for module_path in self.SKLEARN_MODULES:
40
+ try:
41
+ # Try to import module
42
+ module = importlib.import_module(module_path)
43
+
44
+ # Get just the module name (e.g., 'ensemble' from 'sklearn.ensemble')
45
+ module_name = module_path.split(".")[-1]
46
+
47
+ # Make sure we have a dict ready for this module
48
+ if module_name not in self._cached_sklearn_classifiers:
49
+ self._cached_sklearn_classifiers[module_name] = {}
50
+
51
+ # Look at all classes in the module
52
+ for name, obj in inspect.getmembers(module, inspect.isclass):
53
+ # We consider something a classifier if it:
54
+ # 1. Has fit and predict methods
55
+ # 2. Inherits from BaseEstimator
56
+ if (
57
+ hasattr(obj, "fit")
58
+ and hasattr(obj, "predict")
59
+ and issubclass(obj, BaseEstimator)
60
+ ):
61
+
62
+ # Skip abstract base classes and internal classes
63
+ if not name.startswith("Base") and not name.startswith("_"):
64
+ self._cached_sklearn_classifiers[module_name][name] = (
65
+ module_path,
66
+ obj,
67
+ )
68
+
69
+ except ImportError as e:
70
+ logging.warning(f"Couldn't import {module_path}: {str(e)}")
71
+
72
+ def get_classifier_class(
73
+ self, classifier_name: str, module_name: Optional[str] = None
74
+ ) -> Optional[Type[BaseEstimator]]:
75
+ """
76
+ Find a classifier class by its name, handling suffixed variations.
77
+
78
+ Args:
79
+ classifier_name: Name of the classifier (e.g., 'RandomForestClassifier')
80
+ module_name: Optional module to look in (e.g., 'ensemble', 'linear_model')
81
+
82
+ Returns:
83
+ The classifier class if found, None otherwise
84
+ """
85
+ # First, check for exact matches in custom classifiers
86
+ if classifier_name in self.custom_classifiers:
87
+ return self.custom_classifiers[classifier_name]
88
+
89
+ # If user specified a module, only look there for exact match first
90
+ if module_name is not None:
91
+ if (
92
+ module_name in self._cached_sklearn_classifiers
93
+ and classifier_name in self._cached_sklearn_classifiers[module_name]
94
+ ):
95
+ _, classifier_class = self._cached_sklearn_classifiers[module_name][
96
+ classifier_name
97
+ ]
98
+ return classifier_class
99
+
100
+ # Otherwise, look through all modules for exact match
101
+ if module_name is None:
102
+ for module_dict in self._cached_sklearn_classifiers.values():
103
+ if classifier_name in module_dict:
104
+ _, classifier_class = module_dict[classifier_name]
105
+ return classifier_class
106
+
107
+ # If no exact match, extract base name if this is a variation with _ or - suffix
108
+ base_name = None
109
+ for delimiter in ["_", "-"]:
110
+ if delimiter in classifier_name:
111
+ parts = classifier_name.split(delimiter, 1)
112
+ if len(parts) > 1 and parts[0]: # Ensure we have a non-empty base name
113
+ base_name = parts[0]
114
+ break
115
+
116
+ # If we have a valid base name, look it up
117
+ if base_name:
118
+ # Check custom classifiers for the base name
119
+ if base_name in self.custom_classifiers:
120
+ return self.custom_classifiers[base_name]
121
+
122
+ # If user specified a module, only look there for the base name
123
+ if module_name is not None:
124
+ if (
125
+ module_name in self._cached_sklearn_classifiers
126
+ and base_name in self._cached_sklearn_classifiers[module_name]
127
+ ):
128
+ _, classifier_class = self._cached_sklearn_classifiers[module_name][
129
+ base_name
130
+ ]
131
+ return classifier_class
132
+ else:
133
+ # Otherwise, look through all modules for the base name
134
+ for module_dict in self._cached_sklearn_classifiers.values():
135
+ if base_name in module_dict:
136
+ _, classifier_class = module_dict[base_name]
137
+ return classifier_class
138
+
139
+ # If not found, try to discover new techniques (in case sklearn was updated)
140
+ self._discover_sklearn_classifiers()
141
+
142
+ # Try exact match again with freshly discovered classifiers
143
+ if module_name is not None:
144
+ if (
145
+ module_name in self._cached_sklearn_classifiers
146
+ and classifier_name in self._cached_sklearn_classifiers[module_name]
147
+ ):
148
+ _, classifier_class = self._cached_sklearn_classifiers[module_name][
149
+ classifier_name
150
+ ]
151
+ return classifier_class
152
+ else:
153
+ for module_dict in self._cached_sklearn_classifiers.values():
154
+ if classifier_name in module_dict:
155
+ _, classifier_class = module_dict[classifier_name]
156
+ return classifier_class
157
+
158
+ # Try base name again with freshly discovered classifiers
159
+ if base_name:
160
+ if module_name is not None:
161
+ if (
162
+ module_name in self._cached_sklearn_classifiers
163
+ and base_name in self._cached_sklearn_classifiers[module_name]
164
+ ):
165
+ _, classifier_class = self._cached_sklearn_classifiers[module_name][
166
+ base_name
167
+ ]
168
+ return classifier_class
169
+ else:
170
+ for module_dict in self._cached_sklearn_classifiers.values():
171
+ if base_name in module_dict:
172
+ _, classifier_class = module_dict[base_name]
173
+ return classifier_class
174
+
175
+ return None
176
+
177
+ def list_available_classifiers(self) -> Dict[str, Dict[str, List[str]]]:
178
+ """
179
+ Get a hierarchical list of all available classifiers.
180
+
181
+ Returns:
182
+ Dictionary organised by source -> module -> classifier names
183
+ """
184
+ # Refresh cache in case new classifiers were installed
185
+ self._discover_sklearn_classifiers()
186
+
187
+ result = {"custom": {}, "sklearn": self._get_sklearn_classifiers_by_module()}
188
+
189
+ # Add custom classifiers if there are any
190
+ if self.custom_classifiers:
191
+ result["custom"] = {"general": list(self.custom_classifiers.keys())}
192
+
193
+ return result
194
+
195
+ def _get_sklearn_classifiers_by_module(self) -> Dict[str, List[str]]:
196
+ """Organise scikit-learn classifiers by their module for a cleaner display"""
197
+ result = {}
198
+
199
+ for module_name, classifiers in self._cached_sklearn_classifiers.items():
200
+ if classifiers: # Only include modules that have classifiers
201
+ result[module_name] = list(classifiers.keys())
202
+
203
+ return result
204
+
205
+ def register_custom_classifier(
206
+ self, name: str, classifier_class: Type[BaseEstimator]
207
+ ) -> None:
208
+ """
209
+ Register a custom classifier for use in the framework.
210
+
211
+ Args:
212
+ name: Name to register the classifier under
213
+ classifier_class: The classifier class itself
214
+
215
+ Raises:
216
+ TypeError: If the classifier doesn't meet requirements
217
+ ValueError: If the name is invalid
218
+ """
219
+ if not isinstance(name, str) or not name.strip():
220
+ raise ValueError("Classifier name must be a non-empty string")
221
+
222
+ if classifier_class is None:
223
+ raise TypeError("Classifier class cannot be None")
224
+
225
+ if not isinstance(classifier_class, type) or not issubclass(
226
+ classifier_class, BaseEstimator
227
+ ):
228
+ raise TypeError(
229
+ "Classifier class must inherit from sklearn.base.BaseEstimator"
230
+ )
231
+
232
+ # Make sure it has the required methods
233
+ if not hasattr(classifier_class, "fit") or not hasattr(
234
+ classifier_class, "predict"
235
+ ):
236
+ raise TypeError(
237
+ "Classifier class must implement 'fit' and 'predict' methods"
238
+ )
239
+
240
+ self.custom_classifiers[name] = classifier_class
241
+
242
+ def _load_custom_classifiers(self) -> None:
243
+ """Load registered custom classifiers from the custom classifiers directory."""
244
+ custom_dir = Path.home() / ".balancr" / "custom_classifiers"
245
+ if not custom_dir.exists():
246
+ return
247
+
248
+ metadata_file = custom_dir / "classifiers_metadata.json"
249
+ if not metadata_file.exists():
250
+ return
251
+
252
+ try:
253
+ with open(metadata_file, "r") as f:
254
+ metadata = json.load(f)
255
+
256
+ for classifier_name, info in metadata.items():
257
+ file_path = Path(info["file"])
258
+ class_name = info["class_name"]
259
+
260
+ if not file_path.exists():
261
+ logging.warning(f"Custom classifier file not found: {file_path}")
262
+ continue
263
+
264
+ try:
265
+ # Import the module dynamically
266
+ module_name = file_path.stem
267
+ spec = importlib.util.spec_from_file_location(
268
+ module_name, file_path
269
+ )
270
+ if spec is None or spec.loader is None:
271
+ logging.warning(f"Could not load module from {file_path}")
272
+ continue
273
+
274
+ module = importlib.util.module_from_spec(spec)
275
+ spec.loader.exec_module(module)
276
+
277
+ # Find the specific class
278
+ classifier_class = None
279
+ for name, obj in inspect.getmembers(module, inspect.isclass):
280
+ if (
281
+ name == class_name
282
+ and hasattr(obj, "fit")
283
+ and hasattr(obj, "predict")
284
+ ):
285
+ classifier_class = obj
286
+ break
287
+
288
+ if classifier_class:
289
+ self.custom_classifiers[classifier_name] = classifier_class
290
+ logging.debug(f"Loaded custom classifier: {classifier_name}")
291
+ else:
292
+ logging.warning(f"Class {class_name} not found in {file_path}")
293
+
294
+ except Exception as e:
295
+ logging.warning(
296
+ f"Error loading custom classifier {classifier_name}: {e}"
297
+ )
298
+
299
+ except Exception as e:
300
+ logging.warning(f"Error loading custom classifiers metadata: {e}")
File without changes