autoclass-lite 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autoclass_lite-0.1.0/PKG-INFO +214 -0
- autoclass_lite-0.1.0/README.md +200 -0
- autoclass_lite-0.1.0/autoclass_lite/__init__.py +4 -0
- autoclass_lite-0.1.0/autoclass_lite/automl/__init__.py +0 -0
- autoclass_lite-0.1.0/autoclass_lite/automl/factory.py +33 -0
- autoclass_lite-0.1.0/autoclass_lite/automl/observers.py +29 -0
- autoclass_lite-0.1.0/autoclass_lite/automl/orchestrator.py +317 -0
- autoclass_lite-0.1.0/autoclass_lite/cv/__init__.py +0 -0
- autoclass_lite-0.1.0/autoclass_lite/cv/splitter.py +40 -0
- autoclass_lite-0.1.0/autoclass_lite/cv/validator.py +33 -0
- autoclass_lite-0.1.0/autoclass_lite/metrics/__init__.py +0 -0
- autoclass_lite-0.1.0/autoclass_lite/metrics/classification.py +53 -0
- autoclass_lite-0.1.0/autoclass_lite/models/__init__.py +0 -0
- autoclass_lite-0.1.0/autoclass_lite/models/base.py +55 -0
- autoclass_lite-0.1.0/autoclass_lite/models/knn.py +52 -0
- autoclass_lite-0.1.0/autoclass_lite/models/logistic.py +153 -0
- autoclass_lite-0.1.0/autoclass_lite/models/naive_bayes.py +59 -0
- autoclass_lite-0.1.0/autoclass_lite/models/tree.py +99 -0
- autoclass_lite-0.1.0/autoclass_lite.egg-info/PKG-INFO +214 -0
- autoclass_lite-0.1.0/autoclass_lite.egg-info/SOURCES.txt +28 -0
- autoclass_lite-0.1.0/autoclass_lite.egg-info/dependency_links.txt +1 -0
- autoclass_lite-0.1.0/autoclass_lite.egg-info/requires.txt +4 -0
- autoclass_lite-0.1.0/autoclass_lite.egg-info/top_level.txt +1 -0
- autoclass_lite-0.1.0/pyproject.toml +20 -0
- autoclass_lite-0.1.0/setup.cfg +4 -0
- autoclass_lite-0.1.0/tests/test_automl.py +226 -0
- autoclass_lite-0.1.0/tests/test_cv.py +113 -0
- autoclass_lite-0.1.0/tests/test_factory.py +53 -0
- autoclass_lite-0.1.0/tests/test_metrics.py +58 -0
- autoclass_lite-0.1.0/tests/test_models.py +76 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: autoclass-lite
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight AutoML library for classification, built from scratch.
|
|
5
|
+
Keywords: machine learning,automl,classification
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: numpy>=1.24
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
|
|
15
|
+
# autoclass-lite
|
|
16
|
+
|
|
17
|
+
A lightweight AutoML library for classification, built from scratch using only NumPy. Trains multiple models with cross-validation, ranks them by performance, and returns the best one — all in a single `fit()` call.
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from autoclass_lite import SimpleAutoML, GridAutoML
|
|
29
|
+
|
|
30
|
+
# Train all models and get a ranked leaderboard
|
|
31
|
+
automl = SimpleAutoML()
|
|
32
|
+
automl.fit(X_train, y_train)
|
|
33
|
+
automl.summary()
|
|
34
|
+
|
|
35
|
+
# Predict using the best model
|
|
36
|
+
predictions = automl.predict(X_test)
|
|
37
|
+
|
|
38
|
+
# Grid search over hyperparameters
|
|
39
|
+
grid = GridAutoML()
|
|
40
|
+
grid.fit(X_train, y_train)
|
|
41
|
+
grid.summary()
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- 4 classifiers implemented from scratch: KNN, Naive Bayes, Logistic Regression, Decision Tree
|
|
47
|
+
- K-Fold cross-validation with parallel fold evaluation
|
|
48
|
+
- Hyperparameter grid search with memoization cache (dynamic programming)
|
|
49
|
+
- Observer-based progress reporting
|
|
50
|
+
- Accepts NumPy arrays, Python lists, and Pandas DataFrames
|
|
51
|
+
|
|
52
|
+
## Usage Examples
|
|
53
|
+
|
|
54
|
+
### SimpleAutoML
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from autoclass_lite import SimpleAutoML
|
|
58
|
+
|
|
59
|
+
automl = SimpleAutoML(
|
|
60
|
+
cv_splits=5, # number of cross-validation folds
|
|
61
|
+
metric="accuracy", # metric used to rank models
|
|
62
|
+
random_state=42
|
|
63
|
+
)
|
|
64
|
+
automl.fit(X_train, y_train)
|
|
65
|
+
automl.summary()
|
|
66
|
+
preds = automl.predict(X_test)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### GridAutoML
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from autoclass_lite import GridAutoML
|
|
73
|
+
|
|
74
|
+
param_grid = {
|
|
75
|
+
"logistic_regression": [
|
|
76
|
+
{"learning_rate": 0.01},
|
|
77
|
+
{"learning_rate": 0.1},
|
|
78
|
+
],
|
|
79
|
+
"knn": [{"k": 3}, {"k": 5}, {"k": 7}],
|
|
80
|
+
"decision_tree": [{"max_depth": 3}, {"max_depth": 5}],
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
grid = GridAutoML(param_grid=param_grid, metric="f1_score")
|
|
84
|
+
grid.fit(X_train, y_train)
|
|
85
|
+
grid.summary()
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Logistic Regression
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from autoclass_lite.models.logistic import LogisticRegression
|
|
92
|
+
|
|
93
|
+
model = LogisticRegression(learning_rate=0.01, n_iterations=1000)
|
|
94
|
+
model.fit(X_train, y_train)
|
|
95
|
+
predictions = model.predict(X_test)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Metrics
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from autoclass_lite.metrics.classification import accuracy, precision, recall, f1_score, evaluate
|
|
102
|
+
|
|
103
|
+
acc = accuracy(y_true, y_pred)
|
|
104
|
+
|
|
105
|
+
results = evaluate(y_true, y_pred, metrics=[accuracy, precision, recall, f1_score])
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Observer (progress reporting)
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from autoclass_lite.automl.observers import ConsoleObserver
|
|
112
|
+
|
|
113
|
+
automl = SimpleAutoML()
|
|
114
|
+
automl.add_observer(ConsoleObserver())
|
|
115
|
+
automl.fit(X_train, y_train)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Project Structure
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
autoclass_lite/
|
|
122
|
+
├── __init__.py # Public API: SimpleAutoML, GridAutoML
|
|
123
|
+
├── automl/
|
|
124
|
+
│ ├── orchestrator.py # SimpleAutoML, GridAutoML (Facade pattern)
|
|
125
|
+
│ ├── factory.py # ModelFactory (Factory pattern)
|
|
126
|
+
│ └── observers.py # Observer, ConsoleObserver (Observer pattern)
|
|
127
|
+
├── models/
|
|
128
|
+
│ ├── base.py # BaseModel ABC (Strategy + Template Method)
|
|
129
|
+
│ ├── logistic.py # LogisticRegression
|
|
130
|
+
│ ├── knn.py # KNNClassifier
|
|
131
|
+
│ ├── naive_bayes.py # GaussianNaiveBayes
|
|
132
|
+
│ └── tree.py # DecisionTreeClassifier (recursive)
|
|
133
|
+
├── cv/
|
|
134
|
+
│ ├── splitter.py # KFoldSplitter
|
|
135
|
+
│ └── validator.py # CrossValidator (ThreadPoolExecutor)
|
|
136
|
+
└── metrics/
|
|
137
|
+
└── classification.py # Pure functions + evaluate() HOF
|
|
138
|
+
tests/ # pytest test suite
|
|
139
|
+
pyproject.toml
|
|
140
|
+
README.md
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Learning Outcomes
|
|
146
|
+
|
|
147
|
+
### 1. Object-Oriented Programming (OOP)
|
|
148
|
+
|
|
149
|
+
- **Encapsulation:** All internal state uses private attributes (`_weights`, `_biases`, `_cache`, etc.). Users interact only through the public API.
|
|
150
|
+
- **Abstract Base Class:** `BaseModel` in [autoclass_lite/models/base.py](autoclass_lite/models/base.py) defines the interface all classifiers must implement.
|
|
151
|
+
- **Inheritance:** `KNNClassifier`, `GaussianNaiveBayes`, `LogisticRegression`, `DecisionTreeClassifier` all extend `BaseModel`. `GridAutoML` extends `SimpleAutoML`.
|
|
152
|
+
- **Polymorphism:** `CrossValidator` calls `.fit()` and `.predict()` on any `BaseModel` subclass without knowing its concrete type.
|
|
153
|
+
|
|
154
|
+
### 2. Functional Programming
|
|
155
|
+
|
|
156
|
+
- **Pure functions:** `accuracy`, `precision`, `recall`, `f1_score` in [autoclass_lite/metrics/classification.py](autoclass_lite/metrics/classification.py) take arrays and return a value with no side effects.
|
|
157
|
+
- **Higher-order function:** `evaluate(y_true, y_pred, metrics: list)` takes a list of metric functions and applies them, returning a results dict.
|
|
158
|
+
- **Lambdas as data:** `KNNClassifier._DISTANCES` is a class-level dict mapping distance names to lambda functions, selected at runtime.
|
|
159
|
+
|
|
160
|
+
### 3. Concurrency
|
|
161
|
+
|
|
162
|
+
`CrossValidator` in [autoclass_lite/cv/validator.py](autoclass_lite/cv/validator.py) uses `ThreadPoolExecutor` to evaluate all K folds in parallel. Each fold gets a `copy.deepcopy` of the model to ensure thread safety.
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
with ThreadPoolExecutor() as executor:
|
|
166
|
+
futures = [executor.submit(self._evaluate_fold, ...) for fold in folds]
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### 4. Recursion / Dynamic Programming
|
|
170
|
+
|
|
171
|
+
- **Recursion:** `DecisionTreeClassifier` in [autoclass_lite/models/tree.py](autoclass_lite/models/tree.py) uses `_grow_tree()` to recursively split nodes and `_traverse()` to recursively walk the tree during prediction.
|
|
172
|
+
- **Dynamic Programming (Memoization):** `GridAutoML` in [autoclass_lite/automl/orchestrator.py](autoclass_lite/automl/orchestrator.py) maintains a `_cache` dict. Every `(model_name, params)` combination is evaluated at most once — repeated configurations are looked up from cache instead of re-running cross-validation.
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
cache_key = (name, frozenset(params.items()))
|
|
176
|
+
if cache_key in self._cache:
|
|
177
|
+
scores = self._cache[cache_key]
|
|
178
|
+
else:
|
|
179
|
+
scores = validator.validate(model, X, y)
|
|
180
|
+
self._cache[cache_key] = scores
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### 5. SOLID Principles
|
|
184
|
+
|
|
185
|
+
- **Single Responsibility:** Each class does exactly one thing. `KFoldSplitter` only splits indices. `CrossValidator` only runs folds. `ModelFactory` only creates models.
|
|
186
|
+
- **Open/Closed:** New models can be added by extending `BaseModel` and registering in `ModelFactory` without touching existing code. `SimpleAutoML.DEFAULT_CONFIGS` allows adding new default configurations without modifying `fit()`.
|
|
187
|
+
- **Liskov Substitution:** Any `BaseModel` subclass can replace another anywhere in the system.
|
|
188
|
+
- **Interface Segregation:** `BaseModel` only requires `fit` and `predict`. Optional `get_params` has a default implementation in the base class.
|
|
189
|
+
- **Dependency Inversion:** `CrossValidator` depends on `BaseModel` (abstraction), not any concrete classifier.
|
|
190
|
+
|
|
191
|
+
### 6. Architectural & Design Patterns
|
|
192
|
+
|
|
193
|
+
**Architecture:** Layered pipeline — models → cross-validation → metrics → AutoML orchestration. Each layer depends only on the layer below it.
|
|
194
|
+
|
|
195
|
+
**Factory Pattern** — [autoclass_lite/automl/factory.py](autoclass_lite/automl/factory.py)
|
|
196
|
+
|
|
197
|
+
`ModelFactory.create(name, **kwargs)` instantiates any registered model by name. Adding a new model requires only one line in the registry dict.
|
|
198
|
+
|
|
199
|
+
**Strategy Pattern** — [autoclass_lite/models/base.py](autoclass_lite/models/base.py), [autoclass_lite/models/knn.py](autoclass_lite/models/knn.py)
|
|
200
|
+
|
|
201
|
+
- `BaseModel` defines the classifier interface; each model is a concrete strategy swappable at runtime by `ModelFactory`.
|
|
202
|
+
- `KNNClassifier._DISTANCES` selects the distance function at construction time — `euclidean` or `manhattan` — without any `if/else` in the prediction loop.
|
|
203
|
+
|
|
204
|
+
**Observer Pattern** — [autoclass_lite/automl/observers.py](autoclass_lite/automl/observers.py)
|
|
205
|
+
|
|
206
|
+
`SimpleAutoML` notifies all registered observers on `fit_start`, `model_done`, and `fit_done` events without coupling to any specific logger or UI.
|
|
207
|
+
|
|
208
|
+
**Template Method Pattern** — [autoclass_lite/models/base.py](autoclass_lite/models/base.py)
|
|
209
|
+
|
|
210
|
+
`BaseModel.fit_predict()` defines the skeleton (fit then predict). Subclasses implement the steps; the sequence is inherited and never duplicated.
|
|
211
|
+
|
|
212
|
+
**Facade Pattern** — [autoclass_lite/automl/orchestrator.py](autoclass_lite/automl/orchestrator.py)
|
|
213
|
+
|
|
214
|
+
`SimpleAutoML` exposes a single `fit() / predict() / summary()` interface that internally coordinates model creation, cross-validation, and ranking.
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# autoclass-lite
|
|
2
|
+
|
|
3
|
+
A lightweight AutoML library for classification, built from scratch using only NumPy. Trains multiple models with cross-validation, ranks them by performance, and returns the best one — all in a single `fit()` call.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install -e .
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from autoclass_lite import SimpleAutoML, GridAutoML
|
|
15
|
+
|
|
16
|
+
# Train all models and get a ranked leaderboard
|
|
17
|
+
automl = SimpleAutoML()
|
|
18
|
+
automl.fit(X_train, y_train)
|
|
19
|
+
automl.summary()
|
|
20
|
+
|
|
21
|
+
# Predict using the best model
|
|
22
|
+
predictions = automl.predict(X_test)
|
|
23
|
+
|
|
24
|
+
# Grid search over hyperparameters
|
|
25
|
+
grid = GridAutoML()
|
|
26
|
+
grid.fit(X_train, y_train)
|
|
27
|
+
grid.summary()
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Features
|
|
31
|
+
|
|
32
|
+
- 4 classifiers implemented from scratch: KNN, Naive Bayes, Logistic Regression, Decision Tree
|
|
33
|
+
- K-Fold cross-validation with parallel fold evaluation
|
|
34
|
+
- Hyperparameter grid search with memoization cache (dynamic programming)
|
|
35
|
+
- Observer-based progress reporting
|
|
36
|
+
- Accepts NumPy arrays, Python lists, and Pandas DataFrames
|
|
37
|
+
|
|
38
|
+
## Usage Examples
|
|
39
|
+
|
|
40
|
+
### SimpleAutoML
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from autoclass_lite import SimpleAutoML
|
|
44
|
+
|
|
45
|
+
automl = SimpleAutoML(
|
|
46
|
+
cv_splits=5, # number of cross-validation folds
|
|
47
|
+
metric="accuracy", # metric used to rank models
|
|
48
|
+
random_state=42
|
|
49
|
+
)
|
|
50
|
+
automl.fit(X_train, y_train)
|
|
51
|
+
automl.summary()
|
|
52
|
+
preds = automl.predict(X_test)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### GridAutoML
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from autoclass_lite import GridAutoML
|
|
59
|
+
|
|
60
|
+
param_grid = {
|
|
61
|
+
"logistic_regression": [
|
|
62
|
+
{"learning_rate": 0.01},
|
|
63
|
+
{"learning_rate": 0.1},
|
|
64
|
+
],
|
|
65
|
+
"knn": [{"k": 3}, {"k": 5}, {"k": 7}],
|
|
66
|
+
"decision_tree": [{"max_depth": 3}, {"max_depth": 5}],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
grid = GridAutoML(param_grid=param_grid, metric="f1_score")
|
|
70
|
+
grid.fit(X_train, y_train)
|
|
71
|
+
grid.summary()
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Logistic Regression
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from autoclass_lite.models.logistic import LogisticRegression
|
|
78
|
+
|
|
79
|
+
model = LogisticRegression(learning_rate=0.01, n_iterations=1000)
|
|
80
|
+
model.fit(X_train, y_train)
|
|
81
|
+
predictions = model.predict(X_test)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Metrics
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from autoclass_lite.metrics.classification import accuracy, precision, recall, f1_score, evaluate
|
|
88
|
+
|
|
89
|
+
acc = accuracy(y_true, y_pred)
|
|
90
|
+
|
|
91
|
+
results = evaluate(y_true, y_pred, metrics=[accuracy, precision, recall, f1_score])
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Observer (progress reporting)
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from autoclass_lite.automl.observers import ConsoleObserver
|
|
98
|
+
|
|
99
|
+
automl = SimpleAutoML()
|
|
100
|
+
automl.add_observer(ConsoleObserver())
|
|
101
|
+
automl.fit(X_train, y_train)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Project Structure
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
autoclass_lite/
|
|
108
|
+
├── __init__.py # Public API: SimpleAutoML, GridAutoML
|
|
109
|
+
├── automl/
|
|
110
|
+
│ ├── orchestrator.py # SimpleAutoML, GridAutoML (Facade pattern)
|
|
111
|
+
│ ├── factory.py # ModelFactory (Factory pattern)
|
|
112
|
+
│ └── observers.py # Observer, ConsoleObserver (Observer pattern)
|
|
113
|
+
├── models/
|
|
114
|
+
│ ├── base.py # BaseModel ABC (Strategy + Template Method)
|
|
115
|
+
│ ├── logistic.py # LogisticRegression
|
|
116
|
+
│ ├── knn.py # KNNClassifier
|
|
117
|
+
│ ├── naive_bayes.py # GaussianNaiveBayes
|
|
118
|
+
│ └── tree.py # DecisionTreeClassifier (recursive)
|
|
119
|
+
├── cv/
|
|
120
|
+
│ ├── splitter.py # KFoldSplitter
|
|
121
|
+
│ └── validator.py # CrossValidator (ThreadPoolExecutor)
|
|
122
|
+
└── metrics/
|
|
123
|
+
└── classification.py # Pure functions + evaluate() HOF
|
|
124
|
+
tests/ # pytest test suite
|
|
125
|
+
pyproject.toml
|
|
126
|
+
README.md
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Learning Outcomes
|
|
132
|
+
|
|
133
|
+
### 1. Object-Oriented Programming (OOP)
|
|
134
|
+
|
|
135
|
+
- **Encapsulation:** All internal state uses private attributes (`_weights`, `_biases`, `_cache`, etc.). Users interact only through the public API.
|
|
136
|
+
- **Abstract Base Class:** `BaseModel` in [autoclass_lite/models/base.py](autoclass_lite/models/base.py) defines the interface all classifiers must implement.
|
|
137
|
+
- **Inheritance:** `KNNClassifier`, `GaussianNaiveBayes`, `LogisticRegression`, `DecisionTreeClassifier` all extend `BaseModel`. `GridAutoML` extends `SimpleAutoML`.
|
|
138
|
+
- **Polymorphism:** `CrossValidator` calls `.fit()` and `.predict()` on any `BaseModel` subclass without knowing its concrete type.
|
|
139
|
+
|
|
140
|
+
### 2. Functional Programming
|
|
141
|
+
|
|
142
|
+
- **Pure functions:** `accuracy`, `precision`, `recall`, `f1_score` in [autoclass_lite/metrics/classification.py](autoclass_lite/metrics/classification.py) take arrays and return a value with no side effects.
|
|
143
|
+
- **Higher-order function:** `evaluate(y_true, y_pred, metrics: list)` takes a list of metric functions and applies them, returning a results dict.
|
|
144
|
+
- **Lambdas as data:** `KNNClassifier._DISTANCES` is a class-level dict mapping distance names to lambda functions, selected at runtime.
|
|
145
|
+
|
|
146
|
+
### 3. Concurrency
|
|
147
|
+
|
|
148
|
+
`CrossValidator` in [autoclass_lite/cv/validator.py](autoclass_lite/cv/validator.py) uses `ThreadPoolExecutor` to evaluate all K folds in parallel. Each fold gets a `copy.deepcopy` of the model to ensure thread safety.
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
with ThreadPoolExecutor() as executor:
|
|
152
|
+
futures = [executor.submit(self._evaluate_fold, ...) for fold in folds]
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 4. Recursion / Dynamic Programming
|
|
156
|
+
|
|
157
|
+
- **Recursion:** `DecisionTreeClassifier` in [autoclass_lite/models/tree.py](autoclass_lite/models/tree.py) uses `_grow_tree()` to recursively split nodes and `_traverse()` to recursively walk the tree during prediction.
|
|
158
|
+
- **Dynamic Programming (Memoization):** `GridAutoML` in [autoclass_lite/automl/orchestrator.py](autoclass_lite/automl/orchestrator.py) maintains a `_cache` dict. Every `(model_name, params)` combination is evaluated at most once — repeated configurations are looked up from cache instead of re-running cross-validation.
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
cache_key = (name, frozenset(params.items()))
|
|
162
|
+
if cache_key in self._cache:
|
|
163
|
+
scores = self._cache[cache_key]
|
|
164
|
+
else:
|
|
165
|
+
scores = validator.validate(model, X, y)
|
|
166
|
+
self._cache[cache_key] = scores
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### 5. SOLID Principles
|
|
170
|
+
|
|
171
|
+
- **Single Responsibility:** Each class does exactly one thing. `KFoldSplitter` only splits indices. `CrossValidator` only runs folds. `ModelFactory` only creates models.
|
|
172
|
+
- **Open/Closed:** New models can be added by extending `BaseModel` and registering in `ModelFactory` without touching existing code. `SimpleAutoML.DEFAULT_CONFIGS` allows adding new default configurations without modifying `fit()`.
|
|
173
|
+
- **Liskov Substitution:** Any `BaseModel` subclass can replace another anywhere in the system.
|
|
174
|
+
- **Interface Segregation:** `BaseModel` only requires `fit` and `predict`. Optional `get_params` has a default implementation in the base class.
|
|
175
|
+
- **Dependency Inversion:** `CrossValidator` depends on `BaseModel` (abstraction), not any concrete classifier.
|
|
176
|
+
|
|
177
|
+
### 6. Architectural & Design Patterns
|
|
178
|
+
|
|
179
|
+
**Architecture:** Layered pipeline — models → cross-validation → metrics → AutoML orchestration. Each layer depends only on the layer below it.
|
|
180
|
+
|
|
181
|
+
**Factory Pattern** — [autoclass_lite/automl/factory.py](autoclass_lite/automl/factory.py)
|
|
182
|
+
|
|
183
|
+
`ModelFactory.create(name, **kwargs)` instantiates any registered model by name. Adding a new model requires only one line in the registry dict.
|
|
184
|
+
|
|
185
|
+
**Strategy Pattern** — [autoclass_lite/models/base.py](autoclass_lite/models/base.py), [autoclass_lite/models/knn.py](autoclass_lite/models/knn.py)
|
|
186
|
+
|
|
187
|
+
- `BaseModel` defines the classifier interface; each model is a concrete strategy swappable at runtime by `ModelFactory`.
|
|
188
|
+
- `KNNClassifier._DISTANCES` selects the distance function at construction time — `euclidean` or `manhattan` — without any `if/else` in the prediction loop.
|
|
189
|
+
|
|
190
|
+
**Observer Pattern** — [autoclass_lite/automl/observers.py](autoclass_lite/automl/observers.py)
|
|
191
|
+
|
|
192
|
+
`SimpleAutoML` notifies all registered observers on `fit_start`, `model_done`, and `fit_done` events without coupling to any specific logger or UI.
|
|
193
|
+
|
|
194
|
+
**Template Method Pattern** — [autoclass_lite/models/base.py](autoclass_lite/models/base.py)
|
|
195
|
+
|
|
196
|
+
`BaseModel.fit_predict()` defines the skeleton (fit then predict). Subclasses implement the steps; the sequence is inherited and never duplicated.
|
|
197
|
+
|
|
198
|
+
**Facade Pattern** — [autoclass_lite/automl/orchestrator.py](autoclass_lite/automl/orchestrator.py)
|
|
199
|
+
|
|
200
|
+
`SimpleAutoML` exposes a single `fit() / predict() / summary()` interface that internally coordinates model creation, cross-validation, and ranking.
|
|
File without changes
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from ..models.logistic import LogisticRegression
|
|
2
|
+
from ..models.base import BaseModel
|
|
3
|
+
from ..models.knn import KNNClassifier
|
|
4
|
+
from ..models.naive_bayes import GaussianNaiveBayes
|
|
5
|
+
from ..models.tree import DecisionTreeClassifier
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ModelFactory:
|
|
9
|
+
"""
|
|
10
|
+
Factory that instantiates model objects from name strings (Factory pattern).
|
|
11
|
+
"""
|
|
12
|
+
_registry = {
|
|
13
|
+
"knn": KNNClassifier,
|
|
14
|
+
"logistic_regression": LogisticRegression,
|
|
15
|
+
"naive_bayes": GaussianNaiveBayes,
|
|
16
|
+
"decision_tree": DecisionTreeClassifier,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def create(cls, name:str, **kwargs) -> BaseModel:
|
|
21
|
+
"""
|
|
22
|
+
Instantiate a model by name, passing kwargs to its constructor.
|
|
23
|
+
"""
|
|
24
|
+
if name not in cls._registry:
|
|
25
|
+
raise ValueError(f"Unknown model '{name}'. Choose from {list(cls._registry.keys())}")
|
|
26
|
+
return cls._registry[name](**kwargs)
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def available_models(cls) -> list:
|
|
30
|
+
"""
|
|
31
|
+
Return the list of registered model names.
|
|
32
|
+
"""
|
|
33
|
+
return list(cls._registry.keys())
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Observer(ABC):
|
|
5
|
+
"""
|
|
6
|
+
Abstract observer. Subclasses receive event notifications from AutoML (Observer pattern).
|
|
7
|
+
"""
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def update(self, event:str, data: dict) -> None:
|
|
10
|
+
"""
|
|
11
|
+
Called when the subject fires an event.
|
|
12
|
+
"""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
class ConsoleObserver(Observer):
|
|
16
|
+
"""
|
|
17
|
+
Prints training progress to the console.
|
|
18
|
+
"""
|
|
19
|
+
def update(self, event: str, data: dict) -> None:
|
|
20
|
+
"""Print a progress message for fit_start, model_done, and fit_done events."""
|
|
21
|
+
if event == "model_done":
|
|
22
|
+
print(f" [{data['model']}] {data['scores']}")
|
|
23
|
+
elif event == "fit_start":
|
|
24
|
+
print("Starting AutoML...")
|
|
25
|
+
elif event == "fit_done":
|
|
26
|
+
print(f"Done. Best model: {data['best_model']}")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|