mlbuddy-learn 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlbuddy_learn-0.1.0/.gitignore +30 -0
- mlbuddy_learn-0.1.0/LICENSE +21 -0
- mlbuddy_learn-0.1.0/PKG-INFO +57 -0
- mlbuddy_learn-0.1.0/README.md +37 -0
- mlbuddy_learn-0.1.0/mlpilot/__init__.py +5 -0
- mlbuddy_learn-0.1.0/mlpilot/auto/__init__.py +2 -0
- mlbuddy_learn-0.1.0/mlpilot/auto/data.py +69 -0
- mlbuddy_learn-0.1.0/mlpilot/auto/trainer.py +93 -0
- mlbuddy_learn-0.1.0/mlpilot/explain/__init__.py +2 -0
- mlbuddy_learn-0.1.0/mlpilot/explain/visualizer.py +51 -0
- mlbuddy_learn-0.1.0/mlpilot/guide/__init__.py +2 -0
- mlbuddy_learn-0.1.0/mlpilot/guide/suggest.py +43 -0
- mlbuddy_learn-0.1.0/pyproject.toml +33 -0
- mlbuddy_learn-0.1.0/tests/test_data.py +60 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Virtual environments
|
|
2
|
+
venv/
|
|
3
|
+
env/
|
|
4
|
+
ENV/
|
|
5
|
+
.venv
|
|
6
|
+
|
|
7
|
+
# Python cache
|
|
8
|
+
__pycache__/
|
|
9
|
+
*.py[cod]
|
|
10
|
+
*$py.class
|
|
11
|
+
*.so
|
|
12
|
+
.Python
|
|
13
|
+
|
|
14
|
+
# pytest cache
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.coverage
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
|
|
24
|
+
# OS
|
|
25
|
+
.DS_Store
|
|
26
|
+
Thumbs.db
|
|
27
|
+
|
|
28
|
+
# Generated files
|
|
29
|
+
feature_importance.png
|
|
30
|
+
*.png
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mohammed Jaasir
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlbuddy-learn
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An ML library that guides beginners step by step
|
|
5
|
+
Project-URL: Homepage, https://github.com/Mohammedjaasir/mlbuddy-learn
|
|
6
|
+
Author-email: Mohammed Jaasir <jaasir@example.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: AI,beginners,education,machine learning
|
|
10
|
+
Classifier: Intended Audience :: Education
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Requires-Dist: matplotlib
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: pandas
|
|
18
|
+
Requires-Dist: scikit-learn
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# MLPilot
|
|
22
|
+
|
|
23
|
+
Machine Learning automation and guidance system.
|
|
24
|
+
|
|
25
|
+
## Project Structure
|
|
26
|
+
|
|
27
|
+
- **mlpilot/auto/**: Automated machine learning tasks
|
|
28
|
+
- `data.py`: Data handling and preprocessing
|
|
29
|
+
- `trainer.py`: Model training utilities
|
|
30
|
+
|
|
31
|
+
- **mlpilot/guide/**: ML guidance and suggestions
|
|
32
|
+
- `suggest.py`: Suggestions engine for ML workflows
|
|
33
|
+
|
|
34
|
+
- **mlpilot/explain/**: Model interpretation and visualization
|
|
35
|
+
- `visualizer.py`: Model visualization tools
|
|
36
|
+
|
|
37
|
+
- **tests/**: Test suite
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install -e .
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Development
|
|
46
|
+
|
|
47
|
+
Install development dependencies:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install -e ".[dev]"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Run tests:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pytest
|
|
57
|
+
```
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# MLPilot
|
|
2
|
+
|
|
3
|
+
Machine Learning automation and guidance system.
|
|
4
|
+
|
|
5
|
+
## Project Structure
|
|
6
|
+
|
|
7
|
+
- **mlpilot/auto/**: Automated machine learning tasks
|
|
8
|
+
- `data.py`: Data handling and preprocessing
|
|
9
|
+
- `trainer.py`: Model training utilities
|
|
10
|
+
|
|
11
|
+
- **mlpilot/guide/**: ML guidance and suggestions
|
|
12
|
+
- `suggest.py`: Suggestions engine for ML workflows
|
|
13
|
+
|
|
14
|
+
- **mlpilot/explain/**: Model interpretation and visualization
|
|
15
|
+
- `visualizer.py`: Model visualization tools
|
|
16
|
+
|
|
17
|
+
- **tests/**: Test suite
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Development
|
|
26
|
+
|
|
27
|
+
Install development dependencies:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install -e ".[dev]"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Run tests:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pytest
|
|
37
|
+
```
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.model_selection import train_test_split
|
|
3
|
+
from sklearn.preprocessing import StandardScaler
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load(X, y, test_size=0.2, scale=True, random_state=42):
|
|
7
|
+
print("\nš mlpilot: Loading your data...\n")
|
|
8
|
+
|
|
9
|
+
if not isinstance(X, np.ndarray):
|
|
10
|
+
X = np.array(X)
|
|
11
|
+
print(" ā Converted X to numpy array")
|
|
12
|
+
|
|
13
|
+
if not isinstance(y, np.ndarray):
|
|
14
|
+
y = np.array(y)
|
|
15
|
+
print(" ā Converted y to numpy array")
|
|
16
|
+
|
|
17
|
+
if X.shape[0] != y.shape[0]:
|
|
18
|
+
raise ValueError(
|
|
19
|
+
f"\nā Mismatch: X has {X.shape[0]} rows but y has {y.shape[0]} values.\n"
|
|
20
|
+
f" ā Make sure X and y have the same number of samples."
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
if X.shape[0] < 10:
|
|
24
|
+
print(" ā Warning: Less than 10 samples. ML works better with more data.")
|
|
25
|
+
|
|
26
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
27
|
+
X, y, test_size=test_size, random_state=random_state
|
|
28
|
+
)
|
|
29
|
+
print(f" ā Split: {len(X_train)} training samples, {len(X_test)} test samples")
|
|
30
|
+
|
|
31
|
+
if scale:
|
|
32
|
+
scaler = StandardScaler()
|
|
33
|
+
X_train = scaler.fit_transform(X_train)
|
|
34
|
+
X_test = scaler.transform(X_test)
|
|
35
|
+
print(" ā Features scaled with StandardScaler (mean=0, std=1)")
|
|
36
|
+
|
|
37
|
+
print(f"\n š¦ Data shape: {X_train.shape[1]} features, {len(set(y))} classes")
|
|
38
|
+
print("\nā
Data ready! Pass X_train, X_test, y_train, y_test to ml.train()\n")
|
|
39
|
+
|
|
40
|
+
return X_train, X_test, y_train, y_test
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_csv(filepath, target_column):
|
|
44
|
+
"""Load data from a CSV file.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
filepath: Path to the CSV file
|
|
48
|
+
target_column: Name of the column containing the target labels
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Tuple of (X_train, X_test, y_train, y_test)
|
|
52
|
+
"""
|
|
53
|
+
import pandas as pd
|
|
54
|
+
|
|
55
|
+
print(f"\nš Loading CSV from: {filepath}\n")
|
|
56
|
+
df = pd.read_csv(filepath)
|
|
57
|
+
|
|
58
|
+
print(f" ā Loaded {len(df)} rows and {len(df.columns)} columns")
|
|
59
|
+
|
|
60
|
+
if target_column not in df.columns:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"ā Target column '{target_column}' not found.\n"
|
|
63
|
+
f" Available columns: {list(df.columns)}"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
y = df[target_column].values
|
|
67
|
+
X = df.drop(columns=[target_column]).values
|
|
68
|
+
|
|
69
|
+
return load(X, y)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from sklearn.linear_model import LogisticRegression
|
|
2
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
3
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
4
|
+
from sklearn.svm import SVC
|
|
5
|
+
from sklearn.metrics import accuracy_score, classification_report
|
|
6
|
+
|
|
7
|
+
from ..guide.suggest import suggest
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
MODELS = {
|
|
11
|
+
"logistic": LogisticRegression(max_iter=1000),
|
|
12
|
+
"tree": DecisionTreeClassifier(),
|
|
13
|
+
"random_forest": RandomForestClassifier(),
|
|
14
|
+
"svm": SVC(),
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GuidedModel:
|
|
19
|
+
def __init__(self, model, X_test, y_test, model_name):
|
|
20
|
+
self.model = model
|
|
21
|
+
self.X_test = X_test
|
|
22
|
+
self.y_test = y_test
|
|
23
|
+
self.model_name = model_name
|
|
24
|
+
|
|
25
|
+
y_pred = model.predict(X_test)
|
|
26
|
+
self.accuracy = accuracy_score(y_test, y_pred)
|
|
27
|
+
self.y_pred = y_pred
|
|
28
|
+
|
|
29
|
+
def predict(self, X):
|
|
30
|
+
return self.model.predict(X)
|
|
31
|
+
|
|
32
|
+
def explain(self):
|
|
33
|
+
from ..explain.visualizer import explain_model
|
|
34
|
+
explain_model(self.model, self.X_test, self.y_test, self.model_name)
|
|
35
|
+
|
|
36
|
+
def suggest(self):
|
|
37
|
+
suggest(self)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def train(X_train, X_test, y_train, y_test, model="logistic"):
|
|
41
|
+
if model not in MODELS:
|
|
42
|
+
available = ", ".join(MODELS.keys())
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"\nā Unknown model '{model}'.\n"
|
|
45
|
+
f" ā Available models: {available}"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
print(f"\nš¤ mlpilot: Training '{model}' model...\n")
|
|
49
|
+
|
|
50
|
+
clf = MODELS[model]
|
|
51
|
+
clf.fit(X_train, y_train)
|
|
52
|
+
|
|
53
|
+
y_pred = clf.predict(X_test)
|
|
54
|
+
acc = accuracy_score(y_test, y_pred)
|
|
55
|
+
|
|
56
|
+
print(f" ā Training complete!")
|
|
57
|
+
print(f" š Accuracy: {acc * 100:.1f}%")
|
|
58
|
+
print()
|
|
59
|
+
print(classification_report(y_test, y_pred))
|
|
60
|
+
print("š” Tip: Call model.explain() to see what the model learned.")
|
|
61
|
+
print("š” Tip: Call model.suggest() to get advice on improving it.\n")
|
|
62
|
+
|
|
63
|
+
return GuidedModel(clf, X_test, y_test, model)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def compare(X_train, X_test, y_train, y_test):
|
|
67
|
+
"""Try all available models and show which one performs best.
|
|
68
|
+
|
|
69
|
+
This is the quickest way to find the best model for your data.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
X_train, X_test, y_train, y_test: Training and test data
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dictionary with model names and their accuracies
|
|
76
|
+
"""
|
|
77
|
+
print("\nš¬ mlpilot: Comparing all models...\n")
|
|
78
|
+
|
|
79
|
+
results = {}
|
|
80
|
+
for name, clf in MODELS.items():
|
|
81
|
+
clf.fit(X_train, y_train)
|
|
82
|
+
acc = accuracy_score(y_test, clf.predict(X_test))
|
|
83
|
+
results[name] = acc
|
|
84
|
+
print(f" {name:15s} ā {acc * 100:6.1f}% accuracy")
|
|
85
|
+
|
|
86
|
+
print()
|
|
87
|
+
best_model = max(results, key=results.get)
|
|
88
|
+
best_acc = results[best_model]
|
|
89
|
+
|
|
90
|
+
print(f" š Best: {best_model} with {best_acc * 100:.1f}% accuracy\n")
|
|
91
|
+
print(f"š” Tip: Use ml.train(..., model='{best_model}') to train the winner.\n")
|
|
92
|
+
|
|
93
|
+
return results
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def explain_model(model, X_test, y_test, model_name):
|
|
6
|
+
print(f"\nš mlpilot: Explaining your '{model_name}' model...\n")
|
|
7
|
+
|
|
8
|
+
if hasattr(model, "feature_importances_"):
|
|
9
|
+
_plot_feature_importance(model.feature_importances_, model_name)
|
|
10
|
+
|
|
11
|
+
elif hasattr(model, "coef_"):
|
|
12
|
+
_plot_coefficients(model.coef_[0], model_name)
|
|
13
|
+
|
|
14
|
+
else:
|
|
15
|
+
print(" ā¹ This model doesn't expose internals directly.")
|
|
16
|
+
print(" ā Try model='logistic' or model='random_forest'.\n")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _plot_feature_importance(importances, model_name):
|
|
20
|
+
n = len(importances)
|
|
21
|
+
features = [f"Feature {i+1}" for i in range(n)]
|
|
22
|
+
indices = np.argsort(importances)[::-1]
|
|
23
|
+
|
|
24
|
+
plt.figure(figsize=(8, 4))
|
|
25
|
+
plt.bar(range(n), importances[indices], color="steelblue", alpha=0.8)
|
|
26
|
+
plt.xticks(range(n), [features[i] for i in indices], rotation=45, ha="right")
|
|
27
|
+
plt.title(f"Feature Importance ā {model_name}", fontsize=13, fontweight="bold")
|
|
28
|
+
plt.ylabel("Importance Score")
|
|
29
|
+
plt.tight_layout()
|
|
30
|
+
plt.savefig("feature_importance.png", dpi=120)
|
|
31
|
+
plt.show()
|
|
32
|
+
print(" ā Saved as 'feature_importance.png'")
|
|
33
|
+
print(" ā Features on the left matter most to your model.\n")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _plot_coefficients(coefs, model_name):
|
|
37
|
+
n = len(coefs)
|
|
38
|
+
features = [f"Feature {i+1}" for i in range(n)]
|
|
39
|
+
colors = ["crimson" if c < 0 else "steelblue" for c in coefs]
|
|
40
|
+
|
|
41
|
+
plt.figure(figsize=(8, 4))
|
|
42
|
+
plt.bar(range(n), coefs, color=colors, alpha=0.8)
|
|
43
|
+
plt.xticks(range(n), features, rotation=45, ha="right")
|
|
44
|
+
plt.axhline(0, color="black", linewidth=0.8, linestyle="--")
|
|
45
|
+
plt.title(f"Model Coefficients ā {model_name}", fontsize=13, fontweight="bold")
|
|
46
|
+
plt.ylabel("Coefficient Value")
|
|
47
|
+
plt.tight_layout()
|
|
48
|
+
plt.savefig("coefficients.png", dpi=120)
|
|
49
|
+
plt.show()
|
|
50
|
+
print(" ā Saved as 'coefficients.png'")
|
|
51
|
+
print(" ā Blue = pushes toward class 1. Red = pushes toward class 0.\n")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from sklearn.metrics import accuracy_score
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def suggest(guided_model):
|
|
5
|
+
acc = guided_model.accuracy
|
|
6
|
+
|
|
7
|
+
print("\nš¬ mlpilot suggestions:\n")
|
|
8
|
+
print(f" Your model accuracy: {acc * 100:.1f}%\n")
|
|
9
|
+
|
|
10
|
+
if acc < 0.60:
|
|
11
|
+
print(" ā Accuracy is quite low. Here's what to try:\n")
|
|
12
|
+
print(" 1. Get more training data.")
|
|
13
|
+
print(" 2. Check your labels ā are they correct?")
|
|
14
|
+
if guided_model.model_name != "random_forest":
|
|
15
|
+
print(" 3. Try model='random_forest' ā often works better.")
|
|
16
|
+
else:
|
|
17
|
+
print(" 3. Try model='svm' or model='logistic' instead.")
|
|
18
|
+
print(" 4. Your features might not be informative enough.")
|
|
19
|
+
|
|
20
|
+
elif acc < 0.80:
|
|
21
|
+
print(" š” Decent start! Here's how to improve:\n")
|
|
22
|
+
print(" 1. Try model='random_forest'.")
|
|
23
|
+
print(" 2. Add more features if you have them.")
|
|
24
|
+
print(" 3. Check for class imbalance.")
|
|
25
|
+
print(" 4. Try tuning hyperparameters.")
|
|
26
|
+
|
|
27
|
+
elif acc < 0.90:
|
|
28
|
+
print(" š¢ Good accuracy! To push further:\n")
|
|
29
|
+
print(" 1. Try model='svm'.")
|
|
30
|
+
print(" 2. Engineer new features from existing ones.")
|
|
31
|
+
print(" 3. Collect more diverse training data.")
|
|
32
|
+
|
|
33
|
+
else:
|
|
34
|
+
print(" ā
Excellent accuracy! A few things to check:\n")
|
|
35
|
+
print(" 1. Make sure you're not overfitting.")
|
|
36
|
+
print(" 2. Check for duplicated rows in your dataset.")
|
|
37
|
+
print(" 3. If all looks good ā you're ready to deploy!")
|
|
38
|
+
|
|
39
|
+
print()
|
|
40
|
+
print(" š Quick guide:")
|
|
41
|
+
print(" - Accuracy < 60% ā data quality or quantity problem")
|
|
42
|
+
print(" - Accuracy 60-80% ā try different models or more features")
|
|
43
|
+
print(" - Accuracy > 90% ā check for overfitting\n")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "mlbuddy-learn"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "An ML library that guides beginners step by step"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Mohammed Jaasir", email = "jaasir@example.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["machine learning", "beginners", "education", "AI"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Intended Audience :: Education",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"numpy",
|
|
24
|
+
"scikit-learn",
|
|
25
|
+
"matplotlib",
|
|
26
|
+
"pandas",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://github.com/Mohammedjaasir/mlbuddy-learn"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.wheel]
|
|
33
|
+
packages = ["mlpilot"]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pytest
|
|
3
|
+
from mlpilot.auto.data import load
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_load_returns_four_splits():
|
|
7
|
+
X = np.random.rand(100, 4)
|
|
8
|
+
y = np.random.randint(0, 2, 100)
|
|
9
|
+
result = load(X, y)
|
|
10
|
+
assert len(result) == 4
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_load_correct_sizes():
|
|
14
|
+
X = np.random.rand(100, 4)
|
|
15
|
+
y = np.random.randint(0, 2, 100)
|
|
16
|
+
X_train, X_test, y_train, y_test = load(X, y, test_size=0.2)
|
|
17
|
+
assert len(X_train) == 80
|
|
18
|
+
assert len(X_test) == 20
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_load_raises_on_shape_mismatch():
|
|
22
|
+
X = np.random.rand(100, 4)
|
|
23
|
+
y = np.random.randint(0, 2, 90) # wrong size ā should raise error
|
|
24
|
+
with pytest.raises(ValueError):
|
|
25
|
+
load(X, y)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _plot_feature_importance(importances, model_name):
|
|
29
|
+
n = len(importances)
|
|
30
|
+
features = [f"Feature {i+1}" for i in range(n)]
|
|
31
|
+
indices = np.argsort(importances)[::-1]
|
|
32
|
+
|
|
33
|
+
plt.figure(figsize=(8, 4))
|
|
34
|
+
plt.bar(range(n), importances[indices], color="steelblue", alpha=0.8)
|
|
35
|
+
plt.xticks(range(n), [features[i] for i in indices], rotation=45, ha="right")
|
|
36
|
+
plt.title(f"Feature Importance ā {model_name}", fontsize=13, fontweight="bold")
|
|
37
|
+
plt.ylabel("Importance Score")
|
|
38
|
+
plt.tight_layout()
|
|
39
|
+
plt.savefig("feature_importance.png", dpi=120)
|
|
40
|
+
plt.show()
|
|
41
|
+
print(" ā Saved as 'feature_importance.png'")
|
|
42
|
+
print(" ā Features on the left matter most to your model.\n")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _plot_coefficients(coefs, model_name):
|
|
46
|
+
n = len(coefs)
|
|
47
|
+
features = [f"Feature {i+1}" for i in range(n)]
|
|
48
|
+
colors = ["crimson" if c < 0 else "steelblue" for c in coefs]
|
|
49
|
+
|
|
50
|
+
plt.figure(figsize=(8, 4))
|
|
51
|
+
plt.bar(range(n), coefs, color=colors, alpha=0.8)
|
|
52
|
+
plt.xticks(range(n), features, rotation=45, ha="right")
|
|
53
|
+
plt.axhline(0, color="black", linewidth=0.8, linestyle="--")
|
|
54
|
+
plt.title(f"Model Coefficients ā {model_name}", fontsize=13, fontweight="bold")
|
|
55
|
+
plt.ylabel("Coefficient Value")
|
|
56
|
+
plt.tight_layout()
|
|
57
|
+
plt.savefig("coefficients.png", dpi=120)
|
|
58
|
+
plt.show()
|
|
59
|
+
print(" ā Saved as 'coefficients.png'")
|
|
60
|
+
print(" ā Blue = pushes toward class 1. Red = pushes toward class 0.\n")
|