mlbuddy-learn 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ # Virtual environments
2
+ venv/
3
+ env/
4
+ ENV/
5
+ .venv
6
+
7
+ # Python cache
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ *.so
12
+ .Python
13
+
14
+ # pytest cache
15
+ .pytest_cache/
16
+ .coverage
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+ *.swp
22
+ *.swo
23
+
24
+ # OS
25
+ .DS_Store
26
+ Thumbs.db
27
+
28
+ # Generated files
29
+ feature_importance.png
30
+ *.png
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mohammed Jaasir
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.4
2
+ Name: mlbuddy-learn
3
+ Version: 0.1.0
4
+ Summary: An ML library that guides beginners step by step
5
+ Project-URL: Homepage, https://github.com/Mohammedjaasir/mlbuddy-learn
6
+ Author-email: Mohammed Jaasir <jaasir@example.com>
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Keywords: AI,beginners,education,machine learning
10
+ Classifier: Intended Audience :: Education
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Python: >=3.8
15
+ Requires-Dist: matplotlib
16
+ Requires-Dist: numpy
17
+ Requires-Dist: pandas
18
+ Requires-Dist: scikit-learn
19
+ Description-Content-Type: text/markdown
20
+
21
+ # MLPilot
22
+
23
+ Machine Learning automation and guidance system.
24
+
25
+ ## Project Structure
26
+
27
+ - **mlpilot/auto/**: Automated machine learning tasks
28
+ - `data.py`: Data handling and preprocessing
29
+ - `trainer.py`: Model training utilities
30
+
31
+ - **mlpilot/guide/**: ML guidance and suggestions
32
+ - `suggest.py`: Suggestions engine for ML workflows
33
+
34
+ - **mlpilot/explain/**: Model interpretation and visualization
35
+ - `visualizer.py`: Model visualization tools
36
+
37
+ - **tests/**: Test suite
38
+
39
+ ## Installation
40
+
41
+ ```bash
42
+ pip install -e .
43
+ ```
44
+
45
+ ## Development
46
+
47
+ Install development dependencies:
48
+
49
+ ```bash
50
+ pip install -e ".[dev]"
51
+ ```
52
+
53
+ Run tests:
54
+
55
+ ```bash
56
+ pytest
57
+ ```
@@ -0,0 +1,37 @@
1
+ # MLPilot
2
+
3
+ Machine Learning automation and guidance system.
4
+
5
+ ## Project Structure
6
+
7
+ - **mlpilot/auto/**: Automated machine learning tasks
8
+ - `data.py`: Data handling and preprocessing
9
+ - `trainer.py`: Model training utilities
10
+
11
+ - **mlpilot/guide/**: ML guidance and suggestions
12
+ - `suggest.py`: Suggestions engine for ML workflows
13
+
14
+ - **mlpilot/explain/**: Model interpretation and visualization
15
+ - `visualizer.py`: Model visualization tools
16
+
17
+ - **tests/**: Test suite
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install -e .
23
+ ```
24
+
25
+ ## Development
26
+
27
+ Install development dependencies:
28
+
29
+ ```bash
30
+ pip install -e ".[dev]"
31
+ ```
32
+
33
+ Run tests:
34
+
35
+ ```bash
36
+ pytest
37
+ ```
@@ -0,0 +1,5 @@
1
+ from .auto.data import load, load_csv
2
+ from .auto.trainer import train, compare
3
+
4
+ __version__ = "0.1.0"
5
+ __all__ = ["load", "load_csv", "train", "compare"]
@@ -0,0 +1,2 @@
1
+ from .data import load
2
+ from .trainer import train
@@ -0,0 +1,69 @@
1
+ import numpy as np
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.preprocessing import StandardScaler
4
+
5
+
6
+ def load(X, y, test_size=0.2, scale=True, random_state=42):
7
+ print("\nšŸ” mlpilot: Loading your data...\n")
8
+
9
+ if not isinstance(X, np.ndarray):
10
+ X = np.array(X)
11
+ print(" āœ“ Converted X to numpy array")
12
+
13
+ if not isinstance(y, np.ndarray):
14
+ y = np.array(y)
15
+ print(" āœ“ Converted y to numpy array")
16
+
17
+ if X.shape[0] != y.shape[0]:
18
+ raise ValueError(
19
+ f"\nāœ— Mismatch: X has {X.shape[0]} rows but y has {y.shape[0]} values.\n"
20
+ f" → Make sure X and y have the same number of samples."
21
+ )
22
+
23
+ if X.shape[0] < 10:
24
+ print(" ⚠ Warning: Less than 10 samples. ML works better with more data.")
25
+
26
+ X_train, X_test, y_train, y_test = train_test_split(
27
+ X, y, test_size=test_size, random_state=random_state
28
+ )
29
+ print(f" āœ“ Split: {len(X_train)} training samples, {len(X_test)} test samples")
30
+
31
+ if scale:
32
+ scaler = StandardScaler()
33
+ X_train = scaler.fit_transform(X_train)
34
+ X_test = scaler.transform(X_test)
35
+ print(" āœ“ Features scaled with StandardScaler (mean=0, std=1)")
36
+
37
+ print(f"\n šŸ“¦ Data shape: {X_train.shape[1]} features, {len(set(y))} classes")
38
+ print("\nāœ… Data ready! Pass X_train, X_test, y_train, y_test to ml.train()\n")
39
+
40
+ return X_train, X_test, y_train, y_test
41
+
42
+
43
+ def load_csv(filepath, target_column):
44
+ """Load data from a CSV file.
45
+
46
+ Args:
47
+ filepath: Path to the CSV file
48
+ target_column: Name of the column containing the target labels
49
+
50
+ Returns:
51
+ Tuple of (X_train, X_test, y_train, y_test)
52
+ """
53
+ import pandas as pd
54
+
55
+ print(f"\nšŸ“‚ Loading CSV from: {filepath}\n")
56
+ df = pd.read_csv(filepath)
57
+
58
+ print(f" āœ“ Loaded {len(df)} rows and {len(df.columns)} columns")
59
+
60
+ if target_column not in df.columns:
61
+ raise ValueError(
62
+ f"āœ— Target column '{target_column}' not found.\n"
63
+ f" Available columns: {list(df.columns)}"
64
+ )
65
+
66
+ y = df[target_column].values
67
+ X = df.drop(columns=[target_column]).values
68
+
69
+ return load(X, y)
@@ -0,0 +1,93 @@
1
+ from sklearn.linear_model import LogisticRegression
2
+ from sklearn.tree import DecisionTreeClassifier
3
+ from sklearn.ensemble import RandomForestClassifier
4
+ from sklearn.svm import SVC
5
+ from sklearn.metrics import accuracy_score, classification_report
6
+
7
+ from ..guide.suggest import suggest
8
+
9
+
10
+ MODELS = {
11
+ "logistic": LogisticRegression(max_iter=1000),
12
+ "tree": DecisionTreeClassifier(),
13
+ "random_forest": RandomForestClassifier(),
14
+ "svm": SVC(),
15
+ }
16
+
17
+
18
+ class GuidedModel:
19
+ def __init__(self, model, X_test, y_test, model_name):
20
+ self.model = model
21
+ self.X_test = X_test
22
+ self.y_test = y_test
23
+ self.model_name = model_name
24
+
25
+ y_pred = model.predict(X_test)
26
+ self.accuracy = accuracy_score(y_test, y_pred)
27
+ self.y_pred = y_pred
28
+
29
+ def predict(self, X):
30
+ return self.model.predict(X)
31
+
32
+ def explain(self):
33
+ from ..explain.visualizer import explain_model
34
+ explain_model(self.model, self.X_test, self.y_test, self.model_name)
35
+
36
+ def suggest(self):
37
+ suggest(self)
38
+
39
+
40
+ def train(X_train, X_test, y_train, y_test, model="logistic"):
41
+ if model not in MODELS:
42
+ available = ", ".join(MODELS.keys())
43
+ raise ValueError(
44
+ f"\nāœ— Unknown model '{model}'.\n"
45
+ f" → Available models: {available}"
46
+ )
47
+
48
+ print(f"\nšŸ¤– mlpilot: Training '{model}' model...\n")
49
+
50
+ clf = MODELS[model]
51
+ clf.fit(X_train, y_train)
52
+
53
+ y_pred = clf.predict(X_test)
54
+ acc = accuracy_score(y_test, y_pred)
55
+
56
+ print(f" āœ“ Training complete!")
57
+ print(f" šŸ“Š Accuracy: {acc * 100:.1f}%")
58
+ print()
59
+ print(classification_report(y_test, y_pred))
60
+ print("šŸ’” Tip: Call model.explain() to see what the model learned.")
61
+ print("šŸ’” Tip: Call model.suggest() to get advice on improving it.\n")
62
+
63
+ return GuidedModel(clf, X_test, y_test, model)
64
+
65
+
66
+ def compare(X_train, X_test, y_train, y_test):
67
+ """Try all available models and show which one performs best.
68
+
69
+ This is the quickest way to find the best model for your data.
70
+
71
+ Args:
72
+ X_train, X_test, y_train, y_test: Training and test data
73
+
74
+ Returns:
75
+ Dictionary with model names and their accuracies
76
+ """
77
+ print("\nšŸ”¬ mlpilot: Comparing all models...\n")
78
+
79
+ results = {}
80
+ for name, clf in MODELS.items():
81
+ clf.fit(X_train, y_train)
82
+ acc = accuracy_score(y_test, clf.predict(X_test))
83
+ results[name] = acc
84
+ print(f" {name:15s} → {acc * 100:6.1f}% accuracy")
85
+
86
+ print()
87
+ best_model = max(results, key=results.get)
88
+ best_acc = results[best_model]
89
+
90
+ print(f" šŸ† Best: {best_model} with {best_acc * 100:.1f}% accuracy\n")
91
+ print(f"šŸ’” Tip: Use ml.train(..., model='{best_model}') to train the winner.\n")
92
+
93
+ return results
@@ -0,0 +1,2 @@
1
+ """Explain module for model interpretation and visualization."""
2
+ from .visualizer import explain_model
@@ -0,0 +1,51 @@
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+
4
+
5
+ def explain_model(model, X_test, y_test, model_name):
6
+ print(f"\nšŸ“Š mlpilot: Explaining your '{model_name}' model...\n")
7
+
8
+ if hasattr(model, "feature_importances_"):
9
+ _plot_feature_importance(model.feature_importances_, model_name)
10
+
11
+ elif hasattr(model, "coef_"):
12
+ _plot_coefficients(model.coef_[0], model_name)
13
+
14
+ else:
15
+ print(" ℹ This model doesn't expose internals directly.")
16
+ print(" → Try model='logistic' or model='random_forest'.\n")
17
+
18
+
19
+ def _plot_feature_importance(importances, model_name):
20
+ n = len(importances)
21
+ features = [f"Feature {i+1}" for i in range(n)]
22
+ indices = np.argsort(importances)[::-1]
23
+
24
+ plt.figure(figsize=(8, 4))
25
+ plt.bar(range(n), importances[indices], color="steelblue", alpha=0.8)
26
+ plt.xticks(range(n), [features[i] for i in indices], rotation=45, ha="right")
27
+ plt.title(f"Feature Importance — {model_name}", fontsize=13, fontweight="bold")
28
+ plt.ylabel("Importance Score")
29
+ plt.tight_layout()
30
+ plt.savefig("feature_importance.png", dpi=120)
31
+ plt.show()
32
+ print(" āœ“ Saved as 'feature_importance.png'")
33
+ print(" → Features on the left matter most to your model.\n")
34
+
35
+
36
+ def _plot_coefficients(coefs, model_name):
37
+ n = len(coefs)
38
+ features = [f"Feature {i+1}" for i in range(n)]
39
+ colors = ["crimson" if c < 0 else "steelblue" for c in coefs]
40
+
41
+ plt.figure(figsize=(8, 4))
42
+ plt.bar(range(n), coefs, color=colors, alpha=0.8)
43
+ plt.xticks(range(n), features, rotation=45, ha="right")
44
+ plt.axhline(0, color="black", linewidth=0.8, linestyle="--")
45
+ plt.title(f"Model Coefficients — {model_name}", fontsize=13, fontweight="bold")
46
+ plt.ylabel("Coefficient Value")
47
+ plt.tight_layout()
48
+ plt.savefig("coefficients.png", dpi=120)
49
+ plt.show()
50
+ print(" āœ“ Saved as 'coefficients.png'")
51
+ print(" → Blue = pushes toward class 1. Red = pushes toward class 0.\n")
@@ -0,0 +1,2 @@
1
+ """Guide module for providing ML suggestions and guidance."""
2
+ from .suggest import suggest
@@ -0,0 +1,43 @@
1
+ from sklearn.metrics import accuracy_score
2
+
3
+
4
+ def suggest(guided_model):
5
+ acc = guided_model.accuracy
6
+
7
+ print("\nšŸ’¬ mlpilot suggestions:\n")
8
+ print(f" Your model accuracy: {acc * 100:.1f}%\n")
9
+
10
+ if acc < 0.60:
11
+ print(" ⚠ Accuracy is quite low. Here's what to try:\n")
12
+ print(" 1. Get more training data.")
13
+ print(" 2. Check your labels — are they correct?")
14
+ if guided_model.model_name != "random_forest":
15
+ print(" 3. Try model='random_forest' — often works better.")
16
+ else:
17
+ print(" 3. Try model='svm' or model='logistic' instead.")
18
+ print(" 4. Your features might not be informative enough.")
19
+
20
+ elif acc < 0.80:
21
+ print(" 🟔 Decent start! Here's how to improve:\n")
22
+ print(" 1. Try model='random_forest'.")
23
+ print(" 2. Add more features if you have them.")
24
+ print(" 3. Check for class imbalance.")
25
+ print(" 4. Try tuning hyperparameters.")
26
+
27
+ elif acc < 0.90:
28
+ print(" 🟢 Good accuracy! To push further:\n")
29
+ print(" 1. Try model='svm'.")
30
+ print(" 2. Engineer new features from existing ones.")
31
+ print(" 3. Collect more diverse training data.")
32
+
33
+ else:
34
+ print(" āœ… Excellent accuracy! A few things to check:\n")
35
+ print(" 1. Make sure you're not overfitting.")
36
+ print(" 2. Check for duplicated rows in your dataset.")
37
+ print(" 3. If all looks good — you're ready to deploy!")
38
+
39
+ print()
40
+ print(" šŸ“˜ Quick guide:")
41
+ print(" - Accuracy < 60% → data quality or quantity problem")
42
+ print(" - Accuracy 60-80% → try different models or more features")
43
+ print(" - Accuracy > 90% → check for overfitting\n")
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "mlbuddy-learn"
7
+ version = "0.1.0"
8
+ description = "An ML library that guides beginners step by step"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "Mohammed Jaasir", email = "jaasir@example.com" }
14
+ ]
15
+ keywords = ["machine learning", "beginners", "education", "AI"]
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Intended Audience :: Education",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ ]
22
+ dependencies = [
23
+ "numpy",
24
+ "scikit-learn",
25
+ "matplotlib",
26
+ "pandas",
27
+ ]
28
+
29
+ [project.urls]
30
+ Homepage = "https://github.com/Mohammedjaasir/mlbuddy-learn"
31
+
32
+ [tool.hatch.build.targets.wheel]
33
+ packages = ["mlpilot"]
@@ -0,0 +1,60 @@
1
+ import numpy as np
2
+ import pytest
3
+ from mlpilot.auto.data import load
4
+
5
+
6
+ def test_load_returns_four_splits():
7
+ X = np.random.rand(100, 4)
8
+ y = np.random.randint(0, 2, 100)
9
+ result = load(X, y)
10
+ assert len(result) == 4
11
+
12
+
13
+ def test_load_correct_sizes():
14
+ X = np.random.rand(100, 4)
15
+ y = np.random.randint(0, 2, 100)
16
+ X_train, X_test, y_train, y_test = load(X, y, test_size=0.2)
17
+ assert len(X_train) == 80
18
+ assert len(X_test) == 20
19
+
20
+
21
+ def test_load_raises_on_shape_mismatch():
22
+ X = np.random.rand(100, 4)
23
+ y = np.random.randint(0, 2, 90) # wrong size — should raise error
24
+ with pytest.raises(ValueError):
25
+ load(X, y)
26
+
27
+
28
+ def _plot_feature_importance(importances, model_name):
29
+ n = len(importances)
30
+ features = [f"Feature {i+1}" for i in range(n)]
31
+ indices = np.argsort(importances)[::-1]
32
+
33
+ plt.figure(figsize=(8, 4))
34
+ plt.bar(range(n), importances[indices], color="steelblue", alpha=0.8)
35
+ plt.xticks(range(n), [features[i] for i in indices], rotation=45, ha="right")
36
+ plt.title(f"Feature Importance — {model_name}", fontsize=13, fontweight="bold")
37
+ plt.ylabel("Importance Score")
38
+ plt.tight_layout()
39
+ plt.savefig("feature_importance.png", dpi=120)
40
+ plt.show()
41
+ print(" āœ“ Saved as 'feature_importance.png'")
42
+ print(" → Features on the left matter most to your model.\n")
43
+
44
+
45
+ def _plot_coefficients(coefs, model_name):
46
+ n = len(coefs)
47
+ features = [f"Feature {i+1}" for i in range(n)]
48
+ colors = ["crimson" if c < 0 else "steelblue" for c in coefs]
49
+
50
+ plt.figure(figsize=(8, 4))
51
+ plt.bar(range(n), coefs, color=colors, alpha=0.8)
52
+ plt.xticks(range(n), features, rotation=45, ha="right")
53
+ plt.axhline(0, color="black", linewidth=0.8, linestyle="--")
54
+ plt.title(f"Model Coefficients — {model_name}", fontsize=13, fontweight="bold")
55
+ plt.ylabel("Coefficient Value")
56
+ plt.tight_layout()
57
+ plt.savefig("coefficients.png", dpi=120)
58
+ plt.show()
59
+ print(" āœ“ Saved as 'coefficients.png'")
60
+ print(" → Blue = pushes toward class 1. Red = pushes toward class 0.\n")