evolveml 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,134 @@
1
+ Metadata-Version: 2.4
2
+ Name: evolveml
3
+ Version: 0.2.0
4
+ Summary: A Python ML library that evolves with your data. Batch + Real-time Learning, AutoML, XAI, NLP, RL, Anomaly Detection & more.
5
+ Author: SAPPA VAMSI
6
+ License: MIT
7
+ Project-URL: Homepage, https://pypi.org/project/evolveml
8
+ Keywords: machine learning,online learning,automl,explainable ai,reinforcement learning,nlp,anomaly detection
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: numpy
12
+
13
+ # evolveml šŸš€
14
+
15
+ > A Python ML library that **evolves with your data.**
16
+ > Batch + Real-time Learning + Latest 2026 AI Trends — no sklearn needed!
17
+
18
+ **Author: SAPPA VAMSI**
19
+
20
+ ---
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install evolveml
26
+ ```
27
+
28
+ ---
29
+
30
+ ## What's Inside (v0.2.0)
31
+
32
+ ### 🧠 Core Models
33
+ | Module | Description |
34
+ |---|---|
35
+ | `DecisionTreeClassifier` | Decision Tree from scratch |
36
+ | `LinearRegressionModel` | Linear Regression |
37
+ | `LogisticRegressionModel` | Logistic Regression + online updates |
38
+ | `NeuralNetwork` | Neural Network from scratch |
39
+
40
+ ### šŸ”„ Real-time / Online Learning
41
+ | Module | Description |
42
+ |---|---|
43
+ | `StreamLearner` | Learns one sample at a time in real-time |
44
+
45
+ ### šŸŽÆ Ready-to-use Tasks
46
+ | Module | Description |
47
+ |---|---|
48
+ | `FraudDetector` | Real-time bank fraud detection |
49
+ | `ImageClassifier` | Image classification |
50
+ | `SpamDetector` | Email spam detection |
51
+ | `StockPredictor` | Stock price prediction |
52
+
53
+ ### šŸ”„ Latest 2026 Trending Modules
54
+ | Module | Trend | Description |
55
+ |---|---|---|
56
+ | `AutoFeatureSelector` | **AutoML** | Auto-selects best features |
57
+ | `AnomalyDetector` | **Edge AI / IoT** | Detects anomalies in streams |
58
+ | `ConceptDriftDetector` | **Adaptive ML** | Detects data distribution changes |
59
+ | `ExplainableModel` | **XAI** | Explains WHY model predicted |
60
+ | `ReinforcementAgent` | **Agentic AI** | Q-Learning agent |
61
+ | `SentimentAnalyzer` | **NLP** | Real-time text sentiment |
62
+ | `TransferLearner` | **Transfer Learning** | Reuse knowledge across tasks |
63
+
64
+ ---
65
+
66
+ ## Quick Examples
67
+
68
+ ### šŸ¤– AutoML - Auto Feature Selection
69
+ ```python
70
+ from evolveml import AutoFeatureSelector
71
+ selector = AutoFeatureSelector(top_k=5)
72
+ X_best = selector.fit_transform(X_train, y_train)
73
+ selector.report()
74
+ ```
75
+
76
+ ### 🚨 Anomaly Detection (IoT/Edge)
77
+ ```python
78
+ from evolveml import AnomalyDetector
79
+ detector = AnomalyDetector(threshold=2.5)
80
+ detector.fit(normal_data)
81
+ result = detector.detect(new_sensor_reading)
82
+ print(result) # {'is_anomaly': True, 'status': '🚨 ANOMALY'}
83
+ ```
84
+
85
+ ### šŸ“‰ Concept Drift Detection
86
+ ```python
87
+ from evolveml import ConceptDriftDetector
88
+ drift = ConceptDriftDetector()
89
+ for pred, actual in prediction_stream:
90
+ status = drift.update(pred, actual)
91
+ if status['drift_detected']:
92
+ print("āš ļø Retrain your model!")
93
+ ```
94
+
95
+ ### šŸ” Explainable AI
96
+ ```python
97
+ from evolveml import ExplainableModel, DecisionTreeClassifier
98
+ model = DecisionTreeClassifier()
99
+ model.fit(X_train, y_train)
100
+ xai = ExplainableModel(model, feature_names=['age', 'amount', 'hour'])
101
+ xai.fit(X_train, y_train)
102
+ xai.explain(X_test[0])
103
+ ```
104
+
105
+ ### šŸŽ® Reinforcement Learning Agent
106
+ ```python
107
+ from evolveml import ReinforcementAgent
108
+ agent = ReinforcementAgent(n_states=100, n_actions=4)
109
+ action = agent.act(state)
110
+ agent.learn(state, action, reward=+1, next_state=next_state)
111
+ ```
112
+
113
+ ### šŸ’¬ Sentiment Analysis
114
+ ```python
115
+ from evolveml import SentimentAnalyzer
116
+ analyzer = SentimentAnalyzer()
117
+ result = analyzer.analyze("This product is absolutely amazing!")
118
+ print(result) # {'sentiment': 'POSITIVE 😊', 'confidence': 0.87}
119
+ analyzer.learn("brilliant", label='positive') # teach new words
120
+ ```
121
+
122
+ ### šŸ” Transfer Learning
123
+ ```python
124
+ from evolveml import TransferLearner, DecisionTreeClassifier
125
+ source = DecisionTreeClassifier()
126
+ source.fit(X_source, y_source)
127
+ transfer = TransferLearner(source)
128
+ transfer.fit(X_target, y_target) # learns faster with less data!
129
+ ```
130
+
131
+ ---
132
+
133
+ ## License
134
+ MIT — Free to use!
@@ -0,0 +1,122 @@
1
+ # evolveml šŸš€
2
+
3
+ > A Python ML library that **evolves with your data.**
4
+ > Batch + Real-time Learning + Latest 2026 AI Trends — no sklearn needed!
5
+
6
+ **Author: SAPPA VAMSI**
7
+
8
+ ---
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ pip install evolveml
14
+ ```
15
+
16
+ ---
17
+
18
+ ## What's Inside (v0.2.0)
19
+
20
+ ### 🧠 Core Models
21
+ | Module | Description |
22
+ |---|---|
23
+ | `DecisionTreeClassifier` | Decision Tree from scratch |
24
+ | `LinearRegressionModel` | Linear Regression |
25
+ | `LogisticRegressionModel` | Logistic Regression + online updates |
26
+ | `NeuralNetwork` | Neural Network from scratch |
27
+
28
+ ### šŸ”„ Real-time / Online Learning
29
+ | Module | Description |
30
+ |---|---|
31
+ | `StreamLearner` | Learns one sample at a time in real-time |
32
+
33
+ ### šŸŽÆ Ready-to-use Tasks
34
+ | Module | Description |
35
+ |---|---|
36
+ | `FraudDetector` | Real-time bank fraud detection |
37
+ | `ImageClassifier` | Image classification |
38
+ | `SpamDetector` | Email spam detection |
39
+ | `StockPredictor` | Stock price prediction |
40
+
41
+ ### šŸ”„ Latest 2026 Trending Modules
42
+ | Module | Trend | Description |
43
+ |---|---|---|
44
+ | `AutoFeatureSelector` | **AutoML** | Auto-selects best features |
45
+ | `AnomalyDetector` | **Edge AI / IoT** | Detects anomalies in streams |
46
+ | `ConceptDriftDetector` | **Adaptive ML** | Detects data distribution changes |
47
+ | `ExplainableModel` | **XAI** | Explains WHY model predicted |
48
+ | `ReinforcementAgent` | **Agentic AI** | Q-Learning agent |
49
+ | `SentimentAnalyzer` | **NLP** | Real-time text sentiment |
50
+ | `TransferLearner` | **Transfer Learning** | Reuse knowledge across tasks |
51
+
52
+ ---
53
+
54
+ ## Quick Examples
55
+
56
+ ### šŸ¤– AutoML - Auto Feature Selection
57
+ ```python
58
+ from evolveml import AutoFeatureSelector
59
+ selector = AutoFeatureSelector(top_k=5)
60
+ X_best = selector.fit_transform(X_train, y_train)
61
+ selector.report()
62
+ ```
63
+
64
+ ### 🚨 Anomaly Detection (IoT/Edge)
65
+ ```python
66
+ from evolveml import AnomalyDetector
67
+ detector = AnomalyDetector(threshold=2.5)
68
+ detector.fit(normal_data)
69
+ result = detector.detect(new_sensor_reading)
70
+ print(result) # {'is_anomaly': True, 'status': '🚨 ANOMALY'}
71
+ ```
72
+
73
+ ### šŸ“‰ Concept Drift Detection
74
+ ```python
75
+ from evolveml import ConceptDriftDetector
76
+ drift = ConceptDriftDetector()
77
+ for pred, actual in prediction_stream:
78
+ status = drift.update(pred, actual)
79
+ if status['drift_detected']:
80
+ print("āš ļø Retrain your model!")
81
+ ```
82
+
83
+ ### šŸ” Explainable AI
84
+ ```python
85
+ from evolveml import ExplainableModel, DecisionTreeClassifier
86
+ model = DecisionTreeClassifier()
87
+ model.fit(X_train, y_train)
88
+ xai = ExplainableModel(model, feature_names=['age', 'amount', 'hour'])
89
+ xai.fit(X_train, y_train)
90
+ xai.explain(X_test[0])
91
+ ```
92
+
93
+ ### šŸŽ® Reinforcement Learning Agent
94
+ ```python
95
+ from evolveml import ReinforcementAgent
96
+ agent = ReinforcementAgent(n_states=100, n_actions=4)
97
+ action = agent.act(state)
98
+ agent.learn(state, action, reward=+1, next_state=next_state)
99
+ ```
100
+
101
+ ### šŸ’¬ Sentiment Analysis
102
+ ```python
103
+ from evolveml import SentimentAnalyzer
104
+ analyzer = SentimentAnalyzer()
105
+ result = analyzer.analyze("This product is absolutely amazing!")
106
+ print(result) # {'sentiment': 'POSITIVE 😊', 'confidence': 0.87}
107
+ analyzer.learn("brilliant", label='positive') # teach new words
108
+ ```
109
+
110
+ ### šŸ” Transfer Learning
111
+ ```python
112
+ from evolveml import TransferLearner, DecisionTreeClassifier
113
+ source = DecisionTreeClassifier()
114
+ source.fit(X_source, y_source)
115
+ transfer = TransferLearner(source)
116
+ transfer.fit(X_target, y_target) # learns faster with less data!
117
+ ```
118
+
119
+ ---
120
+
121
+ ## License
122
+ MIT — Free to use!
@@ -0,0 +1,53 @@
1
+ import numpy as np
2
+
3
+ class AnomalyDetector:
4
+ """
5
+ Real-time Anomaly Detection for IoT / Edge AI.
6
+ Detects unusual patterns in streaming sensor data.
7
+
8
+ Usage:
9
+ from evolveml.anomaly import AnomalyDetector
10
+ detector = AnomalyDetector(threshold=2.5)
11
+ detector.fit(normal_data)
12
+ result = detector.detect(new_sample)
13
+ """
14
+ def __init__(self, threshold=2.5):
15
+ self.threshold = threshold
16
+ self.mean_ = None
17
+ self.std_ = None
18
+ self.history = []
19
+
20
+ def fit(self, X):
21
+ X = np.array(X)
22
+ self.mean_ = X.mean(axis=0)
23
+ self.std_ = X.std(axis=0)
24
+ self.std_[self.std_ == 0] = 1e-10
25
+ print(f"āœ… AnomalyDetector trained on {len(X)} samples")
26
+ return self
27
+
28
+ def _z_score(self, x):
29
+ return np.abs((np.array(x) - self.mean_) / self.std_).mean()
30
+
31
+ def detect(self, x):
32
+ """Detect if a single sample is anomalous"""
33
+ score = self._z_score(x)
34
+ is_anomaly = score > self.threshold
35
+ self.history.append({'score': score, 'anomaly': is_anomaly})
36
+ return {
37
+ 'is_anomaly': bool(is_anomaly),
38
+ 'score': float(score),
39
+ 'threshold': self.threshold,
40
+ 'status': '🚨 ANOMALY' if is_anomaly else 'āœ… NORMAL'
41
+ }
42
+
43
+ def detect_batch(self, X):
44
+ """Detect anomalies in batch"""
45
+ return [self.detect(x) for x in np.array(X)]
46
+
47
+ def update(self, x):
48
+ """Online update - adapt to new normal patterns"""
49
+ x = np.array(x)
50
+ alpha = 0.05 # learning rate for mean/std update
51
+ self.mean_ = (1 - alpha) * self.mean_ + alpha * x
52
+ self.std_ = (1 - alpha) * self.std_ + alpha * np.abs(x - self.mean_)
53
+ self.std_[self.std_ == 0] = 1e-10
@@ -0,0 +1,47 @@
1
+ import numpy as np
2
+
3
+ class AutoFeatureSelector:
4
+ """
5
+ AutoML - Automatically selects the best features from your dataset.
6
+ No manual feature engineering needed!
7
+
8
+ Usage:
9
+ from evolveml.automl import AutoFeatureSelector
10
+ selector = AutoFeatureSelector(top_k=10)
11
+ selector.fit(X_train, y_train)
12
+ X_selected = selector.transform(X_train)
13
+ """
14
+ def __init__(self, top_k=10):
15
+ self.top_k = top_k
16
+ self.selected_indices_ = None
17
+ self.scores_ = None
18
+
19
+ def _correlation_score(self, X, y):
20
+ scores = []
21
+ for i in range(X.shape[1]):
22
+ col = X[:, i]
23
+ if col.std() == 0:
24
+ scores.append(0.0)
25
+ else:
26
+ corr = np.corrcoef(col, y)[0, 1]
27
+ scores.append(abs(corr) if not np.isnan(corr) else 0.0)
28
+ return np.array(scores)
29
+
30
+ def fit(self, X, y):
31
+ X, y = np.array(X), np.array(y)
32
+ self.scores_ = self._correlation_score(X, y)
33
+ k = min(self.top_k, X.shape[1])
34
+ self.selected_indices_ = np.argsort(self.scores_)[::-1][:k]
35
+ print(f"āœ… AutoFeatureSelector: Selected top {k} features out of {X.shape[1]}")
36
+ return self
37
+
38
+ def transform(self, X):
39
+ return np.array(X)[:, self.selected_indices_]
40
+
41
+ def fit_transform(self, X, y):
42
+ return self.fit(X, y).transform(X)
43
+
44
+ def report(self):
45
+ print("\nšŸ“Š Feature Importance Scores:")
46
+ for rank, idx in enumerate(self.selected_indices_):
47
+ print(f" Rank {rank+1}: Feature[{idx}] → Score: {self.scores_[idx]:.4f}")
@@ -0,0 +1,63 @@
1
+ import numpy as np
2
+
3
+ class ConceptDriftDetector:
4
+ """
5
+ Detects when data distribution changes over time (Concept Drift).
6
+ Essential for adaptive/continual learning systems.
7
+
8
+ Usage:
9
+ from evolveml.drift import ConceptDriftDetector
10
+ detector = ConceptDriftDetector()
11
+ for prediction, actual in stream:
12
+ status = detector.update(prediction, actual)
13
+ if status['drift_detected']:
14
+ print("āš ļø Drift detected! Retrain model.")
15
+ """
16
+ def __init__(self, window_size=30, warning_threshold=0.1, drift_threshold=0.2):
17
+ self.window_size = window_size
18
+ self.warning_threshold = warning_threshold
19
+ self.drift_threshold = drift_threshold
20
+ self.errors = []
21
+ self.drift_points = []
22
+ self.n_samples = 0
23
+
24
+ def update(self, predicted, actual):
25
+ """Feed one prediction and check for drift"""
26
+ error = int(predicted != actual)
27
+ self.errors.append(error)
28
+ self.n_samples += 1
29
+
30
+ if len(self.errors) < self.window_size:
31
+ return {'drift_detected': False, 'warning': False, 'error_rate': None}
32
+
33
+ # Sliding window error rate
34
+ recent = self.errors[-self.window_size:]
35
+ error_rate = np.mean(recent)
36
+
37
+ # Baseline from first window
38
+ baseline = np.mean(self.errors[:self.window_size])
39
+ change = abs(error_rate - baseline)
40
+
41
+ drift = change > self.drift_threshold
42
+ warning = change > self.warning_threshold
43
+
44
+ if drift:
45
+ self.drift_points.append(self.n_samples)
46
+
47
+ return {
48
+ 'drift_detected': bool(drift),
49
+ 'warning': bool(warning),
50
+ 'error_rate': float(error_rate),
51
+ 'baseline_error': float(baseline),
52
+ 'change': float(change),
53
+ 'status': '🚨 DRIFT!' if drift else ('āš ļø WARNING' if warning else 'āœ… STABLE')
54
+ }
55
+
56
+ def reset(self):
57
+ """Reset after model retraining"""
58
+ self.errors = []
59
+ print("šŸ”„ Drift detector reset after retraining")
60
+
61
+ @property
62
+ def n_drifts(self):
63
+ return len(self.drift_points)
@@ -0,0 +1,75 @@
1
+ import numpy as np
2
+
3
+ class ExplainableModel:
4
+ """
5
+ Explainable AI (XAI) - Shows WHY your model made a prediction.
6
+ Wraps any evolveml model and adds explanations.
7
+
8
+ Usage:
9
+ from evolveml.explain import ExplainableModel
10
+ from evolveml import DecisionTreeClassifier
11
+
12
+ base_model = DecisionTreeClassifier()
13
+ base_model.fit(X_train, y_train)
14
+
15
+ xai = ExplainableModel(base_model, feature_names=['age', 'amount', 'hour'])
16
+ xai.fit(X_train, y_train)
17
+ explanation = xai.explain(X_test[0])
18
+ print(explanation)
19
+ """
20
+ def __init__(self, model, feature_names=None):
21
+ self.model = model
22
+ self.feature_names = feature_names
23
+ self.X_train = None
24
+ self.y_train = None
25
+
26
+ def fit(self, X, y):
27
+ self.X_train = np.array(X)
28
+ self.y_train = np.array(y)
29
+ if not hasattr(self.model, 'weights') and not hasattr(self.model, 'root'):
30
+ self.model.fit(X, y)
31
+ return self
32
+
33
+ def _feature_importance(self, x):
34
+ """Estimate feature importance using perturbation"""
35
+ x = np.array(x).flatten()
36
+ base_pred = self.model.predict(x.reshape(1, -1))[0]
37
+ importances = []
38
+ for i in range(len(x)):
39
+ x_perturbed = x.copy()
40
+ x_perturbed[i] = self.X_train[:, i].mean() if self.X_train is not None else 0
41
+ perturbed_pred = self.model.predict(x_perturbed.reshape(1, -1))[0]
42
+ importances.append(abs(float(base_pred) - float(perturbed_pred)))
43
+ return np.array(importances)
44
+
45
+ def explain(self, x):
46
+ """Explain a single prediction"""
47
+ x = np.array(x).flatten()
48
+ prediction = self.model.predict(x.reshape(1, -1))[0]
49
+ importances = self._feature_importance(x)
50
+ total = importances.sum() + 1e-10
51
+ names = self.feature_names or [f'Feature[{i}]' for i in range(len(x))]
52
+
53
+ ranked = sorted(zip(names, importances, x),
54
+ key=lambda t: t[1], reverse=True)
55
+
56
+ explanation = {
57
+ 'prediction': int(prediction),
58
+ 'top_reasons': [
59
+ {
60
+ 'feature': name,
61
+ 'value': float(val),
62
+ 'importance': float(imp),
63
+ 'contribution': f"{imp/total*100:.1f}%"
64
+ }
65
+ for name, imp, val in ranked[:5]
66
+ ]
67
+ }
68
+
69
+ print(f"\nšŸ” EXPLANATION")
70
+ print(f" Prediction : {prediction}")
71
+ print(f" Top Reasons:")
72
+ for r in explanation['top_reasons']:
73
+ print(f" → {r['feature']} = {r['value']:.3f} | Impact: {r['contribution']}")
74
+
75
+ return explanation
@@ -0,0 +1,26 @@
1
+ """
2
+ evolveml - A Python library that evolves with your data.
3
+ Supports Batch + Real-time Online Learning + Latest 2026 ML Trends.
4
+ Author: SAPPA VAMSI
5
+ """
6
+
7
+ from .models.decision_tree import DecisionTreeClassifier
8
+ from .models.linear import LinearRegressionModel, LogisticRegressionModel
9
+ from .models.neural_net import NeuralNetwork
10
+ from .stream_learner import StreamLearner
11
+ from .tasks.fraud_detection import FraudDetector
12
+ from .tasks.image_classifier import ImageClassifier
13
+ from .tasks.spam_detector import SpamDetector
14
+ from .tasks.stock_predictor import StockPredictor
15
+ from .automl import AutoFeatureSelector
16
+ from .anomaly import AnomalyDetector
17
+ from .drift import ConceptDriftDetector
18
+ from .explain import ExplainableModel
19
+ from .rl import ReinforcementAgent
20
+ from .nlp import SentimentAnalyzer
21
+ from .transfer import TransferLearner
22
+ from .metrics import accuracy, mse, rmse, f1_score
23
+ from .utils import normalize, train_test_split
24
+
25
+ __version__ = "0.26.0"
26
+ __author__ = "SAPPA VAMSI"
@@ -0,0 +1,23 @@
1
+ import numpy as np
2
+
3
+ def accuracy(y_true, y_pred):
4
+ """Accuracy score"""
5
+ return float(np.mean(np.array(y_true) == np.array(y_pred)))
6
+
7
+ def mse(y_true, y_pred):
8
+ """Mean Squared Error"""
9
+ return float(np.mean((np.array(y_true) - np.array(y_pred)) ** 2))
10
+
11
+ def rmse(y_true, y_pred):
12
+ """Root Mean Squared Error"""
13
+ return float(np.sqrt(mse(y_true, y_pred)))
14
+
15
+ def f1_score(y_true, y_pred):
16
+ """F1 Score for binary classification"""
17
+ y_true, y_pred = np.array(y_true), np.array(y_pred)
18
+ tp = np.sum((y_pred == 1) & (y_true == 1))
19
+ fp = np.sum((y_pred == 1) & (y_true == 0))
20
+ fn = np.sum((y_pred == 0) & (y_true == 1))
21
+ precision = tp / (tp + fp + 1e-10)
22
+ recall = tp / (tp + fn + 1e-10)
23
+ return float(2 * precision * recall / (precision + recall + 1e-10))
@@ -0,0 +1,82 @@
1
+ import numpy as np
2
+
3
+ class DecisionNode:
4
+ def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
5
+ self.feature = feature
6
+ self.threshold = threshold
7
+ self.left = left
8
+ self.right = right
9
+ self.value = value
10
+
11
+ class DecisionTreeClassifier:
12
+ """
13
+ Decision Tree Classifier - Batch Learning
14
+
15
+ Usage:
16
+ from evolveml import DecisionTreeClassifier
17
+ model = DecisionTreeClassifier(max_depth=10)
18
+ model.fit(X_train, y_train)
19
+ predictions = model.predict(X_test)
20
+ print(model.score(X_test, y_test))
21
+ """
22
+ def __init__(self, max_depth=10, min_samples_split=2):
23
+ self.max_depth = max_depth
24
+ self.min_samples_split = min_samples_split
25
+ self.root = None
26
+
27
+ def fit(self, X, y):
28
+ X, y = np.array(X), np.array(y)
29
+ self.classes_ = np.unique(y)
30
+ self.root = self._build_tree(X, y, depth=0)
31
+ return self
32
+
33
+ def _build_tree(self, X, y, depth):
34
+ n_samples, n_features = X.shape
35
+ if (depth >= self.max_depth or n_samples < self.min_samples_split or len(np.unique(y)) == 1):
36
+ return DecisionNode(value=self._most_common(y))
37
+ best_feature, best_threshold = self._best_split(X, y, n_features)
38
+ if best_feature is None:
39
+ return DecisionNode(value=self._most_common(y))
40
+ left_idx = X[:, best_feature] <= best_threshold
41
+ left = self._build_tree(X[left_idx], y[left_idx], depth + 1)
42
+ right = self._build_tree(X[~left_idx], y[~left_idx], depth + 1)
43
+ return DecisionNode(feature=best_feature, threshold=best_threshold, left=left, right=right)
44
+
45
+ def _best_split(self, X, y, n_features):
46
+ best_gain, best_feature, best_threshold = -1, None, None
47
+ features = np.random.choice(n_features, min(n_features, 20), replace=False)
48
+ for feature in features:
49
+ for threshold in np.unique(X[:, feature])[:10]:
50
+ gain = self._info_gain(y, X[:, feature], threshold)
51
+ if gain > best_gain:
52
+ best_gain, best_feature, best_threshold = gain, feature, threshold
53
+ return best_feature, best_threshold
54
+
55
+ def _info_gain(self, y, col, threshold):
56
+ parent = self._entropy(y)
57
+ left, right = y[col <= threshold], y[col > threshold]
58
+ if len(left) == 0 or len(right) == 0:
59
+ return 0
60
+ n = len(y)
61
+ return parent - (len(left)/n * self._entropy(left) + len(right)/n * self._entropy(right))
62
+
63
+ def _entropy(self, y):
64
+ counts = np.bincount(y.astype(int))
65
+ probs = counts[counts > 0] / len(y)
66
+ return -np.sum(probs * np.log2(probs))
67
+
68
+ def _most_common(self, y):
69
+ return np.bincount(y.astype(int)).argmax()
70
+
71
+ def predict(self, X):
72
+ return np.array([self._traverse(x, self.root) for x in np.array(X)])
73
+
74
+ def _traverse(self, x, node):
75
+ if node.value is not None:
76
+ return node.value
77
+ if x[node.feature] <= node.threshold:
78
+ return self._traverse(x, node.left)
79
+ return self._traverse(x, node.right)
80
+
81
+ def score(self, X, y):
82
+ return float(np.mean(self.predict(X) == np.array(y)))