adamops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adamops/__init__.py +40 -0
  2. adamops/cli.py +163 -0
  3. adamops/data/__init__.py +24 -0
  4. adamops/data/feature_engineering.py +284 -0
  5. adamops/data/loaders.py +922 -0
  6. adamops/data/preprocessors.py +227 -0
  7. adamops/data/splitters.py +218 -0
  8. adamops/data/validators.py +148 -0
  9. adamops/deployment/__init__.py +21 -0
  10. adamops/deployment/api.py +237 -0
  11. adamops/deployment/cloud.py +191 -0
  12. adamops/deployment/containerize.py +262 -0
  13. adamops/deployment/exporters.py +148 -0
  14. adamops/evaluation/__init__.py +24 -0
  15. adamops/evaluation/comparison.py +133 -0
  16. adamops/evaluation/explainability.py +143 -0
  17. adamops/evaluation/metrics.py +233 -0
  18. adamops/evaluation/reports.py +165 -0
  19. adamops/evaluation/visualization.py +238 -0
  20. adamops/models/__init__.py +21 -0
  21. adamops/models/automl.py +277 -0
  22. adamops/models/ensembles.py +228 -0
  23. adamops/models/modelops.py +308 -0
  24. adamops/models/registry.py +250 -0
  25. adamops/monitoring/__init__.py +21 -0
  26. adamops/monitoring/alerts.py +200 -0
  27. adamops/monitoring/dashboard.py +117 -0
  28. adamops/monitoring/drift.py +212 -0
  29. adamops/monitoring/performance.py +195 -0
  30. adamops/pipelines/__init__.py +15 -0
  31. adamops/pipelines/orchestrators.py +183 -0
  32. adamops/pipelines/workflows.py +212 -0
  33. adamops/utils/__init__.py +18 -0
  34. adamops/utils/config.py +457 -0
  35. adamops/utils/helpers.py +663 -0
  36. adamops/utils/logging.py +412 -0
  37. adamops-0.1.0.dist-info/METADATA +310 -0
  38. adamops-0.1.0.dist-info/RECORD +42 -0
  39. adamops-0.1.0.dist-info/WHEEL +5 -0
  40. adamops-0.1.0.dist-info/entry_points.txt +2 -0
  41. adamops-0.1.0.dist-info/licenses/LICENSE +21 -0
  42. adamops-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,165 @@
1
+ """
2
+ AdamOps Reports Module
3
+
4
+ Generates HTML/PDF reports for model evaluation.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional
8
+ from pathlib import Path
9
+ from datetime import datetime
10
+ import json
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ from adamops.utils.logging import get_logger
16
+ from adamops.evaluation.metrics import evaluate, classification_report
17
+
18
+ logger = get_logger(__name__)
19
+
20
+ HTML_TEMPLATE = """
21
+ <!DOCTYPE html>
22
+ <html>
23
+ <head>
24
+ <title>{title}</title>
25
+ <style>
26
+ body {{ font-family: Arial, sans-serif; margin: 40px; background: #f5f5f5; }}
27
+ .container {{ max-width: 1200px; margin: 0 auto; background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
28
+ h1 {{ color: #333; border-bottom: 2px solid #4a90d9; padding-bottom: 10px; }}
29
+ h2 {{ color: #666; margin-top: 30px; }}
30
+ table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
31
+ th, td {{ border: 1px solid #ddd; padding: 12px; text-align: left; }}
32
+ th {{ background: #4a90d9; color: white; }}
33
+ tr:nth-child(even) {{ background: #f9f9f9; }}
34
+ .metric {{ font-size: 24px; font-weight: bold; color: #4a90d9; }}
35
+ .metric-card {{ display: inline-block; padding: 20px; margin: 10px; background: #f0f7ff; border-radius: 8px; text-align: center; min-width: 150px; }}
36
+ .metric-label {{ color: #666; font-size: 14px; }}
37
+ .section {{ margin: 30px 0; }}
38
+ .footer {{ margin-top: 40px; padding-top: 20px; border-top: 1px solid #ddd; color: #999; font-size: 12px; }}
39
+ </style>
40
+ </head>
41
+ <body>
42
+ <div class="container">
43
+ <h1>{title}</h1>
44
+ <p>Generated: {timestamp}</p>
45
+ {content}
46
+ <div class="footer">Generated by AdamOps v0.1.0</div>
47
+ </div>
48
+ </body>
49
+ </html>
50
+ """
51
+
52
+
53
+ class EvaluationReport:
54
+ """Model evaluation report generator."""
55
+
56
+ def __init__(self, model_name: str = "Model"):
57
+ self.model_name = model_name
58
+ self.sections = []
59
+ self.metrics = {}
60
+ self.timestamp = datetime.now().isoformat()
61
+
62
+ def add_metrics(self, metrics: Dict[str, float], title: str = "Performance Metrics"):
63
+ """Add metrics section."""
64
+ self.metrics.update(metrics)
65
+
66
+ cards = ""
67
+ for name, value in metrics.items():
68
+ if isinstance(value, float):
69
+ value_str = f"{value:.4f}"
70
+ else:
71
+ value_str = str(value)
72
+ cards += f'<div class="metric-card"><div class="metric">{value_str}</div><div class="metric-label">{name}</div></div>'
73
+
74
+ self.sections.append(f'<div class="section"><h2>{title}</h2>{cards}</div>')
75
+
76
+ def add_table(self, df: pd.DataFrame, title: str = "Results"):
77
+ """Add table section."""
78
+ table_html = df.to_html(index=False, classes='results-table')
79
+ self.sections.append(f'<div class="section"><h2>{title}</h2>{table_html}</div>')
80
+
81
+ def add_text(self, text: str, title: str = "Notes"):
82
+ """Add text section."""
83
+ self.sections.append(f'<div class="section"><h2>{title}</h2><p>{text}</p></div>')
84
+
85
+ def add_confusion_matrix(self, y_true: np.ndarray, y_pred: np.ndarray,
86
+ labels: Optional[List[str]] = None):
87
+ """Add confusion matrix section."""
88
+ from sklearn.metrics import confusion_matrix
89
+ cm = confusion_matrix(y_true, y_pred)
90
+ cm_df = pd.DataFrame(cm,
91
+ index=[f'Actual: {l}' for l in (labels or range(len(cm)))],
92
+ columns=[f'Pred: {l}' for l in (labels or range(len(cm)))])
93
+ self.add_table(cm_df, "Confusion Matrix")
94
+
95
+ def add_classification_report(self, y_true: np.ndarray, y_pred: np.ndarray):
96
+ """Add classification report section."""
97
+ report = classification_report(y_true, y_pred, output_dict=True)
98
+ df = pd.DataFrame(report).T.reset_index().rename(columns={'index': 'class'})
99
+ self.add_table(df.round(4), "Classification Report")
100
+
101
+ def generate_html(self) -> str:
102
+ """Generate HTML report."""
103
+ content = "\n".join(self.sections)
104
+ return HTML_TEMPLATE.format(
105
+ title=f"{self.model_name} Evaluation Report",
106
+ timestamp=self.timestamp,
107
+ content=content
108
+ )
109
+
110
+ def save_html(self, filepath: str):
111
+ """Save HTML report to file."""
112
+ filepath = Path(filepath)
113
+ filepath.parent.mkdir(parents=True, exist_ok=True)
114
+
115
+ with open(filepath, 'w', encoding='utf-8') as f:
116
+ f.write(self.generate_html())
117
+
118
+ logger.info(f"Report saved to {filepath}")
119
+
120
+ def save_json(self, filepath: str):
121
+ """Save report data as JSON."""
122
+ data = {
123
+ "model_name": self.model_name,
124
+ "timestamp": self.timestamp,
125
+ "metrics": self.metrics,
126
+ }
127
+
128
+ with open(filepath, 'w') as f:
129
+ json.dump(data, f, indent=2, default=str)
130
+
131
+
132
+ def generate_report(
133
+ model_name: str, y_true: np.ndarray, y_pred: np.ndarray,
134
+ y_prob: Optional[np.ndarray] = None, task: str = "classification",
135
+ save_path: Optional[str] = None
136
+ ) -> EvaluationReport:
137
+ """
138
+ Generate evaluation report.
139
+
140
+ Args:
141
+ model_name: Name of the model.
142
+ y_true: True labels.
143
+ y_pred: Predicted labels.
144
+ y_prob: Probability predictions.
145
+ task: 'classification' or 'regression'.
146
+ save_path: Optional path to save HTML report.
147
+
148
+ Returns:
149
+ EvaluationReport object.
150
+ """
151
+ report = EvaluationReport(model_name)
152
+
153
+ # Add metrics
154
+ metrics = evaluate(y_true, y_pred, task, y_prob)
155
+ report.add_metrics(metrics)
156
+
157
+ # Add task-specific sections
158
+ if task == "classification":
159
+ report.add_confusion_matrix(y_true, y_pred)
160
+ report.add_classification_report(y_true, y_pred)
161
+
162
+ if save_path:
163
+ report.save_html(save_path)
164
+
165
+ return report
@@ -0,0 +1,238 @@
1
+ """
2
+ AdamOps Visualization Module
3
+
4
+ Provides plotting for model evaluation: confusion matrices, ROC curves, etc.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional, Tuple, Union
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ try:
12
+ import matplotlib.pyplot as plt
13
+ import seaborn as sns
14
+ PLT_AVAILABLE = True
15
+ except ImportError:
16
+ PLT_AVAILABLE = False
17
+
18
+ from sklearn import metrics as sklearn_metrics
19
+ from adamops.utils.logging import get_logger
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ def _check_plt():
25
+ if not PLT_AVAILABLE:
26
+ raise ImportError("matplotlib and seaborn required. Install with: pip install matplotlib seaborn")
27
+
28
+
29
+ def plot_confusion_matrix(
30
+ y_true: np.ndarray, y_pred: np.ndarray,
31
+ labels: Optional[List[str]] = None, normalize: bool = False,
32
+ figsize: Tuple[int, int] = (8, 6), cmap: str = "Blues",
33
+ title: str = "Confusion Matrix", save_path: Optional[str] = None
34
+ ) -> plt.Figure:
35
+ """Plot confusion matrix."""
36
+ _check_plt()
37
+
38
+ cm = sklearn_metrics.confusion_matrix(y_true, y_pred)
39
+ if normalize:
40
+ cm = cm.astype('float') / cm.sum(axis=1, keepdims=True)
41
+
42
+ fig, ax = plt.subplots(figsize=figsize)
43
+ sns.heatmap(cm, annot=True, fmt='.2f' if normalize else 'd',
44
+ cmap=cmap, xticklabels=labels, yticklabels=labels, ax=ax)
45
+ ax.set_xlabel('Predicted')
46
+ ax.set_ylabel('Actual')
47
+ ax.set_title(title)
48
+
49
+ if save_path:
50
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
51
+
52
+ return fig
53
+
54
+
55
+ def plot_roc_curve(
56
+ y_true: np.ndarray, y_prob: np.ndarray,
57
+ figsize: Tuple[int, int] = (8, 6), title: str = "ROC Curve",
58
+ save_path: Optional[str] = None
59
+ ) -> plt.Figure:
60
+ """Plot ROC curve."""
61
+ _check_plt()
62
+
63
+ if y_prob.ndim == 2:
64
+ y_prob = y_prob[:, 1]
65
+
66
+ fpr, tpr, _ = sklearn_metrics.roc_curve(y_true, y_prob)
67
+ auc = sklearn_metrics.roc_auc_score(y_true, y_prob)
68
+
69
+ fig, ax = plt.subplots(figsize=figsize)
70
+ ax.plot(fpr, tpr, 'b-', label=f'ROC (AUC = {auc:.3f})')
71
+ ax.plot([0, 1], [0, 1], 'k--', label='Random')
72
+ ax.set_xlabel('False Positive Rate')
73
+ ax.set_ylabel('True Positive Rate')
74
+ ax.set_title(title)
75
+ ax.legend()
76
+ ax.grid(True, alpha=0.3)
77
+
78
+ if save_path:
79
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
80
+
81
+ return fig
82
+
83
+
84
+ def plot_precision_recall_curve(
85
+ y_true: np.ndarray, y_prob: np.ndarray,
86
+ figsize: Tuple[int, int] = (8, 6), save_path: Optional[str] = None
87
+ ) -> plt.Figure:
88
+ """Plot precision-recall curve."""
89
+ _check_plt()
90
+
91
+ if y_prob.ndim == 2:
92
+ y_prob = y_prob[:, 1]
93
+
94
+ precision, recall, _ = sklearn_metrics.precision_recall_curve(y_true, y_prob)
95
+ ap = sklearn_metrics.average_precision_score(y_true, y_prob)
96
+
97
+ fig, ax = plt.subplots(figsize=figsize)
98
+ ax.plot(recall, precision, 'b-', label=f'PR (AP = {ap:.3f})')
99
+ ax.set_xlabel('Recall')
100
+ ax.set_ylabel('Precision')
101
+ ax.set_title('Precision-Recall Curve')
102
+ ax.legend()
103
+ ax.grid(True, alpha=0.3)
104
+
105
+ if save_path:
106
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
107
+
108
+ return fig
109
+
110
+
111
+ def plot_feature_importance(
112
+ importance: np.ndarray, feature_names: List[str],
113
+ top_n: int = 20, figsize: Tuple[int, int] = (10, 8),
114
+ title: str = "Feature Importance", save_path: Optional[str] = None
115
+ ) -> plt.Figure:
116
+ """Plot feature importance."""
117
+ _check_plt()
118
+
119
+ indices = np.argsort(importance)[-top_n:]
120
+
121
+ fig, ax = plt.subplots(figsize=figsize)
122
+ ax.barh(range(len(indices)), importance[indices], color='steelblue')
123
+ ax.set_yticks(range(len(indices)))
124
+ ax.set_yticklabels([feature_names[i] for i in indices])
125
+ ax.set_xlabel('Importance')
126
+ ax.set_title(title)
127
+
128
+ if save_path:
129
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
130
+
131
+ return fig
132
+
133
+
134
+ def plot_residuals(
135
+ y_true: np.ndarray, y_pred: np.ndarray,
136
+ figsize: Tuple[int, int] = (12, 5), save_path: Optional[str] = None
137
+ ) -> plt.Figure:
138
+ """Plot residuals for regression."""
139
+ _check_plt()
140
+
141
+ residuals = y_true - y_pred
142
+
143
+ fig, axes = plt.subplots(1, 2, figsize=figsize)
144
+
145
+ # Residuals vs Predicted
146
+ axes[0].scatter(y_pred, residuals, alpha=0.5)
147
+ axes[0].axhline(y=0, color='r', linestyle='--')
148
+ axes[0].set_xlabel('Predicted')
149
+ axes[0].set_ylabel('Residuals')
150
+ axes[0].set_title('Residuals vs Predicted')
151
+
152
+ # Residual distribution
153
+ axes[1].hist(residuals, bins=30, edgecolor='black', alpha=0.7)
154
+ axes[1].set_xlabel('Residuals')
155
+ axes[1].set_ylabel('Frequency')
156
+ axes[1].set_title('Residual Distribution')
157
+
158
+ plt.tight_layout()
159
+
160
+ if save_path:
161
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
162
+
163
+ return fig
164
+
165
+
166
+ def plot_actual_vs_predicted(
167
+ y_true: np.ndarray, y_pred: np.ndarray,
168
+ figsize: Tuple[int, int] = (8, 8), save_path: Optional[str] = None
169
+ ) -> plt.Figure:
170
+ """Plot actual vs predicted for regression."""
171
+ _check_plt()
172
+
173
+ fig, ax = plt.subplots(figsize=figsize)
174
+
175
+ ax.scatter(y_true, y_pred, alpha=0.5)
176
+ min_val = min(y_true.min(), y_pred.min())
177
+ max_val = max(y_true.max(), y_pred.max())
178
+ ax.plot([min_val, max_val], [min_val, max_val], 'r--', label='Perfect')
179
+ ax.set_xlabel('Actual')
180
+ ax.set_ylabel('Predicted')
181
+ ax.set_title('Actual vs Predicted')
182
+ ax.legend()
183
+
184
+ if save_path:
185
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
186
+
187
+ return fig
188
+
189
+
190
+ def plot_learning_curve(
191
+ train_sizes: np.ndarray, train_scores: np.ndarray, val_scores: np.ndarray,
192
+ figsize: Tuple[int, int] = (8, 6), save_path: Optional[str] = None
193
+ ) -> plt.Figure:
194
+ """Plot learning curve."""
195
+ _check_plt()
196
+
197
+ fig, ax = plt.subplots(figsize=figsize)
198
+
199
+ train_mean = train_scores.mean(axis=1)
200
+ train_std = train_scores.std(axis=1)
201
+ val_mean = val_scores.mean(axis=1)
202
+ val_std = val_scores.std(axis=1)
203
+
204
+ ax.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1)
205
+ ax.fill_between(train_sizes, val_mean - val_std, val_mean + val_std, alpha=0.1)
206
+ ax.plot(train_sizes, train_mean, 'o-', label='Training')
207
+ ax.plot(train_sizes, val_mean, 'o-', label='Validation')
208
+
209
+ ax.set_xlabel('Training Size')
210
+ ax.set_ylabel('Score')
211
+ ax.set_title('Learning Curve')
212
+ ax.legend()
213
+ ax.grid(True, alpha=0.3)
214
+
215
+ if save_path:
216
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
217
+
218
+ return fig
219
+
220
+
221
+ def plot_model_comparison(
222
+ results: pd.DataFrame, metric: str = "cv_mean",
223
+ figsize: Tuple[int, int] = (10, 6), save_path: Optional[str] = None
224
+ ) -> plt.Figure:
225
+ """Plot model comparison bar chart."""
226
+ _check_plt()
227
+
228
+ fig, ax = plt.subplots(figsize=figsize)
229
+
230
+ results_sorted = results.sort_values(metric, ascending=True)
231
+ ax.barh(results_sorted['algorithm'], results_sorted[metric], color='steelblue')
232
+ ax.set_xlabel(metric)
233
+ ax.set_title('Model Comparison')
234
+
235
+ if save_path:
236
+ fig.savefig(save_path, dpi=150, bbox_inches='tight')
237
+
238
+ return fig
@@ -0,0 +1,21 @@
1
+ """
2
+ AdamOps Models Module
3
+
4
+ Provides model training and management capabilities:
5
+ - modelops: Train various ML models (regression, classification, clustering)
6
+ - registry: Version and track models with metadata
7
+ - ensembles: Create ensemble models (voting, stacking, blending)
8
+ - automl: Automated model selection and hyperparameter tuning
9
+ """
10
+
11
+ from adamops.models import modelops
12
+ from adamops.models import registry
13
+ from adamops.models import ensembles
14
+ from adamops.models import automl
15
+
16
+ __all__ = [
17
+ "modelops",
18
+ "registry",
19
+ "ensembles",
20
+ "automl",
21
+ ]
@@ -0,0 +1,277 @@
1
+ """
2
+ AdamOps AutoML Module
3
+
4
+ Provides automated model selection and hyperparameter tuning.
5
+ """
6
+
7
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
8
+ import time
9
+ import numpy as np
10
+ import pandas as pd
11
+ from sklearn.model_selection import cross_val_score
12
+
13
+ from adamops.utils.logging import get_logger
14
+ from adamops.utils.helpers import infer_task_type
15
+ from adamops.models.modelops import (
16
+ CLASSIFICATION_MODELS, REGRESSION_MODELS, TrainedModel, train
17
+ )
18
+
19
+ logger = get_logger(__name__)
20
+
21
+ try:
22
+ import optuna
23
+ optuna.logging.set_verbosity(optuna.logging.WARNING)
24
+ OPTUNA_AVAILABLE = True
25
+ except ImportError:
26
+ OPTUNA_AVAILABLE = False
27
+
28
+
29
+ # Default hyperparameter search spaces
30
+ PARAM_SPACES = {
31
+ "random_forest": {
32
+ "n_estimators": ("int", 50, 300),
33
+ "max_depth": ("int", 3, 20),
34
+ "min_samples_split": ("int", 2, 20),
35
+ "min_samples_leaf": ("int", 1, 10),
36
+ },
37
+ "gradient_boosting": {
38
+ "n_estimators": ("int", 50, 300),
39
+ "max_depth": ("int", 3, 10),
40
+ "learning_rate": ("float", 0.01, 0.3),
41
+ "min_samples_split": ("int", 2, 20),
42
+ },
43
+ "xgboost": {
44
+ "n_estimators": ("int", 50, 300),
45
+ "max_depth": ("int", 3, 12),
46
+ "learning_rate": ("float", 0.01, 0.3),
47
+ "subsample": ("float", 0.6, 1.0),
48
+ "colsample_bytree": ("float", 0.6, 1.0),
49
+ },
50
+ "lightgbm": {
51
+ "n_estimators": ("int", 50, 300),
52
+ "max_depth": ("int", 3, 12),
53
+ "learning_rate": ("float", 0.01, 0.3),
54
+ "num_leaves": ("int", 20, 100),
55
+ "subsample": ("float", 0.6, 1.0),
56
+ },
57
+ "ridge": {
58
+ "alpha": ("float", 0.001, 100.0, "log"),
59
+ },
60
+ "lasso": {
61
+ "alpha": ("float", 0.001, 100.0, "log"),
62
+ },
63
+ "knn": {
64
+ "n_neighbors": ("int", 1, 30),
65
+ "weights": ("categorical", ["uniform", "distance"]),
66
+ },
67
+ }
68
+
69
+
70
+ class AutoMLResult:
71
+ """Results from AutoML run."""
72
+
73
+ def __init__(self):
74
+ self.best_model: Optional[TrainedModel] = None
75
+ self.best_score: float = 0.0
76
+ self.best_algorithm: str = ""
77
+ self.best_params: Dict = {}
78
+ self.leaderboard: List[Dict] = []
79
+ self.time_elapsed: float = 0.0
80
+
81
+ def summary(self) -> str:
82
+ lines = [
83
+ "=" * 50, "AutoML Results", "=" * 50,
84
+ f"Best Algorithm: {self.best_algorithm}",
85
+ f"Best Score: {self.best_score:.4f}",
86
+ f"Time Elapsed: {self.time_elapsed:.1f}s",
87
+ "", "Leaderboard:",
88
+ ]
89
+ for i, entry in enumerate(self.leaderboard[:10], 1):
90
+ lines.append(f" {i}. {entry['algorithm']}: {entry['score']:.4f}")
91
+ return "\n".join(lines)
92
+
93
+
94
+ def grid_search(
95
+ X, y, algorithm: str, param_grid: Dict[str, List],
96
+ task: str = "classification", cv: int = 5, scoring: Optional[str] = None
97
+ ) -> Tuple[Dict, float]:
98
+ """Grid search hyperparameter tuning."""
99
+ from sklearn.model_selection import GridSearchCV
100
+
101
+ models = CLASSIFICATION_MODELS if task == "classification" else REGRESSION_MODELS
102
+ if algorithm not in models:
103
+ raise ValueError(f"Unknown algorithm: {algorithm}")
104
+
105
+ model = models[algorithm]()
106
+ scoring = scoring or ("accuracy" if task == "classification" else "r2")
107
+
108
+ grid = GridSearchCV(model, param_grid, cv=cv, scoring=scoring, n_jobs=-1)
109
+ grid.fit(X, y)
110
+
111
+ return grid.best_params_, grid.best_score_
112
+
113
+
114
+ def random_search(
115
+ X, y, algorithm: str, param_distributions: Dict,
116
+ task: str = "classification", cv: int = 5, n_iter: int = 50,
117
+ scoring: Optional[str] = None
118
+ ) -> Tuple[Dict, float]:
119
+ """Random search hyperparameter tuning."""
120
+ from sklearn.model_selection import RandomizedSearchCV
121
+
122
+ models = CLASSIFICATION_MODELS if task == "classification" else REGRESSION_MODELS
123
+ model = models[algorithm]()
124
+ scoring = scoring or ("accuracy" if task == "classification" else "r2")
125
+
126
+ search = RandomizedSearchCV(
127
+ model, param_distributions, n_iter=n_iter, cv=cv,
128
+ scoring=scoring, n_jobs=-1, random_state=42
129
+ )
130
+ search.fit(X, y)
131
+
132
+ return search.best_params_, search.best_score_
133
+
134
+
135
+ def bayesian_search(
136
+ X, y, algorithm: str, task: str = "classification",
137
+ cv: int = 5, n_trials: int = 50, scoring: Optional[str] = None,
138
+ param_space: Optional[Dict] = None
139
+ ) -> Tuple[Dict, float]:
140
+ """Bayesian optimization with Optuna."""
141
+ if not OPTUNA_AVAILABLE:
142
+ raise ImportError("Optuna required. Install with: pip install optuna")
143
+
144
+ models = CLASSIFICATION_MODELS if task == "classification" else REGRESSION_MODELS
145
+ scoring = scoring or ("accuracy" if task == "classification" else "r2")
146
+ space = param_space or PARAM_SPACES.get(algorithm, {})
147
+
148
+ def objective(trial):
149
+ params = {}
150
+ for name, spec in space.items():
151
+ if spec[0] == "int":
152
+ params[name] = trial.suggest_int(name, spec[1], spec[2])
153
+ elif spec[0] == "float":
154
+ log = len(spec) > 3 and spec[3] == "log"
155
+ params[name] = trial.suggest_float(name, spec[1], spec[2], log=log)
156
+ elif spec[0] == "categorical":
157
+ params[name] = trial.suggest_categorical(name, spec[1])
158
+
159
+ model = models[algorithm](**params)
160
+ scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
161
+ return scores.mean()
162
+
163
+ study = optuna.create_study(direction="maximize")
164
+ study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
165
+
166
+ return study.best_params, study.best_value
167
+
168
+
169
+ def run(
170
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
171
+ task: str = "auto", algorithms: Optional[List[str]] = None,
172
+ tuning: str = "bayesian", cv: int = 5, time_limit: int = 3600,
173
+ n_trials: int = 50, scoring: Optional[str] = None
174
+ ) -> AutoMLResult:
175
+ """
176
+ Run AutoML.
177
+
178
+ Args:
179
+ X: Features.
180
+ y: Target.
181
+ task: 'classification', 'regression', or 'auto'.
182
+ algorithms: Algorithms to try (None for all).
183
+ tuning: 'grid', 'random', 'bayesian', or 'none'.
184
+ cv: Cross-validation folds.
185
+ time_limit: Max time in seconds.
186
+ n_trials: Trials per algorithm for tuning.
187
+ scoring: Scoring metric.
188
+
189
+ Returns:
190
+ AutoMLResult: Results with best model and leaderboard.
191
+ """
192
+ start_time = time.time()
193
+ result = AutoMLResult()
194
+
195
+ # Auto-detect task
196
+ if task == "auto":
197
+ task = infer_task_type(y)
198
+ logger.info(f"Auto-detected task: {task}")
199
+
200
+ # Get algorithms
201
+ models = CLASSIFICATION_MODELS if task == "classification" else REGRESSION_MODELS
202
+ if algorithms is None:
203
+ algorithms = list(models.keys())
204
+
205
+ scoring = scoring or ("accuracy" if task == "classification" else "r2")
206
+ logger.info(f"Running AutoML with {len(algorithms)} algorithms")
207
+
208
+ for algo in algorithms:
209
+ if time.time() - start_time > time_limit:
210
+ logger.warning("Time limit reached")
211
+ break
212
+
213
+ try:
214
+ logger.info(f"Tuning {algo}...")
215
+
216
+ if tuning == "bayesian" and algo in PARAM_SPACES and OPTUNA_AVAILABLE:
217
+ best_params, score = bayesian_search(
218
+ X, y, algo, task, cv, min(n_trials, 30), scoring
219
+ )
220
+ elif tuning == "none":
221
+ model = models[algo]()
222
+ scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
223
+ best_params, score = {}, scores.mean()
224
+ else:
225
+ # Default to random search
226
+ from scipy.stats import randint, uniform
227
+ param_dist = {}
228
+ if algo in PARAM_SPACES:
229
+ for name, spec in PARAM_SPACES[algo].items():
230
+ if spec[0] == "int":
231
+ param_dist[name] = randint(spec[1], spec[2])
232
+ elif spec[0] == "float":
233
+ param_dist[name] = uniform(spec[1], spec[2] - spec[1])
234
+
235
+ if param_dist:
236
+ best_params, score = random_search(
237
+ X, y, algo, param_dist, task, cv, min(n_trials, 20), scoring
238
+ )
239
+ else:
240
+ model = models[algo]()
241
+ scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
242
+ best_params, score = {}, scores.mean()
243
+
244
+ result.leaderboard.append({
245
+ "algorithm": algo, "score": score, "params": best_params
246
+ })
247
+
248
+ if score > result.best_score:
249
+ result.best_score = score
250
+ result.best_algorithm = algo
251
+ result.best_params = best_params
252
+
253
+ except Exception as e:
254
+ logger.warning(f"Failed {algo}: {e}")
255
+
256
+ # Sort leaderboard
257
+ result.leaderboard.sort(key=lambda x: x["score"], reverse=True)
258
+
259
+ # Train best model
260
+ if result.best_algorithm:
261
+ result.best_model = train(
262
+ X, y, task, result.best_algorithm, result.best_params
263
+ )
264
+
265
+ result.time_elapsed = time.time() - start_time
266
+ logger.info(f"AutoML complete. Best: {result.best_algorithm} ({result.best_score:.4f})")
267
+
268
+ return result
269
+
270
+
271
+ def quick_run(
272
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
273
+ task: str = "auto"
274
+ ) -> TrainedModel:
275
+ """Quick AutoML run with defaults."""
276
+ result = run(X, y, task, tuning="none", n_trials=10)
277
+ return result.best_model