adamops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adamops/__init__.py +40 -0
  2. adamops/cli.py +163 -0
  3. adamops/data/__init__.py +24 -0
  4. adamops/data/feature_engineering.py +284 -0
  5. adamops/data/loaders.py +922 -0
  6. adamops/data/preprocessors.py +227 -0
  7. adamops/data/splitters.py +218 -0
  8. adamops/data/validators.py +148 -0
  9. adamops/deployment/__init__.py +21 -0
  10. adamops/deployment/api.py +237 -0
  11. adamops/deployment/cloud.py +191 -0
  12. adamops/deployment/containerize.py +262 -0
  13. adamops/deployment/exporters.py +148 -0
  14. adamops/evaluation/__init__.py +24 -0
  15. adamops/evaluation/comparison.py +133 -0
  16. adamops/evaluation/explainability.py +143 -0
  17. adamops/evaluation/metrics.py +233 -0
  18. adamops/evaluation/reports.py +165 -0
  19. adamops/evaluation/visualization.py +238 -0
  20. adamops/models/__init__.py +21 -0
  21. adamops/models/automl.py +277 -0
  22. adamops/models/ensembles.py +228 -0
  23. adamops/models/modelops.py +308 -0
  24. adamops/models/registry.py +250 -0
  25. adamops/monitoring/__init__.py +21 -0
  26. adamops/monitoring/alerts.py +200 -0
  27. adamops/monitoring/dashboard.py +117 -0
  28. adamops/monitoring/drift.py +212 -0
  29. adamops/monitoring/performance.py +195 -0
  30. adamops/pipelines/__init__.py +15 -0
  31. adamops/pipelines/orchestrators.py +183 -0
  32. adamops/pipelines/workflows.py +212 -0
  33. adamops/utils/__init__.py +18 -0
  34. adamops/utils/config.py +457 -0
  35. adamops/utils/helpers.py +663 -0
  36. adamops/utils/logging.py +412 -0
  37. adamops-0.1.0.dist-info/METADATA +310 -0
  38. adamops-0.1.0.dist-info/RECORD +42 -0
  39. adamops-0.1.0.dist-info/WHEEL +5 -0
  40. adamops-0.1.0.dist-info/entry_points.txt +2 -0
  41. adamops-0.1.0.dist-info/licenses/LICENSE +21 -0
  42. adamops-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,228 @@
1
+ """
2
+ AdamOps Ensemble Models Module
3
+
4
+ Provides voting, stacking, blending, and weighted averaging ensembles.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional, Tuple, Union
8
+ import numpy as np
9
+ import pandas as pd
10
+ from sklearn.ensemble import VotingClassifier, VotingRegressor, StackingClassifier, StackingRegressor
11
+ from sklearn.linear_model import LogisticRegression, Ridge
12
+ from sklearn.model_selection import cross_val_predict
13
+
14
+ from adamops.utils.logging import get_logger
15
+ from adamops.models.modelops import CLASSIFICATION_MODELS, REGRESSION_MODELS
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class VotingEnsemble:
21
+ """Voting ensemble for classification or regression."""
22
+
23
+ def __init__(self, estimators: List[Tuple[str, Any]], voting: str = "soft",
24
+ weights: Optional[List[float]] = None, task: str = "classification"):
25
+ self.estimators = estimators
26
+ self.voting = voting
27
+ self.weights = weights
28
+ self.task = task
29
+
30
+ if task == "classification":
31
+ self.model = VotingClassifier(estimators, voting=voting, weights=weights)
32
+ else:
33
+ self.model = VotingRegressor(estimators, weights=weights)
34
+
35
+ def fit(self, X, y):
36
+ self.model.fit(X, y)
37
+ return self
38
+
39
+ def predict(self, X):
40
+ return self.model.predict(X)
41
+
42
+ def predict_proba(self, X):
43
+ if self.task == "classification" and hasattr(self.model, "predict_proba"):
44
+ return self.model.predict_proba(X)
45
+ raise ValueError("Not available for regression")
46
+
47
+
48
+ class StackingEnsemble:
49
+ """Stacking ensemble with meta-learner."""
50
+
51
+ def __init__(self, estimators: List[Tuple[str, Any]],
52
+ final_estimator: Optional[Any] = None,
53
+ task: str = "classification", cv: int = 5):
54
+ self.estimators = estimators
55
+ self.task = task
56
+ self.cv = cv
57
+
58
+ if final_estimator is None:
59
+ final_estimator = LogisticRegression() if task == "classification" else Ridge()
60
+
61
+ if task == "classification":
62
+ self.model = StackingClassifier(
63
+ estimators, final_estimator=final_estimator, cv=cv
64
+ )
65
+ else:
66
+ self.model = StackingRegressor(
67
+ estimators, final_estimator=final_estimator, cv=cv
68
+ )
69
+
70
+ def fit(self, X, y):
71
+ self.model.fit(X, y)
72
+ return self
73
+
74
+ def predict(self, X):
75
+ return self.model.predict(X)
76
+
77
+
78
+ class BlendingEnsemble:
79
+ """Blending ensemble (holdout-based stacking)."""
80
+
81
+ def __init__(self, estimators: List[Tuple[str, Any]],
82
+ final_estimator: Optional[Any] = None,
83
+ task: str = "classification", blend_ratio: float = 0.2):
84
+ self.estimators = estimators
85
+ self.task = task
86
+ self.blend_ratio = blend_ratio
87
+ self.final_estimator = final_estimator or (
88
+ LogisticRegression() if task == "classification" else Ridge()
89
+ )
90
+ self.fitted_estimators = []
91
+
92
+ def fit(self, X, y):
93
+ from sklearn.model_selection import train_test_split
94
+
95
+ # Split for blending
96
+ X_train, X_blend, y_train, y_blend = train_test_split(
97
+ X, y, test_size=self.blend_ratio, random_state=42
98
+ )
99
+
100
+ # Fit base models and get blend predictions
101
+ blend_features = []
102
+ self.fitted_estimators = []
103
+
104
+ for name, estimator in self.estimators:
105
+ estimator.fit(X_train, y_train)
106
+ self.fitted_estimators.append((name, estimator))
107
+
108
+ if self.task == "classification" and hasattr(estimator, "predict_proba"):
109
+ preds = estimator.predict_proba(X_blend)[:, 1]
110
+ else:
111
+ preds = estimator.predict(X_blend)
112
+ blend_features.append(preds)
113
+
114
+ # Stack blend predictions
115
+ blend_X = np.column_stack(blend_features)
116
+
117
+ # Fit meta-learner
118
+ self.final_estimator.fit(blend_X, y_blend)
119
+
120
+ return self
121
+
122
+ def predict(self, X):
123
+ # Get predictions from base models
124
+ features = []
125
+ for name, estimator in self.fitted_estimators:
126
+ if self.task == "classification" and hasattr(estimator, "predict_proba"):
127
+ preds = estimator.predict_proba(X)[:, 1]
128
+ else:
129
+ preds = estimator.predict(X)
130
+ features.append(preds)
131
+
132
+ meta_X = np.column_stack(features)
133
+ return self.final_estimator.predict(meta_X)
134
+
135
+
136
+ class WeightedAverageEnsemble:
137
+ """Weighted average ensemble."""
138
+
139
+ def __init__(self, estimators: List[Tuple[str, Any]],
140
+ weights: Optional[List[float]] = None,
141
+ task: str = "classification"):
142
+ self.estimators = estimators
143
+ self.weights = weights or [1.0 / len(estimators)] * len(estimators)
144
+ self.task = task
145
+ self.fitted_estimators = []
146
+
147
+ def fit(self, X, y):
148
+ self.fitted_estimators = []
149
+ for name, estimator in self.estimators:
150
+ estimator.fit(X, y)
151
+ self.fitted_estimators.append((name, estimator))
152
+ return self
153
+
154
+ def predict(self, X):
155
+ predictions = []
156
+ for (name, estimator), weight in zip(self.fitted_estimators, self.weights):
157
+ pred = estimator.predict(X)
158
+ predictions.append(pred * weight)
159
+
160
+ weighted_sum = sum(predictions)
161
+
162
+ if self.task == "classification":
163
+ return (weighted_sum > 0.5).astype(int)
164
+ return weighted_sum
165
+
166
+ def predict_proba(self, X):
167
+ if self.task != "classification":
168
+ raise ValueError("Not available for regression")
169
+
170
+ probas = []
171
+ for (name, estimator), weight in zip(self.fitted_estimators, self.weights):
172
+ if hasattr(estimator, "predict_proba"):
173
+ probas.append(estimator.predict_proba(X) * weight)
174
+
175
+ return sum(probas)
176
+
177
+
178
+ def create_voting_ensemble(
179
+ algorithms: List[str], task: str = "classification",
180
+ voting: str = "soft", weights: Optional[List[float]] = None
181
+ ) -> VotingEnsemble:
182
+ """Create voting ensemble from algorithm names."""
183
+ models = CLASSIFICATION_MODELS if task == "classification" else REGRESSION_MODELS
184
+ estimators = [(alg, models[alg]()) for alg in algorithms if alg in models]
185
+ return VotingEnsemble(estimators, voting=voting, weights=weights, task=task)
186
+
187
+
188
+ def create_stacking_ensemble(
189
+ algorithms: List[str], task: str = "classification",
190
+ final_estimator: Optional[Any] = None, cv: int = 5
191
+ ) -> StackingEnsemble:
192
+ """Create stacking ensemble from algorithm names."""
193
+ models = CLASSIFICATION_MODELS if task == "classification" else REGRESSION_MODELS
194
+ estimators = [(alg, models[alg]()) for alg in algorithms if alg in models]
195
+ return StackingEnsemble(estimators, final_estimator, task, cv)
196
+
197
+
198
+ def auto_ensemble(
199
+ X, y, task: str = "classification", top_n: int = 3, cv: int = 5
200
+ ) -> Tuple[Any, Dict]:
201
+ """
202
+ Automatically select and create best ensemble.
203
+
204
+ Returns:
205
+ (ensemble, results): Best ensemble and evaluation results.
206
+ """
207
+ from adamops.models.modelops import compare_models
208
+
209
+ # Compare base models
210
+ comparison = compare_models(X, y, task, cv=cv)
211
+ top_algorithms = comparison.head(top_n)["algorithm"].tolist()
212
+
213
+ logger.info(f"Selected top {top_n} algorithms: {top_algorithms}")
214
+
215
+ # Create and evaluate ensembles
216
+ results = {}
217
+
218
+ # Voting
219
+ voting = create_voting_ensemble(top_algorithms, task)
220
+ voting.fit(X, y)
221
+ results["voting"] = voting
222
+
223
+ # Stacking
224
+ stacking = create_stacking_ensemble(top_algorithms, task, cv=cv)
225
+ stacking.fit(X, y)
226
+ results["stacking"] = stacking
227
+
228
+ return stacking, {"algorithms": top_algorithms, "ensembles": list(results.keys())}
@@ -0,0 +1,308 @@
1
+ """
2
+ AdamOps ModelOps Module
3
+
4
+ Provides model training for regression, classification, and clustering.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional, Tuple, Union
8
+ import numpy as np
9
+ import pandas as pd
10
+ import joblib
11
+ from pathlib import Path
12
+
13
+ from sklearn.linear_model import Ridge, Lasso, ElasticNet, LogisticRegression
14
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
15
+ from sklearn.ensemble import (
16
+ GradientBoostingClassifier, GradientBoostingRegressor,
17
+ RandomForestClassifier, RandomForestRegressor
18
+ )
19
+ from sklearn.naive_bayes import GaussianNB
20
+ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
21
+ from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
22
+ from sklearn.mixture import GaussianMixture
23
+
24
+ try:
25
+ import xgboost as xgb
26
+ XGB_AVAILABLE = True
27
+ except ImportError:
28
+ XGB_AVAILABLE = False
29
+
30
+ try:
31
+ import lightgbm as lgb
32
+ LGB_AVAILABLE = True
33
+ except ImportError:
34
+ LGB_AVAILABLE = False
35
+
36
+ from adamops.utils.logging import get_logger
37
+ from adamops.utils.helpers import infer_task_type
38
+
39
+ logger = get_logger(__name__)
40
+
41
+
42
+ # Model Registry
43
+ REGRESSION_MODELS = {
44
+ "ridge": Ridge,
45
+ "lasso": Lasso,
46
+ "elasticnet": ElasticNet,
47
+ "decision_tree": DecisionTreeRegressor,
48
+ "random_forest": RandomForestRegressor,
49
+ "gradient_boosting": GradientBoostingRegressor,
50
+ "knn": KNeighborsRegressor,
51
+ }
52
+
53
+ CLASSIFICATION_MODELS = {
54
+ "logistic": LogisticRegression,
55
+ "decision_tree": DecisionTreeClassifier,
56
+ "random_forest": RandomForestClassifier,
57
+ "gradient_boosting": GradientBoostingClassifier,
58
+ "naive_bayes": GaussianNB,
59
+ "knn": KNeighborsClassifier,
60
+ }
61
+
62
+ CLUSTERING_MODELS = {
63
+ "kmeans": KMeans,
64
+ "dbscan": DBSCAN,
65
+ "hierarchical": AgglomerativeClustering,
66
+ "gmm": GaussianMixture,
67
+ }
68
+
69
+ # Add XGBoost if available
70
+ if XGB_AVAILABLE:
71
+ REGRESSION_MODELS["xgboost"] = xgb.XGBRegressor
72
+ CLASSIFICATION_MODELS["xgboost"] = xgb.XGBClassifier
73
+
74
+ # Add LightGBM if available
75
+ if LGB_AVAILABLE:
76
+ REGRESSION_MODELS["lightgbm"] = lgb.LGBMRegressor
77
+ CLASSIFICATION_MODELS["lightgbm"] = lgb.LGBMClassifier
78
+
79
+
80
+ class TrainedModel:
81
+ """Wrapper for trained models with metadata."""
82
+
83
+ def __init__(self, model: Any, task: str, algorithm: str, params: Dict,
84
+ feature_names: Optional[List[str]] = None):
85
+ self.model = model
86
+ self.task = task
87
+ self.algorithm = algorithm
88
+ self.params = params
89
+ self.feature_names = feature_names
90
+ self.is_fitted = True
91
+
92
+ def predict(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
93
+ """Make predictions."""
94
+ return self.model.predict(X)
95
+
96
+ def predict_proba(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
97
+ """Predict probabilities (classification only)."""
98
+ if hasattr(self.model, 'predict_proba'):
99
+ return self.model.predict_proba(X)
100
+ raise ValueError("Model does not support probability predictions")
101
+
102
+ def save(self, filepath: Union[str, Path]) -> None:
103
+ """Save model to file."""
104
+ filepath = Path(filepath)
105
+ filepath.parent.mkdir(parents=True, exist_ok=True)
106
+ joblib.dump(self, filepath)
107
+ logger.info(f"Model saved to {filepath}")
108
+
109
+ @classmethod
110
+ def load(cls, filepath: Union[str, Path]) -> "TrainedModel":
111
+ """Load model from file."""
112
+ return joblib.load(filepath)
113
+
114
+
115
+ def get_available_models(task: str = "classification") -> List[str]:
116
+ """Get list of available models for a task."""
117
+ if task == "classification":
118
+ return list(CLASSIFICATION_MODELS.keys())
119
+ elif task == "regression":
120
+ return list(REGRESSION_MODELS.keys())
121
+ elif task == "clustering":
122
+ return list(CLUSTERING_MODELS.keys())
123
+ return []
124
+
125
+
126
+ def train(
127
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
128
+ task: str = "auto", algorithm: str = "random_forest",
129
+ params: Optional[Dict] = None, random_state: int = 42
130
+ ) -> TrainedModel:
131
+ """
132
+ Train a model.
133
+
134
+ Args:
135
+ X: Features.
136
+ y: Target.
137
+ task: 'classification', 'regression', or 'auto'.
138
+ algorithm: Model algorithm name.
139
+ params: Model hyperparameters.
140
+ random_state: Random seed.
141
+
142
+ Returns:
143
+ TrainedModel: Trained model wrapper.
144
+ """
145
+ # Auto-detect task type
146
+ if task == "auto":
147
+ task = infer_task_type(y)
148
+ logger.info(f"Auto-detected task: {task}")
149
+
150
+ # Get model class
151
+ if task in ["classification", "multiclass"]:
152
+ if algorithm not in CLASSIFICATION_MODELS:
153
+ raise ValueError(f"Unknown classification algorithm: {algorithm}")
154
+ model_class = CLASSIFICATION_MODELS[algorithm]
155
+ elif task == "regression":
156
+ if algorithm not in REGRESSION_MODELS:
157
+ raise ValueError(f"Unknown regression algorithm: {algorithm}")
158
+ model_class = REGRESSION_MODELS[algorithm]
159
+ else:
160
+ raise ValueError(f"Unknown task: {task}")
161
+
162
+ # Set default params
163
+ default_params = {"random_state": random_state}
164
+ if params:
165
+ default_params.update(params)
166
+
167
+ # Filter params for model
168
+ import inspect
169
+ sig = inspect.signature(model_class)
170
+ valid_params = {k: v for k, v in default_params.items() if k in sig.parameters}
171
+
172
+ # Create and train model
173
+ logger.info(f"Training {algorithm} for {task}")
174
+ model = model_class(**valid_params)
175
+ model.fit(X, y)
176
+
177
+ feature_names = X.columns.tolist() if isinstance(X, pd.DataFrame) else None
178
+
179
+ return TrainedModel(
180
+ model=model, task=task, algorithm=algorithm,
181
+ params=valid_params, feature_names=feature_names
182
+ )
183
+
184
+
185
+ def train_regression(
186
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
187
+ algorithm: str = "ridge", params: Optional[Dict] = None
188
+ ) -> TrainedModel:
189
+ """Train a regression model."""
190
+ return train(X, y, task="regression", algorithm=algorithm, params=params)
191
+
192
+
193
+ def train_classification(
194
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
195
+ algorithm: str = "random_forest", params: Optional[Dict] = None
196
+ ) -> TrainedModel:
197
+ """Train a classification model."""
198
+ return train(X, y, task="classification", algorithm=algorithm, params=params)
199
+
200
+
201
+ def train_clustering(
202
+ X: Union[pd.DataFrame, np.ndarray], algorithm: str = "kmeans",
203
+ n_clusters: int = 3, params: Optional[Dict] = None
204
+ ) -> Tuple[Any, np.ndarray]:
205
+ """
206
+ Train a clustering model.
207
+
208
+ Returns:
209
+ (model, labels): Fitted model and cluster labels.
210
+ """
211
+ if algorithm not in CLUSTERING_MODELS:
212
+ raise ValueError(f"Unknown clustering algorithm: {algorithm}")
213
+
214
+ model_class = CLUSTERING_MODELS[algorithm]
215
+ default_params = {}
216
+
217
+ # Set n_clusters for algorithms that support it
218
+ if algorithm in ["kmeans", "hierarchical"]:
219
+ default_params["n_clusters"] = n_clusters
220
+ elif algorithm == "gmm":
221
+ default_params["n_components"] = n_clusters
222
+
223
+ if params:
224
+ default_params.update(params)
225
+
226
+ logger.info(f"Training {algorithm} clustering")
227
+ model = model_class(**default_params)
228
+
229
+ if algorithm == "gmm":
230
+ model.fit(X)
231
+ labels = model.predict(X)
232
+ else:
233
+ labels = model.fit_predict(X)
234
+
235
+ return model, labels
236
+
237
+
238
+ def cross_validate(
239
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
240
+ task: str = "classification", algorithm: str = "random_forest",
241
+ cv: int = 5, scoring: Optional[str] = None, params: Optional[Dict] = None
242
+ ) -> Dict[str, Any]:
243
+ """
244
+ Cross-validate a model.
245
+
246
+ Returns:
247
+ Dict with train_scores, test_scores, and mean/std values.
248
+ """
249
+ from sklearn.model_selection import cross_validate as sklearn_cv
250
+
251
+ # Get model
252
+ if task == "classification":
253
+ model = CLASSIFICATION_MODELS[algorithm]()
254
+ else:
255
+ model = REGRESSION_MODELS[algorithm]()
256
+
257
+ if params:
258
+ model.set_params(**params)
259
+
260
+ if scoring is None:
261
+ scoring = "accuracy" if task == "classification" else "r2"
262
+
263
+ logger.info(f"Cross-validating {algorithm} with {cv} folds")
264
+
265
+ results = sklearn_cv(model, X, y, cv=cv, scoring=scoring, return_train_score=True)
266
+
267
+ return {
268
+ "train_scores": results["train_score"].tolist(),
269
+ "test_scores": results["test_score"].tolist(),
270
+ "train_mean": float(results["train_score"].mean()),
271
+ "train_std": float(results["train_score"].std()),
272
+ "test_mean": float(results["test_score"].mean()),
273
+ "test_std": float(results["test_score"].std()),
274
+ "fit_time": float(results["fit_time"].mean()),
275
+ }
276
+
277
+
278
+ def compare_models(
279
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
280
+ task: str = "classification", algorithms: Optional[List[str]] = None,
281
+ cv: int = 5, scoring: Optional[str] = None
282
+ ) -> pd.DataFrame:
283
+ """
284
+ Compare multiple models.
285
+
286
+ Returns:
287
+ DataFrame with model comparison results.
288
+ """
289
+ if algorithms is None:
290
+ algorithms = list(CLASSIFICATION_MODELS.keys()) if task == "classification" \
291
+ else list(REGRESSION_MODELS.keys())
292
+
293
+ results = []
294
+ for algo in algorithms:
295
+ try:
296
+ cv_results = cross_validate(X, y, task, algo, cv, scoring)
297
+ results.append({
298
+ "algorithm": algo,
299
+ "cv_mean": cv_results["test_mean"],
300
+ "cv_std": cv_results["test_std"],
301
+ "train_mean": cv_results["train_mean"],
302
+ "fit_time": cv_results["fit_time"],
303
+ })
304
+ except Exception as e:
305
+ logger.warning(f"Failed to train {algo}: {e}")
306
+
307
+ df = pd.DataFrame(results).sort_values("cv_mean", ascending=False)
308
+ return df