adamops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adamops/__init__.py +40 -0
  2. adamops/cli.py +163 -0
  3. adamops/data/__init__.py +24 -0
  4. adamops/data/feature_engineering.py +284 -0
  5. adamops/data/loaders.py +922 -0
  6. adamops/data/preprocessors.py +227 -0
  7. adamops/data/splitters.py +218 -0
  8. adamops/data/validators.py +148 -0
  9. adamops/deployment/__init__.py +21 -0
  10. adamops/deployment/api.py +237 -0
  11. adamops/deployment/cloud.py +191 -0
  12. adamops/deployment/containerize.py +262 -0
  13. adamops/deployment/exporters.py +148 -0
  14. adamops/evaluation/__init__.py +24 -0
  15. adamops/evaluation/comparison.py +133 -0
  16. adamops/evaluation/explainability.py +143 -0
  17. adamops/evaluation/metrics.py +233 -0
  18. adamops/evaluation/reports.py +165 -0
  19. adamops/evaluation/visualization.py +238 -0
  20. adamops/models/__init__.py +21 -0
  21. adamops/models/automl.py +277 -0
  22. adamops/models/ensembles.py +228 -0
  23. adamops/models/modelops.py +308 -0
  24. adamops/models/registry.py +250 -0
  25. adamops/monitoring/__init__.py +21 -0
  26. adamops/monitoring/alerts.py +200 -0
  27. adamops/monitoring/dashboard.py +117 -0
  28. adamops/monitoring/drift.py +212 -0
  29. adamops/monitoring/performance.py +195 -0
  30. adamops/pipelines/__init__.py +15 -0
  31. adamops/pipelines/orchestrators.py +183 -0
  32. adamops/pipelines/workflows.py +212 -0
  33. adamops/utils/__init__.py +18 -0
  34. adamops/utils/config.py +457 -0
  35. adamops/utils/helpers.py +663 -0
  36. adamops/utils/logging.py +412 -0
  37. adamops-0.1.0.dist-info/METADATA +310 -0
  38. adamops-0.1.0.dist-info/RECORD +42 -0
  39. adamops-0.1.0.dist-info/WHEEL +5 -0
  40. adamops-0.1.0.dist-info/entry_points.txt +2 -0
  41. adamops-0.1.0.dist-info/licenses/LICENSE +21 -0
  42. adamops-0.1.0.dist-info/top_level.txt +1 -0
adamops/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ """
2
+ AdamOps - A comprehensive MLOps library for end-to-end machine learning workflows.
3
+
4
+ AdamOps provides tools for:
5
+ - Data loading, validation, cleaning, and feature engineering
6
+ - Model training, registry, and ensemble methods
7
+ - AutoML with hyperparameter tuning
8
+ - Model evaluation and explainability
9
+ - Deployment to various platforms
10
+ - Monitoring and drift detection
11
+ - Pipeline orchestration
12
+
13
+ Author: AdamOps Team
14
+ Version: 0.1.0
15
+ """
16
+
17
+ __version__ = "0.1.0"
18
+ __author__ = "AdamOps Team"
19
+
20
+ # Import core modules for easy access
21
+ from adamops.data import loaders, validators, preprocessors, splitters
22
+ from adamops.models import modelops, registry, ensembles, automl
23
+ from adamops.evaluation import metrics
24
+ from adamops.utils import config, logging as adamops_logging, helpers
25
+
26
+ __all__ = [
27
+ "loaders",
28
+ "validators",
29
+ "preprocessors",
30
+ "splitters",
31
+ "modelops",
32
+ "registry",
33
+ "ensembles",
34
+ "automl",
35
+ "metrics",
36
+ "config",
37
+ "adamops_logging",
38
+ "helpers",
39
+ "__version__",
40
+ ]
adamops/cli.py ADDED
@@ -0,0 +1,163 @@
1
+ """
2
+ AdamOps CLI Module
3
+
4
+ Command-line interface for AdamOps.
5
+ """
6
+
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ try:
12
+ import click
13
+ from rich.console import Console
14
+ from rich.table import Table
15
+ CLICK_AVAILABLE = True
16
+ except ImportError:
17
+ CLICK_AVAILABLE = False
18
+
19
+ if CLICK_AVAILABLE:
20
+ console = Console()
21
+
22
+ @click.group()
23
+ @click.version_option(version="0.1.0", prog_name="adamops")
24
+ def main():
25
+ """AdamOps - MLOps made simple."""
26
+ pass
27
+
28
+ @main.command()
29
+ @click.option("--data", "-d", required=True, help="Path to data file")
30
+ @click.option("--target", "-t", required=True, help="Target column name")
31
+ @click.option("--algorithm", "-a", default="auto", help="Algorithm to use")
32
+ @click.option("--task", default="auto", help="Task type: classification, regression, auto")
33
+ @click.option("--output", "-o", default="model.joblib", help="Output model path")
34
+ def train(data: str, target: str, algorithm: str, task: str, output: str):
35
+ """Train a model."""
36
+ console.print(f"[bold blue]Loading data from {data}...[/]")
37
+
38
+ from adamops.data.loaders import load_auto
39
+ from adamops.models.automl import quick_run
40
+ from adamops.models.modelops import train as train_model
41
+ from adamops.deployment.exporters import export_joblib
42
+
43
+ df = load_auto(data)
44
+ X = df.drop(columns=[target])
45
+ y = df[target]
46
+
47
+ console.print(f"[bold blue]Training {algorithm} model...[/]")
48
+
49
+ if algorithm == "auto":
50
+ model = quick_run(X, y, task)
51
+ else:
52
+ model = train_model(X, y, task, algorithm)
53
+
54
+ export_joblib(model, output)
55
+ console.print(f"[bold green]Model saved to {output}[/]")
56
+
57
+ @main.command()
58
+ @click.option("--model", "-m", required=True, help="Path to model file")
59
+ @click.option("--data", "-d", required=True, help="Path to test data")
60
+ @click.option("--target", "-t", required=True, help="Target column name")
61
+ def evaluate(model: str, data: str, target: str):
62
+ """Evaluate a model."""
63
+ from adamops.deployment.exporters import load_model
64
+ from adamops.data.loaders import load_auto
65
+ from adamops.evaluation.metrics import evaluate as eval_metrics
66
+
67
+ console.print(f"[bold blue]Loading model and data...[/]")
68
+
69
+ model_obj = load_model(model)
70
+ df = load_auto(data)
71
+ X = df.drop(columns=[target])
72
+ y = df[target]
73
+
74
+ y_pred = model_obj.predict(X)
75
+ metrics = eval_metrics(y, y_pred)
76
+
77
+ table = Table(title="Evaluation Results")
78
+ table.add_column("Metric", style="cyan")
79
+ table.add_column("Value", style="green")
80
+
81
+ for name, value in metrics.items():
82
+ if isinstance(value, float):
83
+ table.add_row(name, f"{value:.4f}")
84
+ else:
85
+ table.add_row(name, str(value))
86
+
87
+ console.print(table)
88
+
89
+ @main.command()
90
+ @click.option("--model", "-m", required=True, help="Path to model file")
91
+ @click.option("--type", "deploy_type", default="api", help="Deployment type: api, docker")
92
+ @click.option("--port", "-p", default=8000, help="API port")
93
+ @click.option("--output", "-o", default="./deploy", help="Output directory for docker")
94
+ def deploy(model: str, deploy_type: str, port: int, output: str):
95
+ """Deploy a model."""
96
+ from adamops.deployment.exporters import load_model
97
+
98
+ model_obj = load_model(model)
99
+
100
+ if deploy_type == "api":
101
+ from adamops.deployment.api import run_api
102
+ console.print(f"[bold blue]Starting API on port {port}...[/]")
103
+ run_api(model_obj, port=port)
104
+
105
+ elif deploy_type == "docker":
106
+ from adamops.deployment.containerize import containerize
107
+ console.print(f"[bold blue]Creating Docker deployment...[/]")
108
+ result = containerize(model, output)
109
+ console.print(f"[bold green]Files created in {output}[/]")
110
+ for name, path in result.items():
111
+ console.print(f" - {name}: {path}")
112
+
113
+ @main.command()
114
+ @click.option("--data", "-d", required=True, help="Path to data file")
115
+ def validate(data: str):
116
+ """Validate a data file."""
117
+ from adamops.data.loaders import load_auto
118
+ from adamops.data.validators import validate as validate_data
119
+
120
+ console.print(f"[bold blue]Validating {data}...[/]")
121
+
122
+ df = load_auto(data)
123
+ report = validate_data(df)
124
+
125
+ console.print(report.summary())
126
+
127
+ @main.command()
128
+ @click.argument("workflow_name")
129
+ def run_workflow(workflow_name: str):
130
+ """Run a predefined workflow."""
131
+ from adamops.pipelines.workflows import create_ml_pipeline
132
+
133
+ console.print(f"[bold blue]Running workflow: {workflow_name}[/]")
134
+
135
+ workflow = create_ml_pipeline(workflow_name)
136
+ result = workflow.run()
137
+
138
+ console.print(f"[bold green]Workflow completed![/]")
139
+ console.print(workflow.get_status())
140
+
141
+ @main.command()
142
+ def info():
143
+ """Show AdamOps information."""
144
+ from adamops import __version__
145
+
146
+ console.print("[bold blue]AdamOps - MLOps Made Simple[/]")
147
+ console.print(f"Version: {__version__}")
148
+ console.print()
149
+ console.print("Available commands:")
150
+ console.print(" train - Train a model")
151
+ console.print(" evaluate - Evaluate a model")
152
+ console.print(" deploy - Deploy a model")
153
+ console.print(" validate - Validate data")
154
+ console.print(" run-workflow - Run a workflow")
155
+
156
+ else:
157
+ def main():
158
+ print("CLI requires click and rich. Install with: pip install click rich")
159
+ sys.exit(1)
160
+
161
+
162
+ if __name__ == "__main__":
163
+ main()
@@ -0,0 +1,24 @@
1
+ """
2
+ AdamOps Data Module
3
+
4
+ Provides comprehensive data handling capabilities:
5
+ - loaders: Load data from various sources (CSV, Excel, JSON, SQL, API, compressed files)
6
+ - validators: Validate data types, missing values, duplicates, shapes, and statistics
7
+ - preprocessors: Clean data (handle missing values, outliers, duplicates, type conversion)
8
+ - feature_engineering: Encode, scale, and generate features
9
+ - splitters: Split data for training and evaluation
10
+ """
11
+
12
+ from adamops.data import loaders
13
+ from adamops.data import validators
14
+ from adamops.data import preprocessors
15
+ from adamops.data import feature_engineering
16
+ from adamops.data import splitters
17
+
18
+ __all__ = [
19
+ "loaders",
20
+ "validators",
21
+ "preprocessors",
22
+ "feature_engineering",
23
+ "splitters",
24
+ ]
@@ -0,0 +1,284 @@
1
+ """
2
+ AdamOps Feature Engineering Module
3
+
4
+ Provides encoding, scaling, feature selection, and auto feature generation.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Tuple, Union
8
+ import numpy as np
9
+ import pandas as pd
10
+ from sklearn.preprocessing import (
11
+ OneHotEncoder, LabelEncoder, OrdinalEncoder, StandardScaler,
12
+ MinMaxScaler, RobustScaler, MaxAbsScaler, PolynomialFeatures
13
+ )
14
+ from sklearn.feature_selection import (
15
+ VarianceThreshold, SelectKBest, mutual_info_classif, mutual_info_regression, RFE
16
+ )
17
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
18
+ from adamops.utils.logging import get_logger
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ # =============================================================================
24
+ # Encoding
25
+ # =============================================================================
26
+
27
+ def encode_onehot(
28
+ df: pd.DataFrame, columns: List[str], drop_first: bool = False,
29
+ handle_unknown: str = "ignore"
30
+ ) -> pd.DataFrame:
31
+ """One-hot encode categorical columns."""
32
+ df = df.copy()
33
+ for col in columns:
34
+ dummies = pd.get_dummies(df[col], prefix=col, drop_first=drop_first)
35
+ df = pd.concat([df.drop(col, axis=1), dummies], axis=1)
36
+ logger.info(f"One-hot encoded {len(columns)} columns")
37
+ return df
38
+
39
+
40
+ def encode_label(df: pd.DataFrame, columns: List[str]) -> Tuple[pd.DataFrame, Dict]:
41
+ """Label encode categorical columns. Returns df and encoders dict."""
42
+ df = df.copy()
43
+ encoders = {}
44
+ for col in columns:
45
+ le = LabelEncoder()
46
+ df[col] = le.fit_transform(df[col].astype(str))
47
+ encoders[col] = le
48
+ logger.info(f"Label encoded {len(columns)} columns")
49
+ return df, encoders
50
+
51
+
52
+ def encode_ordinal(
53
+ df: pd.DataFrame, columns: List[str],
54
+ categories: Optional[Dict[str, List]] = None
55
+ ) -> pd.DataFrame:
56
+ """Ordinal encode columns with optional category order."""
57
+ df = df.copy()
58
+ for col in columns:
59
+ if categories and col in categories:
60
+ cat_map = {v: i for i, v in enumerate(categories[col])}
61
+ df[col] = df[col].map(cat_map)
62
+ else:
63
+ df[col] = pd.Categorical(df[col]).codes
64
+ return df
65
+
66
+
67
+ def encode_target(
68
+ df: pd.DataFrame, columns: List[str], target: str, smoothing: float = 1.0
69
+ ) -> pd.DataFrame:
70
+ """Target encode categorical columns."""
71
+ df = df.copy()
72
+ global_mean = df[target].mean()
73
+
74
+ for col in columns:
75
+ agg = df.groupby(col)[target].agg(['mean', 'count'])
76
+ smooth = (agg['count'] * agg['mean'] + smoothing * global_mean) / (agg['count'] + smoothing)
77
+ df[col + '_target'] = df[col].map(smooth)
78
+ df = df.drop(col, axis=1)
79
+
80
+ return df
81
+
82
+
83
+ def encode(
84
+ df: pd.DataFrame, columns: List[str], method: str = "onehot", **kwargs
85
+ ) -> pd.DataFrame:
86
+ """Encode categorical columns with specified method."""
87
+ if method == "onehot":
88
+ return encode_onehot(df, columns, **kwargs)
89
+ elif method == "label":
90
+ return encode_label(df, columns, **kwargs)[0]
91
+ elif method == "ordinal":
92
+ return encode_ordinal(df, columns, **kwargs)
93
+ elif method == "target" and "target" in kwargs:
94
+ return encode_target(df, columns, kwargs["target"])
95
+ else:
96
+ raise ValueError(f"Unknown encoding method: {method}")
97
+
98
+
99
+ # =============================================================================
100
+ # Scaling
101
+ # =============================================================================
102
+
103
+ def scale_standard(df: pd.DataFrame, columns: Optional[List[str]] = None) -> pd.DataFrame:
104
+ """Standardize features (zero mean, unit variance)."""
105
+ df = df.copy()
106
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
107
+ scaler = StandardScaler()
108
+ df[cols] = scaler.fit_transform(df[cols])
109
+ return df
110
+
111
+
112
+ def scale_minmax(df: pd.DataFrame, columns: Optional[List[str]] = None) -> pd.DataFrame:
113
+ """Scale features to [0, 1] range."""
114
+ df = df.copy()
115
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
116
+ scaler = MinMaxScaler()
117
+ df[cols] = scaler.fit_transform(df[cols])
118
+ return df
119
+
120
+
121
+ def scale_robust(df: pd.DataFrame, columns: Optional[List[str]] = None) -> pd.DataFrame:
122
+ """Scale with median and IQR (robust to outliers)."""
123
+ df = df.copy()
124
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
125
+ scaler = RobustScaler()
126
+ df[cols] = scaler.fit_transform(df[cols])
127
+ return df
128
+
129
+
130
+ def scale(
131
+ df: pd.DataFrame, method: str = "standard", columns: Optional[List[str]] = None
132
+ ) -> pd.DataFrame:
133
+ """Scale numeric columns with specified method."""
134
+ if method == "standard":
135
+ return scale_standard(df, columns)
136
+ elif method == "minmax":
137
+ return scale_minmax(df, columns)
138
+ elif method == "robust":
139
+ return scale_robust(df, columns)
140
+ elif method == "maxabs":
141
+ df = df.copy()
142
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
143
+ df[cols] = MaxAbsScaler().fit_transform(df[cols])
144
+ return df
145
+ else:
146
+ raise ValueError(f"Unknown scaling method: {method}")
147
+
148
+
149
+ # =============================================================================
150
+ # Feature Selection
151
+ # =============================================================================
152
+
153
+ def select_by_variance(
154
+ df: pd.DataFrame, threshold: float = 0.0, columns: Optional[List[str]] = None
155
+ ) -> pd.DataFrame:
156
+ """Remove low variance features."""
157
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
158
+ selector = VarianceThreshold(threshold=threshold)
159
+ selected = selector.fit_transform(df[cols])
160
+ selected_cols = [cols[i] for i in selector.get_support(indices=True)]
161
+ df_result = df.drop(cols, axis=1)
162
+ df_result[selected_cols] = selected
163
+ logger.info(f"Selected {len(selected_cols)}/{len(cols)} features by variance")
164
+ return df_result
165
+
166
+
167
+ def select_by_correlation(
168
+ df: pd.DataFrame, threshold: float = 0.9, target: Optional[str] = None
169
+ ) -> pd.DataFrame:
170
+ """Remove highly correlated features."""
171
+ df = df.copy()
172
+ num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
173
+ if target and target in num_cols:
174
+ num_cols.remove(target)
175
+
176
+ corr = df[num_cols].corr().abs()
177
+ upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
178
+ to_drop = [c for c in upper.columns if any(upper[c] > threshold)]
179
+
180
+ logger.info(f"Dropping {len(to_drop)} highly correlated features")
181
+ return df.drop(to_drop, axis=1)
182
+
183
+
184
+ def select_by_importance(
185
+ df: pd.DataFrame, target: str, n_features: int = 10, task: str = "classification"
186
+ ) -> pd.DataFrame:
187
+ """Select features by tree-based importance."""
188
+ X = df.drop(target, axis=1).select_dtypes(include=[np.number])
189
+ y = df[target]
190
+
191
+ model = RandomForestClassifier(n_estimators=50, random_state=42) if task == "classification" \
192
+ else RandomForestRegressor(n_estimators=50, random_state=42)
193
+ model.fit(X, y)
194
+
195
+ importance = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
196
+ top_features = importance.head(n_features).index.tolist()
197
+
198
+ logger.info(f"Selected top {n_features} features by importance")
199
+ return df[[target] + top_features]
200
+
201
+
202
+ def select_features(
203
+ df: pd.DataFrame, target: str, method: str = "importance", n_features: int = 10, **kwargs
204
+ ) -> pd.DataFrame:
205
+ """Select features using specified method."""
206
+ if method == "variance":
207
+ return select_by_variance(df, **kwargs)
208
+ elif method == "correlation":
209
+ return select_by_correlation(df, target=target, **kwargs)
210
+ elif method == "importance":
211
+ return select_by_importance(df, target, n_features, **kwargs)
212
+ else:
213
+ raise ValueError(f"Unknown selection method: {method}")
214
+
215
+
216
+ # =============================================================================
217
+ # Feature Generation
218
+ # =============================================================================
219
+
220
+ def generate_polynomial(
221
+ df: pd.DataFrame, columns: List[str], degree: int = 2, include_bias: bool = False
222
+ ) -> pd.DataFrame:
223
+ """Generate polynomial features."""
224
+ df = df.copy()
225
+ poly = PolynomialFeatures(degree=degree, include_bias=include_bias)
226
+ poly_features = poly.fit_transform(df[columns])
227
+ poly_names = poly.get_feature_names_out(columns)
228
+ df_poly = pd.DataFrame(poly_features, columns=poly_names, index=df.index)
229
+ return pd.concat([df.drop(columns, axis=1), df_poly], axis=1)
230
+
231
+
232
+ def generate_interactions(
233
+ df: pd.DataFrame, columns: List[str], operations: List[str] = ["multiply"]
234
+ ) -> pd.DataFrame:
235
+ """Generate interaction features between columns."""
236
+ df = df.copy()
237
+ for i, col1 in enumerate(columns):
238
+ for col2 in columns[i+1:]:
239
+ if "multiply" in operations:
240
+ df[f"{col1}_x_{col2}"] = df[col1] * df[col2]
241
+ if "add" in operations:
242
+ df[f"{col1}_+_{col2}"] = df[col1] + df[col2]
243
+ if "divide" in operations:
244
+ df[f"{col1}_/_{col2}"] = df[col1] / (df[col2] + 1e-8)
245
+ return df
246
+
247
+
248
+ def generate_datetime_features(df: pd.DataFrame, column: str) -> pd.DataFrame:
249
+ """Extract datetime features from a column."""
250
+ df = df.copy()
251
+ dt = pd.to_datetime(df[column])
252
+ prefix = column
253
+ df[f"{prefix}_year"] = dt.dt.year
254
+ df[f"{prefix}_month"] = dt.dt.month
255
+ df[f"{prefix}_day"] = dt.dt.day
256
+ df[f"{prefix}_dayofweek"] = dt.dt.dayofweek
257
+ df[f"{prefix}_hour"] = dt.dt.hour
258
+ df[f"{prefix}_is_weekend"] = dt.dt.dayofweek.isin([5, 6]).astype(int)
259
+ return df
260
+
261
+
262
+ def auto_feature_engineering(
263
+ df: pd.DataFrame, target: Optional[str] = None,
264
+ polynomial: bool = False, interactions: bool = False, datetime_cols: Optional[List[str]] = None
265
+ ) -> pd.DataFrame:
266
+ """Automatic feature engineering pipeline."""
267
+ logger.info("Running auto feature engineering")
268
+
269
+ num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
270
+ if target and target in num_cols:
271
+ num_cols.remove(target)
272
+
273
+ if datetime_cols:
274
+ for col in datetime_cols:
275
+ df = generate_datetime_features(df, col)
276
+
277
+ if polynomial and len(num_cols) <= 5:
278
+ df = generate_polynomial(df, num_cols[:5], degree=2)
279
+
280
+ if interactions and len(num_cols) >= 2:
281
+ df = generate_interactions(df, num_cols[:4])
282
+
283
+ logger.info(f"Feature engineering complete. New shape: {df.shape}")
284
+ return df