adamops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adamops/__init__.py +40 -0
  2. adamops/cli.py +163 -0
  3. adamops/data/__init__.py +24 -0
  4. adamops/data/feature_engineering.py +284 -0
  5. adamops/data/loaders.py +922 -0
  6. adamops/data/preprocessors.py +227 -0
  7. adamops/data/splitters.py +218 -0
  8. adamops/data/validators.py +148 -0
  9. adamops/deployment/__init__.py +21 -0
  10. adamops/deployment/api.py +237 -0
  11. adamops/deployment/cloud.py +191 -0
  12. adamops/deployment/containerize.py +262 -0
  13. adamops/deployment/exporters.py +148 -0
  14. adamops/evaluation/__init__.py +24 -0
  15. adamops/evaluation/comparison.py +133 -0
  16. adamops/evaluation/explainability.py +143 -0
  17. adamops/evaluation/metrics.py +233 -0
  18. adamops/evaluation/reports.py +165 -0
  19. adamops/evaluation/visualization.py +238 -0
  20. adamops/models/__init__.py +21 -0
  21. adamops/models/automl.py +277 -0
  22. adamops/models/ensembles.py +228 -0
  23. adamops/models/modelops.py +308 -0
  24. adamops/models/registry.py +250 -0
  25. adamops/monitoring/__init__.py +21 -0
  26. adamops/monitoring/alerts.py +200 -0
  27. adamops/monitoring/dashboard.py +117 -0
  28. adamops/monitoring/drift.py +212 -0
  29. adamops/monitoring/performance.py +195 -0
  30. adamops/pipelines/__init__.py +15 -0
  31. adamops/pipelines/orchestrators.py +183 -0
  32. adamops/pipelines/workflows.py +212 -0
  33. adamops/utils/__init__.py +18 -0
  34. adamops/utils/config.py +457 -0
  35. adamops/utils/helpers.py +663 -0
  36. adamops/utils/logging.py +412 -0
  37. adamops-0.1.0.dist-info/METADATA +310 -0
  38. adamops-0.1.0.dist-info/RECORD +42 -0
  39. adamops-0.1.0.dist-info/WHEEL +5 -0
  40. adamops-0.1.0.dist-info/entry_points.txt +2 -0
  41. adamops-0.1.0.dist-info/licenses/LICENSE +21 -0
  42. adamops-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,212 @@
1
+ """
2
+ AdamOps Drift Detection Module
3
+
4
+ Detect data drift and concept drift in production.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+ import numpy as np
9
+ import pandas as pd
10
+ from scipy import stats
11
+ from sklearn.model_selection import train_test_split
12
+
13
+ from adamops.utils.logging import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class DriftDetector:
19
+ """Detect data and concept drift."""
20
+
21
+ def __init__(self, reference_data: pd.DataFrame, threshold: float = 0.05):
22
+ """
23
+ Initialize drift detector.
24
+
25
+ Args:
26
+ reference_data: Reference/training data.
27
+ threshold: P-value threshold for drift detection.
28
+ """
29
+ self.reference = reference_data
30
+ self.threshold = threshold
31
+ self._compute_reference_stats()
32
+
33
+ def _compute_reference_stats(self):
34
+ """Compute statistics for reference data."""
35
+ self.ref_stats = {}
36
+
37
+ for col in self.reference.columns:
38
+ if pd.api.types.is_numeric_dtype(self.reference[col]):
39
+ self.ref_stats[col] = {
40
+ "type": "numeric",
41
+ "mean": self.reference[col].mean(),
42
+ "std": self.reference[col].std(),
43
+ "min": self.reference[col].min(),
44
+ "max": self.reference[col].max(),
45
+ "values": self.reference[col].dropna().values,
46
+ }
47
+ else:
48
+ value_counts = self.reference[col].value_counts(normalize=True)
49
+ self.ref_stats[col] = {
50
+ "type": "categorical",
51
+ "distribution": value_counts.to_dict(),
52
+ "values": self.reference[col].dropna().values,
53
+ }
54
+
55
+ def detect_drift(self, current_data: pd.DataFrame) -> Dict[str, Any]:
56
+ """
57
+ Detect drift between reference and current data.
58
+
59
+ Returns:
60
+ Dict with drift detection results.
61
+ """
62
+ results = {"drift_detected": False, "columns": {}, "summary": {}}
63
+ drift_count = 0
64
+
65
+ for col in self.reference.columns:
66
+ if col not in current_data.columns:
67
+ continue
68
+
69
+ col_result = self._detect_column_drift(col, current_data[col])
70
+ results["columns"][col] = col_result
71
+
72
+ if col_result["drift_detected"]:
73
+ drift_count += 1
74
+
75
+ results["drift_detected"] = drift_count > 0
76
+ results["summary"] = {
77
+ "total_columns": len(self.reference.columns),
78
+ "drifted_columns": drift_count,
79
+ "drift_ratio": drift_count / len(self.reference.columns),
80
+ }
81
+
82
+ if results["drift_detected"]:
83
+ logger.warning(f"Drift detected in {drift_count} columns")
84
+
85
+ return results
86
+
87
+ def _detect_column_drift(self, col: str, current: pd.Series) -> Dict:
88
+ """Detect drift for a single column."""
89
+ ref_stats = self.ref_stats.get(col)
90
+ if ref_stats is None:
91
+ return {"drift_detected": False, "reason": "unknown_column"}
92
+
93
+ current_values = current.dropna().values
94
+
95
+ if ref_stats["type"] == "numeric":
96
+ # Kolmogorov-Smirnov test
97
+ stat, pvalue = stats.ks_2samp(ref_stats["values"], current_values)
98
+ drift_detected = pvalue < self.threshold
99
+
100
+ return {
101
+ "drift_detected": drift_detected,
102
+ "test": "ks_test",
103
+ "statistic": float(stat),
104
+ "p_value": float(pvalue),
105
+ "ref_mean": ref_stats["mean"],
106
+ "current_mean": float(current.mean()),
107
+ }
108
+ else:
109
+ # Chi-square test for categorical
110
+ current_dist = current.value_counts(normalize=True)
111
+
112
+ # Align distributions
113
+ all_categories = set(ref_stats["distribution"].keys()) | set(current_dist.index)
114
+ ref_freq = [ref_stats["distribution"].get(c, 0.001) for c in all_categories]
115
+ cur_freq = [current_dist.get(c, 0.001) for c in all_categories]
116
+
117
+ # Normalize
118
+ ref_freq = np.array(ref_freq) / sum(ref_freq)
119
+ cur_freq = np.array(cur_freq) / sum(cur_freq)
120
+
121
+ # Chi-square
122
+ stat, pvalue = stats.chisquare(cur_freq, ref_freq)
123
+ drift_detected = pvalue < self.threshold
124
+
125
+ return {
126
+ "drift_detected": drift_detected,
127
+ "test": "chi_square",
128
+ "statistic": float(stat),
129
+ "p_value": float(pvalue),
130
+ }
131
+
132
+ def get_drift_report(self, current_data: pd.DataFrame) -> str:
133
+ """Generate human-readable drift report."""
134
+ results = self.detect_drift(current_data)
135
+
136
+ lines = [
137
+ "=" * 50,
138
+ "DRIFT DETECTION REPORT",
139
+ "=" * 50,
140
+ f"Status: {'DRIFT DETECTED' if results['drift_detected'] else 'NO DRIFT'}",
141
+ f"Columns with drift: {results['summary']['drifted_columns']}/{results['summary']['total_columns']}",
142
+ "",
143
+ ]
144
+
145
+ if results["drift_detected"]:
146
+ lines.append("Drifted Columns:")
147
+ for col, info in results["columns"].items():
148
+ if info["drift_detected"]:
149
+ lines.append(f" - {col}: p-value={info['p_value']:.4f} ({info['test']})")
150
+
151
+ return "\n".join(lines)
152
+
153
+
154
+ class PSI:
155
+ """Population Stability Index calculator."""
156
+
157
+ @staticmethod
158
+ def calculate(reference: np.ndarray, current: np.ndarray, bins: int = 10) -> float:
159
+ """
160
+ Calculate PSI between reference and current distributions.
161
+
162
+ PSI < 0.1: No significant change
163
+ 0.1 < PSI < 0.2: Moderate change
164
+ PSI > 0.2: Significant change
165
+ """
166
+ # Create bins from reference
167
+ _, bin_edges = np.histogram(reference, bins=bins)
168
+
169
+ # Get distributions
170
+ ref_counts, _ = np.histogram(reference, bins=bin_edges)
171
+ cur_counts, _ = np.histogram(current, bins=bin_edges)
172
+
173
+ # Convert to percentages
174
+ ref_pct = ref_counts / len(reference)
175
+ cur_pct = cur_counts / len(current)
176
+
177
+ # Avoid division by zero
178
+ ref_pct = np.where(ref_pct == 0, 0.0001, ref_pct)
179
+ cur_pct = np.where(cur_pct == 0, 0.0001, cur_pct)
180
+
181
+ # Calculate PSI
182
+ psi = np.sum((cur_pct - ref_pct) * np.log(cur_pct / ref_pct))
183
+
184
+ return float(psi)
185
+
186
+ @staticmethod
187
+ def interpret(psi: float) -> str:
188
+ """Interpret PSI value."""
189
+ if psi < 0.1:
190
+ return "No significant change"
191
+ elif psi < 0.2:
192
+ return "Moderate change - monitor closely"
193
+ else:
194
+ return "Significant change - investigate"
195
+
196
+
197
+ def detect_drift(
198
+ reference: pd.DataFrame, current: pd.DataFrame, threshold: float = 0.05
199
+ ) -> Dict:
200
+ """Detect drift between reference and current data."""
201
+ detector = DriftDetector(reference, threshold)
202
+ return detector.detect_drift(current)
203
+
204
+
205
+ def calculate_psi(reference: np.ndarray, current: np.ndarray, bins: int = 10) -> Dict:
206
+ """Calculate PSI with interpretation."""
207
+ psi = PSI.calculate(reference, current, bins)
208
+ return {
209
+ "psi": psi,
210
+ "interpretation": PSI.interpret(psi),
211
+ "significant": psi > 0.2,
212
+ }
@@ -0,0 +1,195 @@
1
+ """
2
+ AdamOps Performance Monitoring Module
3
+
4
+ Track model performance over time.
5
+ """
6
+
7
+ from datetime import datetime
8
+ from typing import Any, Dict, List, Optional
9
+ import json
10
+ from pathlib import Path
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ from adamops.utils.logging import get_logger
16
+ from adamops.utils.helpers import ensure_dir
17
+ from adamops.evaluation.metrics import evaluate
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class PerformanceMonitor:
23
+ """Monitor model performance over time."""
24
+
25
+ def __init__(self, model_name: str, storage_path: Optional[str] = None):
26
+ """
27
+ Initialize performance monitor.
28
+
29
+ Args:
30
+ model_name: Name of the model being monitored.
31
+ storage_path: Path to store performance logs.
32
+ """
33
+ self.model_name = model_name
34
+ self.storage_path = Path(storage_path or f".adamops_monitor/{model_name}")
35
+ ensure_dir(self.storage_path)
36
+
37
+ self.metrics_file = self.storage_path / "metrics.json"
38
+ self.predictions_file = self.storage_path / "predictions.json"
39
+
40
+ self._load_history()
41
+
42
+ def _load_history(self):
43
+ """Load existing metrics history."""
44
+ if self.metrics_file.exists():
45
+ with open(self.metrics_file) as f:
46
+ self.metrics_history = json.load(f)
47
+ else:
48
+ self.metrics_history = []
49
+
50
+ def _save_history(self):
51
+ """Save metrics history."""
52
+ with open(self.metrics_file, 'w') as f:
53
+ json.dump(self.metrics_history, f, indent=2, default=str)
54
+
55
+ def log_metrics(
56
+ self, metrics: Dict[str, float],
57
+ timestamp: Optional[str] = None,
58
+ metadata: Optional[Dict] = None
59
+ ):
60
+ """
61
+ Log performance metrics.
62
+
63
+ Args:
64
+ metrics: Dict of metric name to value.
65
+ timestamp: Timestamp (uses current if None).
66
+ metadata: Additional metadata.
67
+ """
68
+ entry = {
69
+ "timestamp": timestamp or datetime.now().isoformat(),
70
+ "metrics": metrics,
71
+ "metadata": metadata or {},
72
+ }
73
+
74
+ self.metrics_history.append(entry)
75
+ self._save_history()
76
+
77
+ logger.info(f"Logged metrics for {self.model_name}: {metrics}")
78
+
79
+ def log_prediction(
80
+ self, y_true: np.ndarray, y_pred: np.ndarray,
81
+ task: str = "classification", y_prob: Optional[np.ndarray] = None
82
+ ):
83
+ """Log prediction and compute metrics."""
84
+ metrics = evaluate(y_true, y_pred, task, y_prob)
85
+ self.log_metrics(metrics, metadata={"task": task, "n_samples": len(y_true)})
86
+ return metrics
87
+
88
+ def get_history(self, n_latest: Optional[int] = None) -> List[Dict]:
89
+ """Get metrics history."""
90
+ if n_latest:
91
+ return self.metrics_history[-n_latest:]
92
+ return self.metrics_history
93
+
94
+ def get_metric_trend(self, metric: str) -> pd.DataFrame:
95
+ """Get trend for a specific metric."""
96
+ data = []
97
+ for entry in self.metrics_history:
98
+ if metric in entry["metrics"]:
99
+ data.append({
100
+ "timestamp": entry["timestamp"],
101
+ "value": entry["metrics"][metric],
102
+ })
103
+
104
+ return pd.DataFrame(data)
105
+
106
+ def detect_degradation(
107
+ self, metric: str, threshold: float = 0.1, window: int = 5
108
+ ) -> Dict:
109
+ """
110
+ Detect performance degradation.
111
+
112
+ Args:
113
+ metric: Metric to monitor.
114
+ threshold: Relative change threshold.
115
+ window: Number of recent entries to compare.
116
+
117
+ Returns:
118
+ Dict with degradation info.
119
+ """
120
+ trend = self.get_metric_trend(metric)
121
+
122
+ if len(trend) < window + 1:
123
+ return {"degraded": False, "reason": "insufficient_data"}
124
+
125
+ baseline = trend["value"].iloc[:-window].mean()
126
+ recent = trend["value"].iloc[-window:].mean()
127
+
128
+ change = (baseline - recent) / baseline if baseline != 0 else 0
129
+ degraded = change > threshold
130
+
131
+ result = {
132
+ "degraded": degraded,
133
+ "metric": metric,
134
+ "baseline": baseline,
135
+ "recent": recent,
136
+ "change_pct": change * 100,
137
+ "threshold_pct": threshold * 100,
138
+ }
139
+
140
+ if degraded:
141
+ logger.warning(f"Performance degradation detected for {metric}: {change*100:.1f}% drop")
142
+
143
+ return result
144
+
145
+ def summary(self) -> Dict:
146
+ """Get monitoring summary."""
147
+ if not self.metrics_history:
148
+ return {"model": self.model_name, "entries": 0}
149
+
150
+ latest = self.metrics_history[-1]
151
+
152
+ return {
153
+ "model": self.model_name,
154
+ "entries": len(self.metrics_history),
155
+ "latest_timestamp": latest["timestamp"],
156
+ "latest_metrics": latest["metrics"],
157
+ }
158
+
159
+
160
+ class LatencyMonitor:
161
+ """Monitor prediction latency."""
162
+
163
+ def __init__(self, model_name: str):
164
+ self.model_name = model_name
165
+ self.latencies = []
166
+
167
+ def record(self, latency_ms: float):
168
+ """Record a latency measurement."""
169
+ self.latencies.append({
170
+ "timestamp": datetime.now().isoformat(),
171
+ "latency_ms": latency_ms,
172
+ })
173
+
174
+ def get_stats(self) -> Dict:
175
+ """Get latency statistics."""
176
+ if not self.latencies:
177
+ return {}
178
+
179
+ values = [l["latency_ms"] for l in self.latencies]
180
+
181
+ return {
182
+ "count": len(values),
183
+ "mean_ms": np.mean(values),
184
+ "std_ms": np.std(values),
185
+ "p50_ms": np.percentile(values, 50),
186
+ "p95_ms": np.percentile(values, 95),
187
+ "p99_ms": np.percentile(values, 99),
188
+ "min_ms": np.min(values),
189
+ "max_ms": np.max(values),
190
+ }
191
+
192
+
193
+ def create_monitor(model_name: str, storage_path: Optional[str] = None) -> PerformanceMonitor:
194
+ """Create a performance monitor."""
195
+ return PerformanceMonitor(model_name, storage_path)
@@ -0,0 +1,15 @@
1
+ """
2
+ AdamOps Pipelines Module
3
+
4
+ Provides pipeline orchestration capabilities:
5
+ - workflows: Define end-to-end ML workflows as DAGs
6
+ - orchestrators: Schedule and run pipelines
7
+ """
8
+
9
+ from adamops.pipelines import workflows
10
+ from adamops.pipelines import orchestrators
11
+
12
+ __all__ = [
13
+ "workflows",
14
+ "orchestrators",
15
+ ]
@@ -0,0 +1,183 @@
1
+ """
2
+ AdamOps Orchestrators Module
3
+
4
+ Schedule and run pipelines.
5
+ """
6
+
7
+ from typing import Any, Callable, Dict, List, Optional
8
+ from datetime import datetime, timedelta
9
+ import threading
10
+ import time
11
+ import json
12
+ from pathlib import Path
13
+
14
+ from adamops.utils.logging import get_logger
15
+ from adamops.utils.helpers import ensure_dir
16
+ from adamops.pipelines.workflows import Workflow, TaskStatus
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class PipelineRun:
22
+ """Represents a single pipeline run."""
23
+
24
+ def __init__(self, workflow: Workflow, run_id: str):
25
+ self.workflow = workflow
26
+ self.run_id = run_id
27
+ self.start_time: Optional[datetime] = None
28
+ self.end_time: Optional[datetime] = None
29
+ self.status = TaskStatus.PENDING
30
+ self.result: Optional[Dict] = None
31
+ self.error: Optional[str] = None
32
+
33
+ @property
34
+ def duration(self) -> Optional[float]:
35
+ if self.start_time and self.end_time:
36
+ return (self.end_time - self.start_time).total_seconds()
37
+ return None
38
+
39
+ def to_dict(self) -> Dict:
40
+ return {
41
+ "run_id": self.run_id,
42
+ "workflow": self.workflow.name,
43
+ "status": self.status.value,
44
+ "start_time": self.start_time.isoformat() if self.start_time else None,
45
+ "end_time": self.end_time.isoformat() if self.end_time else None,
46
+ "duration": self.duration,
47
+ "error": self.error,
48
+ }
49
+
50
+
51
+ class Orchestrator:
52
+ """Orchestrate and schedule pipeline runs."""
53
+
54
+ def __init__(self, storage_path: Optional[str] = None):
55
+ self.storage_path = Path(storage_path or ".adamops_runs")
56
+ ensure_dir(self.storage_path)
57
+
58
+ self.workflows: Dict[str, Workflow] = {}
59
+ self.runs: Dict[str, PipelineRun] = {}
60
+ self.schedules: Dict[str, Dict] = {}
61
+ self._scheduler_thread: Optional[threading.Thread] = None
62
+ self._running = False
63
+
64
+ def register(self, workflow: Workflow):
65
+ """Register a workflow."""
66
+ self.workflows[workflow.name] = workflow
67
+ logger.info(f"Registered workflow: {workflow.name}")
68
+
69
+ def run(self, workflow_name: str, context: Optional[Dict] = None) -> PipelineRun:
70
+ """Run a workflow immediately."""
71
+ if workflow_name not in self.workflows:
72
+ raise ValueError(f"Unknown workflow: {workflow_name}")
73
+
74
+ workflow = self.workflows[workflow_name]
75
+ run_id = f"{workflow_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
76
+
77
+ run = PipelineRun(workflow, run_id)
78
+ self.runs[run_id] = run
79
+
80
+ run.start_time = datetime.now()
81
+ run.status = TaskStatus.RUNNING
82
+
83
+ try:
84
+ run.result = workflow.run(context or {})
85
+ run.status = TaskStatus.COMPLETED
86
+ except Exception as e:
87
+ run.status = TaskStatus.FAILED
88
+ run.error = str(e)
89
+ finally:
90
+ run.end_time = datetime.now()
91
+ self._save_run(run)
92
+
93
+ return run
94
+
95
+ def run_async(self, workflow_name: str, context: Optional[Dict] = None) -> str:
96
+ """Run a workflow asynchronously."""
97
+ run_id = f"{workflow_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
98
+
99
+ def _run():
100
+ self.run(workflow_name, context)
101
+
102
+ thread = threading.Thread(target=_run, name=run_id)
103
+ thread.start()
104
+
105
+ return run_id
106
+
107
+ def schedule(self, workflow_name: str, interval_seconds: int,
108
+ context: Optional[Dict] = None):
109
+ """Schedule a workflow to run at intervals."""
110
+ self.schedules[workflow_name] = {
111
+ "interval": interval_seconds,
112
+ "context": context or {},
113
+ "last_run": None,
114
+ "next_run": datetime.now(),
115
+ }
116
+ logger.info(f"Scheduled {workflow_name} every {interval_seconds}s")
117
+
118
+ def start_scheduler(self):
119
+ """Start the background scheduler."""
120
+ if self._running:
121
+ return
122
+
123
+ self._running = True
124
+
125
+ def _scheduler_loop():
126
+ while self._running:
127
+ now = datetime.now()
128
+
129
+ for name, schedule in self.schedules.items():
130
+ if now >= schedule["next_run"]:
131
+ try:
132
+ self.run(name, schedule["context"])
133
+ except Exception as e:
134
+ logger.error(f"Scheduled run failed: {e}")
135
+
136
+ schedule["last_run"] = now
137
+ schedule["next_run"] = now + timedelta(seconds=schedule["interval"])
138
+
139
+ time.sleep(1)
140
+
141
+ self._scheduler_thread = threading.Thread(target=_scheduler_loop, daemon=True)
142
+ self._scheduler_thread.start()
143
+ logger.info("Started scheduler")
144
+
145
+ def stop_scheduler(self):
146
+ """Stop the background scheduler."""
147
+ self._running = False
148
+ if self._scheduler_thread:
149
+ self._scheduler_thread.join(timeout=5)
150
+ logger.info("Stopped scheduler")
151
+
152
+ def _save_run(self, run: PipelineRun):
153
+ """Save run to storage."""
154
+ run_file = self.storage_path / f"{run.run_id}.json"
155
+ with open(run_file, 'w') as f:
156
+ json.dump(run.to_dict(), f, indent=2)
157
+
158
+ def get_runs(self, workflow_name: Optional[str] = None, limit: int = 10) -> List[Dict]:
159
+ """Get recent runs."""
160
+ runs = list(self.runs.values())
161
+
162
+ if workflow_name:
163
+ runs = [r for r in runs if r.workflow.name == workflow_name]
164
+
165
+ runs.sort(key=lambda r: r.start_time or datetime.min, reverse=True)
166
+ return [r.to_dict() for r in runs[:limit]]
167
+
168
+ def get_run(self, run_id: str) -> Optional[Dict]:
169
+ """Get a specific run."""
170
+ if run_id in self.runs:
171
+ return self.runs[run_id].to_dict()
172
+ return None
173
+
174
+
175
+ # Global orchestrator
176
+ _orchestrator: Optional[Orchestrator] = None
177
+
178
+ def get_orchestrator() -> Orchestrator:
179
+ """Get global orchestrator."""
180
+ global _orchestrator
181
+ if _orchestrator is None:
182
+ _orchestrator = Orchestrator()
183
+ return _orchestrator