adamops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adamops/__init__.py +40 -0
  2. adamops/cli.py +163 -0
  3. adamops/data/__init__.py +24 -0
  4. adamops/data/feature_engineering.py +284 -0
  5. adamops/data/loaders.py +922 -0
  6. adamops/data/preprocessors.py +227 -0
  7. adamops/data/splitters.py +218 -0
  8. adamops/data/validators.py +148 -0
  9. adamops/deployment/__init__.py +21 -0
  10. adamops/deployment/api.py +237 -0
  11. adamops/deployment/cloud.py +191 -0
  12. adamops/deployment/containerize.py +262 -0
  13. adamops/deployment/exporters.py +148 -0
  14. adamops/evaluation/__init__.py +24 -0
  15. adamops/evaluation/comparison.py +133 -0
  16. adamops/evaluation/explainability.py +143 -0
  17. adamops/evaluation/metrics.py +233 -0
  18. adamops/evaluation/reports.py +165 -0
  19. adamops/evaluation/visualization.py +238 -0
  20. adamops/models/__init__.py +21 -0
  21. adamops/models/automl.py +277 -0
  22. adamops/models/ensembles.py +228 -0
  23. adamops/models/modelops.py +308 -0
  24. adamops/models/registry.py +250 -0
  25. adamops/monitoring/__init__.py +21 -0
  26. adamops/monitoring/alerts.py +200 -0
  27. adamops/monitoring/dashboard.py +117 -0
  28. adamops/monitoring/drift.py +212 -0
  29. adamops/monitoring/performance.py +195 -0
  30. adamops/pipelines/__init__.py +15 -0
  31. adamops/pipelines/orchestrators.py +183 -0
  32. adamops/pipelines/workflows.py +212 -0
  33. adamops/utils/__init__.py +18 -0
  34. adamops/utils/config.py +457 -0
  35. adamops/utils/helpers.py +663 -0
  36. adamops/utils/logging.py +412 -0
  37. adamops-0.1.0.dist-info/METADATA +310 -0
  38. adamops-0.1.0.dist-info/RECORD +42 -0
  39. adamops-0.1.0.dist-info/WHEEL +5 -0
  40. adamops-0.1.0.dist-info/entry_points.txt +2 -0
  41. adamops-0.1.0.dist-info/licenses/LICENSE +21 -0
  42. adamops-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,227 @@
1
+ """
2
+ AdamOps Data Preprocessors Module
3
+
4
+ Provides data cleaning capabilities: missing values, outliers, duplicates, type conversion.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Union
8
+ import numpy as np
9
+ import pandas as pd
10
+ from sklearn.impute import SimpleImputer, KNNImputer
11
+ from sklearn.experimental import enable_iterative_imputer
12
+ from sklearn.impute import IterativeImputer
13
+ from sklearn.ensemble import IsolationForest
14
+ from adamops.utils.logging import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ # Missing Value Handling
20
+ def handle_missing(
21
+ df: pd.DataFrame, strategy: str = "mean", columns: Optional[List[str]] = None,
22
+ fill_value: Optional[any] = None, n_neighbors: int = 5
23
+ ) -> pd.DataFrame:
24
+ """
25
+ Handle missing values.
26
+
27
+ Args:
28
+ df: DataFrame to process.
29
+ strategy: 'drop', 'mean', 'median', 'mode', 'constant', 'ffill', 'bfill', 'knn', 'iterative'
30
+ columns: Columns to process (None for all).
31
+ fill_value: Value for 'constant' strategy.
32
+ n_neighbors: Neighbors for KNN.
33
+
34
+ Returns:
35
+ Processed DataFrame.
36
+ """
37
+ df = df.copy()
38
+ cols = columns or df.columns.tolist()
39
+ logger.info(f"Handling missing values with strategy: {strategy}")
40
+
41
+ if strategy == "drop":
42
+ return df.dropna(subset=cols)
43
+ elif strategy == "ffill":
44
+ df[cols] = df[cols].ffill()
45
+ elif strategy == "bfill":
46
+ df[cols] = df[cols].bfill()
47
+ elif strategy == "constant":
48
+ df[cols] = df[cols].fillna(fill_value)
49
+ elif strategy in ["mean", "median", "most_frequent"]:
50
+ strat = "most_frequent" if strategy == "mode" else strategy
51
+ num_cols = [c for c in cols if pd.api.types.is_numeric_dtype(df[c])]
52
+ if num_cols:
53
+ imputer = SimpleImputer(strategy=strat)
54
+ df[num_cols] = imputer.fit_transform(df[num_cols])
55
+ elif strategy == "mode":
56
+ for col in cols:
57
+ if df[col].isna().any():
58
+ mode_val = df[col].mode().iloc[0] if not df[col].mode().empty else None
59
+ df[col] = df[col].fillna(mode_val)
60
+ elif strategy == "knn":
61
+ num_cols = [c for c in cols if pd.api.types.is_numeric_dtype(df[c])]
62
+ if num_cols:
63
+ imputer = KNNImputer(n_neighbors=n_neighbors)
64
+ df[num_cols] = imputer.fit_transform(df[num_cols])
65
+ elif strategy == "iterative":
66
+ num_cols = [c for c in cols if pd.api.types.is_numeric_dtype(df[c])]
67
+ if num_cols:
68
+ imputer = IterativeImputer(random_state=42)
69
+ df[num_cols] = imputer.fit_transform(df[num_cols])
70
+
71
+ return df
72
+
73
+
74
+ # Outlier Handling
75
+ def handle_outliers(
76
+ df: pd.DataFrame, method: str = "iqr", columns: Optional[List[str]] = None,
77
+ threshold: float = 1.5, action: str = "clip", contamination: float = 0.1
78
+ ) -> pd.DataFrame:
79
+ """
80
+ Handle outliers.
81
+
82
+ Args:
83
+ df: DataFrame to process.
84
+ method: 'iqr', 'zscore', 'isolation_forest'
85
+ columns: Columns to process (None for numeric).
86
+ threshold: IQR multiplier or Z-score threshold.
87
+ action: 'clip', 'drop', 'nan'
88
+ contamination: For isolation forest.
89
+
90
+ Returns:
91
+ Processed DataFrame.
92
+ """
93
+ df = df.copy()
94
+ num_cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
95
+ logger.info(f"Handling outliers with method: {method}, action: {action}")
96
+
97
+ if method == "iqr":
98
+ for col in num_cols:
99
+ Q1, Q3 = df[col].quantile(0.25), df[col].quantile(0.75)
100
+ IQR = Q3 - Q1
101
+ lower, upper = Q1 - threshold * IQR, Q3 + threshold * IQR
102
+ mask = (df[col] < lower) | (df[col] > upper)
103
+ if action == "clip":
104
+ df[col] = df[col].clip(lower, upper)
105
+ elif action == "drop":
106
+ df = df[~mask]
107
+ elif action == "nan":
108
+ df.loc[mask, col] = np.nan
109
+
110
+ elif method == "zscore":
111
+ for col in num_cols:
112
+ z = np.abs((df[col] - df[col].mean()) / df[col].std())
113
+ mask = z > threshold
114
+ if action == "clip":
115
+ mean, std = df[col].mean(), df[col].std()
116
+ lower, upper = mean - threshold * std, mean + threshold * std
117
+ df[col] = df[col].clip(lower, upper)
118
+ elif action == "drop":
119
+ df = df[~mask]
120
+ elif action == "nan":
121
+ df.loc[mask, col] = np.nan
122
+
123
+ elif method == "isolation_forest":
124
+ iso = IsolationForest(contamination=contamination, random_state=42)
125
+ preds = iso.fit_predict(df[num_cols])
126
+ mask = preds == -1
127
+ if action == "drop":
128
+ df = df[~mask]
129
+ elif action == "nan":
130
+ df.loc[mask, num_cols] = np.nan
131
+
132
+ return df
133
+
134
+
135
+ # Duplicate Handling
136
+ def handle_duplicates(
137
+ df: pd.DataFrame, subset: Optional[List[str]] = None, keep: str = "first"
138
+ ) -> pd.DataFrame:
139
+ """Remove duplicate rows."""
140
+ before = len(df)
141
+ df = df.drop_duplicates(subset=subset, keep=keep)
142
+ logger.info(f"Removed {before - len(df)} duplicates")
143
+ return df
144
+
145
+
146
+ # Type Conversion
147
+ def convert_types(
148
+ df: pd.DataFrame, type_mapping: Optional[Dict[str, str]] = None,
149
+ auto_convert: bool = True, datetime_columns: Optional[List[str]] = None
150
+ ) -> pd.DataFrame:
151
+ """
152
+ Convert column types.
153
+
154
+ Args:
155
+ df: DataFrame to process.
156
+ type_mapping: {column: target_type}
157
+ auto_convert: Auto-detect and convert types.
158
+ datetime_columns: Columns to parse as datetime.
159
+ """
160
+ df = df.copy()
161
+
162
+ if type_mapping:
163
+ for col, dtype in type_mapping.items():
164
+ if col in df.columns:
165
+ try:
166
+ df[col] = df[col].astype(dtype)
167
+ except (ValueError, TypeError) as e:
168
+ logger.warning(f"Could not convert {col} to {dtype}: {e}")
169
+
170
+ if datetime_columns:
171
+ for col in datetime_columns:
172
+ if col in df.columns:
173
+ df[col] = pd.to_datetime(df[col], errors='coerce')
174
+
175
+ if auto_convert:
176
+ for col in df.columns:
177
+ if df[col].dtype == 'object':
178
+ try:
179
+ df[col] = pd.to_numeric(df[col], errors='ignore')
180
+ except:
181
+ pass
182
+
183
+ return df
184
+
185
+
186
+ # Text Cleaning
187
+ def clean_text(
188
+ df: pd.DataFrame, columns: Optional[List[str]] = None,
189
+ lowercase: bool = True, strip: bool = True, remove_special: bool = False
190
+ ) -> pd.DataFrame:
191
+ """Clean text columns."""
192
+ df = df.copy()
193
+ str_cols = columns or df.select_dtypes(include=['object']).columns.tolist()
194
+
195
+ for col in str_cols:
196
+ if strip:
197
+ df[col] = df[col].str.strip()
198
+ if lowercase:
199
+ df[col] = df[col].str.lower()
200
+ if remove_special:
201
+ df[col] = df[col].str.replace(r'[^\w\s]', '', regex=True)
202
+
203
+ return df
204
+
205
+
206
+ # Full Pipeline
207
+ def preprocess(
208
+ df: pd.DataFrame, missing_strategy: str = "mean", outlier_method: Optional[str] = None,
209
+ remove_duplicates: bool = True, convert_types_auto: bool = True
210
+ ) -> pd.DataFrame:
211
+ """Full preprocessing pipeline."""
212
+ logger.info("Starting preprocessing pipeline")
213
+
214
+ if remove_duplicates:
215
+ df = handle_duplicates(df)
216
+
217
+ if missing_strategy:
218
+ df = handle_missing(df, strategy=missing_strategy)
219
+
220
+ if outlier_method:
221
+ df = handle_outliers(df, method=outlier_method)
222
+
223
+ if convert_types_auto:
224
+ df = convert_types(df, auto_convert=True)
225
+
226
+ logger.info(f"Preprocessing complete. Shape: {df.shape}")
227
+ return df
@@ -0,0 +1,218 @@
1
+ """
2
+ AdamOps Data Splitters Module
3
+
4
+ Provides data splitting: train/test, train/val/test, time-series, K-Fold, stratified.
5
+ """
6
+
7
+ from typing import Iterator, List, Optional, Tuple, Union
8
+ import numpy as np
9
+ import pandas as pd
10
+ from sklearn.model_selection import (
11
+ train_test_split, KFold, StratifiedKFold, TimeSeriesSplit, GroupKFold
12
+ )
13
+ from adamops.utils.logging import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ def split_train_test(
19
+ X: Union[pd.DataFrame, np.ndarray], y: Optional[Union[pd.Series, np.ndarray]] = None,
20
+ test_size: float = 0.2, random_state: int = 42, stratify: bool = False, shuffle: bool = True
21
+ ) -> Tuple:
22
+ """
23
+ Split data into train and test sets.
24
+
25
+ Args:
26
+ X: Features.
27
+ y: Target (optional).
28
+ test_size: Test set proportion.
29
+ random_state: Random seed.
30
+ stratify: Stratify by target.
31
+ shuffle: Shuffle before splitting.
32
+
33
+ Returns:
34
+ (X_train, X_test) or (X_train, X_test, y_train, y_test)
35
+ """
36
+ stratify_col = y if stratify and y is not None else None
37
+
38
+ if y is not None:
39
+ X_train, X_test, y_train, y_test = train_test_split(
40
+ X, y, test_size=test_size, random_state=random_state,
41
+ stratify=stratify_col, shuffle=shuffle
42
+ )
43
+ logger.info(f"Split: train={len(X_train)}, test={len(X_test)}")
44
+ return X_train, X_test, y_train, y_test
45
+ else:
46
+ X_train, X_test = train_test_split(
47
+ X, test_size=test_size, random_state=random_state, shuffle=shuffle
48
+ )
49
+ logger.info(f"Split: train={len(X_train)}, test={len(X_test)}")
50
+ return X_train, X_test
51
+
52
+
53
+ def split_train_val_test(
54
+ X: Union[pd.DataFrame, np.ndarray], y: Optional[Union[pd.Series, np.ndarray]] = None,
55
+ train_size: float = 0.7, val_size: float = 0.15, test_size: float = 0.15,
56
+ random_state: int = 42, stratify: bool = False
57
+ ) -> Tuple:
58
+ """
59
+ Split data into train, validation, and test sets.
60
+
61
+ Returns:
62
+ (X_train, X_val, X_test) or (X_train, X_val, X_test, y_train, y_val, y_test)
63
+ """
64
+ # Normalize sizes
65
+ total = train_size + val_size + test_size
66
+ train_size, val_size, test_size = train_size/total, val_size/total, test_size/total
67
+
68
+ stratify_col = y if stratify and y is not None else None
69
+
70
+ if y is not None:
71
+ # First split: train+val vs test
72
+ X_temp, X_test, y_temp, y_test = train_test_split(
73
+ X, y, test_size=test_size, random_state=random_state,
74
+ stratify=stratify_col
75
+ )
76
+ # Second split: train vs val
77
+ val_ratio = val_size / (train_size + val_size)
78
+ stratify_temp = y_temp if stratify else None
79
+ X_train, X_val, y_train, y_val = train_test_split(
80
+ X_temp, y_temp, test_size=val_ratio, random_state=random_state,
81
+ stratify=stratify_temp
82
+ )
83
+ logger.info(f"Split: train={len(X_train)}, val={len(X_val)}, test={len(X_test)}")
84
+ return X_train, X_val, X_test, y_train, y_val, y_test
85
+ else:
86
+ X_temp, X_test = train_test_split(X, test_size=test_size, random_state=random_state)
87
+ val_ratio = val_size / (train_size + val_size)
88
+ X_train, X_val = train_test_split(X_temp, test_size=val_ratio, random_state=random_state)
89
+ return X_train, X_val, X_test
90
+
91
+
92
+ def split_timeseries(
93
+ X: Union[pd.DataFrame, np.ndarray], y: Optional[Union[pd.Series, np.ndarray]] = None,
94
+ n_splits: int = 5, test_size: Optional[int] = None, gap: int = 0
95
+ ) -> Iterator[Tuple]:
96
+ """
97
+ Time series split for temporal data.
98
+
99
+ Args:
100
+ X: Features.
101
+ y: Target.
102
+ n_splits: Number of splits.
103
+ test_size: Test set size per split.
104
+ gap: Gap between train and test.
105
+
106
+ Yields:
107
+ (train_idx, test_idx) tuples.
108
+ """
109
+ tscv = TimeSeriesSplit(n_splits=n_splits, test_size=test_size, gap=gap)
110
+ logger.info(f"Time series split with {n_splits} folds")
111
+
112
+ for train_idx, test_idx in tscv.split(X):
113
+ yield train_idx, test_idx
114
+
115
+
116
+ def split_kfold(
117
+ X: Union[pd.DataFrame, np.ndarray], y: Optional[Union[pd.Series, np.ndarray]] = None,
118
+ n_splits: int = 5, shuffle: bool = True, random_state: int = 42
119
+ ) -> Iterator[Tuple]:
120
+ """
121
+ K-Fold cross-validation split.
122
+
123
+ Yields:
124
+ (train_idx, test_idx) tuples.
125
+ """
126
+ kf = KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
127
+ logger.info(f"K-Fold split with {n_splits} folds")
128
+
129
+ for train_idx, test_idx in kf.split(X):
130
+ yield train_idx, test_idx
131
+
132
+
133
+ def split_stratified_kfold(
134
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
135
+ n_splits: int = 5, shuffle: bool = True, random_state: int = 42
136
+ ) -> Iterator[Tuple]:
137
+ """
138
+ Stratified K-Fold cross-validation split.
139
+
140
+ Preserves class distribution in each fold.
141
+
142
+ Yields:
143
+ (train_idx, test_idx) tuples.
144
+ """
145
+ skf = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
146
+ logger.info(f"Stratified K-Fold split with {n_splits} folds")
147
+
148
+ for train_idx, test_idx in skf.split(X, y):
149
+ yield train_idx, test_idx
150
+
151
+
152
+ def split_group_kfold(
153
+ X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray],
154
+ groups: Union[pd.Series, np.ndarray], n_splits: int = 5
155
+ ) -> Iterator[Tuple]:
156
+ """
157
+ Group K-Fold split. Ensures groups are not split across train/test.
158
+
159
+ Yields:
160
+ (train_idx, test_idx) tuples.
161
+ """
162
+ gkf = GroupKFold(n_splits=n_splits)
163
+ logger.info(f"Group K-Fold split with {n_splits} folds")
164
+
165
+ for train_idx, test_idx in gkf.split(X, y, groups):
166
+ yield train_idx, test_idx
167
+
168
+
169
+ def get_fold_data(
170
+ X: Union[pd.DataFrame, np.ndarray], y: Optional[Union[pd.Series, np.ndarray]],
171
+ train_idx: np.ndarray, test_idx: np.ndarray
172
+ ) -> Tuple:
173
+ """Get train/test data for a fold."""
174
+ if isinstance(X, pd.DataFrame):
175
+ X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
176
+ else:
177
+ X_train, X_test = X[train_idx], X[test_idx]
178
+
179
+ if y is not None:
180
+ if isinstance(y, pd.Series):
181
+ y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
182
+ else:
183
+ y_train, y_test = y[train_idx], y[test_idx]
184
+ return X_train, X_test, y_train, y_test
185
+
186
+ return X_train, X_test
187
+
188
+
189
+ def create_cv_splits(
190
+ X: Union[pd.DataFrame, np.ndarray], y: Optional[Union[pd.Series, np.ndarray]] = None,
191
+ method: str = "kfold", n_splits: int = 5, **kwargs
192
+ ) -> List[Tuple]:
193
+ """
194
+ Create cross-validation splits.
195
+
196
+ Args:
197
+ X: Features.
198
+ y: Target.
199
+ method: 'kfold', 'stratified', 'timeseries', 'group'
200
+ n_splits: Number of folds.
201
+
202
+ Returns:
203
+ List of (train_idx, test_idx) tuples.
204
+ """
205
+ if method == "kfold":
206
+ return list(split_kfold(X, y, n_splits, **kwargs))
207
+ elif method == "stratified":
208
+ if y is None:
209
+ raise ValueError("y is required for stratified split")
210
+ return list(split_stratified_kfold(X, y, n_splits, **kwargs))
211
+ elif method == "timeseries":
212
+ return list(split_timeseries(X, y, n_splits, **kwargs))
213
+ elif method == "group":
214
+ if "groups" not in kwargs:
215
+ raise ValueError("groups is required for group split")
216
+ return list(split_group_kfold(X, y, kwargs["groups"], n_splits))
217
+ else:
218
+ raise ValueError(f"Unknown split method: {method}")
@@ -0,0 +1,148 @@
1
+ """
2
+ AdamOps Data Validators Module
3
+
4
+ Provides data validation: type validation, missing value checks,
5
+ duplicate detection, shape validation, and statistical checks.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+ from datetime import datetime
11
+ import numpy as np
12
+ import pandas as pd
13
+ from adamops.utils.logging import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ @dataclass
19
+ class ValidationIssue:
20
+ """Represents a validation issue."""
21
+ severity: str # 'error', 'warning', 'info'
22
+ category: str
23
+ column: Optional[str]
24
+ message: str
25
+ details: Optional[Dict] = None
26
+
27
+
28
+ @dataclass
29
+ class ColumnStats:
30
+ """Statistics for a column."""
31
+ name: str
32
+ dtype: str
33
+ count: int
34
+ missing_count: int
35
+ missing_pct: float
36
+ unique_count: int
37
+ unique_pct: float
38
+ mean: Optional[float] = None
39
+ std: Optional[float] = None
40
+ min: Optional[float] = None
41
+ max: Optional[float] = None
42
+
43
+
44
+ @dataclass
45
+ class ValidationReport:
46
+ """Complete validation report."""
47
+ timestamp: str
48
+ shape: Tuple[int, int]
49
+ memory_usage: float
50
+ issues: List[ValidationIssue] = field(default_factory=list)
51
+ column_stats: Dict[str, ColumnStats] = field(default_factory=dict)
52
+ duplicate_rows: int = 0
53
+ passed: bool = True
54
+
55
+ def summary(self) -> str:
56
+ """Generate text summary."""
57
+ lines = [
58
+ "=" * 50, "VALIDATION REPORT", "=" * 50,
59
+ f"Shape: {self.shape[0]} rows x {self.shape[1]} columns",
60
+ f"Memory: {self.memory_usage:.2f} MB",
61
+ f"Duplicates: {self.duplicate_rows}",
62
+ f"Status: {'PASSED' if self.passed else 'FAILED'}",
63
+ f"Issues: {len(self.issues)}", "=" * 50
64
+ ]
65
+ for issue in self.issues:
66
+ col = f"[{issue.column}] " if issue.column else ""
67
+ lines.append(f"[{issue.severity.upper()}] {col}{issue.message}")
68
+ return "\n".join(lines)
69
+
70
+
71
+ class DataValidator:
72
+ """Data validator for DataFrames."""
73
+
74
+ def __init__(self, missing_threshold: float = 0.5, unique_threshold: float = 0.95):
75
+ self.missing_threshold = missing_threshold
76
+ self.unique_threshold = unique_threshold
77
+
78
+ def validate(self, df: pd.DataFrame, schema: Optional[Dict] = None,
79
+ required_columns: Optional[List[str]] = None) -> ValidationReport:
80
+ """Validate a DataFrame."""
81
+ report = ValidationReport(
82
+ timestamp=datetime.now().isoformat(),
83
+ shape=df.shape,
84
+ memory_usage=df.memory_usage(deep=True).sum() / 1024**2,
85
+ )
86
+
87
+ # Check required columns
88
+ if required_columns:
89
+ missing = set(required_columns) - set(df.columns)
90
+ for col in missing:
91
+ report.issues.append(ValidationIssue("error", "schema", col, f"Missing: {col}"))
92
+
93
+ # Check duplicates
94
+ dups = df.duplicated().sum()
95
+ report.duplicate_rows = dups
96
+ if dups > 0:
97
+ report.issues.append(ValidationIssue("warning", "duplicate", None, f"{dups} duplicates"))
98
+
99
+ # Column stats
100
+ for col in df.columns:
101
+ series = df[col]
102
+ missing = series.isna().sum()
103
+ stats = ColumnStats(
104
+ name=col, dtype=str(series.dtype), count=len(series),
105
+ missing_count=missing, missing_pct=100*missing/len(series),
106
+ unique_count=series.nunique(), unique_pct=100*series.nunique()/len(series),
107
+ )
108
+ if pd.api.types.is_numeric_dtype(series):
109
+ stats.mean, stats.std = series.mean(), series.std()
110
+ stats.min, stats.max = series.min(), series.max()
111
+ report.column_stats[col] = stats
112
+
113
+ if stats.missing_pct > self.missing_threshold * 100:
114
+ report.issues.append(ValidationIssue("warning", "missing", col,
115
+ f"High missing: {stats.missing_pct:.1f}%"))
116
+
117
+ report.passed = not any(i.severity == "error" for i in report.issues)
118
+ return report
119
+
120
+
121
+ def validate(df: pd.DataFrame, **kwargs) -> ValidationReport:
122
+ """Validate a DataFrame."""
123
+ return DataValidator().validate(df, **kwargs)
124
+
125
+ def check_missing(df: pd.DataFrame) -> Dict[str, Dict]:
126
+ """Check missing values."""
127
+ return {col: {"count": int(df[col].isna().sum()), "pct": 100*df[col].isna().mean()}
128
+ for col in df.columns if df[col].isna().any()}
129
+
130
+ def check_duplicates(df: pd.DataFrame, subset: Optional[List[str]] = None) -> pd.DataFrame:
131
+ """Get duplicate rows."""
132
+ return df[df.duplicated(subset=subset, keep=False)]
133
+
134
+ def check_types(df: pd.DataFrame) -> Dict[str, str]:
135
+ """Get column types."""
136
+ return {col: str(dtype) for col, dtype in df.dtypes.items()}
137
+
138
+ def describe_data(df: pd.DataFrame) -> pd.DataFrame:
139
+ """Generate data description."""
140
+ stats = []
141
+ for col in df.columns:
142
+ s = df[col]
143
+ row = {"column": col, "dtype": str(s.dtype), "missing": s.isna().sum(),
144
+ "unique": s.nunique()}
145
+ if pd.api.types.is_numeric_dtype(s):
146
+ row.update({"mean": s.mean(), "std": s.std(), "min": s.min(), "max": s.max()})
147
+ stats.append(row)
148
+ return pd.DataFrame(stats)
@@ -0,0 +1,21 @@
1
+ """
2
+ AdamOps Deployment Module
3
+
4
+ Provides model deployment capabilities:
5
+ - exporters: Export models to ONNX, PMML, TFLite, CoreML
6
+ - api: Create FastAPI/Flask/Streamlit APIs
7
+ - containerize: Docker and Kubernetes deployment
8
+ - cloud: AWS, GCP, Azure deployment
9
+ """
10
+
11
+ from adamops.deployment import exporters
12
+ from adamops.deployment import api
13
+ from adamops.deployment import containerize
14
+ from adamops.deployment import cloud
15
+
16
+ __all__ = [
17
+ "exporters",
18
+ "api",
19
+ "containerize",
20
+ "cloud",
21
+ ]