adamops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adamops/__init__.py +40 -0
  2. adamops/cli.py +163 -0
  3. adamops/data/__init__.py +24 -0
  4. adamops/data/feature_engineering.py +284 -0
  5. adamops/data/loaders.py +922 -0
  6. adamops/data/preprocessors.py +227 -0
  7. adamops/data/splitters.py +218 -0
  8. adamops/data/validators.py +148 -0
  9. adamops/deployment/__init__.py +21 -0
  10. adamops/deployment/api.py +237 -0
  11. adamops/deployment/cloud.py +191 -0
  12. adamops/deployment/containerize.py +262 -0
  13. adamops/deployment/exporters.py +148 -0
  14. adamops/evaluation/__init__.py +24 -0
  15. adamops/evaluation/comparison.py +133 -0
  16. adamops/evaluation/explainability.py +143 -0
  17. adamops/evaluation/metrics.py +233 -0
  18. adamops/evaluation/reports.py +165 -0
  19. adamops/evaluation/visualization.py +238 -0
  20. adamops/models/__init__.py +21 -0
  21. adamops/models/automl.py +277 -0
  22. adamops/models/ensembles.py +228 -0
  23. adamops/models/modelops.py +308 -0
  24. adamops/models/registry.py +250 -0
  25. adamops/monitoring/__init__.py +21 -0
  26. adamops/monitoring/alerts.py +200 -0
  27. adamops/monitoring/dashboard.py +117 -0
  28. adamops/monitoring/drift.py +212 -0
  29. adamops/monitoring/performance.py +195 -0
  30. adamops/pipelines/__init__.py +15 -0
  31. adamops/pipelines/orchestrators.py +183 -0
  32. adamops/pipelines/workflows.py +212 -0
  33. adamops/utils/__init__.py +18 -0
  34. adamops/utils/config.py +457 -0
  35. adamops/utils/helpers.py +663 -0
  36. adamops/utils/logging.py +412 -0
  37. adamops-0.1.0.dist-info/METADATA +310 -0
  38. adamops-0.1.0.dist-info/RECORD +42 -0
  39. adamops-0.1.0.dist-info/WHEEL +5 -0
  40. adamops-0.1.0.dist-info/entry_points.txt +2 -0
  41. adamops-0.1.0.dist-info/licenses/LICENSE +21 -0
  42. adamops-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,663 @@
1
+ """
2
+ AdamOps Helpers Module
3
+
4
+ Provides common utility functions used across the library.
5
+ """
6
+
7
+ import os
8
+ import json
9
+ import hashlib
10
+ import pickle
11
+ import warnings
12
+ from pathlib import Path
13
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union
14
+ from datetime import datetime
15
+ from functools import wraps
16
+ import time
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+
21
+ T = TypeVar("T")
22
+
23
+
24
+ # =============================================================================
25
+ # Type Checking and Validation
26
+ # =============================================================================
27
+
28
+ def is_numeric(value: Any) -> bool:
29
+ """
30
+ Check if a value is numeric.
31
+
32
+ Args:
33
+ value: Value to check.
34
+
35
+ Returns:
36
+ bool: True if value is numeric.
37
+
38
+ Example:
39
+ >>> is_numeric(42)
40
+ True
41
+ >>> is_numeric("hello")
42
+ False
43
+ """
44
+ return isinstance(value, (int, float, np.integer, np.floating)) and not isinstance(value, bool)
45
+
46
+
47
+ def is_categorical(series: pd.Series, threshold: float = 0.05) -> bool:
48
+ """
49
+ Check if a pandas Series is likely categorical.
50
+
51
+ Args:
52
+ series: Pandas Series to check.
53
+ threshold: Ratio of unique values to total values.
54
+
55
+ Returns:
56
+ bool: True if series is likely categorical.
57
+
58
+ Example:
59
+ >>> df = pd.DataFrame({"cat": ["a", "b", "a", "b"]})
60
+ >>> is_categorical(df["cat"])
61
+ True
62
+ """
63
+ if series.dtype in ["object", "category", "bool"]:
64
+ return True
65
+
66
+ if pd.api.types.is_numeric_dtype(series):
67
+ unique_ratio = series.nunique() / len(series)
68
+ return unique_ratio < threshold
69
+
70
+ return False
71
+
72
+
73
+ def infer_task_type(y: Union[np.ndarray, pd.Series]) -> str:
74
+ """
75
+ Infer the task type from the target variable.
76
+
77
+ Args:
78
+ y: Target variable.
79
+
80
+ Returns:
81
+ str: Task type ("classification", "regression", or "multiclass").
82
+
83
+ Example:
84
+ >>> y = np.array([0, 1, 0, 1])
85
+ >>> infer_task_type(y)
86
+ 'classification'
87
+ """
88
+ if isinstance(y, pd.Series):
89
+ y = y.values
90
+
91
+ unique_values = np.unique(y)
92
+ n_unique = len(unique_values)
93
+
94
+ # Check if it's a classification problem
95
+ if y.dtype in [np.object_, np.bool_] or n_unique <= 10:
96
+ if n_unique == 2:
97
+ return "classification"
98
+ else:
99
+ return "multiclass"
100
+
101
+ # Check if values are continuous
102
+ if np.issubdtype(y.dtype, np.floating):
103
+ return "regression"
104
+
105
+ # Integer with many unique values -> regression
106
+ if np.issubdtype(y.dtype, np.integer) and n_unique > 10:
107
+ return "regression"
108
+
109
+ return "classification"
110
+
111
+
112
+ def validate_dataframe(df: pd.DataFrame, required_columns: Optional[List[str]] = None) -> bool:
113
+ """
114
+ Validate a pandas DataFrame.
115
+
116
+ Args:
117
+ df: DataFrame to validate.
118
+ required_columns: List of required column names.
119
+
120
+ Returns:
121
+ bool: True if valid.
122
+
123
+ Raises:
124
+ ValueError: If validation fails.
125
+ """
126
+ if not isinstance(df, pd.DataFrame):
127
+ raise ValueError(f"Expected DataFrame, got {type(df).__name__}")
128
+
129
+ if df.empty:
130
+ raise ValueError("DataFrame is empty")
131
+
132
+ if required_columns:
133
+ missing = set(required_columns) - set(df.columns)
134
+ if missing:
135
+ raise ValueError(f"Missing required columns: {missing}")
136
+
137
+ return True
138
+
139
+
140
+ # =============================================================================
141
+ # Data Conversion
142
+ # =============================================================================
143
+
144
+ def to_numpy(data: Union[np.ndarray, pd.DataFrame, pd.Series, list]) -> np.ndarray:
145
+ """
146
+ Convert data to numpy array.
147
+
148
+ Args:
149
+ data: Data to convert.
150
+
151
+ Returns:
152
+ np.ndarray: Numpy array.
153
+ """
154
+ if isinstance(data, np.ndarray):
155
+ return data
156
+ elif isinstance(data, (pd.DataFrame, pd.Series)):
157
+ return data.values
158
+ elif isinstance(data, list):
159
+ return np.array(data)
160
+ else:
161
+ raise ValueError(f"Cannot convert {type(data).__name__} to numpy array")
162
+
163
+
164
+ def to_dataframe(data: Union[np.ndarray, pd.DataFrame, pd.Series, dict, list], columns: Optional[List[str]] = None) -> pd.DataFrame:
165
+ """
166
+ Convert data to pandas DataFrame.
167
+
168
+ Args:
169
+ data: Data to convert.
170
+ columns: Optional column names.
171
+
172
+ Returns:
173
+ pd.DataFrame: Pandas DataFrame.
174
+ """
175
+ if isinstance(data, pd.DataFrame):
176
+ return data
177
+ elif isinstance(data, pd.Series):
178
+ return data.to_frame()
179
+ elif isinstance(data, np.ndarray):
180
+ return pd.DataFrame(data, columns=columns)
181
+ elif isinstance(data, dict):
182
+ return pd.DataFrame(data)
183
+ elif isinstance(data, list):
184
+ return pd.DataFrame(data, columns=columns)
185
+ else:
186
+ raise ValueError(f"Cannot convert {type(data).__name__} to DataFrame")
187
+
188
+
189
+ def safe_cast(value: Any, target_type: Type[T], default: Optional[T] = None) -> Optional[T]:
190
+ """
191
+ Safely cast a value to a target type.
192
+
193
+ Args:
194
+ value: Value to cast.
195
+ target_type: Target type.
196
+ default: Default value if casting fails.
197
+
198
+ Returns:
199
+ Cast value or default.
200
+
201
+ Example:
202
+ >>> safe_cast("42", int)
203
+ 42
204
+ >>> safe_cast("hello", int, default=0)
205
+ 0
206
+ """
207
+ try:
208
+ return target_type(value)
209
+ except (ValueError, TypeError):
210
+ return default
211
+
212
+
213
+ # =============================================================================
214
+ # File Operations
215
+ # =============================================================================
216
+
217
+ def ensure_dir(path: Union[str, Path]) -> Path:
218
+ """
219
+ Ensure a directory exists, creating it if necessary.
220
+
221
+ Args:
222
+ path: Path to directory.
223
+
224
+ Returns:
225
+ Path: Path object.
226
+ """
227
+ path = Path(path)
228
+ path.mkdir(parents=True, exist_ok=True)
229
+ return path
230
+
231
+
232
+ def get_file_hash(filepath: Union[str, Path], algorithm: str = "md5") -> str:
233
+ """
234
+ Calculate hash of a file.
235
+
236
+ Args:
237
+ filepath: Path to file.
238
+ algorithm: Hash algorithm (md5, sha1, sha256).
239
+
240
+ Returns:
241
+ str: Hex digest of file hash.
242
+ """
243
+ hash_func = getattr(hashlib, algorithm)()
244
+
245
+ with open(filepath, "rb") as f:
246
+ for chunk in iter(lambda: f.read(8192), b""):
247
+ hash_func.update(chunk)
248
+
249
+ return hash_func.hexdigest()
250
+
251
+
252
+ def save_object(obj: Any, filepath: Union[str, Path], format: str = "pickle") -> None:
253
+ """
254
+ Save an object to file.
255
+
256
+ Args:
257
+ obj: Object to save.
258
+ filepath: Path to save to.
259
+ format: Save format (pickle, json, joblib).
260
+ """
261
+ filepath = Path(filepath)
262
+ ensure_dir(filepath.parent)
263
+
264
+ if format == "pickle":
265
+ with open(filepath, "wb") as f:
266
+ pickle.dump(obj, f)
267
+ elif format == "json":
268
+ with open(filepath, "w", encoding="utf-8") as f:
269
+ json.dump(obj, f, indent=2, default=str)
270
+ elif format == "joblib":
271
+ import joblib
272
+ joblib.dump(obj, filepath)
273
+ else:
274
+ raise ValueError(f"Unknown format: {format}")
275
+
276
+
277
+ def load_object(filepath: Union[str, Path], format: str = "pickle") -> Any:
278
+ """
279
+ Load an object from file.
280
+
281
+ Args:
282
+ filepath: Path to load from.
283
+ format: Load format (pickle, json, joblib).
284
+
285
+ Returns:
286
+ Loaded object.
287
+ """
288
+ filepath = Path(filepath)
289
+
290
+ if format == "pickle":
291
+ with open(filepath, "rb") as f:
292
+ return pickle.load(f)
293
+ elif format == "json":
294
+ with open(filepath, "r", encoding="utf-8") as f:
295
+ return json.load(f)
296
+ elif format == "joblib":
297
+ import joblib
298
+ return joblib.load(filepath)
299
+ else:
300
+ raise ValueError(f"Unknown format: {format}")
301
+
302
+
303
+ # =============================================================================
304
+ # String Operations
305
+ # =============================================================================
306
+
307
+ def slugify(text: str) -> str:
308
+ """
309
+ Convert text to URL-friendly slug.
310
+
311
+ Args:
312
+ text: Text to convert.
313
+
314
+ Returns:
315
+ str: Slugified text.
316
+
317
+ Example:
318
+ >>> slugify("Hello World!")
319
+ 'hello-world'
320
+ """
321
+ import re
322
+ text = text.lower()
323
+ text = re.sub(r"[^\w\s-]", "", text)
324
+ text = re.sub(r"[\s_-]+", "-", text)
325
+ text = text.strip("-")
326
+ return text
327
+
328
+
329
+ def truncate(text: str, max_length: int = 100, suffix: str = "...") -> str:
330
+ """
331
+ Truncate text to maximum length.
332
+
333
+ Args:
334
+ text: Text to truncate.
335
+ max_length: Maximum length.
336
+ suffix: Suffix to add if truncated.
337
+
338
+ Returns:
339
+ str: Truncated text.
340
+ """
341
+ if len(text) <= max_length:
342
+ return text
343
+ return text[:max_length - len(suffix)] + suffix
344
+
345
+
346
+ # =============================================================================
347
+ # Timing and Performance
348
+ # =============================================================================
349
+
350
+ def timeit(func: Callable) -> Callable:
351
+ """
352
+ Decorator to time function execution.
353
+
354
+ Args:
355
+ func: Function to time.
356
+
357
+ Returns:
358
+ Wrapped function.
359
+
360
+ Example:
361
+ >>> @timeit
362
+ ... def slow_function():
363
+ ... time.sleep(1)
364
+ """
365
+ @wraps(func)
366
+ def wrapper(*args, **kwargs):
367
+ start = time.perf_counter()
368
+ result = func(*args, **kwargs)
369
+ end = time.perf_counter()
370
+ print(f"{func.__name__} executed in {end - start:.4f}s")
371
+ return result
372
+ return wrapper
373
+
374
+
375
+ def retry(
376
+ max_attempts: int = 3,
377
+ delay: float = 1.0,
378
+ backoff: float = 2.0,
379
+ exceptions: Tuple[Type[Exception], ...] = (Exception,),
380
+ ) -> Callable:
381
+ """
382
+ Decorator to retry a function on failure.
383
+
384
+ Args:
385
+ max_attempts: Maximum retry attempts.
386
+ delay: Initial delay between retries.
387
+ backoff: Multiplier for delay after each retry.
388
+ exceptions: Exceptions to catch and retry.
389
+
390
+ Returns:
391
+ Decorator function.
392
+
393
+ Example:
394
+ >>> @retry(max_attempts=3, delay=1.0)
395
+ ... def unstable_function():
396
+ ... # May fail sometimes
397
+ ... pass
398
+ """
399
+ def decorator(func: Callable) -> Callable:
400
+ @wraps(func)
401
+ def wrapper(*args, **kwargs):
402
+ current_delay = delay
403
+ last_exception = None
404
+
405
+ for attempt in range(max_attempts):
406
+ try:
407
+ return func(*args, **kwargs)
408
+ except exceptions as e:
409
+ last_exception = e
410
+ if attempt < max_attempts - 1:
411
+ time.sleep(current_delay)
412
+ current_delay *= backoff
413
+
414
+ raise last_exception
415
+ return wrapper
416
+ return decorator
417
+
418
+
419
+ # =============================================================================
420
+ # Memory and Performance
421
+ # =============================================================================
422
+
423
+ def get_memory_usage(obj: Any) -> int:
424
+ """
425
+ Get memory usage of an object in bytes.
426
+
427
+ Args:
428
+ obj: Object to measure.
429
+
430
+ Returns:
431
+ int: Memory usage in bytes.
432
+ """
433
+ import sys
434
+ return sys.getsizeof(obj)
435
+
436
+
437
+ def reduce_memory_usage(df: pd.DataFrame, verbose: bool = False) -> pd.DataFrame:
438
+ """
439
+ Reduce memory usage of a DataFrame by downcasting numeric types.
440
+
441
+ Args:
442
+ df: DataFrame to optimize.
443
+ verbose: Whether to print memory savings.
444
+
445
+ Returns:
446
+ pd.DataFrame: Optimized DataFrame.
447
+ """
448
+ start_mem = df.memory_usage(deep=True).sum() / 1024**2
449
+
450
+ for col in df.columns:
451
+ col_type = df[col].dtype
452
+
453
+ if col_type != object:
454
+ c_min = df[col].min()
455
+ c_max = df[col].max()
456
+
457
+ if str(col_type)[:3] == "int":
458
+ if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
459
+ df[col] = df[col].astype(np.int8)
460
+ elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
461
+ df[col] = df[col].astype(np.int16)
462
+ elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
463
+ df[col] = df[col].astype(np.int32)
464
+ else:
465
+ df[col] = df[col].astype(np.int64)
466
+ else:
467
+ if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
468
+ df[col] = df[col].astype(np.float32)
469
+ else:
470
+ df[col] = df[col].astype(np.float64)
471
+
472
+ end_mem = df.memory_usage(deep=True).sum() / 1024**2
473
+
474
+ if verbose:
475
+ print(f"Memory usage reduced from {start_mem:.2f} MB to {end_mem:.2f} MB ({100 * (start_mem - end_mem) / start_mem:.1f}% reduction)")
476
+
477
+ return df
478
+
479
+
480
+ # =============================================================================
481
+ # Timestamp Utilities
482
+ # =============================================================================
483
+
484
+ def now_str(format: str = "%Y%m%d_%H%M%S") -> str:
485
+ """
486
+ Get current timestamp as formatted string.
487
+
488
+ Args:
489
+ format: Datetime format string.
490
+
491
+ Returns:
492
+ str: Formatted timestamp.
493
+ """
494
+ return datetime.now().strftime(format)
495
+
496
+
497
+ def parse_timestamp(timestamp: Union[str, int, float, datetime]) -> datetime:
498
+ """
499
+ Parse various timestamp formats to datetime.
500
+
501
+ Args:
502
+ timestamp: Timestamp to parse.
503
+
504
+ Returns:
505
+ datetime: Parsed datetime object.
506
+ """
507
+ if isinstance(timestamp, datetime):
508
+ return timestamp
509
+ elif isinstance(timestamp, (int, float)):
510
+ return datetime.fromtimestamp(timestamp)
511
+ elif isinstance(timestamp, str):
512
+ # Try common formats
513
+ formats = [
514
+ "%Y-%m-%d %H:%M:%S",
515
+ "%Y-%m-%d",
516
+ "%Y/%m/%d",
517
+ "%d-%m-%Y",
518
+ "%d/%m/%Y",
519
+ "%Y%m%d",
520
+ "%Y%m%d_%H%M%S",
521
+ ]
522
+ for fmt in formats:
523
+ try:
524
+ return datetime.strptime(timestamp, fmt)
525
+ except ValueError:
526
+ continue
527
+ raise ValueError(f"Cannot parse timestamp: {timestamp}")
528
+ else:
529
+ raise ValueError(f"Unsupported timestamp type: {type(timestamp)}")
530
+
531
+
532
+ # =============================================================================
533
+ # Validation Decorators
534
+ # =============================================================================
535
+
536
+ def validate_args(**validators: Callable[[Any], bool]) -> Callable:
537
+ """
538
+ Decorator to validate function arguments.
539
+
540
+ Args:
541
+ **validators: Mapping of argument names to validation functions.
542
+
543
+ Returns:
544
+ Decorator function.
545
+
546
+ Example:
547
+ >>> @validate_args(x=lambda x: x > 0)
548
+ ... def process(x):
549
+ ... return x * 2
550
+ """
551
+ def decorator(func: Callable) -> Callable:
552
+ @wraps(func)
553
+ def wrapper(*args, **kwargs):
554
+ # Get function signature
555
+ import inspect
556
+ sig = inspect.signature(func)
557
+ bound = sig.bind(*args, **kwargs)
558
+ bound.apply_defaults()
559
+
560
+ # Validate arguments
561
+ for arg_name, validator in validators.items():
562
+ if arg_name in bound.arguments:
563
+ value = bound.arguments[arg_name]
564
+ if not validator(value):
565
+ raise ValueError(f"Invalid value for argument '{arg_name}': {value}")
566
+
567
+ return func(*args, **kwargs)
568
+ return wrapper
569
+ return decorator
570
+
571
+
572
+ def deprecated(message: str = "", version: str = "") -> Callable:
573
+ """
574
+ Decorator to mark a function as deprecated.
575
+
576
+ Args:
577
+ message: Deprecation message.
578
+ version: Version when the function will be removed.
579
+
580
+ Returns:
581
+ Decorator function.
582
+ """
583
+ def decorator(func: Callable) -> Callable:
584
+ @wraps(func)
585
+ def wrapper(*args, **kwargs):
586
+ warn_msg = f"{func.__name__} is deprecated"
587
+ if version:
588
+ warn_msg += f" and will be removed in version {version}"
589
+ if message:
590
+ warn_msg += f". {message}"
591
+ warnings.warn(warn_msg, DeprecationWarning, stacklevel=2)
592
+ return func(*args, **kwargs)
593
+ return wrapper
594
+ return decorator
595
+
596
+
597
+ # =============================================================================
598
+ # Random Seeds
599
+ # =============================================================================
600
+
601
+ def set_random_seed(seed: int = 42) -> None:
602
+ """
603
+ Set random seed for reproducibility.
604
+
605
+ Args:
606
+ seed: Random seed value.
607
+ """
608
+ import random
609
+ random.seed(seed)
610
+ np.random.seed(seed)
611
+
612
+ # Try to set other seeds if available
613
+ try:
614
+ import torch
615
+ torch.manual_seed(seed)
616
+ if torch.cuda.is_available():
617
+ torch.cuda.manual_seed_all(seed)
618
+ except ImportError:
619
+ pass
620
+
621
+ try:
622
+ import tensorflow as tf
623
+ tf.random.set_seed(seed)
624
+ except ImportError:
625
+ pass
626
+
627
+
628
+ # =============================================================================
629
+ # Progress Tracking
630
+ # =============================================================================
631
+
632
+ class ProgressTracker:
633
+ """
634
+ Simple progress tracker for long-running operations.
635
+
636
+ Example:
637
+ >>> tracker = ProgressTracker(total=100)
638
+ >>> for i in range(100):
639
+ ... tracker.update()
640
+ >>> tracker.finish()
641
+ """
642
+
643
+ def __init__(self, total: int, description: str = "Progress"):
644
+ self.total = total
645
+ self.description = description
646
+ self.current = 0
647
+ self.start_time = time.time()
648
+
649
+ def update(self, n: int = 1) -> None:
650
+ """Update progress by n steps."""
651
+ self.current += n
652
+ elapsed = time.time() - self.start_time
653
+ rate = self.current / elapsed if elapsed > 0 else 0
654
+ eta = (self.total - self.current) / rate if rate > 0 else 0
655
+
656
+ pct = 100 * self.current / self.total
657
+ print(f"\r{self.description}: {self.current}/{self.total} ({pct:.1f}%) | "
658
+ f"Elapsed: {elapsed:.1f}s | ETA: {eta:.1f}s", end="")
659
+
660
+ def finish(self) -> None:
661
+ """Mark progress as complete."""
662
+ elapsed = time.time() - self.start_time
663
+ print(f"\n{self.description} completed in {elapsed:.2f}s")