PyPI - adamops - Versions diffs - 0.1.0__py3-none-any.whl - Mend

adamops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

adamops/__init__.py +40 -0
adamops/cli.py +163 -0
adamops/data/__init__.py +24 -0
adamops/data/feature_engineering.py +284 -0
adamops/data/loaders.py +922 -0
adamops/data/preprocessors.py +227 -0
adamops/data/splitters.py +218 -0
adamops/data/validators.py +148 -0
adamops/deployment/__init__.py +21 -0
adamops/deployment/api.py +237 -0
adamops/deployment/cloud.py +191 -0
adamops/deployment/containerize.py +262 -0
adamops/deployment/exporters.py +148 -0
adamops/evaluation/__init__.py +24 -0
adamops/evaluation/comparison.py +133 -0
adamops/evaluation/explainability.py +143 -0
adamops/evaluation/metrics.py +233 -0
adamops/evaluation/reports.py +165 -0
adamops/evaluation/visualization.py +238 -0
adamops/models/__init__.py +21 -0
adamops/models/automl.py +277 -0
adamops/models/ensembles.py +228 -0
adamops/models/modelops.py +308 -0
adamops/models/registry.py +250 -0
adamops/monitoring/__init__.py +21 -0
adamops/monitoring/alerts.py +200 -0
adamops/monitoring/dashboard.py +117 -0
adamops/monitoring/drift.py +212 -0
adamops/monitoring/performance.py +195 -0
adamops/pipelines/__init__.py +15 -0
adamops/pipelines/orchestrators.py +183 -0
adamops/pipelines/workflows.py +212 -0
adamops/utils/__init__.py +18 -0
adamops/utils/config.py +457 -0
adamops/utils/helpers.py +663 -0
adamops/utils/logging.py +412 -0
adamops-0.1.0.dist-info/METADATA +310 -0
adamops-0.1.0.dist-info/RECORD +42 -0
adamops-0.1.0.dist-info/WHEEL +5 -0
adamops-0.1.0.dist-info/entry_points.txt +2 -0
adamops-0.1.0.dist-info/licenses/LICENSE +21 -0
adamops-0.1.0.dist-info/top_level.txt +1 -0

adamops/pipelines/workflows.py ADDED Viewed

@@ -0,0 +1,212 @@
+"""
+AdamOps Workflows Module
+Define ML workflows as DAGs.
+"""
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from datetime import datetime
+from enum import Enum
+import traceback
+from adamops.utils.logging import get_logger
+logger = get_logger(__name__)
+class TaskStatus(Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+class Task:
+    """Represents a single task in a workflow."""
+    def __init__(self, name: str, func: Callable, dependencies: Optional[List[str]] = None,
+                 retry: int = 0, timeout: Optional[int] = None):
+        self.name = name
+        self.func = func
+        self.dependencies = dependencies or []
+        self.retry = retry
+        self.timeout = timeout
+        self.status = TaskStatus.PENDING
+        self.result: Any = None
+        self.error: Optional[str] = None
+        self.start_time: Optional[datetime] = None
+        self.end_time: Optional[datetime] = None
+    @property
+    def duration(self) -> Optional[float]:
+        if self.start_time and self.end_time:
+            return (self.end_time - self.start_time).total_seconds()
+        return None
+    def run(self, context: Dict) -> Any:
+        """Execute the task."""
+        self.status = TaskStatus.RUNNING
+        self.start_time = datetime.now()
+        attempts = 0
+        while attempts <= self.retry:
+            try:
+                self.result = self.func(context)
+                self.status = TaskStatus.COMPLETED
+                self.end_time = datetime.now()
+                logger.info(f"Task '{self.name}' completed in {self.duration:.2f}s")
+                return self.result
+            except Exception as e:
+                attempts += 1
+                if attempts > self.retry:
+                    self.status = TaskStatus.FAILED
+                    self.error = str(e)
+                    self.end_time = datetime.now()
+                    logger.error(f"Task '{self.name}' failed: {e}")
+                    raise
+                logger.warning(f"Task '{self.name}' failed, retrying ({attempts}/{self.retry})")
+    def to_dict(self) -> Dict:
+        return {
+            "name": self.name,
+            "status": self.status.value,
+            "dependencies": self.dependencies,
+            "duration": self.duration,
+            "error": self.error,
+        }
+class Workflow:
+    """DAG-based workflow for ML pipelines."""
+    def __init__(self, name: str, description: str = ""):
+        self.name = name
+        self.description = description
+        self.tasks: Dict[str, Task] = {}
+        self.context: Dict = {}
+        self.status = TaskStatus.PENDING
+    def add_task(self, name: str, func: Callable, dependencies: Optional[List[str]] = None,
+                 **kwargs) -> "Workflow":
+        """Add a task to the workflow."""
+        task = Task(name, func, dependencies, **kwargs)
+        self.tasks[name] = task
+        return self
+    def task(self, name: str = None, dependencies: Optional[List[str]] = None, **kwargs):
+        """Decorator to add a task."""
+        def decorator(func):
+            task_name = name or func.__name__
+            self.add_task(task_name, func, dependencies, **kwargs)
+            return func
+        return decorator
+    def _get_execution_order(self) -> List[str]:
+        """Topological sort for task execution order."""
+        visited = set()
+        order = []
+        def visit(name: str):
+            if name in visited:
+                return
+            visited.add(name)
+            task = self.tasks[name]
+            for dep in task.dependencies:
+                if dep not in self.tasks:
+                    raise ValueError(f"Unknown dependency: {dep}")
+                visit(dep)
+            order.append(name)
+        for name in self.tasks:
+            visit(name)
+        return order
+    def run(self, initial_context: Optional[Dict] = None) -> Dict:
+        """Execute the workflow."""
+        self.context = initial_context or {}
+        self.status = TaskStatus.RUNNING
+        logger.info(f"Starting workflow: {self.name}")
+        start_time = datetime.now()
+        try:
+            execution_order = self._get_execution_order()
+            for task_name in execution_order:
+                task = self.tasks[task_name]
+                # Check dependencies
+                deps_ok = all(
+                    self.tasks[dep].status == TaskStatus.COMPLETED
+                    for dep in task.dependencies
+                )
+                if not deps_ok:
+                    task.status = TaskStatus.SKIPPED
+                    logger.warning(f"Skipping '{task_name}' due to failed dependencies")
+                    continue
+                # Run task
+                result = task.run(self.context)
+                self.context[task_name] = result
+            self.status = TaskStatus.COMPLETED
+            logger.info(f"Workflow '{self.name}' completed in {(datetime.now() - start_time).total_seconds():.2f}s")
+        except Exception as e:
+            self.status = TaskStatus.FAILED
+            logger.error(f"Workflow '{self.name}' failed: {e}")
+            raise
+        return self.context
+    def get_status(self) -> Dict:
+        """Get workflow status."""
+        return {
+            "name": self.name,
+            "status": self.status.value,
+            "tasks": {name: task.to_dict() for name, task in self.tasks.items()},
+        }
+    def visualize(self) -> str:
+        """Generate ASCII visualization of workflow."""
+        lines = [f"Workflow: {self.name}", "=" * 40]
+        for name in self._get_execution_order():
+            task = self.tasks[name]
+            deps = ", ".join(task.dependencies) if task.dependencies else "None"
+            status = task.status.value.upper()
+            lines.append(f"  [{status}] {name} <- {deps}")
+        return "\n".join(lines)
+def create_ml_pipeline(name: str = "ml_pipeline") -> Workflow:
+    """Create a standard ML pipeline workflow."""
+    workflow = Workflow(name, "Standard ML Training Pipeline")
+    @workflow.task("load_data")
+    def load_data(ctx):
+        logger.info("Loading data...")
+        return ctx.get("data_path")
+    @workflow.task("preprocess", dependencies=["load_data"])
+    def preprocess(ctx):
+        logger.info("Preprocessing data...")
+        return {"preprocessed": True}
+    @workflow.task("train", dependencies=["preprocess"])
+    def train(ctx):
+        logger.info("Training model...")
+        return {"model": "trained"}
+    @workflow.task("evaluate", dependencies=["train"])
+    def evaluate(ctx):
+        logger.info("Evaluating model...")
+        return {"metrics": {"accuracy": 0.95}}
+    return workflow

adamops/utils/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+AdamOps Utils Module
+Provides utility functions:
+- config: Configuration management
+- logging: Centralized logging
+- helpers: Common helper functions
+"""
+from adamops.utils import config
+from adamops.utils import logging
+from adamops.utils import helpers
+__all__ = [
+    "config",
+    "logging",
+    "helpers",
+]

adamops/utils/config.py ADDED Viewed

@@ -0,0 +1,457 @@
+"""
+AdamOps Configuration Module
+Provides centralized configuration management for the entire library.
+Supports YAML, JSON, and environment variable configurations.
+"""
+import os
+import json
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+from dataclasses import dataclass, field
+try:
+    import yaml
+    YAML_AVAILABLE = True
+except ImportError:
+    YAML_AVAILABLE = False
+try:
+    from dotenv import load_dotenv
+    DOTENV_AVAILABLE = True
+except ImportError:
+    DOTENV_AVAILABLE = False
+@dataclass
+class DataConfig:
+    """Configuration for data module."""
+    default_encoding: str = "utf-8"
+    missing_threshold: float = 0.5
+    outlier_method: str = "iqr"
+    outlier_threshold: float = 1.5
+    validation_sample_size: int = 10000
+    auto_detect_types: bool = True
+@dataclass
+class ModelConfig:
+    """Configuration for model module."""
+    default_random_state: int = 42
+    cv_folds: int = 5
+    early_stopping_rounds: int = 50
+    n_jobs: int = -1
+    verbose: int = 0
+@dataclass
+class AutoMLConfig:
+    """Configuration for AutoML module."""
+    time_limit: int = 3600
+    max_trials: int = 100
+    tuning_method: str = "bayesian"
+    optimization_metric: str = "auto"
+    early_stopping: bool = True
+@dataclass
+class DeploymentConfig:
+    """Configuration for deployment module."""
+    default_port: int = 8000
+    default_host: str = "0.0.0.0"
+    api_framework: str = "fastapi"
+    enable_cors: bool = True
+    log_requests: bool = True
+@dataclass
+class MonitoringConfig:
+    """Configuration for monitoring module."""
+    drift_threshold: float = 0.05
+    alert_email: Optional[str] = None
+    check_interval: int = 3600
+    log_predictions: bool = True
+@dataclass
+class LoggingConfig:
+    """Configuration for logging."""
+    level: str = "INFO"
+    format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    file: Optional[str] = None
+    console: bool = True
+    max_bytes: int = 10485760  # 10MB
+    backup_count: int = 5
+@dataclass
+class AdamOpsConfig:
+    """Main configuration class for AdamOps."""
+    data: DataConfig = field(default_factory=DataConfig)
+    model: ModelConfig = field(default_factory=ModelConfig)
+    automl: AutoMLConfig = field(default_factory=AutoMLConfig)
+    deployment: DeploymentConfig = field(default_factory=DeploymentConfig)
+    monitoring: MonitoringConfig = field(default_factory=MonitoringConfig)
+    logging: LoggingConfig = field(default_factory=LoggingConfig)
+    # Registry settings
+    registry_backend: str = "json"  # json or sqlite
+    registry_path: str = ".adamops_registry"
+    # General settings
+    cache_enabled: bool = True
+    cache_path: str = ".adamops_cache"
+# Global configuration instance
+_config: Optional[AdamOpsConfig] = None
+def get_config() -> AdamOpsConfig:
+    """
+    Get the global configuration instance.
+    Returns:
+        AdamOpsConfig: The global configuration object.
+    Example:
+        >>> config = get_config()
+        >>> print(config.model.cv_folds)
+        5
+    """
+    global _config
+    if _config is None:
+        _config = AdamOpsConfig()
+    return _config
+def set_config(config: AdamOpsConfig) -> None:
+    """
+    Set the global configuration instance.
+    Args:
+        config: The configuration object to set as global.
+    Example:
+        >>> custom_config = AdamOpsConfig()
+        >>> custom_config.model.cv_folds = 10
+        >>> set_config(custom_config)
+    """
+    global _config
+    _config = config
+def reset_config() -> None:
+    """
+    Reset the global configuration to defaults.
+    Example:
+        >>> reset_config()
+        >>> config = get_config()
+        >>> print(config.model.cv_folds)
+        5
+    """
+    global _config
+    _config = AdamOpsConfig()
+def load_config_from_file(filepath: Union[str, Path]) -> AdamOpsConfig:
+    """
+    Load configuration from a YAML or JSON file.
+    Args:
+        filepath: Path to the configuration file.
+    Returns:
+        AdamOpsConfig: Loaded configuration object.
+    Raises:
+        FileNotFoundError: If the file doesn't exist.
+        ValueError: If the file format is not supported.
+    Example:
+        >>> config = load_config_from_file("config.yaml")
+    """
+    filepath = Path(filepath)
+    if not filepath.exists():
+        raise FileNotFoundError(f"Configuration file not found: {filepath}")
+    with open(filepath, "r", encoding="utf-8") as f:
+        if filepath.suffix in [".yaml", ".yml"]:
+            if not YAML_AVAILABLE:
+                raise ImportError("PyYAML is required to load YAML config files. Install with: pip install pyyaml")
+            config_dict = yaml.safe_load(f)
+        elif filepath.suffix == ".json":
+            config_dict = json.load(f)
+        else:
+            raise ValueError(f"Unsupported config file format: {filepath.suffix}")
+    return _dict_to_config(config_dict)
+def save_config_to_file(config: AdamOpsConfig, filepath: Union[str, Path]) -> None:
+    """
+    Save configuration to a YAML or JSON file.
+    Args:
+        config: Configuration object to save.
+        filepath: Path to save the configuration to.
+    Example:
+        >>> config = get_config()
+        >>> save_config_to_file(config, "config.yaml")
+    """
+    filepath = Path(filepath)
+    filepath.parent.mkdir(parents=True, exist_ok=True)
+    config_dict = _config_to_dict(config)
+    with open(filepath, "w", encoding="utf-8") as f:
+        if filepath.suffix in [".yaml", ".yml"]:
+            if not YAML_AVAILABLE:
+                raise ImportError("PyYAML is required to save YAML config files. Install with: pip install pyyaml")
+            yaml.dump(config_dict, f, default_flow_style=False, indent=2)
+        elif filepath.suffix == ".json":
+            json.dump(config_dict, f, indent=2)
+        else:
+            raise ValueError(f"Unsupported config file format: {filepath.suffix}")
+def load_config_from_env(prefix: str = "ADAMOPS") -> AdamOpsConfig:
+    """
+    Load configuration from environment variables.
+    Environment variables should be named as {prefix}_{SECTION}_{KEY}.
+    For example: ADAMOPS_MODEL_CV_FOLDS=10
+    Args:
+        prefix: Prefix for environment variables.
+    Returns:
+        AdamOpsConfig: Configuration with values from environment.
+    Example:
+        >>> # Set env: ADAMOPS_MODEL_CV_FOLDS=10
+        >>> config = load_config_from_env()
+        >>> print(config.model.cv_folds)
+        10
+    """
+    if DOTENV_AVAILABLE:
+        load_dotenv()
+    config = AdamOpsConfig()
+    # Map of environment variable suffixes to config attributes
+    env_mappings = {
+        # Data config
+        f"{prefix}_DATA_DEFAULT_ENCODING": ("data", "default_encoding", str),
+        f"{prefix}_DATA_MISSING_THRESHOLD": ("data", "missing_threshold", float),
+        f"{prefix}_DATA_OUTLIER_METHOD": ("data", "outlier_method", str),
+        f"{prefix}_DATA_OUTLIER_THRESHOLD": ("data", "outlier_threshold", float),
+        # Model config
+        f"{prefix}_MODEL_RANDOM_STATE": ("model", "default_random_state", int),
+        f"{prefix}_MODEL_CV_FOLDS": ("model", "cv_folds", int),
+        f"{prefix}_MODEL_N_JOBS": ("model", "n_jobs", int),
+        # AutoML config
+        f"{prefix}_AUTOML_TIME_LIMIT": ("automl", "time_limit", int),
+        f"{prefix}_AUTOML_MAX_TRIALS": ("automl", "max_trials", int),
+        f"{prefix}_AUTOML_TUNING_METHOD": ("automl", "tuning_method", str),
+        # Deployment config
+        f"{prefix}_DEPLOY_PORT": ("deployment", "default_port", int),
+        f"{prefix}_DEPLOY_HOST": ("deployment", "default_host", str),
+        f"{prefix}_DEPLOY_FRAMEWORK": ("deployment", "api_framework", str),
+        # Monitoring config
+        f"{prefix}_MONITOR_DRIFT_THRESHOLD": ("monitoring", "drift_threshold", float),
+        f"{prefix}_MONITOR_CHECK_INTERVAL": ("monitoring", "check_interval", int),
+        # Logging config
+        f"{prefix}_LOG_LEVEL": ("logging", "level", str),
+        f"{prefix}_LOG_FILE": ("logging", "file", str),
+        # General settings
+        f"{prefix}_REGISTRY_BACKEND": (None, "registry_backend", str),
+        f"{prefix}_REGISTRY_PATH": (None, "registry_path", str),
+    }
+    for env_var, (section, attr, type_conv) in env_mappings.items():
+        value = os.environ.get(env_var)
+        if value is not None:
+            try:
+                converted_value = type_conv(value)
+                if section is not None:
+                    setattr(getattr(config, section), attr, converted_value)
+                else:
+                    setattr(config, attr, converted_value)
+            except (ValueError, TypeError):
+                pass  # Skip invalid values
+    return config
+def _config_to_dict(config: AdamOpsConfig) -> Dict[str, Any]:
+    """Convert configuration object to dictionary."""
+    return {
+        "data": {
+            "default_encoding": config.data.default_encoding,
+            "missing_threshold": config.data.missing_threshold,
+            "outlier_method": config.data.outlier_method,
+            "outlier_threshold": config.data.outlier_threshold,
+            "validation_sample_size": config.data.validation_sample_size,
+            "auto_detect_types": config.data.auto_detect_types,
+        },
+        "model": {
+            "default_random_state": config.model.default_random_state,
+            "cv_folds": config.model.cv_folds,
+            "early_stopping_rounds": config.model.early_stopping_rounds,
+            "n_jobs": config.model.n_jobs,
+            "verbose": config.model.verbose,
+        },
+        "automl": {
+            "time_limit": config.automl.time_limit,
+            "max_trials": config.automl.max_trials,
+            "tuning_method": config.automl.tuning_method,
+            "optimization_metric": config.automl.optimization_metric,
+            "early_stopping": config.automl.early_stopping,
+        },
+        "deployment": {
+            "default_port": config.deployment.default_port,
+            "default_host": config.deployment.default_host,
+            "api_framework": config.deployment.api_framework,
+            "enable_cors": config.deployment.enable_cors,
+            "log_requests": config.deployment.log_requests,
+        },
+        "monitoring": {
+            "drift_threshold": config.monitoring.drift_threshold,
+            "alert_email": config.monitoring.alert_email,
+            "check_interval": config.monitoring.check_interval,
+            "log_predictions": config.monitoring.log_predictions,
+        },
+        "logging": {
+            "level": config.logging.level,
+            "format": config.logging.format,
+            "file": config.logging.file,
+            "console": config.logging.console,
+            "max_bytes": config.logging.max_bytes,
+            "backup_count": config.logging.backup_count,
+        },
+        "registry_backend": config.registry_backend,
+        "registry_path": config.registry_path,
+        "cache_enabled": config.cache_enabled,
+        "cache_path": config.cache_path,
+    }
+def _dict_to_config(config_dict: Dict[str, Any]) -> AdamOpsConfig:
+    """Convert dictionary to configuration object."""
+    config = AdamOpsConfig()
+    # Data config
+    if "data" in config_dict:
+        data = config_dict["data"]
+        config.data = DataConfig(
+            default_encoding=data.get("default_encoding", config.data.default_encoding),
+            missing_threshold=data.get("missing_threshold", config.data.missing_threshold),
+            outlier_method=data.get("outlier_method", config.data.outlier_method),
+            outlier_threshold=data.get("outlier_threshold", config.data.outlier_threshold),
+            validation_sample_size=data.get("validation_sample_size", config.data.validation_sample_size),
+            auto_detect_types=data.get("auto_detect_types", config.data.auto_detect_types),
+        )
+    # Model config
+    if "model" in config_dict:
+        model = config_dict["model"]
+        config.model = ModelConfig(
+            default_random_state=model.get("default_random_state", config.model.default_random_state),
+            cv_folds=model.get("cv_folds", config.model.cv_folds),
+            early_stopping_rounds=model.get("early_stopping_rounds", config.model.early_stopping_rounds),
+            n_jobs=model.get("n_jobs", config.model.n_jobs),
+            verbose=model.get("verbose", config.model.verbose),
+        )
+    # AutoML config
+    if "automl" in config_dict:
+        automl = config_dict["automl"]
+        config.automl = AutoMLConfig(
+            time_limit=automl.get("time_limit", config.automl.time_limit),
+            max_trials=automl.get("max_trials", config.automl.max_trials),
+            tuning_method=automl.get("tuning_method", config.automl.tuning_method),
+            optimization_metric=automl.get("optimization_metric", config.automl.optimization_metric),
+            early_stopping=automl.get("early_stopping", config.automl.early_stopping),
+        )
+    # Deployment config
+    if "deployment" in config_dict:
+        deploy = config_dict["deployment"]
+        config.deployment = DeploymentConfig(
+            default_port=deploy.get("default_port", config.deployment.default_port),
+            default_host=deploy.get("default_host", config.deployment.default_host),
+            api_framework=deploy.get("api_framework", config.deployment.api_framework),
+            enable_cors=deploy.get("enable_cors", config.deployment.enable_cors),
+            log_requests=deploy.get("log_requests", config.deployment.log_requests),
+        )
+    # Monitoring config
+    if "monitoring" in config_dict:
+        monitor = config_dict["monitoring"]
+        config.monitoring = MonitoringConfig(
+            drift_threshold=monitor.get("drift_threshold", config.monitoring.drift_threshold),
+            alert_email=monitor.get("alert_email", config.monitoring.alert_email),
+            check_interval=monitor.get("check_interval", config.monitoring.check_interval),
+            log_predictions=monitor.get("log_predictions", config.monitoring.log_predictions),
+        )
+    # Logging config
+    if "logging" in config_dict:
+        log = config_dict["logging"]
+        config.logging = LoggingConfig(
+            level=log.get("level", config.logging.level),
+            format=log.get("format", config.logging.format),
+            file=log.get("file", config.logging.file),
+            console=log.get("console", config.logging.console),
+            max_bytes=log.get("max_bytes", config.logging.max_bytes),
+            backup_count=log.get("backup_count", config.logging.backup_count),
+        )
+    # General settings
+    config.registry_backend = config_dict.get("registry_backend", config.registry_backend)
+    config.registry_path = config_dict.get("registry_path", config.registry_path)
+    config.cache_enabled = config_dict.get("cache_enabled", config.cache_enabled)
+    config.cache_path = config_dict.get("cache_path", config.cache_path)
+    return config
+def update_config(**kwargs) -> AdamOpsConfig:
+    """
+    Update specific configuration values.
+    Args:
+        **kwargs: Configuration values in format section__key=value.
+    Returns:
+        AdamOpsConfig: Updated configuration object.
+    Example:
+        >>> config = update_config(model__cv_folds=10, automl__time_limit=7200)
+        >>> print(config.model.cv_folds)
+        10
+    """
+    config = get_config()
+    for key, value in kwargs.items():
+        if "__" in key:
+            section, attr = key.split("__", 1)
+            if hasattr(config, section):
+                section_config = getattr(config, section)
+                if hasattr(section_config, attr):
+                    setattr(section_config, attr, value)
+        elif hasattr(config, key):
+            setattr(config, key, value)
+    return config