PyPI - collie-mlops - Versions diffs - 0.1.1b0__py3-none-any.whl - Mend

collie-mlops 0.1.1b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

collie/__init__.py +69 -0
collie/_common/__init__.py +0 -0
collie/_common/decorator.py +53 -0
collie/_common/exceptions.py +104 -0
collie/_common/mlflow_model_io/__init__.py +0 -0
collie/_common/mlflow_model_io/base_flavor_handler.py +26 -0
collie/_common/mlflow_model_io/flavor_registry.py +72 -0
collie/_common/mlflow_model_io/model_flavors.py +259 -0
collie/_common/mlflow_model_io/model_io.py +65 -0
collie/_common/utils.py +13 -0
collie/contracts/__init__.py +0 -0
collie/contracts/event.py +79 -0
collie/contracts/mlflow.py +444 -0
collie/contracts/orchestrator.py +79 -0
collie/core/__init__.py +41 -0
collie/core/enums/__init__.py +0 -0
collie/core/enums/components.py +26 -0
collie/core/enums/ml_models.py +20 -0
collie/core/evaluator/__init__.py +0 -0
collie/core/evaluator/evaluator.py +147 -0
collie/core/models.py +125 -0
collie/core/orchestrator/__init__.py +0 -0
collie/core/orchestrator/orchestrator.py +47 -0
collie/core/pusher/__init__.py +0 -0
collie/core/pusher/pusher.py +98 -0
collie/core/trainer/__init__.py +0 -0
collie/core/trainer/trainer.py +78 -0
collie/core/transform/__init__.py +0 -0
collie/core/transform/transform.py +87 -0
collie/core/tuner/__init__.py +0 -0
collie/core/tuner/tuner.py +84 -0
collie/helper/__init__.py +0 -0
collie/helper/pytorch/__init__.py +0 -0
collie/helper/pytorch/callback/__init__.py +0 -0
collie/helper/pytorch/callback/callback.py +155 -0
collie/helper/pytorch/callback/earlystop.py +54 -0
collie/helper/pytorch/callback/model_checkpoint.py +100 -0
collie/helper/pytorch/model/__init__.py +0 -0
collie/helper/pytorch/model/loader.py +55 -0
collie/helper/pytorch/trainer.py +304 -0
collie_mlops-0.1.1b0.dist-info/LICENSE +21 -0
collie_mlops-0.1.1b0.dist-info/METADATA +259 -0
collie_mlops-0.1.1b0.dist-info/RECORD +45 -0
collie_mlops-0.1.1b0.dist-info/WHEEL +5 -0
collie_mlops-0.1.1b0.dist-info/top_level.txt +1 -0

collie/__init__.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""
+Collie - A Lightweight MLOps Framework for Machine Learning Workflows
+Collie provides a modular, event-driven architecture for building ML pipelines
+with deep MLflow integration.
+Quick Start:
+    >>> from collie import Transformer, Trainer, Orchestrator
+    >>> # Define your components
+    >>> orchestrator = Orchestrator(
+    ...     components=[MyTransformer(), MyTrainer()],
+    ...     tracking_uri="http://localhost:5000",
+    ...     registered_model_name="my_model"
+    ... )
+    >>> orchestrator.run()
+For more examples, see: https://github.com/ChingHuanChiu/collie
+"""
+__author__ = "ChingHuanChiu"
+__email__ = "stevenchiou8@gmail.com"
+__version__ = "0.1.0b0"
+# Import all main components for easy access
+from .contracts.event import Event, EventType, PipelineContext
+from .core.transform.transform import Transformer
+from .core.trainer.trainer import Trainer
+from .core.tuner.tuner import Tuner
+from .core.evaluator.evaluator import Evaluator
+from .core.pusher.pusher import Pusher
+from .core.orchestrator.orchestrator import Orchestrator
+# Import data models
+from .core.models import (
+    TransformerPayload,
+    TrainerPayload,
+    TunerPayload,
+    EvaluatorPayload,
+    PusherPayload,
+)
+# Import enums for configuration
+from .core.enums.ml_models import ModelFlavor, MLflowModelStage
+__all__ = [
+    # Core components - the main classes users interact with
+    "Transformer",
+    "Trainer",
+    "Tuner",
+    "Evaluator",
+    "Pusher",
+    "Orchestrator",
+    # Event system - for building custom workflows
+    "Event",
+    "EventType",
+    "PipelineContext",
+    # Payload models - for type-safe data passing
+    "TransformerPayload",
+    "TrainerPayload",
+    "TunerPayload",
+    "EvaluatorPayload",
+    "PusherPayload",
+    # Configuration enums
+    "ModelFlavor",
+    "MLflowModelStage",
+]

collie/_common/__init__.py ADDED Viewed

File without changes

collie/_common/decorator.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing import Tuple, List
+from functools import wraps
+def type_checker(
+    typing: Tuple[type],
+    error_msg: str
+):
+    """
+    A decorator that checks the type of the output of a function.
+    Args:
+        typing (Tuple[type]): A tuple of types to check against.
+        error_msg (str): The error message to be raised if the type does not match.
+    Raises:
+        TypeError: If the type of the output of the function does not match with given types.
+    """
+    def closure(func):
+        @wraps(func)
+        def wrapper(*arg, **kwarg):
+            result = func(*arg, **kwarg)
+            if not isinstance(result, typing):
+                raise TypeError(error_msg)
+            return result
+        return wrapper
+    return closure
+def dict_key_checker(keys: List[str]):
+    """
+    A decorator that checks the keys of the output of a function.
+    Args:
+        keys (List[str]): A list of keys to check against.
+    Raises:
+        TypeError: If the output of the function is not a dictionary.
+        KeyError: If the output of the function does not contain all the keys in the list.
+    """
+    def closure(func):
+        @wraps(func)
+        def wrapper(*arg, **kwarg):
+            result = func(*arg, **kwarg)
+            if not isinstance(result, dict):
+                raise TypeError("The output must be a dictionary.")
+            all_keys_exist = all(key in result for key in keys)
+            if not all_keys_exist:
+                raise KeyError(f"The following keys must all exist in the output: {keys}. Output: {result}")
+            return result
+        return wrapper
+    return closure

collie/_common/exceptions.py ADDED Viewed

@@ -0,0 +1,104 @@
+class CollieBaseException(Exception):
+    """Base exception for all Collie framework errors."""
+    def __init__(self, message: str, component: str = None, details: dict = None):
+        self.message = message
+        self.component = component or self.__class__.__name__.replace('Error', '')
+        self.details = details or {}
+        detailed_message = f"[{self.component}] {message}"
+        if self.details:
+            detailed_message += f" Details: {self.details}"
+        super().__init__(detailed_message)
+class MLflowConfigurationError(CollieBaseException):
+    """Raised when MLflow configuration is invalid."""
+    def __init__(self, message: str, config_param: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if config_param:
+            details['config_parameter'] = config_param
+        super().__init__(message, component="MLflow Config", details=details)
+class MLflowOperationError(CollieBaseException):
+    """Raised when MLflow operations fail."""
+    def __init__(self, message: str, operation: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if operation:
+            details['operation'] = operation
+        super().__init__(message, component="MLflow Operation", details=details)
+class OrchestratorError(CollieBaseException):
+    """Raised for errors in the orchestrator process."""
+    def __init__(self, message: str, pipeline_stage: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if pipeline_stage:
+            details['pipeline_stage'] = pipeline_stage
+        super().__init__(message, component="Orchestrator", details=details)
+class TransformerError(CollieBaseException):
+    """Raised when data transformation fails."""
+    def __init__(self, message: str, data_type: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if data_type:
+            details['data_type'] = data_type
+        super().__init__(message, component="Transformer", details=details)
+class TrainerError(CollieBaseException):
+    """Raised when model training fails."""
+    def __init__(self, message: str, model_type: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if model_type:
+            details['model_type'] = model_type
+        super().__init__(message, component="Trainer", details=details)
+class TunerError(CollieBaseException):
+    """Raised when hyperparameter tuning fails."""
+    def __init__(self, message: str, tuning_method: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if tuning_method:
+            details['tuning_method'] = tuning_method
+        super().__init__(message, component="Tuner", details=details)
+class EvaluatorError(CollieBaseException):
+    """Raised when model evaluation fails."""
+    def __init__(self, message: str, metric: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if metric:
+            details['metric'] = metric
+        super().__init__(message, component="Evaluator", details=details)
+class PusherError(CollieBaseException):
+    """Raised when model pushing/deployment fails."""
+    def __init__(self, message: str, deployment_target: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if deployment_target:
+            details['deployment_target'] = deployment_target
+        super().__init__(message, component="Pusher", details=details)
+class ModelFlavorError(CollieBaseException):
+    """Raised when model flavor operations fail."""
+    def __init__(self, message: str, flavor: str = None, **kwargs):
+        details = kwargs.get('details', {})
+        if flavor:
+            details['flavor'] = flavor
+        super().__init__(message, component="Model Flavor", details=details)

collie/_common/mlflow_model_io/__init__.py ADDED Viewed

File without changes

collie/_common/mlflow_model_io/base_flavor_handler.py ADDED Viewed

@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+from typing import Any
+class FlavorHandler(ABC):
+    @abstractmethod
+    def can_handle(self, model: Any) -> bool:
+        raise NotImplementedError
+    @abstractmethod
+    def flavor(self):
+        raise NotImplementedError
+    @abstractmethod
+    def log_model(
+        self,
+        model: Any,
+        name: str,
+        **kwargs: Any
+    ) -> None:
+        raise NotImplementedError
+    @abstractmethod
+    def load_model(self, model_uri: str) -> Any:
+        raise NotImplementedError

collie/_common/mlflow_model_io/flavor_registry.py ADDED Viewed

@@ -0,0 +1,72 @@
+from typing import List, Optional
+from collie._common.mlflow_model_io.base_flavor_handler import FlavorHandler
+from collie._common.mlflow_model_io.model_flavors import (
+    SklearnFlavorHandler,
+    XGBoostFlavorHandler,
+    PyTorchFlavorHandler,
+    LightGBMFlavorHandler,
+    TransformersFlavorHandler,
+    SKLEARN_AVAILABLE,
+    XGBOOST_AVAILABLE,
+    PYTORCH_AVAILABLE,
+    LIGHTGBM_AVAILABLE,
+    TRANSFORMERS_AVAILABLE
+)
+from collie._common.exceptions import ModelFlavorError
+class FlavorRegistry:
+    """Registry for model flavor handlers with conditional loading."""
+    def __init__(self):
+        self._handlers: List[FlavorHandler] = []
+        # Only register handlers for available frameworks
+        if SKLEARN_AVAILABLE:
+            self._handlers.append(SklearnFlavorHandler())
+        if XGBOOST_AVAILABLE:
+            self._handlers.append(XGBoostFlavorHandler())
+        if PYTORCH_AVAILABLE:
+            self._handlers.append(PyTorchFlavorHandler())
+        if LIGHTGBM_AVAILABLE:
+            self._handlers.append(LightGBMFlavorHandler())
+        if TRANSFORMERS_AVAILABLE:
+            self._handlers.append(TransformersFlavorHandler())
+        if not self._handlers:
+            raise ModelFlavorError(
+                "No model flavor handlers available. Please install at least one supported ML framework."
+            )
+    def find_handler_by_model(self, model) -> Optional[FlavorHandler]:
+        """Find a handler that can handle the given model."""
+        for handler in self._handlers:
+            if handler.can_handle(model):
+                return handler
+        return None
+    def find_handler_by_flavor(self, flavor: str) -> Optional[FlavorHandler]:
+        """Find a handler by flavor name."""
+        for handler in self._handlers:
+            if handler.flavor() == flavor:
+                return handler
+        return None
+    def get_available_flavors(self) -> List[str]:
+        """Get list of available model flavors."""
+        return [handler.flavor().value for handler in self._handlers]
+    def get_handler_info(self) -> dict:
+        """Get information about registered handlers."""
+        return {
+            "total_handlers": len(self._handlers),
+            "available_flavors": self.get_available_flavors(),
+            "framework_status": {
+                "sklearn": SKLEARN_AVAILABLE,
+                "xgboost": XGBOOST_AVAILABLE,
+                "pytorch": PYTORCH_AVAILABLE,
+                "lightgbm": LIGHTGBM_AVAILABLE,
+                "transformers": TRANSFORMERS_AVAILABLE
+            }
+        }

collie/_common/mlflow_model_io/model_flavors.py ADDED Viewed

@@ -0,0 +1,259 @@
+from typing import Any
+import warnings
+# Import with better error handling
+try:
+    import mlflow.sklearn
+    import sklearn.base
+    SKLEARN_AVAILABLE = True
+except ImportError:
+    SKLEARN_AVAILABLE = False
+    warnings.warn("scikit-learn not available. SklearnFlavorHandler will be disabled.")
+try:
+    import mlflow.xgboost
+    import xgboost as xgb
+    XGBOOST_AVAILABLE = True
+except ImportError:
+    XGBOOST_AVAILABLE = False
+    warnings.warn("XGBoost not available. XGBoostFlavorHandler will be disabled.")
+try:
+    import mlflow.pytorch
+    import torch.nn as nn
+    PYTORCH_AVAILABLE = True
+except ImportError:
+    PYTORCH_AVAILABLE = False
+    warnings.warn("PyTorch not available. PyTorchFlavorHandler will be disabled.")
+try:
+    import mlflow.lightgbm
+    import lightgbm as lgb
+    LIGHTGBM_AVAILABLE = True
+except (ImportError, Exception):
+    LIGHTGBM_AVAILABLE = False
+    lgb = None
+    warnings.warn("LightGBM not available. LightGBMFlavorHandler will be disabled.")
+try:
+    import mlflow.transformers
+    from transformers import PreTrainedModel
+    TRANSFORMERS_AVAILABLE = True
+except ImportError:
+    TRANSFORMERS_AVAILABLE = False
+    warnings.warn("Transformers not available. TransformersFlavorHandler will be disabled.")
+from collie._common.mlflow_model_io.base_flavor_handler import FlavorHandler
+from collie.core.enums.ml_models import ModelFlavor
+from collie._common.exceptions import ModelFlavorError
+class SklearnFlavorHandler(FlavorHandler):
+    """Handler for scikit-learn models."""
+    def can_handle(self, model: Any) -> bool:
+        if not SKLEARN_AVAILABLE:
+            return False
+        return isinstance(model, sklearn.base.BaseEstimator)
+    def flavor(self) -> ModelFlavor:
+        return ModelFlavor.SKLEARN
+    def log_model(self, model: Any, name: str, **kwargs: Any) -> None:
+        if not SKLEARN_AVAILABLE:
+            raise ModelFlavorError(
+                "scikit-learn is not available. Please install it to log sklearn models.",
+                flavor="sklearn"
+            )
+        try:
+            mlflow.sklearn.log_model(sk_model=model, artifact_path=name, **kwargs)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to log sklearn model: {str(e)}",
+                flavor="sklearn",
+                details={"model_type": type(model).__name__, "artifact_name": name}
+            ) from e
+    def load_model(self, model_uri: str) -> Any:
+        if not SKLEARN_AVAILABLE:
+            raise ModelFlavorError(
+                "scikit-learn is not available. Please install it to load sklearn models.",
+                flavor="sklearn"
+            )
+        try:
+            return mlflow.sklearn.load_model(model_uri)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to load sklearn model: {str(e)}",
+                flavor="sklearn",
+                details={"model_uri": model_uri}
+            ) from e
+class XGBoostFlavorHandler(FlavorHandler):
+    """Handler for XGBoost models."""
+    def can_handle(self, model: Any) -> bool:
+        if not XGBOOST_AVAILABLE:
+            return False
+        return isinstance(model, (xgb.Booster, xgb.XGBModel))
+    def flavor(self) -> ModelFlavor:
+        return ModelFlavor.XGBOOST
+    def log_model(self, model: Any, name: str, **kwargs: Any) -> None:
+        if not XGBOOST_AVAILABLE:
+            raise ModelFlavorError(
+                "XGBoost is not available. Please install it to log XGBoost models.",
+                flavor="xgboost"
+            )
+        try:
+            mlflow.xgboost.log_model(xgb_model=model, artifact_path=name, **kwargs)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to log XGBoost model: {str(e)}",
+                flavor="xgboost",
+                details={"model_type": type(model).__name__, "artifact_name": name}
+            ) from e
+    def load_model(self, model_uri: str) -> Any:
+        if not XGBOOST_AVAILABLE:
+            raise ModelFlavorError(
+                "XGBoost is not available. Please install it to load XGBoost models.",
+                flavor="xgboost"
+            )
+        try:
+            return mlflow.xgboost.load_model(model_uri)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to load XGBoost model: {str(e)}",
+                flavor="xgboost",
+                details={"model_uri": model_uri}
+            ) from e
+class PyTorchFlavorHandler(FlavorHandler):
+    """Handler for PyTorch models."""
+    def can_handle(self, model: Any) -> bool:
+        if not PYTORCH_AVAILABLE:
+            return False
+        return isinstance(model, nn.Module)
+    def flavor(self) -> ModelFlavor:
+        return ModelFlavor.PYTORCH
+    def log_model(self, model: Any, name: str, **kwargs: Any) -> None:
+        if not PYTORCH_AVAILABLE:
+            raise ModelFlavorError(
+                "PyTorch is not available. Please install it to log PyTorch models.",
+                flavor="pytorch"
+            )
+        try:
+            mlflow.pytorch.log_model(pytorch_model=model, artifact_path=name, **kwargs)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to log PyTorch model: {str(e)}",
+                flavor="pytorch",
+                details={"model_type": type(model).__name__, "artifact_name": name}
+            ) from e
+    def load_model(self, model_uri: str) -> Any:
+        if not PYTORCH_AVAILABLE:
+            raise ModelFlavorError(
+                "PyTorch is not available. Please install it to load PyTorch models.",
+                flavor="pytorch"
+            )
+        try:
+            return mlflow.pytorch.load_model(model_uri)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to load PyTorch model: {str(e)}",
+                flavor="pytorch",
+                details={"model_uri": model_uri}
+            ) from e
+class LightGBMFlavorHandler(FlavorHandler):
+    """Handler for LightGBM models."""
+    def can_handle(self, model: Any) -> bool:
+        if not LIGHTGBM_AVAILABLE or lgb is None:
+            return False
+        return isinstance(model, (lgb.Booster, lgb.LGBMModel))
+    def flavor(self) -> ModelFlavor:
+        return ModelFlavor.LIGHTGBM
+    def log_model(self, model: Any, name: str, **kwargs: Any) -> None:
+        if not LIGHTGBM_AVAILABLE:
+            raise ModelFlavorError(
+                "LightGBM is not available. Please install it to log LightGBM models.",
+                flavor="lightgbm"
+            )
+        try:
+            mlflow.lightgbm.log_model(lgb_model=model, artifact_path=name, **kwargs)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to log LightGBM model: {str(e)}",
+                flavor="lightgbm",
+                details={"model_type": type(model).__name__, "artifact_name": name}
+            ) from e
+    def load_model(self, model_uri: str) -> Any:
+        if not LIGHTGBM_AVAILABLE:
+            raise ModelFlavorError(
+                "LightGBM is not available. Please install it to load LightGBM models.",
+                flavor="lightgbm"
+            )
+        try:
+            return mlflow.lightgbm.load_model(model_uri)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to load LightGBM model: {str(e)}",
+                flavor="lightgbm",
+                details={"model_uri": model_uri}
+            ) from e
+class TransformersFlavorHandler(FlavorHandler):
+    """Handler for Hugging Face Transformers models."""
+    def can_handle(self, model: Any) -> bool:
+        if not TRANSFORMERS_AVAILABLE:
+            return False
+        return isinstance(model, PreTrainedModel)
+    def flavor(self) -> ModelFlavor:
+        return ModelFlavor.TRANSFORMERS
+    def log_model(self, model: Any, name: str, **kwargs: Any) -> None:
+        if not TRANSFORMERS_AVAILABLE:
+            raise ModelFlavorError(
+                "Transformers is not available. Please install it to log Transformers models.",
+                flavor="transformers"
+            )
+        try:
+            mlflow.transformers.log_model(transformers_model=model, artifact_path=name, **kwargs)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to log Transformers model: {str(e)}",
+                flavor="transformers",
+                details={"model_type": type(model).__name__, "artifact_name": name}
+            ) from e
+    def load_model(self, model_uri: str) -> Any:
+        if not TRANSFORMERS_AVAILABLE:
+            raise ModelFlavorError(
+                "Transformers is not available. Please install it to load Transformers models.",
+                flavor="transformers"
+            )
+        try:
+            return mlflow.transformers.load_model(model_uri)
+        except Exception as e:
+            raise ModelFlavorError(
+                f"Failed to load Transformers model: {str(e)}",
+                flavor="transformers",
+                details={"model_uri": model_uri}
+            ) from e

collie/_common/mlflow_model_io/model_io.py ADDED Viewed

@@ -0,0 +1,65 @@
+from typing import Any, Optional
+import mlflow
+from mlflow.tracking import MlflowClient
+from collie._common.mlflow_model_io.flavor_registry import FlavorRegistry
+class MLflowModelIO:
+    def __init__(
+        self,
+        mlflow_client: MlflowClient
+    ) -> None:
+        """
+        Initializes an MLflowModelIO instance.
+        Args:
+            mlflow_client (MlflowClient): The MLflowClient instance to use for logging models.
+        """
+        self.registry = FlavorRegistry()
+        self.client = mlflow_client
+    def log_model(
+        self,
+        model: Any,
+        name: str,
+        registered_model_name: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Logs a model with MLflow.
+        Args:
+            model (Any): The model to log.
+            name (str): The name to give the logged model.
+            registered_model_name (Optional[str], optional): The name to give the registered model. Defaults to None.
+            **kwargs (Any): Additional keyword arguments to pass to the flavor handler's log_model method.
+        Raises:
+            ValueError: If the model type is not supported by any flavor handler.
+        """
+        handler = self.registry.find_handler_by_model(model)
+        if handler is None:
+            raise ValueError(f"Unsupported model type: {type(model)}")
+        handler.log_model(
+            model=model,
+            name=name,
+            registered_model_name=registered_model_name,
+            **kwargs
+        )
+        mlflow.log_param("model_flavor", handler.flavor())
+    def load_model(
+        self,
+        flavor: str,
+        model_uri: str,
+    ) -> Any:
+        handler = self.registry.find_handler_by_flavor(flavor)
+        if handler is None:
+            raise ValueError(f"Unsupported model flavor: {flavor}")
+        return handler.load_model(model_uri)

collie/_common/utils.py ADDED Viewed

@@ -0,0 +1,13 @@
+import logging
+def get_logger() -> logging.Logger:
+    """
+    Return a logger that logs messages with severity level info or higher.
+    Returns:
+        A logger that logs messages with severity level info or higher.
+    """
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    return logger

collie/contracts/__init__.py ADDED Viewed

File without changes