PyPI - FlowerPower - Versions diffs - 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

FlowerPower 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

flowerpower/cfg/__init__.py +143 -25
flowerpower/cfg/base.py +132 -11
flowerpower/cfg/exceptions.py +53 -0
flowerpower/cfg/pipeline/__init__.py +151 -35
flowerpower/cfg/pipeline/adapter.py +1 -0
flowerpower/cfg/pipeline/builder.py +24 -25
flowerpower/cfg/pipeline/builder_adapter.py +142 -0
flowerpower/cfg/pipeline/builder_executor.py +101 -0
flowerpower/cfg/pipeline/run.py +99 -40
flowerpower/cfg/project/__init__.py +59 -14
flowerpower/cfg/project/adapter.py +6 -0
flowerpower/cli/__init__.py +8 -2
flowerpower/cli/cfg.py +0 -38
flowerpower/cli/pipeline.py +121 -83
flowerpower/cli/utils.py +120 -71
flowerpower/flowerpower.py +94 -120
flowerpower/pipeline/config_manager.py +180 -0
flowerpower/pipeline/executor.py +126 -0
flowerpower/pipeline/lifecycle_manager.py +231 -0
flowerpower/pipeline/manager.py +121 -274
flowerpower/pipeline/pipeline.py +66 -278
flowerpower/pipeline/registry.py +45 -4
flowerpower/utils/__init__.py +19 -0
flowerpower/utils/adapter.py +286 -0
flowerpower/utils/callback.py +73 -67
flowerpower/utils/config.py +306 -0
flowerpower/utils/executor.py +178 -0
flowerpower/utils/filesystem.py +194 -0
flowerpower/utils/misc.py +249 -76
flowerpower/utils/security.py +221 -0
{flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/METADATA +2 -2
flowerpower-0.31.0.dist-info/RECORD +53 -0
flowerpower/cfg/pipeline/_schedule.py +0 -32
flowerpower-0.30.0.dist-info/RECORD +0 -42
{flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/WHEEL +0 -0
{flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/entry_points.txt +0 -0
{flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/licenses/LICENSE +0 -0
{flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/top_level.txt +0 -0

flowerpower/cfg/pipeline/__init__.py CHANGED Viewed

@@ -3,13 +3,14 @@ import yaml
 from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
 from hamilton.function_modifiers import source, value
 from munch import Munch, munchify
+from typing import Optional
-from ..base import BaseConfig
+from ..base import BaseConfig, validate_file_path
+from ..exceptions import ConfigLoadError, ConfigSaveError, ConfigPathError
 from .adapter import AdapterConfig
 from .run import ExecutorConfig as ExecutorConfig
 from .run import RunConfig
 from .run import WithAdapterConfig as WithAdapterConfig
-#from .schedule import ScheduleConfig
 class PipelineConfig(BaseConfig):
@@ -22,7 +23,6 @@ class PipelineConfig(BaseConfig):
     Attributes:
         name (str | None): The name of the pipeline.
         run (RunConfig): Configuration for pipeline execution.
-        schedule (ScheduleConfig): Configuration for pipeline scheduling. DEPRECATED.
         params (dict): Pipeline parameters.
         adapter (AdapterConfig): Configuration for the pipeline adapter.
         h_params (dict): Hamilton-formatted parameters.
@@ -45,7 +45,6 @@ class PipelineConfig(BaseConfig):
     name: str | None = msgspec.field(default=None)
     run: RunConfig = msgspec.field(default_factory=RunConfig)
-    #: ScheduleConfig = msgspec.field(default_factory=ScheduleConfig)
     params: dict = msgspec.field(default_factory=dict)
     adapter: AdapterConfig = msgspec.field(default_factory=AdapterConfig)
     h_params: dict = msgspec.field(default_factory=dict)
@@ -54,36 +53,100 @@ class PipelineConfig(BaseConfig):
         if isinstance(self.params, dict):
             self.h_params = munchify(self.to_h_params(self.params))
             self.params = munchify(self.params)
+        # Validate pipeline name if provided
+        if self.name is not None:
+            self._validate_pipeline_name()
     def to_yaml(self, path: str, fs: AbstractFileSystem):
+        """Save pipeline configuration to YAML file.
+        Args:
+            path: Path to the YAML file.
+            fs: Filesystem instance.
+        Raises:
+            ConfigSaveError: If saving the configuration fails.
+            ConfigPathError: If the path contains directory traversal attempts.
+        """
+        try:
+            # Validate the path to prevent directory traversal
+            validated_path = validate_file_path(path)
+        except ConfigPathError as e:
+            raise ConfigSaveError(f"Path validation failed: {e}", path=path, original_error=e)
         try:
-            fs.makedirs(fs._parent(path), exist_ok=True)
-            with fs.open(path, "w") as f:
+            fs.makedirs(fs._parent(validated_path), exist_ok=True)
+            with fs.open(validated_path, "w") as f:
                 d = self.to_dict()
                 d.pop("name")
                 d.pop("h_params")
                 yaml.dump(d, f, default_flow_style=False)
-        except NotImplementedError:
-            raise NotImplementedError(
-                "The filesystem "
-                f"{self.fs.fs.protocol[0] if isinstance(self.fs.fs.protocol, tuple) else self.fs.fs.protocol} "
-                "does not support writing files."
+        except NotImplementedError as e:
+            raise ConfigSaveError(
+                f"The filesystem does not support writing files.",
+                path=validated_path,
+                original_error=e
+            )
+        except Exception as e:
+            raise ConfigSaveError(
+                f"Failed to write configuration to {validated_path}",
+                path=validated_path,
+                original_error=e
             )
     @classmethod
     def from_dict(cls, name: str, data: dict | Munch):
         data.update({"name": name})
-        return msgspec.convert(data, cls)
+        # Handle null params field by converting to empty dict
+        # This fixes the issue where YAML parses empty sections with comments as null
+        if data.get('params') is None:
+            data['params'] = {}
+        instance = msgspec.convert(data, cls)
+        # Manually call __post_init__ since msgspec.convert doesn't call it
+        instance.__post_init__()
+        return instance
     @classmethod
     def from_yaml(cls, name: str, path: str, fs: AbstractFileSystem):
-        with fs.open(path) as f:
-            data = yaml.full_load(f)
-            return cls.from_dict(name=name, data=data)
+        """Load pipeline configuration from YAML file.
+        Args:
+            name: Pipeline name.
+            path: Path to the YAML file.
+            fs: Filesystem instance.
+        Returns:
+            Loaded pipeline configuration.
+        Raises:
+            ConfigLoadError: If loading the configuration fails.
+            ConfigPathError: If the path contains directory traversal attempts.
+        """
+        try:
+            # Validate the path to prevent directory traversal
+            validated_path = validate_file_path(path)
+        except ConfigPathError as e:
+            raise ConfigLoadError(f"Path validation failed: {e}", path=path, original_error=e)
+        try:
+            with fs.open(validated_path) as f:
+                data = yaml.safe_load(f)
+                return cls.from_dict(name=name, data=data)
+        except Exception as e:
+            raise ConfigLoadError(
+                f"Failed to load configuration from {validated_path}",
+                path=validated_path,
+                original_error=e
+            )
     def update(self, d: dict | Munch):
         for k, v in d.items():
-            eval(f"self.{k}.update({v})")
+            # Safe attribute access instead of eval()
+            if hasattr(self, k) and hasattr(getattr(self, k), 'update'):
+                getattr(self, k).update(v)
             if k == "params":
                 self.params.update(munchify(v))
                 self.h_params = munchify(self.to_h_params(self.params))
@@ -133,11 +196,10 @@ class PipelineConfig(BaseConfig):
                 return value(val)
             # For all other values
             return val
-        # Step 1: Replace each value with a dictionary containing key and value
-        result = {k: {k: d[k]} for k in d}
-        # Step 2: Transform all values recursively
+        result = {k: {k: d[k]} for k in d}  # Step 1: Wrap each parameter in its own dict
+        # Step 2: Transform each parameter value recursively
         return {k: transform_recursive(v, d) for k, v in result.items()}
     @classmethod
@@ -168,22 +230,72 @@ class PipelineConfig(BaseConfig):
             ```
         """
         if fs is None:
-            fs = filesystem(
-                base_dir, cached=False, dirfs=True, storage_options=storage_options
+            # Use cached filesystem for better performance
+            storage_options_hash = cls._hash_storage_options(storage_options)
+            fs = cls._get_cached_filesystem(base_dir, storage_options_hash)
+        if fs.exists("conf/pipelines") and name is not None:
+            pipeline = PipelineConfig.from_yaml(
+                name=name,
+                path=f"conf/pipelines/{name}.yml",
+                fs=fs,
             )
-        if fs.exists("conf/pipelines"):
-            if name is not None:
-                pipeline = PipelineConfig.from_yaml(
-                    name=name,
-                    path=f"conf/pipelines/{name}.yml",
-                    fs=fs,
-                )
-            else:
-                pipeline = PipelineConfig(name=name)
         else:
             pipeline = PipelineConfig(name=name)
         return pipeline
+    # Helper methods for centralized load/save logic
+    @classmethod
+    def _load_pipeline_config(cls, base_dir: str, name: str | None, fs: AbstractFileSystem) -> "PipelineConfig":
+        """Centralized pipeline configuration loading logic.
+        Args:
+            base_dir: Base directory for the pipeline.
+            name: Pipeline name.
+            fs: Filesystem instance.
+        Returns:
+            Loaded pipeline configuration.
+        """
+        if fs.exists("conf/pipelines") and name is not None:
+            pipeline = cls.from_yaml(
+                name=name,
+                path=f"conf/pipelines/{name}.yml",
+                fs=fs,
+            )
+        else:
+            pipeline = cls(name=name)
+        return pipeline
+    def _save_pipeline_config(self, fs: AbstractFileSystem) -> None:
+        """Centralized pipeline configuration saving logic.
+        Args:
+            fs: Filesystem instance.
+        """
+        h_params = getattr(self, "h_params")
+        self.to_yaml(path=f"conf/pipelines/{self.name}.yml", fs=fs)
+        setattr(self, "h_params", h_params)
+    def _validate_pipeline_name(self) -> None:
+        """Validate pipeline name parameter.
+        Raises:
+            ValueError: If pipeline name contains invalid characters.
+        """
+        if not isinstance(self.name, str):
+            raise ValueError(f"Pipeline name must be a string, got {type(self.name)}")
+        # Check for directory traversal attempts
+        if '..' in self.name or '/' in self.name or '\\' in self.name:
+            raise ValueError(f"Invalid pipeline name: {self.name}. Contains path traversal characters.")
+        # Check for empty string
+        if not self.name.strip():
+            raise ValueError("Pipeline name cannot be empty or whitespace only.")
     def save(
         self,
@@ -209,9 +321,9 @@ class PipelineConfig(BaseConfig):
             ```
         """
         if fs is None:
-            fs = filesystem(
-                base_dir, cached=True, dirfs=True, storage_options=storage_options
-            )
+            # Use cached filesystem for better performance
+            storage_options_hash = self._hash_storage_options(storage_options)
+            fs = self._get_cached_filesystem(base_dir, storage_options_hash)
         fs.makedirs("conf/pipelines", exist_ok=True)
         if name is not None:
@@ -219,6 +331,10 @@ class PipelineConfig(BaseConfig):
         if self.name is None:
             raise ValueError("Pipeline name is not set. Please provide a name.")
+        # Validate pipeline name to prevent directory traversal
+        if self.name and ('..' in self.name or '/' in self.name or '\\' in self.name):
+            raise ValueError(f"Invalid pipeline name: {self.name}. Contains path traversal characters.")
         h_params = getattr(self, "h_params")
         self.to_yaml(path=f"conf/pipelines/{self.name}.yml", fs=fs)

flowerpower/cfg/pipeline/adapter.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import msgspec
+import os
 from munch import munchify
 from ... import settings

flowerpower/cfg/pipeline/builder.py CHANGED Viewed

@@ -8,6 +8,8 @@ from ..base import BaseConfig
 from .adapter import AdapterConfig as PipelineAdapterConfig
 from .run import ExecutorConfig, RunConfig, WithAdapterConfig
 from ..project.adapter import AdapterConfig as ProjectAdapterConfig
+from .builder_executor import ExecutorBuilder
+from .builder_adapter import AdapterBuilder
 class RunConfigBuilder:
@@ -40,6 +42,10 @@ class RunConfigBuilder:
         # Initialize with empty config
         self._config = RunConfig()
+        # Initialize sub-builders
+        self._executor_builder = ExecutorBuilder()
+        self._adapter_builder = AdapterBuilder()
         # Load defaults from pipeline and project configs
         self._load_defaults()
@@ -148,16 +154,9 @@ class RunConfigBuilder:
         Returns:
             Self for method chaining
         """
-        if not self._config.executor:
-            self._config.executor = ExecutorConfig()
-        self._config.executor.type = executor_type
-        # Apply additional executor options
-        for key, value in kwargs.items():
-            if hasattr(self._config.executor, key):
-                setattr(self._config.executor, key, value)
+        self._executor_builder.with_type(executor_type)
+        if kwargs:
+            self._executor_builder.with_config(kwargs)
         return self
     def with_adapter(self, adapter_name: str, **kwargs) -> "RunConfigBuilder":
@@ -170,17 +169,10 @@ class RunConfigBuilder:
         Returns:
             Self for method chaining
         """
-        if not self._config.with_adapter:
-            self._config.with_adapter = WithAdapterConfig()
-        # Enable the adapter
-        if hasattr(self._config.with_adapter, adapter_name):
-            setattr(self._config.with_adapter, adapter_name, True)
-            # Store adapter configuration for merging
-            if not hasattr(self, '_adapter_configs'):
-                self._adapter_configs = {}
-            self._adapter_configs[adapter_name] = kwargs
+        # Enable the adapter using the adapter builder
+        enable_method = getattr(self._adapter_builder, f"enable_{adapter_name}", None)
+        if enable_method:
+            enable_method(True, **kwargs)
         return self
@@ -315,16 +307,23 @@ class RunConfigBuilder:
         # Create a deep copy to avoid modifying the internal state
         final_config = copy.deepcopy(self._config)
+        # Build executor configuration
+        final_config.executor = self._executor_builder.build()
+        # Build adapter configuration
+        final_config.with_adapter = self._adapter_builder.build()
         # Merge adapter configurations
-        if hasattr(self, '_adapter_configs') and self._adapter_configs:
-            self._merge_adapter_configs(final_config)
+        adapter_configs = self._adapter_builder.get_adapter_configs()
+        if adapter_configs:
+            self._merge_adapter_configs(final_config, adapter_configs)
         # Validate configuration
         self._validate_config(final_config)
         return final_config
-    def _merge_adapter_configs(self, config: RunConfig):
+    def _merge_adapter_configs(self, config: RunConfig, adapter_configs: dict[str, dict[str, Any]]):
         """Merge adapter configurations from builder with project/pipeline configs."""
         if not config.pipeline_adapter_cfg:
             config.pipeline_adapter_cfg = {}
@@ -333,7 +332,7 @@ class RunConfigBuilder:
             config.project_adapter_cfg = {}
         # Merge project adapter defaults
-        for adapter_name, adapter_config in self._adapter_configs.items():
+        for adapter_name, adapter_config in adapter_configs.items():
             if adapter_name in ['hamilton_tracker', 'mlflow', 'opentelemetry']:
                 # Merge with project config
                 if hasattr(self._project_adapter_cfg, adapter_name):

flowerpower/cfg/pipeline/builder_adapter.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""
+Adapter builder for RunConfig.
+"""
+from typing import Any, Optional
+from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
+from ..base import BaseConfig
+from .run import WithAdapterConfig
+class AdapterBuilder:
+    """Builder for creating WithAdapterConfig objects."""
+    def __init__(self, adapter_config: Optional[WithAdapterConfig] = None):
+        """Initialize the AdapterBuilder.
+        Args:
+            adapter_config: Initial adapter configuration to build upon.
+        """
+        self._config = adapter_config or WithAdapterConfig()
+        self._adapter_configs = {}
+    def enable_hamilton_tracker(self, enabled: bool = True, **kwargs) -> "AdapterBuilder":
+        """Enable or disable Hamilton tracker adapter.
+        Args:
+            enabled: Whether to enable the adapter
+            **kwargs: Additional configuration options
+        Returns:
+            Self for method chaining
+        """
+        self._config.hamilton_tracker = enabled
+        if enabled and kwargs:
+            self._adapter_configs['hamilton_tracker'] = kwargs
+        return self
+    def enable_mlflow(self, enabled: bool = True, **kwargs) -> "AdapterBuilder":
+        """Enable or disable MLflow adapter.
+        Args:
+            enabled: Whether to enable the adapter
+            **kwargs: Additional configuration options
+        Returns:
+            Self for method chaining
+        """
+        self._config.mlflow = enabled
+        if enabled and kwargs:
+            self._adapter_configs['mlflow'] = kwargs
+        return self
+    def enable_ray(self, enabled: bool = True, **kwargs) -> "AdapterBuilder":
+        """Enable or disable Ray adapter.
+        Args:
+            enabled: Whether to enable the adapter
+            **kwargs: Additional configuration options
+        Returns:
+            Self for method chaining
+        """
+        self._config.ray = enabled
+        if enabled and kwargs:
+            self._adapter_configs['ray'] = kwargs
+        return self
+    def enable_opentelemetry(self, enabled: bool = True, **kwargs) -> "AdapterBuilder":
+        """Enable or disable OpenTelemetry adapter.
+        Args:
+            enabled: Whether to enable the adapter
+            **kwargs: Additional configuration options
+        Returns:
+            Self for method chaining
+        """
+        self._config.opentelemetry = enabled
+        if enabled and kwargs:
+            self._adapter_configs['opentelemetry'] = kwargs
+        return self
+    def enable_progressbar(self, enabled: bool = True, **kwargs) -> "AdapterBuilder":
+        """Enable or disable progress bar adapter.
+        Args:
+            enabled: Whether to enable the adapter
+            **kwargs: Additional configuration options
+        Returns:
+            Self for method chaining
+        """
+        self._config.progressbar = enabled
+        if enabled and kwargs:
+            self._adapter_configs['progressbar'] = kwargs
+        return self
+    def enable_future(self, enabled: bool = True, **kwargs) -> "AdapterBuilder":
+        """Enable or disable future adapter.
+        Args:
+            enabled: Whether to enable the adapter
+            **kwargs: Additional configuration options
+        Returns:
+            Self for method chaining
+        """
+        self._config.future = enabled
+        if enabled and kwargs:
+            self._adapter_configs['future'] = kwargs
+        return self
+    def with_adapter_config(self, adapter_name: str, config: dict[str, Any]) -> "AdapterBuilder":
+        """Set configuration for a specific adapter.
+        Args:
+            adapter_name: Name of the adapter
+            config: Configuration dictionary
+        Returns:
+            Self for method chaining
+        """
+        if hasattr(self._config, adapter_name):
+            self._adapter_configs[adapter_name] = config
+        return self
+    def build(self) -> WithAdapterConfig:
+        """Build the final WithAdapterConfig object.
+        Returns:
+            Fully configured WithAdapterConfig object
+        """
+        return self._config
+    def get_adapter_configs(self) -> dict[str, dict[str, Any]]:
+        """Get the collected adapter configurations.
+        Returns:
+            Dictionary of adapter configurations
+        """
+        return self._adapter_configs

flowerpower/cfg/pipeline/builder_executor.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""
+Executor builder for RunConfig.
+"""
+from typing import Any, Optional, Union
+from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
+from ... import settings
+from ..base import BaseConfig
+from .run import ExecutorConfig
+class ExecutorBuilder:
+    """Builder for creating ExecutorConfig objects."""
+    def __init__(self, executor_config: Optional[ExecutorConfig] = None):
+        """Initialize the ExecutorBuilder.
+        Args:
+            executor_config: Initial executor configuration to build upon.
+        """
+        self._config = executor_config or ExecutorConfig()
+    def with_type(self, executor_type: str) -> "ExecutorBuilder":
+        """Set the executor type.
+        Args:
+            executor_type: Type of executor ('synchronous', 'threadpool', 'processpool', 'ray', 'dask')
+        Returns:
+            Self for method chaining
+        """
+        self._config.type = executor_type
+        return self
+    def with_max_workers(self, max_workers: int) -> "ExecutorBuilder":
+        """Set the maximum number of workers.
+        Args:
+            max_workers: Maximum number of worker threads/processes
+        Returns:
+            Self for method chaining
+        """
+        self._config.max_workers = max_workers
+        return self
+    def with_num_cpus(self, num_cpus: int) -> "ExecutorBuilder":
+        """Set the number of CPUs to use.
+        Args:
+            num_cpus: Number of CPUs to allocate
+        Returns:
+            Self for method chaining
+        """
+        self._config.num_cpus = num_cpus
+        return self
+    def with_config(self, config: dict[str, Any]) -> "ExecutorBuilder":
+        """Apply additional configuration options.
+        Args:
+            config: Dictionary of additional configuration options
+        Returns:
+            Self for method chaining
+        """
+        for key, value in config.items():
+            if hasattr(self._config, key):
+                setattr(self._config, key, value)
+        return self
+    def build(self) -> ExecutorConfig:
+        """Build the final ExecutorConfig object.
+        Returns:
+            Fully configured ExecutorConfig object
+        Raises:
+            ValueError: If configuration is invalid
+        """
+        self._validate_config()
+        return self._config
+    def _validate_config(self) -> None:
+        """Validate the executor configuration.
+        Raises:
+            ValueError: If configuration is invalid
+        """
+        if self._config.type:
+            valid_executors = ['synchronous', 'threadpool', 'processpool', 'ray', 'dask']
+            if self._config.type not in valid_executors:
+                raise ValueError(f"Invalid executor type: {self._config.type}")
+        if self._config.max_workers is not None and self._config.max_workers < 1:
+            raise ValueError("max_workers must be at least 1")
+        if self._config.num_cpus is not None and self._config.num_cpus < 1:
+            raise ValueError("num_cpus must be at least 1")

FlowerPower 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

FlowerPower 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl