PyPI - FlowerPower - Versions diffs - 0.21.0__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

FlowerPower 0.21.0py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

flowerpower/cfg/__init__.py +143 -25
flowerpower/cfg/base.py +132 -11
flowerpower/cfg/exceptions.py +53 -0
flowerpower/cfg/pipeline/__init__.py +151 -35
flowerpower/cfg/pipeline/adapter.py +1 -0
flowerpower/cfg/pipeline/builder.py +24 -25
flowerpower/cfg/pipeline/builder_adapter.py +142 -0
flowerpower/cfg/pipeline/builder_executor.py +101 -0
flowerpower/cfg/pipeline/run.py +134 -22
flowerpower/cfg/project/__init__.py +59 -14
flowerpower/cfg/project/adapter.py +6 -0
flowerpower/cli/__init__.py +8 -9
flowerpower/cli/cfg.py +0 -38
flowerpower/cli/pipeline.py +121 -83
flowerpower/cli/utils.py +120 -71
flowerpower/flowerpower.py +94 -120
flowerpower/pipeline/config_manager.py +180 -0
flowerpower/pipeline/executor.py +126 -0
flowerpower/pipeline/lifecycle_manager.py +231 -0
flowerpower/pipeline/manager.py +121 -276
flowerpower/pipeline/pipeline.py +66 -278
flowerpower/pipeline/registry.py +45 -4
flowerpower/utils/__init__.py +19 -0
flowerpower/utils/adapter.py +286 -0
flowerpower/utils/callback.py +73 -67
flowerpower/utils/config.py +306 -0
flowerpower/utils/executor.py +178 -0
flowerpower/utils/filesystem.py +194 -0
flowerpower/utils/misc.py +249 -76
flowerpower/utils/security.py +221 -0
{flowerpower-0.21.0.dist-info → flowerpower-0.31.0.dist-info}/METADATA +1 -13
flowerpower-0.31.0.dist-info/RECORD +53 -0
flowerpower/cfg/pipeline/_schedule.py +0 -32
flowerpower/cli/mqtt.py +0 -168
flowerpower/plugins/mqtt/__init__.py +0 -8
flowerpower-0.21.0.dist-info/RECORD +0 -44
{flowerpower-0.21.0.dist-info → flowerpower-0.31.0.dist-info}/WHEEL +0 -0
{flowerpower-0.21.0.dist-info → flowerpower-0.31.0.dist-info}/entry_points.txt +0 -0
{flowerpower-0.21.0.dist-info → flowerpower-0.31.0.dist-info}/licenses/LICENSE +0 -0
{flowerpower-0.21.0.dist-info → flowerpower-0.31.0.dist-info}/top_level.txt +0 -0

flowerpower/utils/misc.py CHANGED Viewed

@@ -8,53 +8,35 @@ from typing import Any
 import msgspec
 from fsspec_utils import AbstractFileSystem, filesystem
+from .security import validate_file_path
 if importlib.util.find_spec("joblib"):
     from joblib import Parallel, delayed
     from rich.progress import (BarColumn, Progress, TextColumn,
                                TimeElapsedColumn)
-    def run_parallel(
-        func: callable,
-        *args,
-        n_jobs: int = -1,
-        backend: str = "threading",
-        verbose: bool = True,
-        **kwargs,
-    ) -> list[any]:
-        """Runs a function for a list of parameters in parallel.
+    def _prepare_parallel_args(
+        args: tuple, kwargs: dict
+    ) -> tuple[list, list, dict, dict, int]:
+        """Prepare and validate arguments for parallel execution.
         Args:
-            func (Callable): function to run in parallel
-            *args: Positional arguments. Can be single values or iterables
-            n_jobs (int, optional): Number of joblib workers. Defaults to -1
-            backend (str, optional): joblib backend. Valid options are
-                `loky`,`threading`, `mutliprocessing` or `sequential`. Defaults to "threading"
-            verbose (bool, optional): Show progress bar. Defaults to True
-            **kwargs: Keyword arguments. Can be single values or iterables
+            args: Positional arguments
+            kwargs: Keyword arguments
         Returns:
-            list[any]: Function output
-        Examples:
-            >>> # Single iterable argument
-            >>> run_parallel(func, [1,2,3], fixed_arg=42)
-            >>> # Multiple iterables in args and kwargs
-            >>> run_parallel(func, [1,2,3], val=[7,8,9], fixed=42)
-            >>> # Only kwargs iterables
-            >>> run_parallel(func, x=[1,2,3], y=[4,5,6], fixed=42)
+            tuple: (iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len)
+        Raises:
+            ValueError: If no iterable arguments or length mismatch
         """
-        parallel_kwargs = {"n_jobs": n_jobs, "backend": backend, "verbose": 0}
         iterables = []
         fixed_args = []
         iterable_kwargs = {}
         fixed_kwargs = {}
         first_iterable_len = None
+        # Process positional arguments
         for arg in args:
             if isinstance(arg, (list, tuple)) and not isinstance(arg[0], (list, tuple)):
                 iterables.append(arg)
@@ -67,6 +49,7 @@ if importlib.util.find_spec("joblib"):
             else:
                 fixed_args.append(arg)
+        # Process keyword arguments
         for key, value in kwargs.items():
             if isinstance(value, (list, tuple)) and not isinstance(
                 value[0], (list, tuple)
@@ -84,12 +67,45 @@ if importlib.util.find_spec("joblib"):
         if first_iterable_len is None:
             raise ValueError("At least one iterable argument is required")
-        all_iterables = iterables + list(iterable_kwargs.values())
-        param_combinations = list(zip(*all_iterables))
+        return iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len
-        if not verbose:
-            return Parallel(**parallel_kwargs)(
-                delayed(func)(
+    def _execute_parallel_with_progress(
+        func: callable,
+        iterables: list,
+        fixed_args: list,
+        iterable_kwargs: dict,
+        fixed_kwargs: dict,
+        param_combinations: list,
+        parallel_kwargs: dict,
+    ) -> list:
+        """Execute parallel tasks with progress tracking.
+        Args:
+            func: Function to execute
+            iterables: List of iterable arguments
+            fixed_args: List of fixed arguments
+            iterable_kwargs: Dictionary of iterable keyword arguments
+            fixed_kwargs: Dictionary of fixed keyword arguments
+            param_combinations: List of parameter combinations
+            parallel_kwargs: Parallel execution configuration
+        Returns:
+            list: Results from parallel execution
+        """
+        results = [None] * len(param_combinations)
+        with Progress(
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            "[progress.percentage]{task.percentage:>3.0f}%",
+            TimeElapsedColumn(),
+            transient=True,
+        ) as progress:
+            task = progress.add_task(
+                "Running in parallel...", total=len(param_combinations)
+            )
+            def wrapper(idx, param_tuple):
+                res = func(
                     *(list(param_tuple[: len(iterables)]) + fixed_args),
                     **{
                         k: v
@@ -99,41 +115,107 @@ if importlib.util.find_spec("joblib"):
                     },
                     **fixed_kwargs,
                 )
-                for param_tuple in param_combinations
-            )
-        else:
-            results = [None] * len(param_combinations)
-            with Progress(
-                TextColumn("[progress.description]{task.description}"),
-                BarColumn(),
-                "[progress.percentage]{task.percentage:>3.0f}%",
-                TimeElapsedColumn(),
-                transient=True,
-            ) as progress:
-                task = progress.add_task(
-                    "Running in parallel...", total=len(param_combinations)
-                )
+                progress.update(task, advance=1)
+                return idx, res
-                def wrapper(idx, param_tuple):
-                    res = func(
-                        *(list(param_tuple[: len(iterables)]) + fixed_args),
-                        **{
-                            k: v
-                            for k, v in zip(
-                                iterable_kwargs.keys(), param_tuple[len(iterables) :]
-                            )
-                        },
-                        **fixed_kwargs,
+            for idx, result in Parallel(**parallel_kwargs)(
+                delayed(wrapper)(i, param_tuple)
+                for i, param_tuple in enumerate(param_combinations)
+            ):
+                results[idx] = result
+        return results
+    def _execute_parallel_without_progress(
+        func: callable,
+        iterables: list,
+        fixed_args: list,
+        iterable_kwargs: dict,
+        fixed_kwargs: dict,
+        param_combinations: list,
+        parallel_kwargs: dict,
+    ) -> list:
+        """Execute parallel tasks without progress tracking.
+        Args:
+            func: Function to execute
+            iterables: List of iterable arguments
+            fixed_args: List of fixed arguments
+            iterable_kwargs: Dictionary of iterable keyword arguments
+            fixed_kwargs: Dictionary of fixed keyword arguments
+            param_combinations: List of parameter combinations
+            parallel_kwargs: Parallel execution configuration
+        Returns:
+            list: Results from parallel execution
+        """
+        return Parallel(**parallel_kwargs)(
+            delayed(func)(
+                *(list(param_tuple[: len(iterables)]) + fixed_args),
+                **{
+                    k: v
+                    for k, v in zip(
+                        iterable_kwargs.keys(), param_tuple[len(iterables) :]
                     )
-                    progress.update(task, advance=1)
-                    return idx, res
+                },
+                **fixed_kwargs,
+            )
+            for param_tuple in param_combinations
+        )
-                for idx, result in Parallel(**parallel_kwargs)(
-                    delayed(wrapper)(i, param_tuple)
-                    for i, param_tuple in enumerate(param_combinations)
-                ):
-                    results[idx] = result
-            return results
+    def run_parallel(
+        func: callable,
+        *args,
+        n_jobs: int = -1,
+        backend: str = "threading",
+        verbose: bool = True,
+        **kwargs,
+    ) -> list[any]:
+        """Runs a function for a list of parameters in parallel.
+        Args:
+            func (Callable): function to run in parallel
+            *args: Positional arguments. Can be single values or iterables
+            n_jobs (int, optional): Number of joblib workers. Defaults to -1
+            backend (str, optional): joblib backend. Valid options are
+                `loky`,`threading`, `mutliprocessing` or `sequential`. Defaults to "threading"
+            verbose (bool, optional): Show progress bar. Defaults to True
+            **kwargs: Keyword arguments. Can be single values or iterables
+        Returns:
+            list[any]: Function output
+        Examples:
+            >>> # Single iterable argument
+            >>> run_parallel(func, [1,2,3], fixed_arg=42)
+            >>> # Multiple iterables in args and kwargs
+            >>> run_parallel(func, [1,2,3], val=[7,8,9], fixed=42)
+            >>> # Only kwargs iterables
+            >>> run_parallel(func, x=[1,2,3], y=[4,5,6], fixed=42)
+        """
+        parallel_kwargs = {"n_jobs": n_jobs, "backend": backend, "verbose": 0}
+        # Prepare and validate arguments
+        iterables, fixed_args, iterable_kwargs, fixed_kwargs, first_iterable_len = _prepare_parallel_args(
+            args, kwargs
+        )
+        # Create parameter combinations
+        all_iterables = iterables + list(iterable_kwargs.values())
+        param_combinations = list(zip(*all_iterables))
+        # Execute with or without progress tracking
+        if not verbose:
+            return _execute_parallel_without_progress(
+                func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
+                param_combinations, parallel_kwargs
+            )
+        else:
+            return _execute_parallel_with_progress(
+                func, iterables, fixed_args, iterable_kwargs, fixed_kwargs,
+                param_combinations, parallel_kwargs
+            )
 else:
@@ -170,19 +252,110 @@ def get_partitions_from_path(
         return list(zip(partitioning, parts[-len(partitioning) :]))
-def view_img(data: str | bytes, format: str = "svg"):
-    # Create a temporary file with .svg extension
+def _validate_image_format(format: str) -> str:
+    """Validate image format to prevent injection attacks.
+    Args:
+        format: Image format to validate
+    Returns:
+        str: Validated format
+    Raises:
+        ValueError: If format is not supported
+    """
+    allowed_formats = {"svg", "png", "jpg", "jpeg", "gif", "pdf", "html"}
+    if format not in allowed_formats:
+        raise ValueError(f"Unsupported format: {format}. Allowed: {allowed_formats}")
+    return format
+def _create_temp_image_file(data: str | bytes, format: str) -> str:
+    """Create a temporary file with image data.
+    Args:
+        data: Image data as string or bytes
+        format: Validated image format
+    Returns:
+        str: Path to temporary file
+    Raises:
+        OSError: If file creation fails
+    """
     with tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) as tmp:
-        tmp.write(data)
+        if isinstance(data, str):
+            tmp.write(data.encode('utf-8'))
+        else:
+            tmp.write(data)
         tmp_path = tmp.name
+    # Validate the temporary file path for security
+    validate_file_path(tmp_path, allow_relative=False)
+    return tmp_path
+def _open_image_viewer(tmp_path: str) -> None:
+    """Open image viewer with the given file path.
+    Args:
+        tmp_path: Path to temporary image file
+    Raises:
+        OSError: If platform is not supported
+        subprocess.CalledProcessError: If subprocess fails
+        subprocess.TimeoutExpired: If subprocess times out
+    """
+    import platform
+    platform_system = platform.system()
+    if platform_system == "Darwin":  # macOS
+        subprocess.run(["open", tmp_path], check=True, timeout=10)
+    elif platform_system == "Linux":
+        subprocess.run(["xdg-open", tmp_path], check=True, timeout=10)
+    elif platform_system == "Windows":
+        subprocess.run(["start", "", tmp_path], shell=True, check=True, timeout=10)
+    else:
+        raise OSError(f"Unsupported platform: {platform_system}")
-    # Open with default application on macOS
-    subprocess.run(["open", tmp_path])
+def _cleanup_temp_file(tmp_path: str) -> None:
+    """Clean up temporary file.
+    Args:
+        tmp_path: Path to temporary file to remove
+    """
+    try:
+        os.unlink(tmp_path)
+    except OSError:
+        pass  # File might already be deleted or in use
-    # Optional: Remove the temp file after a delay
+def view_img(data: str | bytes, format: str = "svg"):
+    """View image data using the system's default image viewer.
+    Args:
+        data: Image data as string or bytes
+        format: Image format (svg, png, jpg, jpeg, gif, pdf, html)
+    Raises:
+        ValueError: If format is not supported
+        RuntimeError: If file opening fails
+        OSError: If platform is not supported
+    """
+    # Validate format to prevent injection attacks
+    validated_format = _validate_image_format(format)
+    # Create a temporary file with validated extension
+    tmp_path = _create_temp_image_file(data, validated_format)
+    try:
+        # Open image viewer with secure subprocess call
+        _open_image_viewer(tmp_path)
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired, OSError) as e:
+        # Clean up temp file on error
+        _cleanup_temp_file(tmp_path)
+        raise RuntimeError(f"Failed to open file: {e}")
+    # Optional: Remove the temp file after a delay
     time.sleep(2)  # Wait for viewer to open
-    os.unlink(tmp_path)
+    _cleanup_temp_file(tmp_path)
 def update_config_from_dict(

flowerpower/utils/security.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""Security utilities for input validation and sanitization."""
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
+if TYPE_CHECKING:
+    from collections.abc import Callable
+class SecurityError(Exception):
+    """Raised when security validation fails."""
+    pass
+def validate_file_path(path: Union[str, Path],
+                      allowed_extensions: Optional[List[str]] = None,
+                      allow_absolute: bool = True,
+                      allow_relative: bool = True) -> Path:
+    """Validate and sanitize file paths to prevent directory traversal attacks.
+    Args:
+        path: File path to validate
+        allowed_extensions: List of allowed file extensions (e.g., ['.yaml', '.yml'])
+        allow_absolute: Whether to allow absolute paths
+        allow_relative: Whether to allow relative paths
+    Returns:
+        Validated Path object
+    Raises:
+        SecurityError: If path is invalid or potentially dangerous
+        ValueError: If path is empty or None
+    """
+    if not path:
+        raise ValueError("Path cannot be empty or None")
+    # Convert to Path object
+    path_obj = Path(path)
+    # Check for directory traversal attempts
+    path_str = str(path_obj)
+    if '..' in path_obj.parts or path_str.startswith('..'):
+        raise SecurityError(f"Directory traversal detected in path: {path}")
+    # Check absolute vs relative path restrictions
+    if path_obj.is_absolute() and not allow_absolute:
+        raise SecurityError(f"Absolute paths not allowed: {path}")
+    if not path_obj.is_absolute() and not allow_relative:
+        raise SecurityError(f"Relative paths not allowed: {path}")
+    # Validate file extension if specified
+    if allowed_extensions:
+        if not path_obj.suffix.lower() in [ext.lower() for ext in allowed_extensions]:
+            raise SecurityError(
+                f"File extension '{path_obj.suffix}' not allowed. "
+                f"Allowed: {allowed_extensions}"
+            )
+    # Check for potentially dangerous characters
+    dangerous_chars = ['|', '&', ';', '`', '$', '<', '>', '"', "'"]
+    if any(char in path_str for char in dangerous_chars):
+        raise SecurityError(f"Dangerous characters detected in path: {path}")
+    return path_obj
+def validate_pipeline_name(name: str) -> str:
+    """Validate pipeline name to prevent injection attacks.
+    Args:
+        name: Pipeline name to validate
+    Returns:
+        Validated name
+    Raises:
+        ValueError: If name is invalid
+        SecurityError: If name contains dangerous characters
+    """
+    if not name or not isinstance(name, str):
+        raise ValueError("Pipeline name must be a non-empty string")
+    name = name.strip()
+    if not name:
+        raise ValueError("Pipeline name cannot be empty or only whitespace")
+    # Check for dangerous characters
+    if not re.match(r'^[a-zA-Z0-9_-]+$', name):
+        raise SecurityError(
+            f"Pipeline name '{name}' contains invalid characters. "
+            "Only alphanumeric, underscore, and hyphen are allowed."
+        )
+    # Check length constraints
+    if len(name) > 100:
+        raise SecurityError(f"Pipeline name too long: {len(name)} > 100 characters")
+    return name
+def validate_config_dict(config: Dict[str, Any],
+                        allowed_keys: Optional[List[str]] = None,
+                        max_depth: int = 10) -> Dict[str, Any]:
+    """Validate configuration dictionary to prevent malicious content.
+    Args:
+        config: Configuration dictionary to validate
+        allowed_keys: List of allowed top-level keys
+        max_depth: Maximum nesting depth to prevent DoS attacks
+    Returns:
+        Validated configuration dictionary
+    Raises:
+        SecurityError: If configuration contains dangerous content
+        ValueError: If configuration is invalid
+    """
+    if not isinstance(config, dict):
+        raise ValueError("Configuration must be a dictionary")
+    # Check for allowed keys
+    if allowed_keys:
+        invalid_keys = set(config.keys()) - set(allowed_keys)
+        if invalid_keys:
+            raise SecurityError(f"Invalid configuration keys: {invalid_keys}")
+    # Check nesting depth
+    def check_depth(obj, depth=0):
+        if depth > max_depth:
+            raise SecurityError(f"Configuration nesting too deep: {depth} > {max_depth}")
+        if isinstance(obj, dict):
+            for value in obj.values():
+                check_depth(value, depth + 1)
+        elif isinstance(obj, (list, tuple)):
+            for item in obj:
+                check_depth(item, depth + 1)
+    check_depth(config)
+    return config
+def sanitize_log_data(data: Any) -> Any:
+    """Sanitize data for safe logging to prevent log injection.
+    Args:
+        data: Data to sanitize for logging
+    Returns:
+        Sanitized data safe for logging
+    """
+    if isinstance(data, str):
+        # Remove potential log injection characters
+        sanitized = re.sub(r'[\r\n\t]', ' ', data)
+        # Limit length to prevent log flooding
+        if len(sanitized) > 1000:
+            sanitized = sanitized[:997] + "..."
+        return sanitized
+    elif isinstance(data, (dict, list)):
+        # For complex objects, convert to string and sanitize
+        return sanitize_log_data(str(data))
+    else:
+        return data
+def validate_executor_type(executor_type: str) -> str:
+    """Validate executor type to prevent arbitrary code execution.
+    Args:
+        executor_type: Executor type string to validate
+    Returns:
+        Validated executor type
+    Raises:
+        SecurityError: If executor type is invalid or dangerous
+    """
+    if not executor_type or not isinstance(executor_type, str):
+        raise ValueError("Executor type must be a non-empty string")
+    allowed_executors = {
+        'synchronous', 'threadpool', 'processpool', 'ray', 'dask'
+    }
+    if executor_type not in allowed_executors:
+        raise SecurityError(
+            f"Invalid executor type: {executor_type}. "
+            f"Allowed types: {allowed_executors}"
+        )
+    return executor_type
+def validate_callback_function(callback: Any) -> bool:
+    """Validate callback function to ensure it's safe to execute.
+    Args:
+        callback: Callback function or callable to validate
+    Returns:
+        True if callback is valid
+    Raises:
+        SecurityError: If callback is dangerous or invalid
+    """
+    if callback is None:
+        return True
+    if not callable(callback):
+        raise SecurityError("Callback must be callable")
+    # Check if it's a built-in function that could be dangerous
+    dangerous_functions = {'eval', 'exec', 'compile', '__import__'}
+    if hasattr(callback, '__name__') and callback.__name__ in dangerous_functions:
+        raise SecurityError(f"Dangerous callback function: {callback.__name__}")
+    return True

{flowerpower-0.21.0.dist-info → flowerpower-0.31.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: FlowerPower
-Version: 0.21.0
+Version: 0.31.0
 Summary: A simple workflow framework for building and managing data processing pipelines
 Author-email: "Volker L." <ligno.blades@gmail.com>
 Project-URL: Homepage, https://github.com/legout/flowerpower
@@ -25,26 +25,14 @@ Provides-Extra: io
 Requires-Dist: flowerpower-io>=0.1.1; extra == "io"
 Provides-Extra: io-legacy
 Requires-Dist: flowerpower-io[legacy]>=0.1.1; extra == "io-legacy"
-Provides-Extra: mongodb
-Requires-Dist: pymongo>=4.7.2; extra == "mongodb"
-Provides-Extra: mqtt
-Requires-Dist: paho-mqtt>=2.1.0; extra == "mqtt"
-Requires-Dist: orjson>=3.10.11; extra == "mqtt"
-Requires-Dist: mmh3>=5.1.0; extra == "mqtt"
 Provides-Extra: opentelemetry
 Requires-Dist: opentelemetry-api>=1.5.0; extra == "opentelemetry"
 Requires-Dist: opentelemetry-sdk>=1.5.0; extra == "opentelemetry"
 Requires-Dist: opentelemetry-exporter-jaeger>=1.21.0; extra == "opentelemetry"
 Provides-Extra: ray
 Requires-Dist: ray>=2.34.0; extra == "ray"
-Provides-Extra: tui
-Requires-Dist: textual>=0.85.2; extra == "tui"
 Provides-Extra: ui
 Requires-Dist: sf-hamilton-ui>=0.0.11; extra == "ui"
-Provides-Extra: webserver
-Requires-Dist: sanic>=24.6.0; extra == "webserver"
-Requires-Dist: sanic-ext>=23.12.0; extra == "webserver"
-Requires-Dist: orjson>=3.10.11; extra == "webserver"
 Provides-Extra: openlineage
 Requires-Dist: openlineage-python>=1.32.0; extra == "openlineage"
 Dynamic: license-file

FlowerPower 0.21.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

FlowerPower 0.21.0py3-none-any.whl → 0.31.0py3-none-any.whl