PyPI - FlowerPower - Versions diffs - 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl - Mend

FlowerPower 0.11.6.19py3-none-any.whl → 0.20.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

flowerpower/cfg/__init__.py +3 -3
flowerpower/cfg/pipeline/__init__.py +5 -3
flowerpower/cfg/project/__init__.py +3 -3
flowerpower/cfg/project/job_queue.py +1 -128
flowerpower/cli/__init__.py +5 -5
flowerpower/cli/cfg.py +0 -3
flowerpower/cli/job_queue.py +401 -133
flowerpower/cli/pipeline.py +14 -413
flowerpower/cli/utils.py +0 -1
flowerpower/flowerpower.py +537 -28
flowerpower/job_queue/__init__.py +5 -94
flowerpower/job_queue/base.py +201 -3
flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -3
flowerpower/job_queue/rq/manager.py +388 -77
flowerpower/pipeline/__init__.py +2 -0
flowerpower/pipeline/base.py +2 -2
flowerpower/pipeline/io.py +14 -16
flowerpower/pipeline/manager.py +21 -642
flowerpower/pipeline/pipeline.py +571 -0
flowerpower/pipeline/registry.py +242 -10
flowerpower/pipeline/visualizer.py +1 -2
flowerpower/plugins/_io/__init__.py +8 -0
flowerpower/plugins/mqtt/manager.py +6 -6
flowerpower/settings/backend.py +0 -2
flowerpower/settings/job_queue.py +1 -57
flowerpower/utils/misc.py +0 -256
flowerpower/utils/monkey.py +1 -83
{flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/METADATA +308 -152
flowerpower-0.20.0.dist-info/RECORD +58 -0
flowerpower/fs/__init__.py +0 -29
flowerpower/fs/base.py +0 -662
flowerpower/fs/ext.py +0 -2143
flowerpower/fs/storage_options.py +0 -1420
flowerpower/job_queue/apscheduler/__init__.py +0 -11
flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
flowerpower/job_queue/apscheduler/manager.py +0 -1051
flowerpower/job_queue/apscheduler/setup.py +0 -554
flowerpower/job_queue/apscheduler/trigger.py +0 -169
flowerpower/job_queue/apscheduler/utils.py +0 -311
flowerpower/pipeline/job_queue.py +0 -583
flowerpower/pipeline/runner.py +0 -603
flowerpower/plugins/io/base.py +0 -2520
flowerpower/plugins/io/helpers/datetime.py +0 -298
flowerpower/plugins/io/helpers/polars.py +0 -875
flowerpower/plugins/io/helpers/pyarrow.py +0 -570
flowerpower/plugins/io/helpers/sql.py +0 -202
flowerpower/plugins/io/loader/__init__.py +0 -28
flowerpower/plugins/io/loader/csv.py +0 -37
flowerpower/plugins/io/loader/deltatable.py +0 -190
flowerpower/plugins/io/loader/duckdb.py +0 -19
flowerpower/plugins/io/loader/json.py +0 -37
flowerpower/plugins/io/loader/mqtt.py +0 -159
flowerpower/plugins/io/loader/mssql.py +0 -26
flowerpower/plugins/io/loader/mysql.py +0 -26
flowerpower/plugins/io/loader/oracle.py +0 -26
flowerpower/plugins/io/loader/parquet.py +0 -35
flowerpower/plugins/io/loader/postgres.py +0 -26
flowerpower/plugins/io/loader/pydala.py +0 -19
flowerpower/plugins/io/loader/sqlite.py +0 -23
flowerpower/plugins/io/metadata.py +0 -244
flowerpower/plugins/io/saver/__init__.py +0 -28
flowerpower/plugins/io/saver/csv.py +0 -36
flowerpower/plugins/io/saver/deltatable.py +0 -186
flowerpower/plugins/io/saver/duckdb.py +0 -19
flowerpower/plugins/io/saver/json.py +0 -36
flowerpower/plugins/io/saver/mqtt.py +0 -28
flowerpower/plugins/io/saver/mssql.py +0 -26
flowerpower/plugins/io/saver/mysql.py +0 -26
flowerpower/plugins/io/saver/oracle.py +0 -26
flowerpower/plugins/io/saver/parquet.py +0 -36
flowerpower/plugins/io/saver/postgres.py +0 -26
flowerpower/plugins/io/saver/pydala.py +0 -20
flowerpower/plugins/io/saver/sqlite.py +0 -24
flowerpower/utils/scheduler.py +0 -311
flowerpower-0.11.6.19.dist-info/RECORD +0 -102
{flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/WHEEL +0 -0
{flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/entry_points.txt +0 -0
{flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/licenses/LICENSE +0 -0
{flowerpower-0.11.6.19.dist-info → flowerpower-0.20.0.dist-info}/top_level.txt +0 -0

flowerpower/pipeline/registry.py CHANGED Viewed

@@ -4,9 +4,11 @@
 import datetime as dt
 import os
 import posixpath
-from typing import TYPE_CHECKING
+import sys
+from typing import TYPE_CHECKING, Any, Dict
 import rich
+from fsspec_utils import AbstractFileSystem, filesystem
 from loguru import logger
 from rich.console import Console
 from rich.panel import Panel
@@ -17,15 +19,16 @@ from rich.tree import Tree
 from .. import settings
 # Import necessary config types and utility functions
 from ..cfg import PipelineConfig, ProjectConfig
-from ..fs import AbstractFileSystem
 from ..utils.logging import setup_logging
 # Assuming view_img might be used indirectly or needed later
 from ..utils.templates import (HOOK_TEMPLATE__MQTT_BUILD_CONFIG,
                                PIPELINE_PY_TEMPLATE)
+# Import base utilities
+from .base import load_module
 if TYPE_CHECKING:
-    # Keep this for type hinting if needed elsewhere, though Config is imported directly now
-    pass
+    from .pipeline import Pipeline
+    from ..flowerpower import FlowerPowerProject
 from enum import Enum
@@ -54,8 +57,8 @@ class PipelineRegistry:
         self,
         project_cfg: ProjectConfig,
         fs: AbstractFileSystem,
-        cfg_dir: str,
-        pipelines_dir: str,
+        base_dir: str | None = None,
+        storage_options: dict | None = None,
     ):
         """
         Initializes the PipelineRegistry.
@@ -63,15 +66,244 @@ class PipelineRegistry:
         Args:
             project_cfg: The project configuration object.
             fs: The filesystem instance.
-            cfg_dir: The configuration directory path.
-            pipelines_dir: The pipelines directory path.
+            base_dir: The base directory path.
+            storage_options: Storage options for filesystem operations.
         """
         self.project_cfg = project_cfg
         self._fs = fs
-        self._cfg_dir = cfg_dir
-        self._pipelines_dir = pipelines_dir
+        self._cfg_dir = settings.CONFIG_DIR
+        self._pipelines_dir = settings.PIPELINES_DIR
+        self._base_dir = base_dir
+        self._storage_options = storage_options or {}
         self._console = Console()
+        # Cache for loaded pipelines
+        self._pipeline_cache: Dict[str, "Pipeline"] = {}
+        self._config_cache: Dict[str, PipelineConfig] = {}
+        self._module_cache: Dict[str, Any] = {}
+        # Ensure module paths are added
+        self._add_modules_path()
+    @classmethod
+    def from_filesystem(
+        cls,
+        base_dir: str,
+        fs: AbstractFileSystem | None = None,
+        storage_options: dict | None = None,
+    ) -> "PipelineRegistry":
+        """
+        Create a PipelineRegistry from filesystem parameters.
+        This factory method creates a complete PipelineRegistry instance by:
+        1. Creating the filesystem if not provided
+        2. Loading the ProjectConfig from the base directory
+        3. Initializing the registry with the loaded configuration
+        Args:
+            base_dir: The base directory path for the FlowerPower project
+            fs: Optional filesystem instance. If None, will be created from base_dir
+            storage_options: Optional storage options for filesystem access
+        Returns:
+            PipelineRegistry: A fully configured registry instance
+        Raises:
+            ValueError: If base_dir is invalid or ProjectConfig cannot be loaded
+            RuntimeError: If filesystem creation fails
+        Example:
+            ```python
+            # Create registry from local directory
+            registry = PipelineRegistry.from_filesystem("/path/to/project")
+            # Create registry with S3 storage
+            registry = PipelineRegistry.from_filesystem(
+                "s3://my-bucket/project",
+                storage_options={"key": "secret"}
+            )
+            ```
+        """
+        # Create filesystem if not provided
+        if fs is None:
+            fs = filesystem(
+                base_dir,
+                storage_options=storage_options,
+                cached=storage_options is not None,
+            )
+        # Load project configuration
+        project_cfg = ProjectConfig.load(base_dir=base_dir, fs=fs)
+        # Ensure we have a ProjectConfig instance
+        if not isinstance(project_cfg, ProjectConfig):
+            raise TypeError(f"Expected ProjectConfig, got {type(project_cfg)}")
+        # Create and return registry instance
+        return cls(
+            project_cfg=project_cfg,
+            fs=fs,
+            base_dir=base_dir,
+            storage_options=storage_options,
+        )
+    def _add_modules_path(self) -> None:
+        """Add pipeline module paths to Python path."""
+        try:
+            if hasattr(self._fs, "is_cache_fs") and self._fs.is_cache_fs:
+                self._fs.sync_cache()
+                project_path = self._fs._mapper.directory
+                modules_path = posixpath.join(project_path, self._pipelines_dir)
+            else:
+                # Use the base directory directly if not using cache
+                if hasattr(self._fs, "path"):
+                    project_path = self._fs.path
+                elif self._base_dir:
+                    project_path = self._base_dir
+                else:
+                    # Fallback for mocked filesystems
+                    project_path = "."
+                modules_path = posixpath.join(project_path, self._pipelines_dir)
+            if project_path not in sys.path:
+                sys.path.insert(0, project_path)
+            if modules_path not in sys.path:
+                sys.path.insert(0, modules_path)
+        except (AttributeError, TypeError):
+            # Handle case where filesystem is mocked or doesn't have required properties
+            logger.debug("Could not add modules path - using default Python path")
+    # --- Pipeline Factory Methods ---
+    def get_pipeline(
+        self, name: str, project_context: "FlowerPowerProject", reload: bool = False
+    ) -> "Pipeline":
+        """Get a Pipeline instance for the given name.
+        This method creates a fully-formed Pipeline object by loading its configuration
+        and Python module, then injecting the project context.
+        Args:
+            name: Name of the pipeline to get
+            project_context: Reference to the FlowerPowerProject
+            reload: Whether to reload configuration and module from disk
+        Returns:
+            Pipeline instance ready for execution
+        Raises:
+            FileNotFoundError: If pipeline configuration or module doesn't exist
+            ImportError: If pipeline module cannot be imported
+            ValueError: If pipeline configuration is invalid
+        """
+        # Use cache if available and not reloading
+        if not reload and name in self._pipeline_cache:
+            logger.debug(f"Returning cached pipeline '{name}'")
+            return self._pipeline_cache[name]
+        logger.debug(f"Creating pipeline instance for '{name}'")
+        # Load pipeline configuration
+        config = self.load_config(name, reload=reload)
+        # Load pipeline module
+        module = self.load_module(name, reload=reload)
+        # Import Pipeline class here to avoid circular import
+        from .pipeline import Pipeline
+        # Create Pipeline instance
+        pipeline = Pipeline(
+            name=name,
+            config=config,
+            module=module,
+            project_context=project_context,
+        )
+        # Cache the pipeline instance
+        self._pipeline_cache[name] = pipeline
+        logger.debug(f"Successfully created pipeline instance for '{name}'")
+        return pipeline
+    def load_config(self, name: str, reload: bool = False) -> PipelineConfig:
+        """Load pipeline configuration from disk.
+        Args:
+            name: Name of the pipeline
+            reload: Whether to reload from disk even if cached
+        Returns:
+            PipelineConfig instance
+        """
+        # Use cache if available and not reloading
+        if not reload and name in self._config_cache:
+            logger.debug(f"Returning cached config for pipeline '{name}'")
+            return self._config_cache[name]
+        logger.debug(f"Loading configuration for pipeline '{name}'")
+        # Load configuration from disk
+        config = PipelineConfig.load(
+            base_dir=self._base_dir,
+            name=name,
+            fs=self._fs,
+            storage_options=self._storage_options,
+        )
+        # Cache the configuration
+        self._config_cache[name] = config
+        return config
+    def load_module(self, name: str, reload: bool = False) -> Any:
+        """Load pipeline module from disk.
+        Args:
+            name: Name of the pipeline
+            reload: Whether to reload from disk even if cached
+        Returns:
+            Loaded Python module
+        """
+        # Use cache if available and not reloading
+        if not reload and name in self._module_cache:
+            logger.debug(f"Returning cached module for pipeline '{name}'")
+            return self._module_cache[name]
+        logger.debug(f"Loading module for pipeline '{name}'")
+        # Convert pipeline name to module name
+        formatted_name = name.replace(".", "/").replace("-", "_")
+        module_name = f"pipelines.{formatted_name}"
+        # Load the module
+        module = load_module(module_name, reload=reload)
+        # Cache the module
+        self._module_cache[name] = module
+        return module
+    def clear_cache(self, name: str | None = None):
+        """Clear cached pipelines, configurations, and modules.
+        Args:
+            name: If provided, clear cache only for this pipeline.
+                 If None, clear entire cache.
+        """
+        if name:
+            logger.debug(f"Clearing cache for pipeline '{name}'")
+            self._pipeline_cache.pop(name, None)
+            self._config_cache.pop(name, None)
+            self._module_cache.pop(name, None)
+        else:
+            logger.debug("Clearing entire pipeline cache")
+            self._pipeline_cache.clear()
+            self._config_cache.clear()
+            self._module_cache.clear()
     # --- Methods moved from PipelineManager ---
     def new(self, name: str, overwrite: bool = False):
         """

flowerpower/pipeline/visualizer.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import posixpath
-from typing import Any
+from fsspec_utils import AbstractFileSystem
 from hamilton import driver
 from rich import print
 # Import necessary config types and utility functions
 from ..cfg import PipelineConfig, ProjectConfig
-from ..fs import AbstractFileSystem
 from ..utils.misc import view_img
 from .base import load_module  # Import module loading utility

flowerpower/plugins/_io/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+import warnings
+warnings.warn(
+    "The flowerpower.plugins._io module is deprecated. "
+    "Please use 'flowerpower-io' instead. Install it with 'pip install flowerpower-io'.",
+    DeprecationWarning,
+    stacklevel=2,
+)

flowerpower/plugins/mqtt/manager.py CHANGED Viewed

@@ -7,6 +7,7 @@ from types import TracebackType
 from typing import Any, Callable
 import mmh3
+from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
 from loguru import logger
 from munch import Munch
 from paho.mqtt.client import (MQTT_ERR_SUCCESS, CallbackAPIVersion, Client,
@@ -16,9 +17,7 @@ from paho.mqtt.reasoncodes import ReasonCode
 from ...cfg import ProjectConfig
 from ...cfg.pipeline.run import ExecutorConfig, WithAdapterConfig
 from ...cfg.project.adapter import AdapterConfig
-from ...fs import AbstractFileSystem, BaseStorageOptions, get_filesystem
 from ...pipeline.manager import PipelineManager
-from ...utils.callback import run_with_callback
 from ...utils.logging import setup_logging
 from .cfg import MqttConfig
@@ -132,8 +131,9 @@ class MqttManager:
             import os
             if fs is None:
-                fs = get_filesystem(
-                    path=os.path.dirname(path), storage_options=storage_options
+                fs = filesystem(
+                    protocol_or_path=os.path.dirname(path),
+                    storage_options=storage_options,
                 )
             cfg = MqttConfig.from_yaml(path=os.path.basename(path), fs=fs)
@@ -637,7 +637,7 @@ class MqttManager:
                 storage_options=storage_options, fs=fs, base_dir=base_dir
             ) as pipeline:
                 if as_job:
-                    res = pipeline.add_job(
+                    pipeline.add_job(
                         name=name,
                         inputs=inputs,
                         final_vars=final_vars,
@@ -664,7 +664,7 @@ class MqttManager:
                     )
                 else:
-                    res = pipeline.run(
+                    pipeline.run(
                         name=name,
                         inputs=inputs,
                         final_vars=final_vars,

flowerpower/settings/backend.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import os
 # Define backend properties in a dictionary for easier maintenance
 BACKEND_PROPERTIES = {

flowerpower/settings/job_queue.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 from .backend import BACKEND_PROPERTIES
-from .executor import EXECUTOR, EXECUTOR_MAX_WORKERS, EXECUTOR_NUM_CPUS
+from .executor import EXECUTOR_NUM_CPUS
 # WORKER
 JOB_QUEUE_TYPE = os.getenv("FP_JOB_QUEUE_TYPE", "rq")
@@ -29,59 +29,3 @@ RQ_QUEUES = (
     .split(",")
 )
 RQ_NUM_WORKERS = int(os.getenv("FP_RQ_NUM_WORKERS", EXECUTOR_NUM_CPUS))
-# APS WORKER
-APS_BACKEND_DS = os.getenv("FP_APS_BACKEND_DS", "memory")
-APS_BACKEND_DS_HOST = os.getenv(
-    "FP_APS_BACKEND_DS_HOST",
-    BACKEND_PROPERTIES.get(APS_BACKEND_DS, {}).get("default_host", None),
-)
-APS_BACKEND_DS_PORT = int(
-    os.getenv(
-        "FP_APS_BACKEND_DS_PORT",
-        BACKEND_PROPERTIES.get(APS_BACKEND_DS, {}).get("default_port", 0),
-    )
-)
-APS_BACKEND_DS_DB = os.getenv(
-    "FP_APS_BACKEND_DS_DB",
-    BACKEND_PROPERTIES.get(APS_BACKEND_DS, {}).get("default_database", None),
-)
-APS_BACKEND_DS_USERNAME = os.getenv(
-    "FP_APS_BACKEND_DS_USERNAME",
-    BACKEND_PROPERTIES.get(APS_BACKEND_DS, {}).get("default_username", None),
-)
-APS_BACKEND_DS_PASSWORD = os.getenv(
-    "FP_APS_BACKEND_DS_PASSWORD",
-    BACKEND_PROPERTIES.get(APS_BACKEND_DS, {}).get("default_password", None),
-)
-APS_BACKEND_DS_SCHEMA = os.getenv("FP_APS_BACKEND_DS_SCHEMA", "flowerpower")
-APS_BACKEND_EB = os.getenv("FP_APS_BACKEND_EB", "memory")
-APS_BACKEND_EB_HOST = os.getenv(
-    "FP_APS_BACKEND_EB_HOST",
-    BACKEND_PROPERTIES.get(APS_BACKEND_EB, {}).get("default_host", None),
-)
-APS_BACKEND_EB_PORT = int(
-    os.getenv(
-        "FP_APS_BACKEND_EB_PORT",
-        BACKEND_PROPERTIES.get(APS_BACKEND_EB, {}).get("default_port", 0),
-    )
-)
-APS_BACKEND_EB_DB = os.getenv(
-    "FP_APS_BACKEND_EB_DB",
-    BACKEND_PROPERTIES.get(APS_BACKEND_EB, {}).get("default_database", None),
-)
-APS_BACKEND_EB_USERNAME = os.getenv(
-    "FP_APS_BACKEND_EB_USERNAME",
-    BACKEND_PROPERTIES.get(APS_BACKEND_EB, {}).get("default_username", None),
-)
-APS_BACKEND_EB_PASSWORD = os.getenv(
-    "FP_APS_BACKEND_EB_PASSWORD",
-    BACKEND_PROPERTIES.get(APS_BACKEND_EB, {}).get("default_password", None),
-)
-APS_CLEANUP_INTERVAL = int(os.getenv("FP_APS_CLEANUP_INTERVAL", 300))
-APS_MAX_CONCURRENT_JOBS = int(os.getenv("FP_APS_MAX_CONCURRENT_JOBS", 10))
-APS_DEFAULT_EXECUTOR = os.getenv("FP_APS_DEFAULT_EXECUTOR", EXECUTOR)
-APS_NUM_WORKERS = int(os.getenv("FP_APS_NUM_WORKERS", EXECUTOR_MAX_WORKERS))

flowerpower/utils/misc.py CHANGED Viewed

@@ -8,262 +8,6 @@ from typing import Any
 import msgspec
-if importlib.util.find_spec("pyarrow"):
-    import pyarrow as pa
-    def convert_large_types_to_standard(schema: pa.Schema) -> pa.Schema:
-        # Define mapping of large types to standard types
-        type_mapping = {
-            pa.large_string(): pa.string(),
-            pa.large_binary(): pa.binary(),
-            pa.large_list(pa.null()): pa.list_(pa.null()),
-        }
-        # Convert fields
-        new_fields = []
-        for field in schema:
-            field_type = field.type
-            # Check if type exists in mapping
-            if field_type in type_mapping:
-                new_field = pa.field(
-                    name=field.name,
-                    type=type_mapping[field_type],
-                    nullable=field.nullable,
-                    metadata=field.metadata,
-                )
-                new_fields.append(new_field)
-            # Handle large lists with nested types
-            elif isinstance(field_type, pa.LargeListType):
-                new_field = pa.field(
-                    name=field.name,
-                    type=pa.list_(field_type.value_type),
-                    nullable=field.nullable,
-                    metadata=field.metadata,
-                )
-                new_fields.append(new_field)
-            else:
-                new_fields.append(field)
-        return pa.schema(new_fields)
-else:
-    def convert_large_types_to_standard(*args, **kwargs):
-        raise ImportError("pyarrow not installed")
-if importlib.util.find_spec("polars"):
-    import polars as pl
-    def _dict_to_dataframe(
-        data: dict | list[dict], unique: bool | list[str] | str = False
-    ) -> pl.DataFrame:
-        """
-        Convert a dictionary or list of dictionaries to a polars DataFrame.
-        Args:
-            data: (dict | list[dict]) Data to convert.
-        Returns:
-            pl.DataFrame: Converted data.
-        Examples:
-            >>> # Single dict with list values
-            >>> data = {'a': [1, 2, 3], 'b': [4, 5, 6]}
-            >>> _dict_to_dataframe(data)
-            shape: (3, 2)
-            ┌─────┬─────┐
-            │ a   ┆ b   │
-            │ --- ┆ --- │
-            │ i64 ┆ i64 │
-            ╞═════╪═════╡
-            │ 1   ┆ 4   │
-            │ 2   ┆ 5   │
-            │ 3   ┆ 6   │
-            └─────┴─────┘
-            >>> # Single dict with scalar values
-            >>> data = {'a': 1, 'b': 2}
-            >>> _dict_to_dataframe(data)
-            shape: (1, 2)
-            ┌─────┬─────┐
-            │ a   ┆ b   │
-            │ --- ┆ --- │
-            │ i64 ┆ i64 │
-            ╞═════╪═════╡
-            │ 1   ┆ 2   │
-            └─────┴─────┘
-            >>> # List of dicts with scalar values
-            >>> data = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}]
-            >>> _dict_to_dataframe(data)
-            shape: (2, 2)
-            ┌─────┬─────┐
-            │ a   ┆ b   │
-            │ --- ┆ --- │
-            │ i64 ┆ i64 │
-            ╞═════╪═════╡
-            │ 1   ┆ 2   │
-            │ 3   ┆ 4   │
-            └─────┴─────┘
-            >>> # List of dicts with list values
-            >>> data = [{'a': [1, 2], 'b': [3, 4]}, {'a': [5, 6], 'b': [7, 8]}]
-            >>> _dict_to_dataframe(data)
-            shape: (2, 2)
-            ┌───────┬───────┐
-            │ a     ┆ b     │
-            │ ---   ┆ ---   │
-            │ list  ┆ list  │
-            ╞═══════╪═══════╡
-            │ [1,2] ┆ [3,4] │
-            │ [5,6] ┆ [7,8] │
-            └───────┴───────┘
-        """
-        if isinstance(data, list):
-            # If it's a single-element list, just use the first element
-            if len(data) == 1:
-                data = data[0]
-            # If it's a list of dicts
-            else:
-                first_item = data[0]
-                # Check if the dict values are lists/tuples
-                if any(isinstance(v, (list, tuple)) for v in first_item.values()):
-                    # Each dict becomes a row with list/tuple values
-                    data = pl.DataFrame(data)
-                else:
-                    # If values are scalars, convert list of dicts to DataFrame
-                    data = pl.DataFrame(data)
-                if unique:
-                    data = data.unique(
-                        subset=None if not isinstance(unique, str | list) else unique,
-                        maintain_order=True,
-                    )
-                return data
-        # If it's a single dict
-        if isinstance(data, dict):
-            # Check if values are lists/tuples
-            if any(isinstance(v, (list, tuple)) for v in data.values()):
-                # Get the length of any list value (assuming all lists have same length)
-                length = len(
-                    next(v for v in data.values() if isinstance(v, (list, tuple)))
-                )
-                # Convert to DataFrame where each list element becomes a row
-                data = pl.DataFrame({
-                    k: v if isinstance(v, (list, tuple)) else [v] * length
-                    for k, v in data.items()
-                })
-            else:
-                # If values are scalars, wrap them in a list to create a single row
-                data = pl.DataFrame({k: [v] for k, v in data.items()})
-            if unique:
-                data = data.unique(
-                    subset=None if not isinstance(unique, str | list) else unique,
-                    maintain_order=True,
-                )
-            return data
-        raise ValueError("Input must be a dictionary or list of dictionaries")
-else:
-    def _dict_to_dataframe(*args, **kwargs):
-        raise ImportError("polars not installed")
-if (
-    importlib.util.find_spec("pandas")
-    and importlib.util.find_spec("polars")
-    and importlib.util.find_spec("pyarrow")
-):
-    from typing import Generator
-    import pandas as pd
-    def to_pyarrow_table(
-        data: pl.DataFrame
-        | pl.LazyFrame
-        | pd.DataFrame
-        | dict
-        | list[pl.DataFrame | pl.LazyFrame | pd.DataFrame | dict],
-        concat: bool = False,
-        unique: bool | list[str] | str = False,
-    ) -> pa.Table:
-        if isinstance(data, dict):
-            data = _dict_to_dataframe(data)
-        if isinstance(data, list):
-            if isinstance(data[0], dict):
-                data = _dict_to_dataframe(data, unique=unique)
-        if not isinstance(data, list):
-            data = [data]
-        if isinstance(data[0], pl.LazyFrame):
-            data = [dd.collect() for dd in data]
-        if isinstance(data[0], pl.DataFrame):
-            if concat:
-                data = pl.concat(data, how="diagonal_relaxed")
-                if unique:
-                    data = data.unique(
-                        subset=None if not isinstance(unique, str | list) else unique,
-                        maintain_order=True,
-                    )
-                data = data.to_arrow()
-                data = data.cast(convert_large_types_to_standard(data.schema))
-            else:
-                data = [dd.to_arrow() for dd in data]
-                data = [
-                    dd.cast(convert_large_types_to_standard(dd.schema)) for dd in data
-                ]
-        elif isinstance(data[0], pd.DataFrame):
-            data = [pa.Table.from_pandas(dd, preserve_index=False) for dd in data]
-            if concat:
-                data = pa.concat_tables(data, promote_options="permissive")
-                if unique:
-                    data = (
-                        pl.from_arrow(data)
-                        .unique(
-                            subset=None
-                            if not isinstance(unique, str | list)
-                            else unique,
-                            maintain_order=True,
-                        )
-                        .to_arrow()
-                    )
-                    data = data.cast(convert_large_types_to_standard(data.schema))
-        elif isinstance(data[0], pa.RecordBatch | pa.RecordBatchReader | Generator):
-            if concat:
-                data = pa.Table.from_batches(data)
-                if unique:
-                    data = (
-                        pl.from_arrow(data)
-                        .unique(
-                            subset=None
-                            if not isinstance(unique, str | list)
-                            else unique,
-                            maintain_order=True,
-                        )
-                        .to_arrow()
-                    )
-                    data = data.cast(convert_large_types_to_standard(data.schema))
-            else:
-                data = [pa.Table.from_batches([dd]) for dd in data]
-        return data
-else:
-    def to_pyarrow_table(*args, **kwargs):
-        raise ImportError("pandas, polars, or pyarrow not installed")
 if importlib.util.find_spec("joblib"):
     from joblib import Parallel, delayed
     from rich.progress import (BarColumn, Progress, TextColumn,

FlowerPower 0.11.6.19__py3-none-any.whl → 0.20.0__py3-none-any.whl

FlowerPower 0.11.6.19py3-none-any.whl → 0.20.0py3-none-any.whl