PyPI - FlowerPower - Versions diffs - 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

FlowerPower 0.11.6.20py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

flowerpower/__init__.py +2 -6
flowerpower/cfg/__init__.py +7 -14
flowerpower/cfg/base.py +29 -25
flowerpower/cfg/pipeline/__init__.py +8 -6
flowerpower/cfg/pipeline/_schedule.py +32 -0
flowerpower/cfg/pipeline/adapter.py +0 -5
flowerpower/cfg/pipeline/builder.py +377 -0
flowerpower/cfg/pipeline/run.py +36 -0
flowerpower/cfg/project/__init__.py +11 -24
flowerpower/cfg/project/adapter.py +0 -12
flowerpower/cli/__init__.py +2 -21
flowerpower/cli/cfg.py +0 -3
flowerpower/cli/mqtt.py +0 -6
flowerpower/cli/pipeline.py +22 -415
flowerpower/cli/utils.py +0 -1
flowerpower/flowerpower.py +345 -146
flowerpower/pipeline/__init__.py +2 -0
flowerpower/pipeline/base.py +21 -12
flowerpower/pipeline/io.py +58 -54
flowerpower/pipeline/manager.py +165 -726
flowerpower/pipeline/pipeline.py +643 -0
flowerpower/pipeline/registry.py +285 -18
flowerpower/pipeline/visualizer.py +5 -6
flowerpower/plugins/io/__init__.py +8 -0
flowerpower/plugins/mqtt/__init__.py +7 -11
flowerpower/settings/__init__.py +0 -2
flowerpower/settings/{backend.py → _backend.py} +0 -21
flowerpower/settings/logging.py +1 -1
flowerpower/utils/logging.py +24 -12
flowerpower/utils/misc.py +17 -256
flowerpower/utils/monkey.py +1 -83
flowerpower-0.21.0.dist-info/METADATA +463 -0
flowerpower-0.21.0.dist-info/RECORD +44 -0
flowerpower/cfg/pipeline/schedule.py +0 -74
flowerpower/cfg/project/job_queue.py +0 -238
flowerpower/cli/job_queue.py +0 -1061
flowerpower/fs/__init__.py +0 -29
flowerpower/fs/base.py +0 -662
flowerpower/fs/ext.py +0 -2143
flowerpower/fs/storage_options.py +0 -1420
flowerpower/job_queue/__init__.py +0 -294
flowerpower/job_queue/apscheduler/__init__.py +0 -11
flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
flowerpower/job_queue/apscheduler/manager.py +0 -1051
flowerpower/job_queue/apscheduler/setup.py +0 -554
flowerpower/job_queue/apscheduler/trigger.py +0 -169
flowerpower/job_queue/apscheduler/utils.py +0 -311
flowerpower/job_queue/base.py +0 -413
flowerpower/job_queue/rq/__init__.py +0 -10
flowerpower/job_queue/rq/_trigger.py +0 -37
flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
flowerpower/job_queue/rq/manager.py +0 -1582
flowerpower/job_queue/rq/setup.py +0 -154
flowerpower/job_queue/rq/utils.py +0 -69
flowerpower/mqtt.py +0 -12
flowerpower/pipeline/job_queue.py +0 -583
flowerpower/pipeline/runner.py +0 -603
flowerpower/plugins/io/base.py +0 -2520
flowerpower/plugins/io/helpers/datetime.py +0 -298
flowerpower/plugins/io/helpers/polars.py +0 -875
flowerpower/plugins/io/helpers/pyarrow.py +0 -570
flowerpower/plugins/io/helpers/sql.py +0 -202
flowerpower/plugins/io/loader/__init__.py +0 -28
flowerpower/plugins/io/loader/csv.py +0 -37
flowerpower/plugins/io/loader/deltatable.py +0 -190
flowerpower/plugins/io/loader/duckdb.py +0 -19
flowerpower/plugins/io/loader/json.py +0 -37
flowerpower/plugins/io/loader/mqtt.py +0 -159
flowerpower/plugins/io/loader/mssql.py +0 -26
flowerpower/plugins/io/loader/mysql.py +0 -26
flowerpower/plugins/io/loader/oracle.py +0 -26
flowerpower/plugins/io/loader/parquet.py +0 -35
flowerpower/plugins/io/loader/postgres.py +0 -26
flowerpower/plugins/io/loader/pydala.py +0 -19
flowerpower/plugins/io/loader/sqlite.py +0 -23
flowerpower/plugins/io/metadata.py +0 -244
flowerpower/plugins/io/saver/__init__.py +0 -28
flowerpower/plugins/io/saver/csv.py +0 -36
flowerpower/plugins/io/saver/deltatable.py +0 -186
flowerpower/plugins/io/saver/duckdb.py +0 -19
flowerpower/plugins/io/saver/json.py +0 -36
flowerpower/plugins/io/saver/mqtt.py +0 -28
flowerpower/plugins/io/saver/mssql.py +0 -26
flowerpower/plugins/io/saver/mysql.py +0 -26
flowerpower/plugins/io/saver/oracle.py +0 -26
flowerpower/plugins/io/saver/parquet.py +0 -36
flowerpower/plugins/io/saver/postgres.py +0 -26
flowerpower/plugins/io/saver/pydala.py +0 -20
flowerpower/plugins/io/saver/sqlite.py +0 -24
flowerpower/plugins/mqtt/cfg.py +0 -17
flowerpower/plugins/mqtt/manager.py +0 -962
flowerpower/settings/job_queue.py +0 -87
flowerpower/utils/scheduler.py +0 -311
flowerpower-0.11.6.20.dist-info/METADATA +0 -537
flowerpower-0.11.6.20.dist-info/RECORD +0 -102
{flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
{flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
{flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
{flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0

flowerpower/cli/pipeline.py CHANGED Viewed

@@ -1,14 +1,13 @@
 # Import necessary libraries
-import datetime as dt
-import duration_parser
 import typer
 from loguru import logger
 from typing_extensions import Annotated
+from ..flowerpower import FlowerPowerProject
 from ..pipeline.manager import HookType, PipelineManager
+from ..cfg.pipeline.run import RunConfig
 from ..utils.logging import setup_logging
-from .utils import parse_dict_or_list_param  # , parse_param_dict
+from .utils import parse_dict_or_list_param
 setup_logging()
@@ -104,431 +103,39 @@ def run(
     parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
     parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
-    with PipelineManager(
-        base_dir=base_dir,
-        storage_options=parsed_storage_options or {},
-        log_level=log_level,
-    ) as manager:
-        _ = manager.run(
-            name=name,
-            inputs=parsed_inputs,
-            final_vars=parsed_final_vars,
-            config=parsed_config,
-            cache=parsed_cache,
-            executor_cfg=executor,
-            with_adapter_cfg=parsed_with_adapter,
-            max_retries=max_retries,
-            retry_delay=retry_delay,
-            jitter_factor=jitter_factor,
-        )
-        logger.info(f"Pipeline '{name}' finished running.")
-@app.command()
-def run_job(
-    name: str = typer.Argument(..., help="Name or ID of the pipeline job to run"),
-    executor: str | None = typer.Option(
-        None, help="Executor to use for running the job"
-    ),
-    base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
-    inputs: str | None = typer.Option(
-        None, help="Input parameters as JSON, dict string, or key=value pairs"
-    ),
-    final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
-    config: str | None = typer.Option(
-        None, help="Config for the hamilton pipeline executor"
-    ),
-    cache: str | None = typer.Option(
-        None, help="Cache configuration as JSON or dict string"
-    ),
-    storage_options: str | None = typer.Option(
-        None, help="Storage options as JSON, dict string, or key=value pairs"
-    ),
-    log_level: str | None = typer.Option(
-        None, help="Logging level (debug, info, warning, error, critical)"
-    ),
-    with_adapter: str | None = typer.Option(
-        None, help="Adapter configuration as JSON or dict string"
-    ),
-    max_retries: int = typer.Option(
-        0, help="Maximum number of retry attempts on failure"
-    ),
-    retry_delay: float = typer.Option(
-        1.0, help="Base delay between retries in seconds"
-    ),
-    jitter_factor: float = typer.Option(
-        0.1, help="Random factor applied to delay for jitter (0-1)"
-    ),
-):
-    """
-    Run a specific pipeline job.
-    This command runs an existing job by its ID. The job should have been previously
-    added to the system via the add-job command or through scheduling.
-    Args:
-        name: Job ID to run
-        executor: Type of executor to use (maps to executor_cfg in manager)
-        base_dir: Base directory containing pipelines and configurations
-        inputs: Input parameters for the pipeline
-        final_vars: Final variables to request from the pipeline
-        config: Configuration for the Hamilton executor
-        cache: Cache configuration
-        storage_options: Options for storage backends
-        log_level: Set the logging level
-        with_adapter: Configuration for adapters like trackers or monitors
-        max_retries: Maximum number of retry attempts on failure
-        retry_delay: Base delay between retries in seconds
-        jitter_factor: Random factor applied to delay for jitter (0-1)
-    Examples:
-        # Run a job with a specific ID
-        $ pipeline run-job job-123456
-        # Run a job with custom inputs
-        $ pipeline run-job job-123456 --inputs '{"data_path": "data/myfile.csv"}'
-        # Specify a different executor
-        $ pipeline run-job job-123456 --executor local
-        # Use caching for better performance
-        $ pipeline run-job job-123456 --cache '{"type": "memory"}'
-        # Configure adapters for monitoring
-        $ pipeline run-job job-123456 --with-adapter '{"tracker": true, "opentelemetry": false}'
-        # Set up automatic retries for resilience
-        $ pipeline run-job job-123456 --max-retries 3 --retry-delay 2.0
-    """
-    parsed_inputs = parse_dict_or_list_param(inputs, "dict")
-    parsed_config = parse_dict_or_list_param(config, "dict")
-    parsed_cache = parse_dict_or_list_param(cache, "dict")
-    parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
-    parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
-    parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
-    with PipelineManager(
-        base_dir=base_dir,
-        storage_options=parsed_storage_options or {},
-        log_level=log_level,
-    ) as manager:
-        _ = manager.run_job(
-            name=name,
-            inputs=parsed_inputs,
-            final_vars=parsed_final_vars,
-            config=parsed_config,
-            cache=parsed_cache,
-            executor_cfg=executor,
-            with_adapter_cfg=parsed_with_adapter,
-            max_retries=max_retries,
-            retry_delay=retry_delay,
-            jitter_factor=jitter_factor,
-        )
-        logger.info(f"Job '{name}' finished running.")
-@app.command()
-def add_job(
-    name: str = typer.Argument(..., help="Name of the pipeline to add as a job"),
-    executor: str | None = typer.Option(
-        None, help="Executor to use for running the job"
-    ),
-    base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
-    inputs: str | None = typer.Option(
-        None, help="Input parameters as JSON, dict string, or key=value pairs"
-    ),
-    final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
-    config: str | None = typer.Option(
-        None, help="Config for the hamilton pipeline executor"
-    ),
-    cache: str | None = typer.Option(
-        None, help="Cache configuration as JSON or dict string"
-    ),
-    storage_options: str | None = typer.Option(
-        None, help="Storage options as JSON, dict string, or key=value pairs"
-    ),
-    log_level: str | None = typer.Option(
-        None, help="Logging level (debug, info, warning, error, critical)"
-    ),
-    with_adapter: str | None = typer.Option(
-        None, help="Adapter configuration as JSON or dict string"
-    ),
-    run_at: str | None = typer.Option(None, help="Run at a specific time (ISO format)"),
-    run_in: str | None = typer.Option(
-        None, help="Run in a specific interval (e.g., '5m', '1h', '12m34s')"
-    ),
-    max_retries: int = typer.Option(
-        3, help="Maximum number of retry attempts on failure"
-    ),
-    retry_delay: float = typer.Option(
-        1.0, help="Base delay between retries in seconds"
-    ),
-    jitter_factor: float = typer.Option(
-        0.1, help="Random factor applied to delay for jitter (0-1)"
-    ),
-):
-    """
-    Add a pipeline job to the queue.
-    This command adds a job to the queue for later execution. The job is based on
-    an existing pipeline with customized inputs and configuration.
-    Args:
-        name: Pipeline name to add as a job
-        executor: Type of executor to use
-        base_dir: Base directory containing pipelines and configurations
-        inputs: Input parameters for the pipeline
-        final_vars: Final variables to request from the pipeline
-        config: Configuration for the Hamilton executor
-        cache: Cache configuration
-        storage_options: Options for storage backends
-        log_level: Set the logging level
-        with_adapter: Configuration for adapters like trackers or monitors
-        run_at: Run the job at a specific time (ISO format)
-        run_in: Run the job in a specific interval (e.g., '5m', '1h')
-        max_retries: Maximum number of retry attempts on failure
-        retry_delay: Base delay between retries in seconds
-        jitter_factor: Random factor applied to delay for jitter (0-1)
-    Examples:
-        # Add a basic job
-        $ pipeline add-job my_pipeline
-        # Add a job with custom inputs
-        $ pipeline add-job my_pipeline --inputs '{"data_path": "data/myfile.csv"}'
-        # Specify final variables to calculate
-        $ pipeline add-job my_pipeline --final-vars '["output_table", "metrics"]'
-        # Configure caching
-        $ pipeline add-job my_pipeline --cache '{"type": "memory", "ttl": 3600}'
-        # Use a specific log level
-        $ pipeline add-job my_pipeline --log-level debug
-        # Configure automatic retries for resilience
-        $ pipeline add-job my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
-    """
-    parsed_inputs = parse_dict_or_list_param(inputs, "dict")
-    parsed_config = parse_dict_or_list_param(config, "dict")
-    parsed_cache = parse_dict_or_list_param(cache, "dict")
-    parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
-    parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
-    parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
-    run_at = dt.datetime.fromisoformat(run_at) if run_at else None
-    run_in = duration_parser.parse(run_in) if run_in else None
-    with PipelineManager(
+    # Use FlowerPowerProject for better consistency with the new architecture
+    project = FlowerPowerProject.load(
         base_dir=base_dir,
         storage_options=parsed_storage_options or {},
         log_level=log_level,
-    ) as manager:
-        job_id = manager.add_job(
-            name=name,
-            inputs=parsed_inputs,
-            final_vars=parsed_final_vars,
-            config=parsed_config,
-            cache=parsed_cache,
-            executor_cfg=executor,
-            with_adapter_cfg=parsed_with_adapter,
-            run_at=run_at,
-            run_in=run_in,
-            max_retries=max_retries,
-            retry_delay=retry_delay,
-            jitter_factor=jitter_factor,
-        )
-        logger.info(f"Job {job_id} added for pipeline '{name}'.")
-@app.command()
-def schedule(
-    name: str = typer.Argument(..., help="Name of the pipeline to schedule"),
-    executor: str | None = typer.Option(
-        None, help="Executor to use for running the job"
-    ),
-    base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
-    inputs: str | None = typer.Option(
-        None, help="Input parameters as JSON, dict string, or key=value pairs"
-    ),
-    final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
-    config: str | None = typer.Option(
-        None, help="Config for the hamilton pipeline executor"
-    ),
-    cache: str | None = typer.Option(
-        None, help="Cache configuration as JSON or dict string"
-    ),
-    cron: str | None = typer.Option(None, help="Cron expression for scheduling"),
-    interval: str | None = typer.Option(
-        None, help="Interval for scheduling (e.g., '5m', '1h')"
-    ),
-    date: str | None = typer.Option(
-        None, help="Specific date and time for scheduling (ISO format)"
-    ),
-    storage_options: str | None = typer.Option(
-        None, help="Storage options as JSON, dict string, or key=value pairs"
-    ),
-    log_level: str | None = typer.Option(
-        None, help="Logging level (debug, info, warning, error, critical)"
-    ),
-    with_adapter: str | None = typer.Option(
-        None, help="Adapter configuration as JSON or dict string"
-    ),
-    overwrite: bool = typer.Option(
-        False, help="Overwrite existing schedule if it exists"
-    ),
-    schedule_id: str | None = typer.Option(
-        None, help="Custom ID for the schedule (autogenerated if not provided)"
-    ),
-    max_retries: int = typer.Option(
-        3, help="Maximum number of retry attempts on failure"
-    ),
-    retry_delay: float = typer.Option(
-        1.0, help="Base delay between retries in seconds"
-    ),
-    jitter_factor: float = typer.Option(
-        0.1, help="Random factor applied to delay for jitter (0-1)"
-    ),
-):
-    """
-    Schedule a pipeline to run at specified times.
-    This command schedules a pipeline to run automatically based on various
-    scheduling triggers like cron expressions, time intervals, or specific dates.
-    Args:
-        name: Pipeline name to schedule
-        executor: Type of executor to use
-        base_dir: Base directory containing pipelines and configurations
-        inputs: Input parameters for the pipeline
-        final_vars: Final variables to request from the pipeline
-        config: Configuration for the Hamilton executor
-        cache: Cache configuration
-        cron: Cron expression for scheduling (e.g., "0 * * * *")
-        interval: Interval for scheduling (e.g., "5m", "1h")
-        date: Specific date and time for scheduling (ISO format)
-        storage_options: Options for storage backends
-        log_level: Set the logging level
-        with_adapter: Configuration for adapters like trackers or monitors
-        overwrite: Overwrite existing schedule with same ID
-        schedule_id: Custom identifier for the schedule
-        max_retries: Maximum number of retry attempts on failure
-        retry_delay: Base delay between retries in seconds
-        jitter_factor: Random factor applied to delay for jitter (0-1)
-    Examples:
-        # Schedule with cron expression (every hour)
-        $ pipeline schedule my_pipeline --trigger-type cron --crontab "0 * * * *"
-        # Schedule to run every 15 minutes
-        $ pipeline schedule my_pipeline --trigger-type interval --interval_params minutes=15
-        # Schedule to run at a specific date and time
-        $ pipeline schedule my_pipeline --trigger-type date --date_params run_date="2025-12-31 23:59:59"
-        # Schedule with custom inputs and cache settings
-        $ pipeline schedule my_pipeline --inputs '{"source": "database"}' --cache '{"type": "redis"}'
-        # Create a schedule in paused state
-        $ pipeline schedule my_pipeline --crontab "0 9 * * 1-5" --paused
-        # Set a custom schedule ID
-        $ pipeline schedule my_pipeline --crontab "0 12 * * *" --schedule_id "daily-noon-run"
-        # Configure automatic retries for resilience
-        $ pipeline schedule my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
-    """
-    parsed_inputs = parse_dict_or_list_param(inputs, "dict")
-    parsed_config = parse_dict_or_list_param(config, "dict")
-    parsed_cache = parse_dict_or_list_param(cache, "dict")
-    parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
-    parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
-    parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
-    interval = duration_parser.parse(interval) if interval else None
-    cron = cron if cron else None
-    date = dt.datetime.fromisoformat(date) if date else None
+    )
-    with PipelineManager(
-        base_dir=base_dir,
-        storage_options=parsed_storage_options or {},
-        log_level=log_level,
-    ) as manager:
-        # Combine common schedule kwargs
+    if project is None:
+        logger.error(f"Failed to load FlowerPower project from {base_dir or '.'}")
+        raise typer.Exit(1)
-        id_ = manager.schedule(
-            name=name,
+    try:
+        # Construct RunConfig object from parsed CLI arguments
+        run_config = RunConfig(
             inputs=parsed_inputs,
             final_vars=parsed_final_vars,
             config=parsed_config,
             cache=parsed_cache,
-            executor_cfg=executor,
-            with_adapter_cfg=parsed_with_adapter,
-            cron=cron,
-            interval=interval,
-            date=date,
-            overwrite=overwrite,
-            schedule_id=schedule_id,
+            with_adapter=parsed_with_adapter,
             max_retries=max_retries,
             retry_delay=retry_delay,
             jitter_factor=jitter_factor,
         )
+        # Handle executor configuration
+        if executor is not None:
+            run_config.executor.type = executor
-    logger.info(f"Pipeline '{name}' scheduled with ID {id_}.")
-@app.command()
-def schedule_all(
-    executor: str | None = typer.Option(
-        None, help="Override executor specified in pipeline configs"
-    ),
-    base_dir: str | None = typer.Option(
-        None, help="Base directory containing pipelines and configurations"
-    ),
-    storage_options: str | None = typer.Option(
-        None, help="Storage options as JSON, dict string, or key=value pairs"
-    ),
-    log_level: str | None = typer.Option(
-        None, help="Logging level (debug, info, warning, error, critical)"
-    ),
-    overwrite: bool = typer.Option(
-        False, help="Overwrite existing schedules if they exist"
-    ),
-):
-    """
-    Schedule all pipelines based on their individual configurations.
-    This command reads the configuration files for all pipelines in the project
-    and schedules them based on their individual scheduling settings. This is useful
-    for setting up all scheduled pipelines at once after deployment or system restart.
-    Args:
-        executor: Override executor specified in pipeline configs
-        base_dir: Base directory containing pipelines and configurations
-        storage_options: Options for storage backends
-        log_level: Set the logging level
-        overwrite: Whether to overwrite existing schedules
-    Examples:
-        # Schedule all pipelines using their configurations
-        $ pipeline schedule-all
-        # Force overwrite of existing schedules
-        $ pipeline schedule-all --overwrite
-        # Override executor for all pipelines
-        $ pipeline schedule-all --executor distributed
-        # Set custom base directory
-        $ pipeline schedule-all --base-dir /path/to/project
-    """
-    parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
-    with PipelineManager(
-        base_dir=base_dir,
-        storage_options=parsed_storage_options or {},
-        log_level=log_level,
-    ) as manager:
-        manager.schedule_all(overwrite=overwrite, executor_cfg=executor)
-    logger.info("Scheduled all pipelines based on their configurations.")
+        _ = project.run(name=name, run_config=run_config)
+        logger.info(f"Pipeline '{name}' finished running.")
+    except Exception as e:
+        logger.error(f"Pipeline execution failed: {e}")
+        raise typer.Exit(1)
 @app.command()

flowerpower/cli/utils.py CHANGED Viewed

@@ -4,7 +4,6 @@ import json
 import posixpath
 import re
 import sys
-from pathlib import Path
 from typing import Callable
 from loguru import logger

FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl

FlowerPower 0.11.6.20py3-none-any.whl → 0.21.0py3-none-any.whl