FlowerPower 0.20.0__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. flowerpower/__init__.py +2 -6
  2. flowerpower/cfg/__init__.py +4 -11
  3. flowerpower/cfg/base.py +29 -25
  4. flowerpower/cfg/pipeline/__init__.py +3 -3
  5. flowerpower/cfg/pipeline/_schedule.py +32 -0
  6. flowerpower/cfg/pipeline/adapter.py +0 -5
  7. flowerpower/cfg/pipeline/builder.py +377 -0
  8. flowerpower/cfg/pipeline/run.py +89 -0
  9. flowerpower/cfg/project/__init__.py +8 -21
  10. flowerpower/cfg/project/adapter.py +0 -12
  11. flowerpower/cli/__init__.py +2 -28
  12. flowerpower/cli/pipeline.py +10 -4
  13. flowerpower/flowerpower.py +275 -585
  14. flowerpower/pipeline/base.py +19 -10
  15. flowerpower/pipeline/io.py +52 -46
  16. flowerpower/pipeline/manager.py +149 -91
  17. flowerpower/pipeline/pipeline.py +159 -87
  18. flowerpower/pipeline/registry.py +68 -33
  19. flowerpower/pipeline/visualizer.py +4 -4
  20. flowerpower/plugins/{_io → io}/__init__.py +1 -1
  21. flowerpower/settings/__init__.py +0 -2
  22. flowerpower/settings/{backend.py → _backend.py} +0 -19
  23. flowerpower/settings/logging.py +1 -1
  24. flowerpower/utils/logging.py +24 -12
  25. flowerpower/utils/misc.py +17 -0
  26. flowerpower-0.30.0.dist-info/METADATA +451 -0
  27. flowerpower-0.30.0.dist-info/RECORD +42 -0
  28. flowerpower/cfg/pipeline/schedule.py +0 -74
  29. flowerpower/cfg/project/job_queue.py +0 -111
  30. flowerpower/cli/job_queue.py +0 -1329
  31. flowerpower/cli/mqtt.py +0 -174
  32. flowerpower/job_queue/__init__.py +0 -205
  33. flowerpower/job_queue/base.py +0 -611
  34. flowerpower/job_queue/rq/__init__.py +0 -10
  35. flowerpower/job_queue/rq/_trigger.py +0 -37
  36. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
  37. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -228
  38. flowerpower/job_queue/rq/manager.py +0 -1893
  39. flowerpower/job_queue/rq/setup.py +0 -154
  40. flowerpower/job_queue/rq/utils.py +0 -69
  41. flowerpower/mqtt.py +0 -12
  42. flowerpower/plugins/mqtt/__init__.py +0 -12
  43. flowerpower/plugins/mqtt/cfg.py +0 -17
  44. flowerpower/plugins/mqtt/manager.py +0 -962
  45. flowerpower/settings/job_queue.py +0 -31
  46. flowerpower-0.20.0.dist-info/METADATA +0 -693
  47. flowerpower-0.20.0.dist-info/RECORD +0 -58
  48. {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/WHEEL +0 -0
  49. {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/entry_points.txt +0 -0
  50. {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/licenses/LICENSE +0 -0
  51. {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,8 @@ import datetime as dt
2
2
  import os
3
3
  import posixpath
4
4
  from pathlib import Path
5
- from typing import Any, Callable
5
+ from typing import Any, Callable, Optional, TYPE_CHECKING
6
+ from functools import wraps
6
7
 
7
8
  import rich
8
9
  from fsspec_utils import (AbstractFileSystem, BaseStorageOptions,
@@ -11,81 +12,144 @@ from loguru import logger
11
12
 
12
13
  from . import settings
13
14
  from .cfg import ProjectConfig
14
- from .cfg.pipeline import ExecutorConfig, WithAdapterConfig
15
+ from .cfg.pipeline import ExecutorConfig, WithAdapterConfig, RunConfig
15
16
  from .cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
16
17
  from .cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
17
- from .job_queue import JobQueueManager
18
18
  from .pipeline import PipelineManager
19
19
  from .utils.logging import setup_logging
20
20
 
21
- setup_logging(level=settings.LOG_LEVEL)
21
+ setup_logging()
22
+
23
+ def handle_errors(func):
24
+ """Decorator to handle exceptions, log them, and re-raise as RuntimeError."""
25
+ @wraps(func)
26
+ def wrapper(self, *args, **kwargs):
27
+ try:
28
+ return func(self, *args, **kwargs)
29
+ except Exception as e:
30
+ # Extract operation name from function name for better logging
31
+ operation_name = func.__name__.replace('_', ' ').title()
32
+ # For methods like 'run', we want to log the pipeline name if available
33
+ if 'name' in kwargs and func.__name__ in ['run']:
34
+ logger.error(f"Failed to {operation_name.lower()} pipeline '{kwargs.get('name')}': {e}")
35
+ raise RuntimeError(f"Pipeline {operation_name.lower()} failed for '{kwargs.get('name')}': {e}") from e
36
+ else:
37
+ logger.error(f"Failed to {operation_name.lower()}: {e}")
38
+ raise RuntimeError(f"{operation_name} failed: {e}") from e
39
+ return wrapper
22
40
 
23
41
 
24
42
  class FlowerPowerProject:
25
43
  def __init__(
26
44
  self,
27
45
  pipeline_manager: PipelineManager,
28
- job_queue_manager: JobQueueManager | None = None,
29
46
  ):
30
47
  """
31
48
  Initialize a FlowerPower project.
32
49
  Args:
33
50
  pipeline_manager (PipelineManager | None): Instance of PipelineManager to manage pipelines.
34
- job_queue_manager (JobQueueManager | None): Instance of JobQueueManager to manage job queues.
35
51
  """
36
52
  self.pipeline_manager = pipeline_manager
37
- self.job_queue_manager = job_queue_manager
38
53
  self.name = self.pipeline_manager.project_cfg.name
39
- self._base_dir = self.pipeline_manager._base_dir
40
- self._fs = self.pipeline_manager._fs
41
- self._storage_options = self.pipeline_manager._storage_options
42
- self.job_queue_type = (
43
- self.job_queue_manager.cfg.type
44
- if self.job_queue_manager is not None
45
- else None
46
- )
47
- self.job_queue_backend = (
48
- self.job_queue_manager.cfg.backend
49
- if self.job_queue_manager is not None
50
- else None
51
- )
54
+
55
+ def _validate_pipeline_name(self, name: str) -> None:
56
+ """Validate the pipeline name argument."""
57
+ if not name or not isinstance(name, str):
58
+ raise ValueError("Pipeline 'name' must be a non-empty string")
59
+ if name.strip() != name:
60
+ raise ValueError(
61
+ "Pipeline 'name' cannot have leading or trailing whitespace"
62
+ )
52
63
 
53
64
  def _inject_dependencies(self):
54
65
  """Inject dependencies between managers for proper architecture.
55
66
 
56
67
  This method establishes the correct dependency flow:
57
68
  - Project context is properly established for pipeline execution
58
- - JobQueueManager automatically creates its own PipelineRegistry via property
59
69
  """
60
70
  # Store project reference for pipeline context
61
71
  # This will be used when creating Pipeline instances
62
72
  self.pipeline_manager._project_context = self
63
73
 
64
- # Note: JobQueueManager now creates its own PipelineRegistry automatically
65
- # via the pipeline_registry property, so no manual injection needed
74
+ def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
75
+ """Merge kwargs into a RunConfig object.
76
+
77
+ This helper method updates the RunConfig object with values from kwargs,
78
+ handling different types of attributes appropriately.
79
+
80
+ Args:
81
+ run_config: The RunConfig object to update
82
+ kwargs: Dictionary of additional parameters to merge
83
+
84
+ Returns:
85
+ RunConfig: Updated RunConfig object
86
+ """
87
+ # Handle dictionary-like attributes with update or deep merge
88
+ if 'inputs' in kwargs and kwargs['inputs'] is not None:
89
+ if run_config.inputs is None:
90
+ run_config.inputs = kwargs['inputs']
91
+ else:
92
+ run_config.inputs.update(kwargs['inputs'])
93
+
94
+ if 'config' in kwargs and kwargs['config'] is not None:
95
+ if run_config.config is None:
96
+ run_config.config = kwargs['config']
97
+ else:
98
+ run_config.config.update(kwargs['config'])
99
+
100
+ if 'cache' in kwargs and kwargs['cache'] is not None:
101
+ run_config.cache = kwargs['cache']
102
+
103
+ if 'adapter' in kwargs and kwargs['adapter'] is not None:
104
+ if run_config.adapter is None:
105
+ run_config.adapter = kwargs['adapter']
106
+ else:
107
+ run_config.adapter.update(kwargs['adapter'])
108
+
109
+ # Handle executor_cfg - convert string/dict to ExecutorConfig if needed
110
+ if 'executor_cfg' in kwargs and kwargs['executor_cfg'] is not None:
111
+ executor_cfg = kwargs['executor_cfg']
112
+ if isinstance(executor_cfg, str):
113
+ run_config.executor = ExecutorConfig(type=executor_cfg)
114
+ elif isinstance(executor_cfg, dict):
115
+ run_config.executor = ExecutorConfig.from_dict(executor_cfg)
116
+ elif isinstance(executor_cfg, ExecutorConfig):
117
+ run_config.executor = executor_cfg
118
+
119
+ # Handle adapter configurations
120
+ if 'with_adapter_cfg' in kwargs and kwargs['with_adapter_cfg'] is not None:
121
+ with_adapter_cfg = kwargs['with_adapter_cfg']
122
+ if isinstance(with_adapter_cfg, dict):
123
+ run_config.with_adapter = WithAdapterConfig.from_dict(with_adapter_cfg)
124
+ elif isinstance(with_adapter_cfg, WithAdapterConfig):
125
+ run_config.with_adapter = with_adapter_cfg
126
+
127
+ if 'pipeline_adapter_cfg' in kwargs and kwargs['pipeline_adapter_cfg'] is not None:
128
+ run_config.pipeline_adapter_cfg = kwargs['pipeline_adapter_cfg']
129
+
130
+ if 'project_adapter_cfg' in kwargs and kwargs['project_adapter_cfg'] is not None:
131
+ run_config.project_adapter_cfg = kwargs['project_adapter_cfg']
132
+
133
+ # Handle simple attributes
134
+ simple_attrs = [
135
+ 'final_vars', 'reload', 'log_level', 'max_retries', 'retry_delay',
136
+ 'jitter_factor', 'retry_exceptions', 'on_success', 'on_failure'
137
+ ]
138
+
139
+ for attr in simple_attrs:
140
+ if attr in kwargs and kwargs[attr] is not None:
141
+ setattr(run_config, attr, kwargs[attr])
142
+
143
+ return run_config
66
144
 
67
145
  # --- Convenience Methods for Pipeline Operations ---
68
146
 
147
+ @handle_errors
69
148
  def run(
70
149
  self,
71
150
  name: str,
72
- inputs: dict | None = None,
73
- final_vars: list[str] | None = None,
74
- config: dict | None = None,
75
- cache: dict | None = None,
76
- executor_cfg: str | dict | ExecutorConfig | None = None,
77
- with_adapter_cfg: dict | WithAdapterConfig | None = None,
78
- pipeline_adapter_cfg: dict | PipelineAdapterConfig | None = None,
79
- project_adapter_cfg: dict | ProjectAdapterConfig | None = None,
80
- adapter: dict[str, Any] | None = None,
81
- reload: bool = False,
82
- log_level: str | None = None,
83
- max_retries: int | None = None,
84
- retry_delay: float | None = None,
85
- jitter_factor: float | None = None,
86
- retry_exceptions: tuple | list | None = None,
87
- on_success: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
88
- on_failure: Callable | tuple[Callable, tuple | None, dict | None] | None = None,
151
+ run_config: RunConfig | None = None,
152
+ **kwargs
89
153
  ) -> dict[str, Any]:
90
154
  """Execute a pipeline synchronously and return its results.
91
155
 
@@ -94,30 +158,36 @@ class FlowerPowerProject:
94
158
 
95
159
  Args:
96
160
  name: Name of the pipeline to run. Must be a valid identifier.
97
- inputs: Override pipeline input values. Example: {"data_date": "2025-04-28"}
98
- final_vars: Specify which output variables to return. Example: ["model", "metrics"]
99
- config: Configuration for Hamilton pipeline executor. Example: {"model": "LogisticRegression"}
100
- cache: Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
101
- executor_cfg: Execution configuration, can be:
102
- - str: Executor name, e.g. "threadpool", "local"
103
- - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
104
- - ExecutorConfig: Structured config object
105
- with_adapter_cfg: Adapter settings for pipeline execution.
106
- Example: {"opentelemetry": True, "tracker": False}
107
- pipeline_adapter_cfg: Pipeline-specific adapter settings.
108
- Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
109
- project_adapter_cfg: Project-level adapter settings.
110
- Example: {"opentelemetry": {"host": "http://localhost:4317"}}
111
- adapter: Custom adapter instance for pipeline
112
- Example: {"ray_graph_adapter": RayGraphAdapter()}
113
- reload: Force reload of pipeline configuration.
114
- log_level: Logging level for the execution. Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
115
- max_retries: Maximum number of retries for execution.
116
- retry_delay: Delay between retries in seconds.
117
- jitter_factor: Random jitter factor to add to retry delay
118
- retry_exceptions: Exceptions that trigger a retry.
119
- on_success: Callback to run on successful pipeline execution.
120
- on_failure: Callback to run on pipeline execution failure.
161
+ run_config: Run configuration object containing all execution parameters.
162
+ If None, the default configuration from the pipeline will be used.
163
+ **kwargs: Additional parameters to override the run_config. Supported parameters include:
164
+ inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
165
+ final_vars (list[str] | None): Specify which output variables to return.
166
+ Example: ["model", "metrics"]
167
+ config (dict | None): Configuration for Hamilton pipeline executor.
168
+ Example: {"model": "LogisticRegression"}
169
+ cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
170
+ executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
171
+ - str: Executor name, e.g. "threadpool", "local"
172
+ - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
173
+ - ExecutorConfig: Structured config object
174
+ with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
175
+ Example: {"opentelemetry": True, "tracker": False}
176
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
177
+ Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
178
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
179
+ Example: {"opentelemetry": {"host": "http://localhost:4317"}}
180
+ adapter (dict[str, Any] | None): Custom adapter instance for pipeline
181
+ Example: {"ray_graph_adapter": RayGraphAdapter()}
182
+ reload (bool): Force reload of pipeline configuration.
183
+ log_level (str | None): Logging level for the execution. Default None uses project config.
184
+ Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
185
+ max_retries (int): Maximum number of retries for execution.
186
+ retry_delay (float): Delay between retries in seconds.
187
+ jitter_factor (float): Random jitter factor to add to retry delay
188
+ retry_exceptions (tuple): Exceptions that trigger a retry.
189
+ on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
190
+ on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
121
191
 
122
192
  Returns:
123
193
  dict[str, Any]: Pipeline execution results, mapping output variable names to their computed values.
@@ -134,11 +204,19 @@ class FlowerPowerProject:
134
204
  # Simple execution
135
205
  result = project.run("my_pipeline")
136
206
 
137
- # With custom inputs
207
+ # Run with custom RunConfig
208
+ from flowerpower.cfg.pipeline.run import RunConfig
209
+ config = RunConfig(inputs={"date": "2025-04-28"}, final_vars=["result"])
210
+ result = project.run("ml_pipeline", run_config=config)
211
+
212
+ # Complex run with kwargs overrides
138
213
  result = project.run(
139
214
  "ml_pipeline",
140
- inputs={"data_date": "2025-01-01"},
141
- final_vars=["model", "metrics"]
215
+ inputs={"training_date": "2025-04-28"},
216
+ final_vars=["model", "metrics"],
217
+ executor_cfg={"type": "threadpool", "max_workers": 4},
218
+ with_adapter_cfg={"tracker": True},
219
+ reload=True
142
220
  )
143
221
  ```
144
222
  """
@@ -150,452 +228,42 @@ class FlowerPowerProject:
150
228
  )
151
229
 
152
230
  # Validate required arguments
153
- if not name or not isinstance(name, str):
154
- raise ValueError("Pipeline 'name' must be a non-empty string")
155
-
156
- if name.strip() != name:
157
- raise ValueError(
158
- "Pipeline 'name' cannot have leading or trailing whitespace"
159
- )
160
-
161
- # Validate optional arguments
162
- if inputs is not None and not isinstance(inputs, dict):
163
- raise TypeError("'inputs' must be a dictionary")
164
-
165
- if final_vars is not None and not isinstance(final_vars, list):
166
- raise TypeError("'final_vars' must be a list of strings")
167
-
168
- if final_vars is not None:
169
- for var in final_vars:
170
- if not isinstance(var, str):
171
- raise TypeError("All items in 'final_vars' must be strings")
172
-
173
- try:
174
- return self.pipeline_manager.run(
175
- name=name,
176
- inputs=inputs,
177
- final_vars=final_vars,
178
- config=config,
179
- cache=cache,
180
- executor_cfg=executor_cfg,
181
- with_adapter_cfg=with_adapter_cfg,
182
- pipeline_adapter_cfg=pipeline_adapter_cfg,
183
- project_adapter_cfg=project_adapter_cfg,
184
- adapter=adapter,
185
- reload=reload,
186
- log_level=log_level,
187
- max_retries=max_retries,
188
- retry_delay=retry_delay,
189
- jitter_factor=jitter_factor,
190
- retry_exceptions=retry_exceptions,
191
- on_success=on_success,
192
- on_failure=on_failure,
193
- )
194
- except Exception as e:
195
- # Log error and re-raise with context
196
- logger.error(f"Failed to execute pipeline '{name}': {e}")
197
- raise RuntimeError(f"Pipeline execution failed for '{name}': {e}") from e
198
-
199
- def enqueue(
200
- self,
201
- name: str,
202
- *args,
203
- **kwargs,
204
- ):
205
- """Enqueue a pipeline for execution via the job queue.
206
-
207
- This is a convenience method that delegates to the job queue manager's
208
- enqueue_pipeline method. It provides asynchronous pipeline execution.
209
-
210
- Args:
211
- name: Name of the pipeline to enqueue
212
- *args: Additional positional arguments for job execution
213
- **kwargs: Keyword arguments for pipeline execution and job queue options.
214
- Supports all parameters from pipeline_manager.run() plus job queue specific options:
215
- - run_in: Schedule the job to run after a delay
216
- - run_at: Schedule the job to run at a specific datetime
217
- - queue_name: Queue to use (for RQ)
218
- - timeout: Job execution timeout
219
- - retry: Number of retries
220
- - result_ttl: Result time to live
221
- - ttl: Job time to live
222
-
223
- Returns:
224
- Job ID or result depending on implementation, or None if job queue not configured
225
-
226
- Raises:
227
- RuntimeError: If job queue manager is not configured
228
-
229
- Example:
230
- ```python
231
- project = FlowerPowerProject.load(".")
232
-
233
- # Immediate execution via job queue
234
- job_id = project.enqueue("my_pipeline", inputs={"date": "today"})
235
-
236
- # Delayed execution
237
- job_id = project.enqueue("my_pipeline", inputs={"date": "today"}, run_in=300)
238
-
239
- # Scheduled execution
240
- from datetime import datetime
241
- job_id = project.enqueue(
242
- "my_pipeline",
243
- inputs={"date": "today"},
244
- run_at=datetime(2025, 1, 1, 9, 0)
245
- )
246
- ```
247
- """
248
- # Validate job queue manager is available
249
- if self.job_queue_manager is None:
250
- raise RuntimeError(
251
- "Job queue manager is not configured. Cannot enqueue pipeline jobs. "
252
- "Ensure the project was loaded with a job queue configuration."
253
- )
254
-
255
- # Validate required arguments
256
- if not name or not isinstance(name, str):
257
- raise ValueError("Pipeline 'name' must be a non-empty string")
258
-
259
- if name.strip() != name:
260
- raise ValueError(
261
- "Pipeline 'name' cannot have leading or trailing whitespace"
262
- )
263
-
264
- try:
265
- return self.job_queue_manager.enqueue_pipeline(
266
- name=name, project_context=self, *args, **kwargs
267
- )
268
- except Exception as e:
269
- # Log error and re-raise with context
270
- logger.error(f"Failed to enqueue pipeline '{name}': {e}")
271
- raise RuntimeError(f"Pipeline enqueue failed for '{name}': {e}") from e
272
-
273
- def schedule(
274
- self,
275
- name: str,
276
- *args,
277
- **kwargs,
278
- ):
279
- """Schedule a pipeline for recurring or future execution.
280
-
281
- This is a convenience method that delegates to the job queue manager's
282
- schedule_pipeline method. It provides scheduled pipeline execution.
283
-
284
- Args:
285
- name: Name of the pipeline to schedule
286
- *args: Additional positional arguments for scheduling
287
- **kwargs: Keyword arguments for pipeline execution and scheduling options.
288
- Supports all parameters from pipeline_manager.run() plus scheduling options:
289
- - cron: Cron expression for recurring execution (e.g., "0 9 * * *")
290
- - interval: Time interval for recurring execution (int seconds or dict)
291
- - date: Future date for one-time execution (datetime or ISO string)
292
- - schedule_id: Unique identifier for the schedule
293
- - overwrite: Whether to overwrite existing schedule with same ID
294
-
295
- Returns:
296
- Schedule ID or job ID depending on implementation, or None if job queue not configured
297
-
298
- Raises:
299
- RuntimeError: If job queue manager is not configured
300
-
301
- Example:
302
- ```python
303
- project = FlowerPowerProject.load(".")
304
-
305
- # Daily schedule with cron
306
- schedule_id = project.schedule(
307
- "daily_metrics",
308
- cron="0 9 * * *", # 9 AM daily
309
- inputs={"date": "{{ execution_date }}"}
310
- )
311
-
312
- # Interval-based schedule
313
- schedule_id = project.schedule(
314
- "monitoring",
315
- interval={"minutes": 15},
316
- inputs={"check_type": "health"}
317
- )
318
-
319
- # Future one-time execution
320
- from datetime import datetime, timedelta
321
- future_date = datetime.now() + timedelta(days=1)
322
- schedule_id = project.schedule(
323
- "batch_process",
324
- date=future_date,
325
- inputs={"process_date": "tomorrow"}
326
- )
327
- ```
328
- """
329
- # Validate job queue manager is available
330
- if self.job_queue_manager is None:
331
- raise RuntimeError(
332
- "Job queue manager is not configured. Cannot schedule pipeline jobs. "
333
- "Ensure the project was loaded with a job queue configuration."
334
- )
335
-
336
- # Validate required arguments
337
- if not name or not isinstance(name, str):
338
- raise ValueError("Pipeline 'name' must be a non-empty string")
339
-
340
- if name.strip() != name:
341
- raise ValueError(
342
- "Pipeline 'name' cannot have leading or trailing whitespace"
343
- )
344
-
345
- try:
346
- return self.job_queue_manager.schedule_pipeline(
347
- name=name, project_context=self, *args, **kwargs
348
- )
349
- except Exception as e:
350
- # Log error and re-raise with context
351
- logger.error(f"Failed to schedule pipeline '{name}': {e}")
352
- raise RuntimeError(f"Pipeline schedule failed for '{name}': {e}") from e
353
-
354
- def start_worker(
355
- self,
356
- background: bool = False,
357
- queue_names: list[str] | None = None,
358
- with_scheduler: bool = True,
359
- **kwargs: Any,
360
- ) -> None:
361
- """Start a worker process for processing jobs from the queues.
362
-
363
- This is a convenience method that delegates to the job queue manager's
364
- start_worker method.
365
-
366
- Args:
367
- background: If True, runs the worker in a non-blocking background mode.
368
- If False, runs in the current process and blocks until stopped.
369
- queue_names: List of queue names to process. If None, processes all
370
- queues defined in the backend configuration.
371
- with_scheduler: Whether to include the scheduler queue for processing
372
- scheduled jobs (if supported by the backend).
373
- **kwargs: Additional worker configuration options specific to the job queue backend.
374
-
375
- Raises:
376
- RuntimeError: If job queue manager is not configured
377
-
378
- Example:
379
- ```python
380
- project = FlowerPowerProject.load(".")
381
-
382
- # Start worker in foreground (blocks)
383
- project.start_worker()
384
-
385
- # Start worker in background
386
- project.start_worker(background=True)
387
-
388
- # Start worker for specific queues
389
- project.start_worker(queue_names=["high_priority", "default"])
390
- ```
391
- """
392
- # Validate job queue manager is available
393
- if self.job_queue_manager is None:
394
- raise RuntimeError(
395
- "Job queue manager is not configured. Cannot start worker. "
396
- "Ensure the project was loaded with a job queue configuration."
397
- )
398
-
399
- # Validate optional arguments
400
- if queue_names is not None and not isinstance(queue_names, list):
401
- raise TypeError("'queue_names' must be a list of strings")
402
-
403
- if queue_names is not None:
404
- for queue_name in queue_names:
405
- if not isinstance(queue_name, str):
406
- raise TypeError("All items in 'queue_names' must be strings")
407
-
408
- if not isinstance(background, bool):
409
- raise TypeError("'background' must be a boolean")
410
-
411
- if not isinstance(with_scheduler, bool):
412
- raise TypeError("'with_scheduler' must be a boolean")
413
-
414
- try:
415
- return self.job_queue_manager.start_worker(
416
- background=background,
417
- queue_names=queue_names,
418
- with_scheduler=with_scheduler,
419
- **kwargs,
420
- )
421
- except Exception as e:
422
- # Log error and re-raise with context
423
- logger.error(f"Failed to start worker: {e}")
424
- raise RuntimeError(f"Worker start failed: {e}") from e
425
-
426
- def stop_worker(self) -> None:
427
- """Stop the worker process.
428
-
429
- This is a convenience method that delegates to the job queue manager's
430
- stop_worker method.
431
-
432
- Raises:
433
- RuntimeError: If job queue manager is not configured
434
-
435
- Example:
436
- ```python
437
- project = FlowerPowerProject.load(".")
438
- project.stop_worker()
439
- ```
440
- """
441
- # Validate job queue manager is available
442
- if self.job_queue_manager is None:
443
- raise RuntimeError(
444
- "Job queue manager is not configured. Cannot stop worker. "
445
- "Ensure the project was loaded with a job queue configuration."
446
- )
447
-
448
- try:
449
- return self.job_queue_manager.stop_worker()
450
- except Exception as e:
451
- # Log error and re-raise with context
452
- logger.error(f"Failed to stop worker: {e}")
453
- raise RuntimeError(f"Worker stop failed: {e}") from e
454
-
455
- def start_worker_pool(
456
- self,
457
- num_workers: int | None = None,
458
- background: bool = False,
459
- queue_names: list[str] | None = None,
460
- with_scheduler: bool = True,
461
- **kwargs: Any,
462
- ) -> None:
463
- """Start a pool of worker processes to handle jobs in parallel.
464
-
465
- This is a convenience method that delegates to the job queue manager's
466
- start_worker_pool method.
467
-
468
- Args:
469
- num_workers: Number of worker processes to start. If None, uses CPU
470
- count or backend-specific default.
471
- background: If True, runs the worker pool in a non-blocking background mode.
472
- If False, runs in the current process and blocks until stopped.
473
- queue_names: List of queue names to process. If None, processes all
474
- queues defined in the backend configuration.
475
- with_scheduler: Whether to include the scheduler queue for processing
476
- scheduled jobs (if supported by the backend).
477
- **kwargs: Additional worker pool configuration options specific to the job queue backend.
478
-
479
- Raises:
480
- RuntimeError: If job queue manager is not configured
481
-
482
- Example:
483
- ```python
484
- project = FlowerPowerProject.load(".")
485
-
486
- # Start worker pool with default number of workers
487
- project.start_worker_pool()
488
-
489
- # Start 4 workers in background
490
- project.start_worker_pool(num_workers=4, background=True)
491
-
492
- # Start worker pool for specific queues
493
- project.start_worker_pool(
494
- num_workers=2,
495
- queue_names=["high_priority", "default"]
496
- )
497
- ```
498
- """
499
- # Validate job queue manager is available
500
- if self.job_queue_manager is None:
501
- raise RuntimeError(
502
- "Job queue manager is not configured. Cannot start worker pool. "
503
- "Ensure the project was loaded with a job queue configuration."
504
- )
505
-
506
- # Validate optional arguments
507
- if num_workers is not None and (
508
- not isinstance(num_workers, int) or num_workers <= 0
509
- ):
510
- raise ValueError("'num_workers' must be a positive integer")
511
-
512
- if queue_names is not None and not isinstance(queue_names, list):
513
- raise TypeError("'queue_names' must be a list of strings")
514
-
515
- if queue_names is not None:
516
- for queue_name in queue_names:
517
- if not isinstance(queue_name, str):
518
- raise TypeError("All items in 'queue_names' must be strings")
519
-
520
- if not isinstance(background, bool):
521
- raise TypeError("'background' must be a boolean")
522
-
523
- if not isinstance(with_scheduler, bool):
524
- raise TypeError("'with_scheduler' must be a boolean")
525
-
526
- try:
527
- return self.job_queue_manager.start_worker_pool(
528
- num_workers=num_workers,
529
- background=background,
530
- queue_names=queue_names,
531
- with_scheduler=with_scheduler,
532
- **kwargs,
533
- )
534
- except Exception as e:
535
- # Log error and re-raise with context
536
- logger.error(f"Failed to start worker pool: {e}")
537
- raise RuntimeError(f"Worker pool start failed: {e}") from e
538
-
539
- def stop_worker_pool(self) -> None:
540
- """Stop all worker processes in the worker pool.
541
-
542
- This is a convenience method that delegates to the job queue manager's
543
- stop_worker_pool method.
544
-
545
- Raises:
546
- RuntimeError: If job queue manager is not configured
547
-
548
- Example:
549
- ```python
550
- project = FlowerPowerProject.load(".")
551
- project.stop_worker_pool()
552
- ```
553
- """
554
- # Validate job queue manager is available
555
- if self.job_queue_manager is None:
556
- raise RuntimeError(
557
- "Job queue manager is not configured. Cannot stop worker pool. "
558
- "Ensure the project was loaded with a job queue configuration."
559
- )
560
-
561
- try:
562
- return self.job_queue_manager.stop_worker_pool()
563
- except Exception as e:
564
- # Log error and re-raise with context
565
- logger.error(f"Failed to stop worker pool: {e}")
566
- raise RuntimeError(f"Worker pool stop failed: {e}") from e
231
+ self._validate_pipeline_name(name)
232
+
233
+ # Initialize run_config - use provided config or create empty one
234
+ run_config = run_config or RunConfig()
235
+
236
+ # Merge kwargs into run_config
237
+ if kwargs:
238
+ run_config = self._merge_run_config_with_kwargs(run_config, kwargs)
239
+
240
+ return self.pipeline_manager.run(
241
+ name=name,
242
+ run_config=run_config,
243
+ )
567
244
 
568
245
  @staticmethod
569
- def _check_project_exists(base_dir: str, fs: AbstractFileSystem | None = None):
246
+ def _check_project_exists(base_dir: str, fs: AbstractFileSystem | None = None) -> tuple[bool, str]:
570
247
  if fs is None:
571
248
  fs = filesystem(base_dir, dirfs=True)
572
- if isinstance(fs, DirFileSystem):
573
- if not fs.exists("."):
574
- rich.print(
575
- "[red]Project directory does not exist. Please initialize it first.[/red]"
576
- )
577
- return False
578
- if not fs.exists("conf") or not fs.exists("pipelines"):
579
- rich.print(
580
- "[red]Project configuration or pipelines directory is missing[/red]"
581
- )
582
- return False
583
- else:
584
- if not fs.exists(base_dir):
585
- rich.print(
586
- "[red]Project directory does not exist. Please initialize it first.[/red]"
587
- )
588
- return False
589
- if not fs.exists(posixpath.join(base_dir, "conf")) or not fs.exists(
590
- posixpath.join(base_dir, "pipelines")
591
- ):
592
- rich.print(
593
- "[red]Project configuration or pipelines directory is missing[/red]"
594
- )
595
- return False
249
+
250
+ # Determine the root path for existence checks
251
+ # For DirFileSystem, paths are relative to its root, so we check "." for the project root.
252
+ # For other filesystems, we use the base_dir directly.
253
+ root_path = "." if isinstance(fs, DirFileSystem) else base_dir
254
+
255
+ if not fs.exists(root_path):
256
+ return False, "Project directory does not exist. Please initialize it first."
257
+
258
+ # Check for required subdirectories
259
+ config_path = posixpath.join(root_path, settings.CONFIG_DIR)
260
+ pipelines_path = posixpath.join(root_path, settings.PIPELINES_DIR)
261
+
262
+ if not fs.exists(config_path) or not fs.exists(pipelines_path):
263
+ return False, "Project configuration or pipelines directory is missing"
596
264
 
597
265
  logger.debug(f"Project exists at {base_dir}")
598
- return True
266
+ return True, ""
599
267
 
600
268
  @classmethod
601
269
  def load(
@@ -620,7 +288,7 @@ class FlowerPowerProject:
620
288
  Raises:
621
289
  FileNotFoundError: If the project does not exist at the specified base directory.
622
290
  """
623
- if log_level:
291
+ if log_level is not None:
624
292
  setup_logging(level=log_level)
625
293
 
626
294
  base_dir = base_dir or str(Path.cwd())
@@ -642,7 +310,8 @@ class FlowerPowerProject:
642
310
  cache_storage=cache_storage,
643
311
  )
644
312
 
645
- if cls._check_project_exists(base_dir, fs):
313
+ project_exists, message = cls._check_project_exists(base_dir, fs)
314
+ if project_exists:
646
315
  logger.info(f"Loading FlowerPower project from {base_dir}")
647
316
  pipeline_manager = PipelineManager(
648
317
  base_dir=base_dir,
@@ -651,17 +320,9 @@ class FlowerPowerProject:
651
320
  log_level=log_level,
652
321
  )
653
322
 
654
- job_queue_manager = JobQueueManager(
655
- name=f"{pipeline_manager.project_cfg.name}_job_queue",
656
- base_dir=base_dir,
657
- storage_options=storage_options,
658
- fs=fs,
659
- )
660
-
661
323
  # Create the project instance
662
324
  project = cls(
663
325
  pipeline_manager=pipeline_manager,
664
- job_queue_manager=job_queue_manager,
665
326
  )
666
327
 
667
328
  # Inject dependencies after creation to avoid circular imports
@@ -669,21 +330,20 @@ class FlowerPowerProject:
669
330
 
670
331
  return project
671
332
  else:
672
- logger.error(
673
- f"Project does not exist at {base_dir}. Please initialize it first. Use `FlowerPowerProject.init()` to create a new project."
674
- )
333
+ rich.print(f"[red]{message}[/red]")
334
+ logger.error(message)
675
335
  return None
676
336
 
677
337
  @classmethod
678
- def init(
338
+ def new(
679
339
  cls,
680
340
  name: str | None = None,
681
341
  base_dir: str | None = None,
682
342
  storage_options: dict | BaseStorageOptions | None = {},
683
343
  fs: AbstractFileSystem | None = None,
684
- job_queue_type: str = settings.JOB_QUEUE_TYPE,
685
344
  hooks_dir: str = settings.HOOKS_DIR,
686
345
  log_level: str | None = None,
346
+ overwrite: bool = False,
687
347
  ) -> "FlowerPowerProject":
688
348
  """
689
349
  Initialize a new FlowerPower project.
@@ -693,12 +353,12 @@ class FlowerPowerProject:
693
353
  base_dir (str | None): The base directory where the project will be created. If None, it defaults to the current working directory.
694
354
  storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
695
355
  fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
696
- job_queue_type (str): The type of job queue to use for the project.
697
356
  hooks_dir (str): The directory where the project hooks will be stored.
357
+ overwrite (bool): Whether to overwrite an existing project at the specified base directory.
698
358
  Returns:
699
359
  FlowerPowerProject: An instance of FlowerPowerProject initialized with the new project.
700
360
  Raises:
701
- FileExistsError: If the project already exists at the specified base directory.
361
+ FileExistsError: If the project already exists at the specified base directory and overwrite is False.
702
362
  """
703
363
  if log_level:
704
364
  setup_logging(level=log_level)
@@ -717,11 +377,39 @@ class FlowerPowerProject:
717
377
  storage_options=storage_options,
718
378
  )
719
379
 
380
+ # Check if project already exists
381
+ project_exists, message = cls._check_project_exists(base_dir, fs)
382
+ if project_exists:
383
+ if overwrite:
384
+ # Delete existing project files and directories
385
+ logger.info(f"Overwriting existing project at {base_dir}")
386
+
387
+ # Remove directories recursively
388
+ config_path = f"{settings.CONFIG_DIR}"
389
+ pipelines_path = settings.PIPELINES_DIR
390
+
391
+ if fs.exists(config_path):
392
+ fs.rm(config_path, recursive=True)
393
+ if fs.exists(pipelines_path):
394
+ fs.rm(pipelines_path, recursive=True)
395
+ if fs.exists(hooks_dir):
396
+ fs.rm(hooks_dir, recursive=True)
397
+
398
+ # Remove README.md file
399
+ if fs.exists("README.md"):
400
+ fs.rm("README.md")
401
+ else:
402
+ error_msg = f"Project already exists at {base_dir}. Use overwrite=True to overwrite the existing project."
403
+ rich.print(f"[red]{error_msg}[/red]")
404
+ logger.error(error_msg)
405
+ raise FileExistsError(error_msg)
406
+
720
407
  fs.makedirs(f"{settings.CONFIG_DIR}/pipelines", exist_ok=True)
721
408
  fs.makedirs(settings.PIPELINES_DIR, exist_ok=True)
722
409
  fs.makedirs(hooks_dir, exist_ok=True)
723
410
 
724
- cfg = ProjectConfig.load(name=name, job_queue_type=job_queue_type, fs=fs)
411
+ # Load project configuration
412
+ cfg = ProjectConfig.load(name=name, fs=fs)
725
413
 
726
414
  with fs.open("README.md", "w") as f:
727
415
  f.write(
@@ -729,7 +417,6 @@ class FlowerPowerProject:
729
417
  f"**created on**\n\n*{dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n"
730
418
  )
731
419
  cfg.save(fs=fs)
732
- os.chdir(posixpath.join(base_dir, name))
733
420
 
734
421
  rich.print(
735
422
  f"\n✨ Initialized FlowerPower project [bold blue]{name}[/bold blue] "
@@ -776,87 +463,90 @@ class FlowerPowerProject:
776
463
  base_dir=base_dir,
777
464
  storage_options=storage_options,
778
465
  fs=fs,
779
- log_level=settings.LOG_LEVEL,
466
+ log_level=log_level,
780
467
  )
781
468
 
782
469
 
783
- class FlowerPower:
784
- def __new__(
785
- self,
786
- name: str | None = None,
787
- base_dir: str | None = None,
788
- storage_options: dict | BaseStorageOptions | None = {},
789
- fs: AbstractFileSystem | None = None,
790
- job_queue_type: str = settings.JOB_QUEUE_TYPE,
791
- hooks_dir: str = settings.HOOKS_DIR,
792
- ) -> FlowerPowerProject:
793
- """
794
- Initialize a FlowerPower project.
795
-
796
- Args:
797
- name (str | None): The name of the project. If None, it defaults to the current directory name.
798
- base_dir (str | None): The base directory where the project will be created. If None, it defaults to the current working directory.
799
- storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
800
- fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
801
- job_queue_type (str): The type of job queue to use for the project.
802
- hooks_dir (str): The directory where the project hooks will be stored.
803
-
804
- Returns:
805
- FlowerPowerProject: An instance of FlowerPowerProject initialized with the new project.
806
- """
807
- if FlowerPowerProject._check_project_exists(base_dir, fs=fs):
808
- return FlowerPowerProject.load(
809
- base_dir=base_dir,
810
- storage_options=storage_options,
811
- fs=fs,
812
- )
813
- else:
814
- return FlowerPowerProject.init(
815
- name=name,
816
- base_dir=base_dir,
817
- storage_options=storage_options,
818
- fs=fs,
819
- job_queue_type=job_queue_type,
820
- hooks_dir=hooks_dir,
821
- )
822
-
823
- def __call__(self) -> FlowerPowerProject:
824
- """
825
- Call the FlowerPower instance to return the current project.
826
-
827
- Returns:
828
- FlowerPowerProject: The current FlowerPower project.
829
- """
830
- return self
831
-
832
-
833
- def init(
470
+ def initialize_project(
834
471
  name: str | None = None,
835
472
  base_dir: str | None = None,
836
473
  storage_options: dict | BaseStorageOptions | None = {},
837
474
  fs: AbstractFileSystem | None = None,
838
- job_queue_type: str = settings.JOB_QUEUE_TYPE,
839
475
  hooks_dir: str = settings.HOOKS_DIR,
476
+ log_level: str | None = None,
840
477
  ) -> FlowerPowerProject:
841
478
  """
842
- Initialize a FlowerPower project.
843
-
479
+ Initialize a new FlowerPower project.
480
+
481
+
482
+ This is a standalone function that directly calls FlowerPowerProject.new
483
+ with the same arguments, providing easier, separately importable access.
484
+
844
485
  Args:
845
486
  name (str | None): The name of the project. If None, it defaults to the current directory name.
846
487
  base_dir (str | None): The base directory where the project will be created. If None, it defaults to the current working directory.
847
488
  storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
848
489
  fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
849
- job_queue_type (str): The type of job queue to use for the project.
850
490
  hooks_dir (str): The directory where the project hooks will be stored.
851
-
491
+ log_level (str | None): The logging level to set for the project.
492
+
852
493
  Returns:
853
494
  FlowerPowerProject: An instance of FlowerPowerProject initialized with the new project.
854
495
  """
855
- return FlowerPowerProject.init(
496
+ return FlowerPowerProject.new(
856
497
  name=name,
857
498
  base_dir=base_dir,
858
499
  storage_options=storage_options,
859
500
  fs=fs,
860
- job_queue_type=job_queue_type,
861
501
  hooks_dir=hooks_dir,
502
+ log_level=log_level,
862
503
  )
504
+
505
+ def create_project(
506
+ name: str | None = None,
507
+ base_dir: str | None = None,
508
+ storage_options: dict | BaseStorageOptions | None = {},
509
+ fs: AbstractFileSystem | None = None,
510
+ hooks_dir: str = settings.HOOKS_DIR,
511
+ ) -> FlowerPowerProject:
512
+ """
513
+ Create or load a FlowerPower project.
514
+
515
+ If a project exists at the specified base_dir, it will be loaded.
516
+ Otherwise, a new project will be initialized.
517
+
518
+ Args:
519
+ name (str | None): The name of the project. If None, it defaults to the current directory name.
520
+ base_dir (str | None): The base directory where the project will be created or loaded from.
521
+ If None, it defaults to the current working directory.
522
+ storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
523
+ fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
524
+ hooks_dir (str): The directory where the project hooks will be stored.
525
+
526
+ Returns:
527
+ FlowerPowerProject: An instance of FlowerPowerProject.
528
+ """
529
+ # Note: _check_project_exists expects base_dir to be a string.
530
+ # If base_dir is None, it will be handled by _check_project_exists or the load/init methods.
531
+ # We pass fs directly, as _check_project_exists can handle fs being None.
532
+ project_exists, _ = FlowerPowerProject._check_project_exists(base_dir or str(Path.cwd()), fs=fs)
533
+
534
+ if project_exists:
535
+ return FlowerPowerProject.load(
536
+ base_dir=base_dir,
537
+ storage_options=storage_options,
538
+ fs=fs,
539
+ )
540
+ else:
541
+ error_message = "Project does not exist. Use `initialize_project()` or `FlowerPowerProject.new()` to create it."
542
+ rich.print(f"[red]{error_message}[/red]")
543
+ logger.error(error_message)
544
+ raise FileNotFoundError(error_message)
545
+
546
+ # Alias for backward compatibility or alternative naming
547
+ FlowerPower = create_project
548
+
549
+
550
+ # The standalone init function is removed as it was a direct pass-through
551
+ # to FlowerPowerProject.new(). Users can now use FlowerPowerProject.new() directly
552
+ # or the new create_project() function which handles both loading and initialization.