FlowerPower 0.11.6.20__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. flowerpower/__init__.py +2 -6
  2. flowerpower/cfg/__init__.py +7 -14
  3. flowerpower/cfg/base.py +29 -25
  4. flowerpower/cfg/pipeline/__init__.py +8 -6
  5. flowerpower/cfg/pipeline/_schedule.py +32 -0
  6. flowerpower/cfg/pipeline/adapter.py +0 -5
  7. flowerpower/cfg/pipeline/builder.py +377 -0
  8. flowerpower/cfg/pipeline/run.py +36 -0
  9. flowerpower/cfg/project/__init__.py +11 -24
  10. flowerpower/cfg/project/adapter.py +0 -12
  11. flowerpower/cli/__init__.py +2 -21
  12. flowerpower/cli/cfg.py +0 -3
  13. flowerpower/cli/mqtt.py +0 -6
  14. flowerpower/cli/pipeline.py +22 -415
  15. flowerpower/cli/utils.py +0 -1
  16. flowerpower/flowerpower.py +345 -146
  17. flowerpower/pipeline/__init__.py +2 -0
  18. flowerpower/pipeline/base.py +21 -12
  19. flowerpower/pipeline/io.py +58 -54
  20. flowerpower/pipeline/manager.py +165 -726
  21. flowerpower/pipeline/pipeline.py +643 -0
  22. flowerpower/pipeline/registry.py +285 -18
  23. flowerpower/pipeline/visualizer.py +5 -6
  24. flowerpower/plugins/io/__init__.py +8 -0
  25. flowerpower/plugins/mqtt/__init__.py +7 -11
  26. flowerpower/settings/__init__.py +0 -2
  27. flowerpower/settings/{backend.py → _backend.py} +0 -21
  28. flowerpower/settings/logging.py +1 -1
  29. flowerpower/utils/logging.py +24 -12
  30. flowerpower/utils/misc.py +17 -256
  31. flowerpower/utils/monkey.py +1 -83
  32. flowerpower-0.21.0.dist-info/METADATA +463 -0
  33. flowerpower-0.21.0.dist-info/RECORD +44 -0
  34. flowerpower/cfg/pipeline/schedule.py +0 -74
  35. flowerpower/cfg/project/job_queue.py +0 -238
  36. flowerpower/cli/job_queue.py +0 -1061
  37. flowerpower/fs/__init__.py +0 -29
  38. flowerpower/fs/base.py +0 -662
  39. flowerpower/fs/ext.py +0 -2143
  40. flowerpower/fs/storage_options.py +0 -1420
  41. flowerpower/job_queue/__init__.py +0 -294
  42. flowerpower/job_queue/apscheduler/__init__.py +0 -11
  43. flowerpower/job_queue/apscheduler/_setup/datastore.py +0 -110
  44. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +0 -93
  45. flowerpower/job_queue/apscheduler/manager.py +0 -1051
  46. flowerpower/job_queue/apscheduler/setup.py +0 -554
  47. flowerpower/job_queue/apscheduler/trigger.py +0 -169
  48. flowerpower/job_queue/apscheduler/utils.py +0 -311
  49. flowerpower/job_queue/base.py +0 -413
  50. flowerpower/job_queue/rq/__init__.py +0 -10
  51. flowerpower/job_queue/rq/_trigger.py +0 -37
  52. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
  53. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -231
  54. flowerpower/job_queue/rq/manager.py +0 -1582
  55. flowerpower/job_queue/rq/setup.py +0 -154
  56. flowerpower/job_queue/rq/utils.py +0 -69
  57. flowerpower/mqtt.py +0 -12
  58. flowerpower/pipeline/job_queue.py +0 -583
  59. flowerpower/pipeline/runner.py +0 -603
  60. flowerpower/plugins/io/base.py +0 -2520
  61. flowerpower/plugins/io/helpers/datetime.py +0 -298
  62. flowerpower/plugins/io/helpers/polars.py +0 -875
  63. flowerpower/plugins/io/helpers/pyarrow.py +0 -570
  64. flowerpower/plugins/io/helpers/sql.py +0 -202
  65. flowerpower/plugins/io/loader/__init__.py +0 -28
  66. flowerpower/plugins/io/loader/csv.py +0 -37
  67. flowerpower/plugins/io/loader/deltatable.py +0 -190
  68. flowerpower/plugins/io/loader/duckdb.py +0 -19
  69. flowerpower/plugins/io/loader/json.py +0 -37
  70. flowerpower/plugins/io/loader/mqtt.py +0 -159
  71. flowerpower/plugins/io/loader/mssql.py +0 -26
  72. flowerpower/plugins/io/loader/mysql.py +0 -26
  73. flowerpower/plugins/io/loader/oracle.py +0 -26
  74. flowerpower/plugins/io/loader/parquet.py +0 -35
  75. flowerpower/plugins/io/loader/postgres.py +0 -26
  76. flowerpower/plugins/io/loader/pydala.py +0 -19
  77. flowerpower/plugins/io/loader/sqlite.py +0 -23
  78. flowerpower/plugins/io/metadata.py +0 -244
  79. flowerpower/plugins/io/saver/__init__.py +0 -28
  80. flowerpower/plugins/io/saver/csv.py +0 -36
  81. flowerpower/plugins/io/saver/deltatable.py +0 -186
  82. flowerpower/plugins/io/saver/duckdb.py +0 -19
  83. flowerpower/plugins/io/saver/json.py +0 -36
  84. flowerpower/plugins/io/saver/mqtt.py +0 -28
  85. flowerpower/plugins/io/saver/mssql.py +0 -26
  86. flowerpower/plugins/io/saver/mysql.py +0 -26
  87. flowerpower/plugins/io/saver/oracle.py +0 -26
  88. flowerpower/plugins/io/saver/parquet.py +0 -36
  89. flowerpower/plugins/io/saver/postgres.py +0 -26
  90. flowerpower/plugins/io/saver/pydala.py +0 -20
  91. flowerpower/plugins/io/saver/sqlite.py +0 -24
  92. flowerpower/plugins/mqtt/cfg.py +0 -17
  93. flowerpower/plugins/mqtt/manager.py +0 -962
  94. flowerpower/settings/job_queue.py +0 -87
  95. flowerpower/utils/scheduler.py +0 -311
  96. flowerpower-0.11.6.20.dist-info/METADATA +0 -537
  97. flowerpower-0.11.6.20.dist-info/RECORD +0 -102
  98. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
  99. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
  100. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
  101. {flowerpower-0.11.6.20.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
@@ -2,83 +2,268 @@ import datetime as dt
2
2
  import os
3
3
  import posixpath
4
4
  from pathlib import Path
5
+ from typing import Any, Callable, Optional, TYPE_CHECKING
6
+ from functools import wraps
5
7
 
6
8
  import rich
9
+ from fsspec_utils import (AbstractFileSystem, BaseStorageOptions,
10
+ DirFileSystem, filesystem)
7
11
  from loguru import logger
8
12
 
9
13
  from . import settings
10
14
  from .cfg import ProjectConfig
11
- from .fs import (AbstractFileSystem, BaseStorageOptions, DirFileSystem,
12
- get_filesystem)
13
- from .job_queue import JobQueueManager
15
+ from .cfg.pipeline import ExecutorConfig, WithAdapterConfig, RunConfig
16
+ from .cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
17
+ from .cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
14
18
  from .pipeline import PipelineManager
15
19
  from .utils.logging import setup_logging
16
20
 
17
- setup_logging(level=settings.LOG_LEVEL)
21
+ setup_logging()
22
+
23
+ def handle_errors(func):
24
+ """Decorator to handle exceptions, log them, and re-raise as RuntimeError."""
25
+ @wraps(func)
26
+ def wrapper(self, *args, **kwargs):
27
+ try:
28
+ return func(self, *args, **kwargs)
29
+ except Exception as e:
30
+ # Extract operation name from function name for better logging
31
+ operation_name = func.__name__.replace('_', ' ').title()
32
+ # For methods like 'run', we want to log the pipeline name if available
33
+ if 'name' in kwargs and func.__name__ in ['run']:
34
+ logger.error(f"Failed to {operation_name.lower()} pipeline '{kwargs.get('name')}': {e}")
35
+ raise RuntimeError(f"Pipeline {operation_name.lower()} failed for '{kwargs.get('name')}': {e}") from e
36
+ else:
37
+ logger.error(f"Failed to {operation_name.lower()}: {e}")
38
+ raise RuntimeError(f"{operation_name} failed: {e}") from e
39
+ return wrapper
18
40
 
19
41
 
20
42
  class FlowerPowerProject:
21
43
  def __init__(
22
44
  self,
23
45
  pipeline_manager: PipelineManager,
24
- job_queue_manager: JobQueueManager | None = None,
25
46
  ):
26
47
  """
27
48
  Initialize a FlowerPower project.
28
49
  Args:
29
50
  pipeline_manager (PipelineManager | None): Instance of PipelineManager to manage pipelines.
30
- job_queue_manager (JobQueueManager | None): Instance of JobQueueManager to manage job queues.
31
51
  """
32
52
  self.pipeline_manager = pipeline_manager
33
- self.job_queue_manager = job_queue_manager
34
53
  self.name = self.pipeline_manager.project_cfg.name
35
- self._base_dir = self.pipeline_manager._base_dir
36
- self._fs = self.pipeline_manager._fs
37
- self._storage_options = self.pipeline_manager._storage_options
38
- self._cfg_dir = self.pipeline_manager._cfg_dir
39
- self._pipelines_dir = self.pipeline_manager._pipelines_dir
40
- self.job_queue_type = (
41
- self.job_queue_manager.cfg.type
42
- if self.job_queue_manager is not None
43
- else None
44
- )
45
- self.job_queue_backend = (
46
- self.job_queue_manager.cfg.backend
47
- if self.job_queue_manager is not None
48
- else None
54
+
55
+ def _validate_pipeline_name(self, name: str) -> None:
56
+ """Validate the pipeline name argument."""
57
+ if not name or not isinstance(name, str):
58
+ raise ValueError("Pipeline 'name' must be a non-empty string")
59
+ if name.strip() != name:
60
+ raise ValueError(
61
+ "Pipeline 'name' cannot have leading or trailing whitespace"
62
+ )
63
+
64
+ def _inject_dependencies(self):
65
+ """Inject dependencies between managers for proper architecture.
66
+
67
+ This method establishes the correct dependency flow:
68
+ - Project context is properly established for pipeline execution
69
+ """
70
+ # Store project reference for pipeline context
71
+ # This will be used when creating Pipeline instances
72
+ self.pipeline_manager._project_context = self
73
+
74
+ def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
75
+ """Merge kwargs into a RunConfig object.
76
+
77
+ This helper method updates the RunConfig object with values from kwargs,
78
+ handling different types of attributes appropriately.
79
+
80
+ Args:
81
+ run_config: The RunConfig object to update
82
+ kwargs: Dictionary of additional parameters to merge
83
+
84
+ Returns:
85
+ RunConfig: Updated RunConfig object
86
+ """
87
+ # Handle dictionary-like attributes with update or deep merge
88
+ if 'inputs' in kwargs and kwargs['inputs'] is not None:
89
+ if run_config.inputs is None:
90
+ run_config.inputs = kwargs['inputs']
91
+ else:
92
+ run_config.inputs.update(kwargs['inputs'])
93
+
94
+ if 'config' in kwargs and kwargs['config'] is not None:
95
+ if run_config.config is None:
96
+ run_config.config = kwargs['config']
97
+ else:
98
+ run_config.config.update(kwargs['config'])
99
+
100
+ if 'cache' in kwargs and kwargs['cache'] is not None:
101
+ run_config.cache = kwargs['cache']
102
+
103
+ if 'adapter' in kwargs and kwargs['adapter'] is not None:
104
+ if run_config.adapter is None:
105
+ run_config.adapter = kwargs['adapter']
106
+ else:
107
+ run_config.adapter.update(kwargs['adapter'])
108
+
109
+ # Handle executor_cfg - convert string/dict to ExecutorConfig if needed
110
+ if 'executor_cfg' in kwargs and kwargs['executor_cfg'] is not None:
111
+ executor_cfg = kwargs['executor_cfg']
112
+ if isinstance(executor_cfg, str):
113
+ run_config.executor = ExecutorConfig(type=executor_cfg)
114
+ elif isinstance(executor_cfg, dict):
115
+ run_config.executor = ExecutorConfig.from_dict(executor_cfg)
116
+ elif isinstance(executor_cfg, ExecutorConfig):
117
+ run_config.executor = executor_cfg
118
+
119
+ # Handle adapter configurations
120
+ if 'with_adapter_cfg' in kwargs and kwargs['with_adapter_cfg'] is not None:
121
+ with_adapter_cfg = kwargs['with_adapter_cfg']
122
+ if isinstance(with_adapter_cfg, dict):
123
+ run_config.with_adapter = WithAdapterConfig.from_dict(with_adapter_cfg)
124
+ elif isinstance(with_adapter_cfg, WithAdapterConfig):
125
+ run_config.with_adapter = with_adapter_cfg
126
+
127
+ if 'pipeline_adapter_cfg' in kwargs and kwargs['pipeline_adapter_cfg'] is not None:
128
+ run_config.pipeline_adapter_cfg = kwargs['pipeline_adapter_cfg']
129
+
130
+ if 'project_adapter_cfg' in kwargs and kwargs['project_adapter_cfg'] is not None:
131
+ run_config.project_adapter_cfg = kwargs['project_adapter_cfg']
132
+
133
+ # Handle simple attributes
134
+ simple_attrs = [
135
+ 'final_vars', 'reload', 'log_level', 'max_retries', 'retry_delay',
136
+ 'jitter_factor', 'retry_exceptions', 'on_success', 'on_failure'
137
+ ]
138
+
139
+ for attr in simple_attrs:
140
+ if attr in kwargs and kwargs[attr] is not None:
141
+ setattr(run_config, attr, kwargs[attr])
142
+
143
+ return run_config
144
+
145
+ # --- Convenience Methods for Pipeline Operations ---
146
+
147
+ @handle_errors
148
+ def run(
149
+ self,
150
+ name: str,
151
+ run_config: RunConfig | None = None,
152
+ **kwargs
153
+ ) -> dict[str, Any]:
154
+ """Execute a pipeline synchronously and return its results.
155
+
156
+ This is a convenience method that delegates to the pipeline manager.
157
+ It provides the same functionality as `self.pipeline_manager.run()`.
158
+
159
+ Args:
160
+ name: Name of the pipeline to run. Must be a valid identifier.
161
+ run_config: Run configuration object containing all execution parameters.
162
+ If None, the default configuration from the pipeline will be used.
163
+ **kwargs: Additional parameters to override the run_config. Supported parameters include:
164
+ inputs (dict | None): Override pipeline input values. Example: {"data_date": "2025-04-28"}
165
+ final_vars (list[str] | None): Specify which output variables to return.
166
+ Example: ["model", "metrics"]
167
+ config (dict | None): Configuration for Hamilton pipeline executor.
168
+ Example: {"model": "LogisticRegression"}
169
+ cache (dict | None): Cache configuration for results. Example: {"recompute": ["node1", "final_node"]}
170
+ executor_cfg (str | dict | ExecutorConfig | None): Execution configuration, can be:
171
+ - str: Executor name, e.g. "threadpool", "local"
172
+ - dict: Raw config, e.g. {"type": "threadpool", "max_workers": 4}
173
+ - ExecutorConfig: Structured config object
174
+ with_adapter_cfg (dict | WithAdapterConfig | None): Adapter settings for pipeline execution.
175
+ Example: {"opentelemetry": True, "tracker": False}
176
+ pipeline_adapter_cfg (dict | PipelineAdapterConfig | None): Pipeline-specific adapter settings.
177
+ Example: {"tracker": {"project_id": "123", "tags": {"env": "prod"}}}
178
+ project_adapter_cfg (dict | ProjectAdapterConfig | None): Project-level adapter settings.
179
+ Example: {"opentelemetry": {"host": "http://localhost:4317"}}
180
+ adapter (dict[str, Any] | None): Custom adapter instance for pipeline
181
+ Example: {"ray_graph_adapter": RayGraphAdapter()}
182
+ reload (bool): Force reload of pipeline configuration.
183
+ log_level (str | None): Logging level for the execution. Default None uses project config.
184
+ Valid values: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
185
+ max_retries (int): Maximum number of retries for execution.
186
+ retry_delay (float): Delay between retries in seconds.
187
+ jitter_factor (float): Random jitter factor to add to retry delay
188
+ retry_exceptions (tuple): Exceptions that trigger a retry.
189
+ on_success (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on successful pipeline execution.
190
+ on_failure (Callable | tuple[Callable, tuple | None, dict | None] | None): Callback to run on pipeline execution failure.
191
+
192
+ Returns:
193
+ dict[str, Any]: Pipeline execution results, mapping output variable names to their computed values.
194
+
195
+ Raises:
196
+ ValueError: If pipeline name doesn't exist or configuration is invalid
197
+ ImportError: If pipeline module cannot be imported
198
+ RuntimeError: If execution fails due to pipeline or adapter errors
199
+
200
+ Example:
201
+ ```python
202
+ project = FlowerPowerProject.load(".")
203
+
204
+ # Simple execution
205
+ result = project.run("my_pipeline")
206
+
207
+ # Run with custom RunConfig
208
+ from flowerpower.cfg.pipeline.run import RunConfig
209
+ config = RunConfig(inputs={"date": "2025-04-28"}, final_vars=["result"])
210
+ result = project.run("ml_pipeline", run_config=config)
211
+
212
+ # Complex run with kwargs overrides
213
+ result = project.run(
214
+ "ml_pipeline",
215
+ inputs={"training_date": "2025-04-28"},
216
+ final_vars=["model", "metrics"],
217
+ executor_cfg={"type": "threadpool", "max_workers": 4},
218
+ with_adapter_cfg={"tracker": True},
219
+ reload=True
220
+ )
221
+ ```
222
+ """
223
+ # Validate pipeline manager is available
224
+ if self.pipeline_manager is None:
225
+ raise RuntimeError(
226
+ "Pipeline manager is not configured. Cannot execute pipeline. "
227
+ "Ensure the project was loaded correctly."
228
+ )
229
+
230
+ # Validate required arguments
231
+ self._validate_pipeline_name(name)
232
+
233
+ # Initialize run_config - use provided config or create empty one
234
+ run_config = run_config or RunConfig()
235
+
236
+ # Merge kwargs into run_config
237
+ if kwargs:
238
+ run_config = self._merge_run_config_with_kwargs(run_config, kwargs)
239
+
240
+ return self.pipeline_manager.run(
241
+ name=name,
242
+ run_config=run_config,
49
243
  )
50
244
 
51
245
  @staticmethod
52
- def _check_project_exists(base_dir: str, fs: AbstractFileSystem | None = None):
246
+ def _check_project_exists(base_dir: str, fs: AbstractFileSystem | None = None) -> tuple[bool, str]:
53
247
  if fs is None:
54
- fs = get_filesystem(base_dir, dirfs=True)
55
- if isinstance(fs, DirFileSystem):
56
- if not fs.exists("."):
57
- rich.print(
58
- "[red]Project directory does not exist. Please initialize it first.[/red]"
59
- )
60
- return False
61
- if not fs.exists("conf") or not fs.exists("pipelines"):
62
- rich.print(
63
- "[red]Project configuration or pipelines directory is missing[/red]"
64
- )
65
- return False
66
- else:
67
- if not fs.exists(base_dir):
68
- rich.print(
69
- "[red]Project directory does not exist. Please initialize it first.[/red]"
70
- )
71
- return False
72
- if not fs.exists(posixpath.join(base_dir, "conf")) or not fs.exists(
73
- posixpath.join(base_dir, "pipelines")
74
- ):
75
- rich.print(
76
- "[red]Project configuration or pipelines directory is missing[/red]"
77
- )
78
- return False
248
+ fs = filesystem(base_dir, dirfs=True)
249
+
250
+ # Determine the root path for existence checks
251
+ # For DirFileSystem, paths are relative to its root, so we check "." for the project root.
252
+ # For other filesystems, we use the base_dir directly.
253
+ root_path = "." if isinstance(fs, DirFileSystem) else base_dir
254
+
255
+ if not fs.exists(root_path):
256
+ return False, "Project directory does not exist. Please initialize it first."
257
+
258
+ # Check for required subdirectories
259
+ config_path = posixpath.join(root_path, settings.CONFIG_DIR)
260
+ pipelines_path = posixpath.join(root_path, settings.PIPELINES_DIR)
261
+
262
+ if not fs.exists(config_path) or not fs.exists(pipelines_path):
263
+ return False, "Project configuration or pipelines directory is missing"
79
264
 
80
265
  logger.debug(f"Project exists at {base_dir}")
81
- return True
266
+ return True, ""
82
267
 
83
268
  @classmethod
84
269
  def load(
@@ -103,7 +288,7 @@ class FlowerPowerProject:
103
288
  Raises:
104
289
  FileNotFoundError: If the project does not exist at the specified base directory.
105
290
  """
106
- if log_level:
291
+ if log_level is not None:
107
292
  setup_logging(level=log_level)
108
293
 
109
294
  base_dir = base_dir or str(Path.cwd())
@@ -118,14 +303,15 @@ class FlowerPowerProject:
118
303
  cached = False
119
304
  cache_storage = None
120
305
  if not fs:
121
- fs = get_filesystem(
306
+ fs = filesystem(
122
307
  base_dir,
123
308
  storage_options=storage_options,
124
309
  cached=cached,
125
310
  cache_storage=cache_storage,
126
311
  )
127
312
 
128
- if cls._check_project_exists(base_dir, fs):
313
+ project_exists, message = cls._check_project_exists(base_dir, fs)
314
+ if project_exists:
129
315
  logger.info(f"Loading FlowerPower project from {base_dir}")
130
316
  pipeline_manager = PipelineManager(
131
317
  base_dir=base_dir,
@@ -134,33 +320,30 @@ class FlowerPowerProject:
134
320
  log_level=log_level,
135
321
  )
136
322
 
137
- job_queue_manager = JobQueueManager(
138
- storage_options=storage_options,
139
- fs=fs,
140
- log_level=log_level,
141
- )
142
- return cls(
323
+ # Create the project instance
324
+ project = cls(
143
325
  pipeline_manager=pipeline_manager,
144
- job_queue_manager=job_queue_manager,
145
326
  )
327
+
328
+ # Inject dependencies after creation to avoid circular imports
329
+ project._inject_dependencies()
330
+
331
+ return project
146
332
  else:
147
- logger.error(
148
- f"Project does not exist at {base_dir}. Please initialize it first. Use `FlowerPowerProject.init()` to create a new project."
149
- )
333
+ rich.print(f"[red]{message}[/red]")
334
+ logger.error(message)
150
335
  return None
151
336
 
152
337
  @classmethod
153
- def init(
338
+ def new(
154
339
  cls,
155
340
  name: str | None = None,
156
341
  base_dir: str | None = None,
157
342
  storage_options: dict | BaseStorageOptions | None = {},
158
343
  fs: AbstractFileSystem | None = None,
159
- job_queue_type: str = settings.JOB_QUEUE_TYPE,
160
- cfg_dir: str = settings.CONFIG_DIR,
161
- pipelines_dir: str = settings.PIPELINES_DIR,
162
344
  hooks_dir: str = settings.HOOKS_DIR,
163
345
  log_level: str | None = None,
346
+ overwrite: bool = False,
164
347
  ) -> "FlowerPowerProject":
165
348
  """
166
349
  Initialize a new FlowerPower project.
@@ -170,14 +353,12 @@ class FlowerPowerProject:
170
353
  base_dir (str | None): The base directory where the project will be created. If None, it defaults to the current working directory.
171
354
  storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
172
355
  fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
173
- job_queue_type (str): The type of job queue to use for the project.
174
- cfg_dir (str): The directory where the project configuration will be stored.
175
- pipelines_dir (str): The directory where the project pipelines will be stored.
176
356
  hooks_dir (str): The directory where the project hooks will be stored.
357
+ overwrite (bool): Whether to overwrite an existing project at the specified base directory.
177
358
  Returns:
178
359
  FlowerPowerProject: An instance of FlowerPowerProject initialized with the new project.
179
360
  Raises:
180
- FileExistsError: If the project already exists at the specified base directory.
361
+ FileExistsError: If the project already exists at the specified base directory and overwrite is False.
181
362
  """
182
363
  if log_level:
183
364
  setup_logging(level=log_level)
@@ -190,17 +371,45 @@ class FlowerPowerProject:
190
371
  base_dir = posixpath.join(str(Path.cwd()), name)
191
372
 
192
373
  if fs is None:
193
- fs = get_filesystem(
194
- path=base_dir,
374
+ fs = filesystem(
375
+ protocol_or_path=base_dir,
195
376
  dirfs=True,
196
377
  storage_options=storage_options,
197
378
  )
198
379
 
199
- fs.makedirs(f"{cfg_dir}/pipelines", exist_ok=True)
200
- fs.makedirs(pipelines_dir, exist_ok=True)
380
+ # Check if project already exists
381
+ project_exists, message = cls._check_project_exists(base_dir, fs)
382
+ if project_exists:
383
+ if overwrite:
384
+ # Delete existing project files and directories
385
+ logger.info(f"Overwriting existing project at {base_dir}")
386
+
387
+ # Remove directories recursively
388
+ config_path = f"{settings.CONFIG_DIR}"
389
+ pipelines_path = settings.PIPELINES_DIR
390
+
391
+ if fs.exists(config_path):
392
+ fs.rm(config_path, recursive=True)
393
+ if fs.exists(pipelines_path):
394
+ fs.rm(pipelines_path, recursive=True)
395
+ if fs.exists(hooks_dir):
396
+ fs.rm(hooks_dir, recursive=True)
397
+
398
+ # Remove README.md file
399
+ if fs.exists("README.md"):
400
+ fs.rm("README.md")
401
+ else:
402
+ error_msg = f"Project already exists at {base_dir}. Use overwrite=True to overwrite the existing project."
403
+ rich.print(f"[red]{error_msg}[/red]")
404
+ logger.error(error_msg)
405
+ raise FileExistsError(error_msg)
406
+
407
+ fs.makedirs(f"{settings.CONFIG_DIR}/pipelines", exist_ok=True)
408
+ fs.makedirs(settings.PIPELINES_DIR, exist_ok=True)
201
409
  fs.makedirs(hooks_dir, exist_ok=True)
202
410
 
203
- cfg = ProjectConfig.load(name=name, job_queue_type=job_queue_type, fs=fs)
411
+ # Load project configuration
412
+ cfg = ProjectConfig.load(name=name, fs=fs)
204
413
 
205
414
  with fs.open("README.md", "w") as f:
206
415
  f.write(
@@ -208,7 +417,6 @@ class FlowerPowerProject:
208
417
  f"**created on**\n\n*{dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n"
209
418
  )
210
419
  cfg.save(fs=fs)
211
- os.chdir(posixpath.join(base_dir, name))
212
420
 
213
421
  rich.print(
214
422
  f"\n✨ Initialized FlowerPower project [bold blue]{name}[/bold blue] "
@@ -255,99 +463,90 @@ class FlowerPowerProject:
255
463
  base_dir=base_dir,
256
464
  storage_options=storage_options,
257
465
  fs=fs,
258
- log_level=settings.LOG_LEVEL,
466
+ log_level=log_level,
259
467
  )
260
468
 
261
469
 
262
- class FlowerPower:
263
- def __new__(
264
- self,
265
- name: str | None = None,
266
- base_dir: str | None = None,
267
- storage_options: dict | BaseStorageOptions | None = {},
268
- fs: AbstractFileSystem | None = None,
269
- job_queue_type: str = settings.JOB_QUEUE_TYPE,
270
- cfg_dir: str = settings.CONFIG_DIR,
271
- pipelines_dir: str = settings.PIPELINES_DIR,
272
- hooks_dir: str = settings.HOOKS_DIR,
273
- ) -> FlowerPowerProject:
274
- """
275
- Initialize a FlowerPower project.
276
-
277
- Args:
278
- name (str | None): The name of the project. If None, it defaults to the current directory name.
279
- base_dir (str | None): The base directory where the project will be created. If None, it defaults to the current working directory.
280
- storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
281
- fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
282
- job_queue_type (str): The type of job queue to use for the project.
283
- cfg_dir (str): The directory where the project configuration will be stored.
284
- pipelines_dir (str): The directory where the project pipelines will be stored.
285
- hooks_dir (str): The directory where the project hooks will be stored.
286
-
287
- Returns:
288
- FlowerPowerProject: An instance of FlowerPowerProject initialized with the new project.
289
- """
290
- if FlowerPowerProject._check_project_exists(base_dir, fs=fs):
291
- return FlowerPowerProject.load(
292
- base_dir=base_dir,
293
- storage_options=storage_options,
294
- fs=fs,
295
- )
296
- else:
297
- return FlowerPowerProject.init(
298
- name=name,
299
- base_dir=base_dir,
300
- storage_options=storage_options,
301
- fs=fs,
302
- job_queue_type=job_queue_type,
303
- cfg_dir=cfg_dir,
304
- pipelines_dir=pipelines_dir,
305
- hooks_dir=hooks_dir,
306
- )
307
-
308
- def __call__(self) -> FlowerPowerProject:
309
- """
310
- Call the FlowerPower instance to return the current project.
311
-
312
- Returns:
313
- FlowerPowerProject: The current FlowerPower project.
314
- """
315
- return self
316
-
317
-
318
- def init(
470
+ def initialize_project(
319
471
  name: str | None = None,
320
472
  base_dir: str | None = None,
321
473
  storage_options: dict | BaseStorageOptions | None = {},
322
474
  fs: AbstractFileSystem | None = None,
323
- job_queue_type: str = settings.JOB_QUEUE_TYPE,
324
- cfg_dir: str = settings.CONFIG_DIR,
325
- pipelines_dir: str = settings.PIPELINES_DIR,
326
475
  hooks_dir: str = settings.HOOKS_DIR,
476
+ log_level: str | None = None,
327
477
  ) -> FlowerPowerProject:
328
478
  """
329
- Initialize a FlowerPower project.
330
-
479
+ Initialize a new FlowerPower project.
480
+
481
+
482
+ This is a standalone function that directly calls FlowerPowerProject.new
483
+ with the same arguments, providing easier, separately importable access.
484
+
331
485
  Args:
332
486
  name (str | None): The name of the project. If None, it defaults to the current directory name.
333
487
  base_dir (str | None): The base directory where the project will be created. If None, it defaults to the current working directory.
334
488
  storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
335
489
  fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
336
- job_queue_type (str): The type of job queue to use for the project.
337
- cfg_dir (str): The directory where the project configuration will be stored.
338
- pipelines_dir (str): The directory where the project pipelines will be stored.
339
490
  hooks_dir (str): The directory where the project hooks will be stored.
340
-
491
+ log_level (str | None): The logging level to set for the project.
492
+
341
493
  Returns:
342
494
  FlowerPowerProject: An instance of FlowerPowerProject initialized with the new project.
343
495
  """
344
- return FlowerPowerProject.init(
496
+ return FlowerPowerProject.new(
345
497
  name=name,
346
498
  base_dir=base_dir,
347
499
  storage_options=storage_options,
348
500
  fs=fs,
349
- job_queue_type=job_queue_type,
350
- cfg_dir=cfg_dir,
351
- pipelines_dir=pipelines_dir,
352
501
  hooks_dir=hooks_dir,
502
+ log_level=log_level,
353
503
  )
504
+
505
+ def create_project(
506
+ name: str | None = None,
507
+ base_dir: str | None = None,
508
+ storage_options: dict | BaseStorageOptions | None = {},
509
+ fs: AbstractFileSystem | None = None,
510
+ hooks_dir: str = settings.HOOKS_DIR,
511
+ ) -> FlowerPowerProject:
512
+ """
513
+ Create or load a FlowerPower project.
514
+
515
+ If a project exists at the specified base_dir, it will be loaded.
516
+ Otherwise, a new project will be initialized.
517
+
518
+ Args:
519
+ name (str | None): The name of the project. If None, it defaults to the current directory name.
520
+ base_dir (str | None): The base directory where the project will be created or loaded from.
521
+ If None, it defaults to the current working directory.
522
+ storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
523
+ fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
524
+ hooks_dir (str): The directory where the project hooks will be stored.
525
+
526
+ Returns:
527
+ FlowerPowerProject: An instance of FlowerPowerProject.
528
+ """
529
+ # Note: _check_project_exists expects base_dir to be a string.
530
+ # If base_dir is None, it will be handled by _check_project_exists or the load/init methods.
531
+ # We pass fs directly, as _check_project_exists can handle fs being None.
532
+ project_exists, _ = FlowerPowerProject._check_project_exists(base_dir or str(Path.cwd()), fs=fs)
533
+
534
+ if project_exists:
535
+ return FlowerPowerProject.load(
536
+ base_dir=base_dir,
537
+ storage_options=storage_options,
538
+ fs=fs,
539
+ )
540
+ else:
541
+ error_message = "Project does not exist. Use `initialize_project()` or `FlowerPowerProject.new()` to create it."
542
+ rich.print(f"[red]{error_message}[/red]")
543
+ logger.error(error_message)
544
+ raise FileNotFoundError(error_message)
545
+
546
+ # Alias for backward compatibility or alternative naming
547
+ FlowerPower = create_project
548
+
549
+
550
+ # The standalone init function is removed as it was a direct pass-through
551
+ # to FlowerPowerProject.new(). Users can now use FlowerPowerProject.new() directly
552
+ # or the new create_project() function which handles both loading and initialization.
@@ -1,5 +1,7 @@
1
1
  from .manager import PipelineManager
2
+ from .pipeline import Pipeline
2
3
 
3
4
  __all__ = [
4
5
  "PipelineManager",
6
+ "Pipeline",
5
7
  ]