FlowerPower 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. flowerpower/cfg/__init__.py +143 -25
  2. flowerpower/cfg/base.py +132 -11
  3. flowerpower/cfg/exceptions.py +53 -0
  4. flowerpower/cfg/pipeline/__init__.py +151 -35
  5. flowerpower/cfg/pipeline/adapter.py +1 -0
  6. flowerpower/cfg/pipeline/builder.py +24 -25
  7. flowerpower/cfg/pipeline/builder_adapter.py +142 -0
  8. flowerpower/cfg/pipeline/builder_executor.py +101 -0
  9. flowerpower/cfg/pipeline/run.py +99 -40
  10. flowerpower/cfg/project/__init__.py +59 -14
  11. flowerpower/cfg/project/adapter.py +6 -0
  12. flowerpower/cli/__init__.py +8 -2
  13. flowerpower/cli/cfg.py +0 -38
  14. flowerpower/cli/pipeline.py +121 -83
  15. flowerpower/cli/utils.py +120 -71
  16. flowerpower/flowerpower.py +94 -120
  17. flowerpower/pipeline/config_manager.py +180 -0
  18. flowerpower/pipeline/executor.py +126 -0
  19. flowerpower/pipeline/lifecycle_manager.py +231 -0
  20. flowerpower/pipeline/manager.py +121 -274
  21. flowerpower/pipeline/pipeline.py +66 -278
  22. flowerpower/pipeline/registry.py +45 -4
  23. flowerpower/utils/__init__.py +19 -0
  24. flowerpower/utils/adapter.py +286 -0
  25. flowerpower/utils/callback.py +73 -67
  26. flowerpower/utils/config.py +306 -0
  27. flowerpower/utils/executor.py +178 -0
  28. flowerpower/utils/filesystem.py +194 -0
  29. flowerpower/utils/misc.py +249 -76
  30. flowerpower/utils/security.py +221 -0
  31. {flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/METADATA +2 -2
  32. flowerpower-0.31.0.dist-info/RECORD +53 -0
  33. flowerpower/cfg/pipeline/_schedule.py +0 -32
  34. flowerpower-0.30.0.dist-info/RECORD +0 -42
  35. {flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/WHEEL +0 -0
  36. {flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/entry_points.txt +0 -0
  37. {flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/licenses/LICENSE +0 -0
  38. {flowerpower-0.30.0.dist-info → flowerpower-0.31.0.dist-info}/top_level.txt +0 -0
@@ -20,12 +20,15 @@ from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
20
20
 
21
21
  from ..settings import CONFIG_DIR, PIPELINES_DIR, CACHE_DIR
22
22
  from ..cfg import PipelineConfig, ProjectConfig
23
- from ..cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
24
- from ..cfg.pipeline.run import ExecutorConfig, RunConfig, WithAdapterConfig
25
- from ..cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
23
+ from ..cfg.pipeline.run import RunConfig
26
24
  from ..utils.logging import setup_logging
25
+ from ..utils.config import merge_run_config_with_kwargs
26
+ from ..utils.filesystem import FilesystemHelper
27
+ from .config_manager import PipelineConfigManager
28
+ from .executor import PipelineExecutor
27
29
  from .io import PipelineIOManager
28
- from .registry import HookType, PipelineRegistry
30
+ from .lifecycle_manager import PipelineLifecycleManager
31
+ from .registry import PipelineRegistry, HookType
29
32
  from .visualizer import PipelineVisualizer
30
33
 
31
34
  setup_logging()
@@ -119,8 +122,36 @@ class PipelineManager:
119
122
  if log_level:
120
123
  setup_logging(level=log_level)
121
124
 
125
+ self._setup_filesystem(base_dir, storage_options, fs, cfg_dir, pipelines_dir)
126
+ self._initialize_managers()
127
+ self._ensure_directories_exist()
128
+ self._add_modules_path()
129
+
130
+ def _setup_filesystem(
131
+ self,
132
+ base_dir: str | None,
133
+ storage_options: dict | Munch | BaseStorageOptions | None,
134
+ fs: AbstractFileSystem | None,
135
+ cfg_dir: str | None,
136
+ pipelines_dir: str | None
137
+ ) -> None:
138
+ """Setup filesystem and configuration directories.
139
+
140
+ Args:
141
+ base_dir: Root directory for the project
142
+ storage_options: Storage options for filesystem
143
+ fs: Pre-configured filesystem instance
144
+ cfg_dir: Configuration directory name
145
+ pipelines_dir: Pipelines directory name
146
+ """
122
147
  self._base_dir = base_dir or str(Path.cwd())
123
- # self._storage_options = storage_options
148
+ self._cfg_dir = cfg_dir
149
+ self._pipelines_dir = pipelines_dir
150
+
151
+ # Setup filesystem helper
152
+ self._fs_helper = FilesystemHelper(self._base_dir, storage_options)
153
+
154
+ # Configure caching if storage options provided
124
155
  if storage_options is not None:
125
156
  cached = True
126
157
  cache_storage = posixpath.join(
@@ -131,58 +162,81 @@ class PipelineManager:
131
162
  else:
132
163
  cached = False
133
164
  cache_storage = None
134
- if not fs:
135
- fs = filesystem(
136
- self._base_dir,
137
- storage_options=storage_options,
138
- cached=cached,
139
- cache_storage=cache_storage,
140
- )
141
- self._fs = fs
165
+
166
+ # Get filesystem instance
167
+ self._fs = fs or self._fs_helper.get_filesystem(cached=cached, cache_storage=cache_storage)
142
168
  self._storage_options = (
143
- storage_options or fs.storage_options
144
- if fs.protocol != "dir"
145
- else fs.fs.storage_options
169
+ storage_options or self._fs.storage_options
170
+ if self._fs.protocol != "dir"
171
+ else self._fs.fs.storage_options
146
172
  )
147
173
 
148
- # Store overrides for ProjectConfig loading
149
- self._cfg_dir = cfg_dir
150
- self._pipelines_dir = pipelines_dir
151
-
152
- self._load_project_cfg(
153
- reload=True
154
- ) # Load project config
155
-
174
+ def _initialize_managers(self) -> None:
175
+ """Initialize all manager components."""
176
+ # Initialize config manager
177
+ self._config_manager = PipelineConfigManager(
178
+ base_dir=self._base_dir,
179
+ fs=self._fs,
180
+ storage_options=self._storage_options,
181
+ cfg_dir=self._cfg_dir
182
+ )
156
183
 
157
- # Ensure essential directories exist (using paths from loaded project_cfg)
158
- try:
159
- self._fs.makedirs(self._cfg_dir, exist_ok=True)
160
- self._fs.makedirs(self._pipelines_dir, exist_ok=True)
161
- except (OSError, PermissionError) as e:
162
- logger.error(f"Error creating essential directories: {e}")
163
- raise RuntimeError(f"Failed to create essential directories: {e}") from e
164
- except Exception as e:
165
- logger.error(f"Unexpected error creating essential directories: {e}")
166
- raise RuntimeError(f"Unexpected filesystem error: {e}") from e
167
-
168
- # Ensure pipeline modules can be imported
169
- self._add_modules_path()
184
+ # Load project configuration
185
+ self._config_manager.load_project_config(reload=True)
170
186
 
171
- # Instantiate components using the loaded project config
187
+ # Initialize registry
172
188
  self.registry = PipelineRegistry(
173
- project_cfg=self.project_cfg,
189
+ project_cfg=self._config_manager.project_config,
174
190
  fs=self._fs,
175
191
  base_dir=self._base_dir,
176
192
  storage_options=self._storage_options,
177
193
  )
178
194
 
179
- # Initialize project context (will be injected by FlowerPowerProject)
195
+ # Initialize specialized managers
196
+ self._executor = PipelineExecutor(
197
+ config_manager=self._config_manager,
198
+ registry=self.registry
199
+ )
200
+ self._lifecycle_manager = PipelineLifecycleManager(registry=self.registry)
201
+
202
+ # Initialize other components
180
203
  self._project_context = None
181
- self.visualizer = PipelineVisualizer(project_cfg=self.project_cfg, fs=self._fs)
204
+ self.visualizer = PipelineVisualizer(
205
+ project_cfg=self._config_manager.project_config,
206
+ fs=self._fs
207
+ )
182
208
  self.io = PipelineIOManager(registry=self.registry)
183
209
 
184
- self._current_pipeline_name: str | None = None
185
- self._pipeline_cfg: PipelineConfig | None = None
210
+ def _ensure_directories_exist(self) -> None:
211
+ """Ensure essential directories exist."""
212
+ self._fs_helper.ensure_directories_exist(
213
+ self._fs,
214
+ self._cfg_dir,
215
+ self._pipelines_dir
216
+ )
217
+
218
+ def _add_modules_path(self) -> None:
219
+ """Add pipeline module paths to Python path.
220
+
221
+ This internal method ensures that pipeline modules can be imported by:
222
+ 1. Syncing filesystem cache if needed
223
+ 2. Adding project root to Python path
224
+ 3. Adding pipelines directory to Python path
225
+ """
226
+ if self._fs.is_cache_fs:
227
+ self._fs.sync_cache()
228
+ project_path = self._fs._mapper.directory
229
+ modules_path = posixpath.join(project_path, self._pipelines_dir)
230
+ else:
231
+ # Use the base directory directly if not using cache
232
+ project_path = self._fs.path
233
+ modules_path = posixpath.join(project_path, self._pipelines_dir)
234
+
235
+ if project_path not in sys.path:
236
+ sys.path.insert(0, project_path)
237
+
238
+ if modules_path not in sys.path:
239
+ sys.path.insert(0, modules_path)
186
240
 
187
241
  def __enter__(self) -> "PipelineManager":
188
242
  """Enter the context manager.
@@ -228,85 +282,10 @@ class PipelineManager:
228
282
  # Add cleanup code if needed
229
283
  pass
230
284
 
231
- def _add_modules_path(self) -> None:
232
- """Add pipeline module paths to Python path.
233
-
234
- This internal method ensures that pipeline modules can be imported by:
235
- 1. Syncing filesystem cache if needed
236
- 2. Adding project root to Python path
237
- 3. Adding pipelines directory to Python path
238
-
239
- Raises:
240
- RuntimeError: If filesystem sync fails or paths are invalid
241
-
242
- Example:
243
- >>> # Internal usage
244
- >>> manager = PipelineManager()
245
- >>> manager._add_modules_path()
246
- >>> import my_pipeline # Now importable
247
- """
248
- if self._fs.is_cache_fs:
249
- self._fs.sync_cache()
250
- project_path = self._fs._mapper.directory
251
- modules_path = posixpath.join(project_path, self._pipelines_dir)
252
-
253
- else:
254
- # Use the base directory directly if not using cache
255
- project_path = self._fs.path
256
- modules_path = posixpath.join(project_path, self._pipelines_dir)
257
-
258
- if project_path not in sys.path:
259
- sys.path.insert(0, project_path)
260
-
261
- if modules_path not in sys.path:
262
- sys.path.insert(0, modules_path)
263
-
264
- def _load_project_cfg(
265
- self, reload: bool = False
266
- ) -> ProjectConfig:
267
- """Load or reload the project configuration.
268
-
269
- This internal method handles loading project-wide settings from the config
270
- directory, applying overrides, and maintaining configuration state.
271
-
272
- Args:
273
- reload: Force reload configuration even if already loaded.
274
- Defaults to False for caching behavior.
275
-
276
- Returns:
277
- ProjectConfig: The loaded project configuration object with any
278
- specified overrides applied.
279
-
280
- Raises:
281
- FileNotFoundError: If project configuration file doesn't exist
282
- ValueError: If configuration format is invalid
283
- RuntimeError: If filesystem operations fail during loading
284
-
285
- Example:
286
- >>> # Internal usage
287
- >>> manager = PipelineManager()
288
- >>> project_cfg = manager._load_project_cfg(reload=True)
289
- >>> print(project_cfg.name)
290
- 'my_project'
291
- """
292
- if hasattr(self, "_project_cfg") and not reload:
293
- return self._project_cfg
294
-
295
- # Pass overrides to ProjectConfig.load
296
- self._project_cfg = ProjectConfig.load(
297
- base_dir=self._base_dir,
298
- fs=self._fs, # Pass pre-configured fs if provided
299
- storage_options=self._storage_options,
300
- )
301
- # Update internal fs reference in case ProjectConfig loaded/created one
302
- return self._project_cfg
303
285
 
304
286
  def load_pipeline(self, name: str, reload: bool = False) -> PipelineConfig:
305
287
  """Load or reload configuration for a specific pipeline.
306
288
 
307
- This internal method handles loading pipeline-specific settings from the config
308
- directory and maintaining the configuration cache state.
309
-
310
289
  Args:
311
290
  name: Name of the pipeline whose configuration to load
312
291
  reload: Force reload configuration even if already loaded.
@@ -314,45 +293,17 @@ class PipelineManager:
314
293
 
315
294
  Returns:
316
295
  PipelineConfig: The loaded pipeline configuration object
317
-
318
- Raises:
319
- FileNotFoundError: If pipeline configuration file doesn't exist
320
- ValueError: If configuration format is invalid
321
- RuntimeError: If filesystem operations fail during loading
322
-
323
- Example:
324
- >>> # Internal usage
325
- >>> manager = PipelineManager()
326
- >>> cfg = manager._load_pipeline_cfg("data_pipeline", reload=True)
327
- >>> print(cfg.run.executor.type)
328
- 'async'
329
296
  """
330
- if name == self._current_pipeline_name and not reload:
331
- return self._pipeline_cfg
332
-
333
- self._current_pipeline_name = name
334
- self._pipeline_cfg = PipelineConfig.load(
335
- base_dir=self._base_dir,
336
- name=name,
337
- fs=self._fs,
338
- storage_options=self._storage_options,
339
- )
340
- return self._pipeline_cfg
297
+ return self._config_manager.load_pipeline_config(name, reload)
341
298
 
342
299
  @property
343
300
  def current_pipeline_name(self) -> str:
344
301
  """Get the name of the currently loaded pipeline.
345
302
 
346
303
  Returns:
347
- str: Name of the currently loaded pipeline, or empty string if none loaded.
348
-
349
- Example:
350
- >>> manager = PipelineManager()
351
- >>> manager._load_pipeline_cfg("example_pipeline")
352
- >>> print(manager.current_pipeline_name)
353
- 'example_pipeline'
304
+ str: Name of the currently loaded pipeline, or None if none loaded.
354
305
  """
355
- return self._current_pipeline_name
306
+ return self._config_manager.current_pipeline_name
356
307
 
357
308
  @property
358
309
  def project_cfg(self) -> ProjectConfig:
@@ -372,9 +323,7 @@ class PipelineManager:
372
323
  >>> print(cfg.name)
373
324
  'my_project'
374
325
  """
375
- if not hasattr(self, "_project_cfg"):
376
- self._load_project_cfg()
377
- return self._project_cfg
326
+ return self._config_manager.project_config
378
327
 
379
328
  @property
380
329
  def pipeline_cfg(self) -> PipelineConfig:
@@ -393,83 +342,10 @@ class PipelineManager:
393
342
  >>> print(cfg.run.executor)
394
343
  'local'
395
344
  """
396
- if not hasattr(self, "_pipeline_cfg"):
397
- logger.warning("Pipeline config not loaded.")
398
- return
399
- return self._pipeline_cfg
345
+ return self._config_manager.pipeline_config
400
346
 
401
347
  # --- Core Execution Method ---
402
348
 
403
- def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
404
- """Merge kwargs into a RunConfig object.
405
-
406
- This helper method updates the RunConfig object with values from kwargs,
407
- handling different types of attributes appropriately.
408
-
409
- Args:
410
- run_config: The RunConfig object to update
411
- kwargs: Dictionary of additional parameters to merge
412
-
413
- Returns:
414
- RunConfig: Updated RunConfig object
415
- """
416
- # Handle dictionary-like attributes with update or deep merge
417
- if 'inputs' in kwargs and kwargs['inputs'] is not None:
418
- if run_config.inputs is None:
419
- run_config.inputs = kwargs['inputs']
420
- else:
421
- run_config.inputs.update(kwargs['inputs'])
422
-
423
- if 'config' in kwargs and kwargs['config'] is not None:
424
- if run_config.config is None:
425
- run_config.config = kwargs['config']
426
- else:
427
- run_config.config.update(kwargs['config'])
428
-
429
- if 'cache' in kwargs and kwargs['cache'] is not None:
430
- run_config.cache = kwargs['cache']
431
-
432
- if 'adapter' in kwargs and kwargs['adapter'] is not None:
433
- if run_config.adapter is None:
434
- run_config.adapter = kwargs['adapter']
435
- else:
436
- run_config.adapter.update(kwargs['adapter'])
437
-
438
- # Handle executor_cfg - convert string/dict to ExecutorConfig if needed
439
- if 'executor_cfg' in kwargs and kwargs['executor_cfg'] is not None:
440
- executor_cfg = kwargs['executor_cfg']
441
- if isinstance(executor_cfg, str):
442
- run_config.executor = ExecutorConfig(type=executor_cfg)
443
- elif isinstance(executor_cfg, dict):
444
- run_config.executor = ExecutorConfig.from_dict(executor_cfg)
445
- elif isinstance(executor_cfg, ExecutorConfig):
446
- run_config.executor = executor_cfg
447
-
448
- # Handle adapter configurations
449
- if 'with_adapter_cfg' in kwargs and kwargs['with_adapter_cfg'] is not None:
450
- with_adapter_cfg = kwargs['with_adapter_cfg']
451
- if isinstance(with_adapter_cfg, dict):
452
- run_config.with_adapter = WithAdapterConfig.from_dict(with_adapter_cfg)
453
- elif isinstance(with_adapter_cfg, WithAdapterConfig):
454
- run_config.with_adapter = with_adapter_cfg
455
-
456
- if 'pipeline_adapter_cfg' in kwargs and kwargs['pipeline_adapter_cfg'] is not None:
457
- run_config.pipeline_adapter_cfg = kwargs['pipeline_adapter_cfg']
458
-
459
- if 'project_adapter_cfg' in kwargs and kwargs['project_adapter_cfg'] is not None:
460
- run_config.project_adapter_cfg = kwargs['project_adapter_cfg']
461
-
462
- # Handle simple attributes
463
- simple_attrs = [
464
- 'final_vars', 'reload', 'log_level', 'max_retries', 'retry_delay',
465
- 'jitter_factor', 'retry_exceptions', 'on_success', 'on_failure'
466
- ]
467
-
468
- for attr in simple_attrs:
469
- if attr in kwargs and kwargs[attr] is not None:
470
- setattr(run_config, attr, kwargs[attr])
471
-
472
- return run_config
473
349
 
474
350
  def run(
475
351
  self,
@@ -547,33 +423,12 @@ class PipelineManager:
547
423
  ... reload=True
548
424
  ... )
549
425
  """
550
- # Initialize run_config - use provided config or load pipeline default
551
- if run_config is None:
552
- run_config = self.load_pipeline(name=name).run
426
+ # Set project context for executor
427
+ if hasattr(self, "_project_context") and self._project_context is not None:
428
+ self._executor._project_context = self._project_context
553
429
 
554
- # Merge kwargs into run_config
555
- if kwargs:
556
- run_config = self._merge_run_config_with_kwargs(run_config, kwargs)
557
-
558
- # Set up logging for this specific run if log_level is provided
559
- if run_config.log_level is not None:
560
- setup_logging(level=run_config.log_level)
561
- else:
562
- # Ensure logging is reset to default if no specific level is provided for this run
563
- setup_logging()
564
-
565
- # Use injected project context, fallback to self for backward compatibility
566
- project_context = getattr(self, "_project_context", self)
567
-
568
- # Get Pipeline instance from registry
569
- pipeline = self.registry.get_pipeline(
570
- name=name, project_context=project_context, reload=run_config.reload
571
- )
572
-
573
- # Execute pipeline using its own run method
574
- return pipeline.run(
575
- run_config=run_config,
576
- )
430
+ # Delegate to executor
431
+ return self._executor.run(name=name, run_config=run_config, **kwargs)
577
432
 
578
433
  # --- Delegated Methods ---
579
434
 
@@ -603,7 +458,7 @@ class PipelineManager:
603
458
  >>> # Overwrite existing pipeline
604
459
  >>> manager.new("data_transformation", overwrite=True)
605
460
  """
606
- self.registry.new(name=name, overwrite=overwrite)
461
+ self._lifecycle_manager.create_pipeline(name=name, overwrite=overwrite)
607
462
 
608
463
  def delete(self, name: str, cfg: bool = True, module: bool = False) -> None:
609
464
  """
@@ -630,7 +485,7 @@ class PipelineManager:
630
485
  >>> # Delete both config and module
631
486
  >>> manager.delete("test_pipeline", module=True)
632
487
  """
633
- self.registry.delete(name=name, cfg=cfg, module=module)
488
+ self._lifecycle_manager.delete_pipeline(name=name, cfg=cfg, module=module)
634
489
 
635
490
  def get_summary(
636
491
  self,
@@ -700,7 +555,7 @@ class PipelineManager:
700
555
  >>> pm = PipelineManager()
701
556
  >>> pm.show_summary()
702
557
  """
703
- return self.registry.show_summary(
558
+ return self._lifecycle_manager.show_summary(
704
559
  name=name,
705
560
  cfg=cfg,
706
561
  code=code,
@@ -709,21 +564,6 @@ class PipelineManager:
709
564
  to_svg=to_svg,
710
565
  )
711
566
 
712
- def show_pipelines(self) -> None:
713
- """Display all available pipelines in a formatted table.
714
-
715
- The table includes pipeline names, types, and enablement status.
716
- Uses rich formatting for terminal display.
717
-
718
- Example:
719
- >>> from flowerpower.pipeline import PipelineManager
720
- >>>
721
- >>> manager = PipelineManager()
722
- >>> manager.show_pipelines()
723
-
724
- """
725
- self.registry.show_pipelines()
726
-
727
567
  def list_pipelines(self) -> list[str]:
728
568
  """Get list of all available pipeline names.
729
569
 
@@ -738,7 +578,14 @@ class PipelineManager:
738
578
  >>> print(pipelines)
739
579
  ['data_ingestion', 'model_training', 'reporting']
740
580
  """
741
- return self.registry.list_pipelines()
581
+ return self._lifecycle_manager.list_pipelines()
582
+
583
+ def show_pipelines(self) -> None:
584
+ """Display all available pipelines in a formatted table.
585
+
586
+ Uses rich formatting for terminal display.
587
+ """
588
+ return self.registry.show_pipelines()
742
589
 
743
590
  @property
744
591
  def pipelines(self) -> list[str]:
@@ -756,7 +603,7 @@ class PipelineManager:
756
603
  >>> print(manager.pipelines)
757
604
  ['data_ingestion', 'model_training', 'reporting']
758
605
  """
759
- return self.registry.pipelines
606
+ return self._lifecycle_manager.pipelines
760
607
 
761
608
  @property
762
609
  def summary(self) -> dict[str, dict | str]:
@@ -776,7 +623,7 @@ class PipelineManager:
776
623
  data_pipeline: batch
777
624
  ml_pipeline: streaming
778
625
  """
779
- return self.registry.summary
626
+ return self._lifecycle_manager.summary
780
627
 
781
628
  def add_hook(
782
629
  self,
@@ -810,7 +657,7 @@ class PipelineManager:
810
657
  ... function_name="my_pre_execute_function"
811
658
  ... )
812
659
  """
813
- self.registry.add_hook(
660
+ self._lifecycle_manager.add_hook(
814
661
  name=name,
815
662
  type=type,
816
663
  to=to,