FlowerPower 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +143 -25
- flowerpower/cfg/base.py +132 -11
- flowerpower/cfg/exceptions.py +53 -0
- flowerpower/cfg/pipeline/__init__.py +151 -35
- flowerpower/cfg/pipeline/adapter.py +1 -0
- flowerpower/cfg/pipeline/builder.py +24 -25
- flowerpower/cfg/pipeline/builder_adapter.py +142 -0
- flowerpower/cfg/pipeline/builder_executor.py +101 -0
- flowerpower/cfg/pipeline/run.py +99 -40
- flowerpower/cfg/project/__init__.py +59 -14
- flowerpower/cfg/project/adapter.py +6 -0
- flowerpower/cli/__init__.py +8 -2
- flowerpower/cli/cfg.py +0 -38
- flowerpower/cli/pipeline.py +121 -83
- flowerpower/cli/utils.py +120 -71
- flowerpower/flowerpower.py +94 -120
- flowerpower/pipeline/config_manager.py +180 -0
- flowerpower/pipeline/executor.py +126 -0
- flowerpower/pipeline/lifecycle_manager.py +231 -0
- flowerpower/pipeline/manager.py +121 -274
- flowerpower/pipeline/pipeline.py +66 -278
- flowerpower/pipeline/registry.py +45 -4
- flowerpower/utils/__init__.py +19 -0
- flowerpower/utils/adapter.py +286 -0
- flowerpower/utils/callback.py +73 -67
- flowerpower/utils/config.py +306 -0
- flowerpower/utils/executor.py +178 -0
- flowerpower/utils/filesystem.py +194 -0
- flowerpower/utils/misc.py +312 -138
- flowerpower/utils/security.py +221 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/METADATA +2 -2
- flowerpower-0.31.1.dist-info/RECORD +53 -0
- flowerpower/cfg/pipeline/_schedule.py +0 -32
- flowerpower-0.30.0.dist-info/RECORD +0 -42
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/WHEEL +0 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/top_level.txt +0 -0
flowerpower/flowerpower.py
CHANGED
@@ -17,6 +17,9 @@ from .cfg.pipeline.adapter import AdapterConfig as PipelineAdapterConfig
|
|
17
17
|
from .cfg.project.adapter import AdapterConfig as ProjectAdapterConfig
|
18
18
|
from .pipeline import PipelineManager
|
19
19
|
from .utils.logging import setup_logging
|
20
|
+
from .utils.security import validate_pipeline_name
|
21
|
+
from .utils.config import merge_run_config_with_kwargs
|
22
|
+
from .utils.filesystem import FilesystemHelper
|
20
23
|
|
21
24
|
setup_logging()
|
22
25
|
|
@@ -53,13 +56,8 @@ class FlowerPowerProject:
|
|
53
56
|
self.name = self.pipeline_manager.project_cfg.name
|
54
57
|
|
55
58
|
def _validate_pipeline_name(self, name: str) -> None:
|
56
|
-
"""Validate the pipeline name argument."""
|
57
|
-
|
58
|
-
raise ValueError("Pipeline 'name' must be a non-empty string")
|
59
|
-
if name.strip() != name:
|
60
|
-
raise ValueError(
|
61
|
-
"Pipeline 'name' cannot have leading or trailing whitespace"
|
62
|
-
)
|
59
|
+
"""Validate the pipeline name argument using security utilities."""
|
60
|
+
validate_pipeline_name(name) # Use secure validation function
|
63
61
|
|
64
62
|
def _inject_dependencies(self):
|
65
63
|
"""Inject dependencies between managers for proper architecture.
|
@@ -71,76 +69,6 @@ class FlowerPowerProject:
|
|
71
69
|
# This will be used when creating Pipeline instances
|
72
70
|
self.pipeline_manager._project_context = self
|
73
71
|
|
74
|
-
def _merge_run_config_with_kwargs(self, run_config: RunConfig, kwargs: dict) -> RunConfig:
|
75
|
-
"""Merge kwargs into a RunConfig object.
|
76
|
-
|
77
|
-
This helper method updates the RunConfig object with values from kwargs,
|
78
|
-
handling different types of attributes appropriately.
|
79
|
-
|
80
|
-
Args:
|
81
|
-
run_config: The RunConfig object to update
|
82
|
-
kwargs: Dictionary of additional parameters to merge
|
83
|
-
|
84
|
-
Returns:
|
85
|
-
RunConfig: Updated RunConfig object
|
86
|
-
"""
|
87
|
-
# Handle dictionary-like attributes with update or deep merge
|
88
|
-
if 'inputs' in kwargs and kwargs['inputs'] is not None:
|
89
|
-
if run_config.inputs is None:
|
90
|
-
run_config.inputs = kwargs['inputs']
|
91
|
-
else:
|
92
|
-
run_config.inputs.update(kwargs['inputs'])
|
93
|
-
|
94
|
-
if 'config' in kwargs and kwargs['config'] is not None:
|
95
|
-
if run_config.config is None:
|
96
|
-
run_config.config = kwargs['config']
|
97
|
-
else:
|
98
|
-
run_config.config.update(kwargs['config'])
|
99
|
-
|
100
|
-
if 'cache' in kwargs and kwargs['cache'] is not None:
|
101
|
-
run_config.cache = kwargs['cache']
|
102
|
-
|
103
|
-
if 'adapter' in kwargs and kwargs['adapter'] is not None:
|
104
|
-
if run_config.adapter is None:
|
105
|
-
run_config.adapter = kwargs['adapter']
|
106
|
-
else:
|
107
|
-
run_config.adapter.update(kwargs['adapter'])
|
108
|
-
|
109
|
-
# Handle executor_cfg - convert string/dict to ExecutorConfig if needed
|
110
|
-
if 'executor_cfg' in kwargs and kwargs['executor_cfg'] is not None:
|
111
|
-
executor_cfg = kwargs['executor_cfg']
|
112
|
-
if isinstance(executor_cfg, str):
|
113
|
-
run_config.executor = ExecutorConfig(type=executor_cfg)
|
114
|
-
elif isinstance(executor_cfg, dict):
|
115
|
-
run_config.executor = ExecutorConfig.from_dict(executor_cfg)
|
116
|
-
elif isinstance(executor_cfg, ExecutorConfig):
|
117
|
-
run_config.executor = executor_cfg
|
118
|
-
|
119
|
-
# Handle adapter configurations
|
120
|
-
if 'with_adapter_cfg' in kwargs and kwargs['with_adapter_cfg'] is not None:
|
121
|
-
with_adapter_cfg = kwargs['with_adapter_cfg']
|
122
|
-
if isinstance(with_adapter_cfg, dict):
|
123
|
-
run_config.with_adapter = WithAdapterConfig.from_dict(with_adapter_cfg)
|
124
|
-
elif isinstance(with_adapter_cfg, WithAdapterConfig):
|
125
|
-
run_config.with_adapter = with_adapter_cfg
|
126
|
-
|
127
|
-
if 'pipeline_adapter_cfg' in kwargs and kwargs['pipeline_adapter_cfg'] is not None:
|
128
|
-
run_config.pipeline_adapter_cfg = kwargs['pipeline_adapter_cfg']
|
129
|
-
|
130
|
-
if 'project_adapter_cfg' in kwargs and kwargs['project_adapter_cfg'] is not None:
|
131
|
-
run_config.project_adapter_cfg = kwargs['project_adapter_cfg']
|
132
|
-
|
133
|
-
# Handle simple attributes
|
134
|
-
simple_attrs = [
|
135
|
-
'final_vars', 'reload', 'log_level', 'max_retries', 'retry_delay',
|
136
|
-
'jitter_factor', 'retry_exceptions', 'on_success', 'on_failure'
|
137
|
-
]
|
138
|
-
|
139
|
-
for attr in simple_attrs:
|
140
|
-
if attr in kwargs and kwargs[attr] is not None:
|
141
|
-
setattr(run_config, attr, kwargs[attr])
|
142
|
-
|
143
|
-
return run_config
|
144
72
|
|
145
73
|
# --- Convenience Methods for Pipeline Operations ---
|
146
74
|
|
@@ -235,7 +163,7 @@ class FlowerPowerProject:
|
|
235
163
|
|
236
164
|
# Merge kwargs into run_config
|
237
165
|
if kwargs:
|
238
|
-
run_config =
|
166
|
+
run_config = merge_run_config_with_kwargs(run_config, kwargs)
|
239
167
|
|
240
168
|
return self.pipeline_manager.run(
|
241
169
|
name=name,
|
@@ -363,6 +291,38 @@ class FlowerPowerProject:
|
|
363
291
|
if log_level:
|
364
292
|
setup_logging(level=log_level)
|
365
293
|
|
294
|
+
# Initialize project parameters
|
295
|
+
name, base_dir = cls._resolve_project_params(name, base_dir)
|
296
|
+
|
297
|
+
# Setup filesystem
|
298
|
+
fs = cls._setup_filesystem(base_dir, storage_options, fs)
|
299
|
+
|
300
|
+
# Handle existing project
|
301
|
+
cls._handle_existing_project(base_dir, fs, hooks_dir, overwrite)
|
302
|
+
|
303
|
+
# Create project structure
|
304
|
+
cls._create_project_structure(fs, hooks_dir)
|
305
|
+
|
306
|
+
# Initialize project configuration
|
307
|
+
cls._initialize_project_config(name, fs)
|
308
|
+
|
309
|
+
# Print success message and getting started guide
|
310
|
+
cls._print_success_message(name, base_dir)
|
311
|
+
|
312
|
+
return cls.load(
|
313
|
+
base_dir=base_dir,
|
314
|
+
storage_options=storage_options,
|
315
|
+
fs=fs,
|
316
|
+
log_level=log_level,
|
317
|
+
)
|
318
|
+
|
319
|
+
@classmethod
|
320
|
+
def _resolve_project_params(
|
321
|
+
cls,
|
322
|
+
name: str | None,
|
323
|
+
base_dir: str | None
|
324
|
+
) -> tuple[str, str]:
|
325
|
+
"""Resolve project name and base directory."""
|
366
326
|
if name is None:
|
367
327
|
name = str(Path.cwd().name)
|
368
328
|
base_dir = posixpath.join(str(Path.cwd().parent), name)
|
@@ -370,54 +330,89 @@ class FlowerPowerProject:
|
|
370
330
|
if base_dir is None:
|
371
331
|
base_dir = posixpath.join(str(Path.cwd()), name)
|
372
332
|
|
333
|
+
return name, base_dir
|
334
|
+
|
335
|
+
@classmethod
|
336
|
+
def _setup_filesystem(
|
337
|
+
cls,
|
338
|
+
base_dir: str,
|
339
|
+
storage_options: dict | BaseStorageOptions | None,
|
340
|
+
fs: AbstractFileSystem | None
|
341
|
+
) -> AbstractFileSystem:
|
342
|
+
"""Setup filesystem for project operations."""
|
373
343
|
if fs is None:
|
374
344
|
fs = filesystem(
|
375
345
|
protocol_or_path=base_dir,
|
376
346
|
dirfs=True,
|
377
347
|
storage_options=storage_options,
|
378
348
|
)
|
349
|
+
return fs
|
350
|
+
|
351
|
+
@classmethod
|
352
|
+
def _handle_existing_project(
|
353
|
+
cls,
|
354
|
+
base_dir: str,
|
355
|
+
fs: AbstractFileSystem,
|
356
|
+
hooks_dir: str,
|
357
|
+
overwrite: bool
|
358
|
+
) -> None:
|
359
|
+
"""Handle existing project directory."""
|
360
|
+
project_exists, _ = cls._check_project_exists(base_dir, fs)
|
379
361
|
|
380
|
-
# Check if project already exists
|
381
|
-
project_exists, message = cls._check_project_exists(base_dir, fs)
|
382
362
|
if project_exists:
|
383
363
|
if overwrite:
|
384
|
-
# Delete existing project files and directories
|
385
364
|
logger.info(f"Overwriting existing project at {base_dir}")
|
386
|
-
|
387
|
-
#
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
fs.rm(hooks_dir, recursive=True)
|
397
|
-
|
398
|
-
# Remove README.md file
|
399
|
-
if fs.exists("README.md"):
|
400
|
-
fs.rm("README.md")
|
365
|
+
|
366
|
+
# Use FilesystemHelper to clean existing files
|
367
|
+
fs_helper = FilesystemHelper(base_dir)
|
368
|
+
fs_helper.clean_directory(
|
369
|
+
fs,
|
370
|
+
f"{settings.CONFIG_DIR}",
|
371
|
+
settings.PIPELINES_DIR,
|
372
|
+
hooks_dir,
|
373
|
+
"README.md"
|
374
|
+
)
|
401
375
|
else:
|
402
376
|
error_msg = f"Project already exists at {base_dir}. Use overwrite=True to overwrite the existing project."
|
403
377
|
rich.print(f"[red]{error_msg}[/red]")
|
404
378
|
logger.error(error_msg)
|
405
379
|
raise FileExistsError(error_msg)
|
406
380
|
|
381
|
+
@classmethod
|
382
|
+
def _create_project_structure(
|
383
|
+
cls,
|
384
|
+
fs: AbstractFileSystem,
|
385
|
+
hooks_dir: str
|
386
|
+
) -> None:
|
387
|
+
"""Create project directory structure."""
|
407
388
|
fs.makedirs(f"{settings.CONFIG_DIR}/pipelines", exist_ok=True)
|
408
389
|
fs.makedirs(settings.PIPELINES_DIR, exist_ok=True)
|
409
390
|
fs.makedirs(hooks_dir, exist_ok=True)
|
410
391
|
|
392
|
+
@classmethod
|
393
|
+
def _initialize_project_config(
|
394
|
+
cls,
|
395
|
+
name: str,
|
396
|
+
fs: AbstractFileSystem
|
397
|
+
) -> ProjectConfig:
|
398
|
+
"""Initialize project configuration and create README."""
|
411
399
|
# Load project configuration
|
412
400
|
cfg = ProjectConfig.load(name=name, fs=fs)
|
413
401
|
|
402
|
+
# Create README file
|
414
403
|
with fs.open("README.md", "w") as f:
|
415
404
|
f.write(
|
416
405
|
f"# FlowerPower project {name.replace('_', ' ').upper()}\n\n"
|
417
406
|
f"**created on**\n\n*{dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n"
|
418
407
|
)
|
408
|
+
|
409
|
+
# Save configuration
|
419
410
|
cfg.save(fs=fs)
|
411
|
+
return cfg
|
420
412
|
|
413
|
+
@classmethod
|
414
|
+
def _print_success_message(cls, name: str, base_dir: str) -> None:
|
415
|
+
"""Print success message and getting started guide."""
|
421
416
|
rich.print(
|
422
417
|
f"\n✨ Initialized FlowerPower project [bold blue]{name}[/bold blue] "
|
423
418
|
f"at [italic green]{base_dir}[/italic green]\n"
|
@@ -459,13 +454,6 @@ class FlowerPowerProject:
|
|
459
454
|
)
|
460
455
|
)
|
461
456
|
|
462
|
-
return cls.load(
|
463
|
-
base_dir=base_dir,
|
464
|
-
storage_options=storage_options,
|
465
|
-
fs=fs,
|
466
|
-
log_level=log_level,
|
467
|
-
)
|
468
|
-
|
469
457
|
|
470
458
|
def initialize_project(
|
471
459
|
name: str | None = None,
|
@@ -509,23 +497,9 @@ def create_project(
|
|
509
497
|
fs: AbstractFileSystem | None = None,
|
510
498
|
hooks_dir: str = settings.HOOKS_DIR,
|
511
499
|
) -> FlowerPowerProject:
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
If a project exists at the specified base_dir, it will be loaded.
|
516
|
-
Otherwise, a new project will be initialized.
|
517
|
-
|
518
|
-
Args:
|
519
|
-
name (str | None): The name of the project. If None, it defaults to the current directory name.
|
520
|
-
base_dir (str | None): The base directory where the project will be created or loaded from.
|
521
|
-
If None, it defaults to the current working directory.
|
522
|
-
storage_options (dict | BaseStorageOptions | None): Storage options for the filesystem.
|
523
|
-
fs (AbstractFileSystem | None): An instance of AbstractFileSystem to use for file operations.
|
524
|
-
hooks_dir (str): The directory where the project hooks will be stored.
|
525
|
-
|
526
|
-
Returns:
|
527
|
-
FlowerPowerProject: An instance of FlowerPowerProject.
|
528
|
-
"""
|
500
|
+
# Note: _check_project_exists expects base_dir to be a string.
|
501
|
+
# If base_dir is None, it will be handled by _check_project_exists or the load/init methods.
|
502
|
+
# We pass fs directly, as _check_project_exists can handle fs being None.
|
529
503
|
# Note: _check_project_exists expects base_dir to be a string.
|
530
504
|
# If base_dir is None, it will be handled by _check_project_exists or the load/init methods.
|
531
505
|
# We pass fs directly, as _check_project_exists can handle fs being None.
|
@@ -0,0 +1,180 @@
|
|
1
|
+
"""Configuration management for pipelines."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
from typing import TYPE_CHECKING, Optional
|
5
|
+
from fsspec_utils import AbstractFileSystem
|
6
|
+
from ..utils.misc import get_filesystem
|
7
|
+
from ..cfg import ProjectConfig, PipelineConfig
|
8
|
+
from ..settings import CONFIG_DIR
|
9
|
+
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from fsspec_utils import AbstractFileSystem
|
13
|
+
|
14
|
+
|
15
|
+
class PipelineConfigManager:
|
16
|
+
"""Handles loading, validation, and access to pipeline configurations.
|
17
|
+
|
18
|
+
This class is responsible for:
|
19
|
+
- Loading project and pipeline configurations
|
20
|
+
- Validating configuration files
|
21
|
+
- Providing convenient access to configuration objects
|
22
|
+
- Managing configuration reload logic
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
base_dir: str,
|
28
|
+
fs: AbstractFileSystem,
|
29
|
+
storage_options: dict,
|
30
|
+
cfg_dir: str = CONFIG_DIR
|
31
|
+
):
|
32
|
+
"""Initialize the configuration manager.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
base_dir: Base directory for the project
|
36
|
+
fs: Filesystem instance for file operations
|
37
|
+
storage_options: Storage options for filesystem
|
38
|
+
cfg_dir: Configuration directory name
|
39
|
+
"""
|
40
|
+
self._base_dir = base_dir
|
41
|
+
self._fs = fs
|
42
|
+
self._storage_options = storage_options
|
43
|
+
self._cfg_dir = cfg_dir
|
44
|
+
self._project_cfg: Optional[ProjectConfig] = None
|
45
|
+
self._pipeline_cfg: Optional[PipelineConfig] = None
|
46
|
+
self._current_pipeline_name: Optional[str] = None
|
47
|
+
|
48
|
+
def load_project_config(self, reload: bool = False) -> ProjectConfig:
|
49
|
+
"""Load project configuration.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
reload: Whether to reload the configuration even if already loaded
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
ProjectConfig: The loaded project configuration
|
56
|
+
"""
|
57
|
+
if self._project_cfg is None or reload:
|
58
|
+
from ..cfg import ProjectConfig
|
59
|
+
|
60
|
+
# Construct config file path
|
61
|
+
cfg_path = f"{self._base_dir}/{self._cfg_dir}/project.yml"
|
62
|
+
|
63
|
+
# Load configuration
|
64
|
+
fs = get_filesystem(fs=None, fs_type=self._base_dir)
|
65
|
+
self._project_cfg = ProjectConfig.from_yaml(path=f"{self._cfg_dir}/project.yml", fs=fs)
|
66
|
+
|
67
|
+
# Add pipelines directory to Python path
|
68
|
+
self._add_modules_path(["pipelines"])
|
69
|
+
|
70
|
+
return self._project_cfg
|
71
|
+
|
72
|
+
def load_pipeline_config(self, name: str, reload: bool = False) -> PipelineConfig:
|
73
|
+
"""Load pipeline configuration.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
name: Name of the pipeline to load
|
77
|
+
reload: Whether to reload the configuration even if already loaded
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
PipelineConfig: The loaded pipeline configuration
|
81
|
+
"""
|
82
|
+
if (self._pipeline_cfg is None or
|
83
|
+
self._current_pipeline_name != name or
|
84
|
+
reload):
|
85
|
+
|
86
|
+
from ..cfg import PipelineConfig
|
87
|
+
|
88
|
+
# Ensure project config is loaded first
|
89
|
+
self.load_project_config(reload=reload)
|
90
|
+
|
91
|
+
# Create filesystem from storage_options
|
92
|
+
fs = get_filesystem(fs=None, fs_type=self._base_dir)
|
93
|
+
|
94
|
+
# Try different file locations and extensions
|
95
|
+
cfg_path = None
|
96
|
+
possible_paths = [
|
97
|
+
# Try .yml extension in pipelines/ subdirectory first
|
98
|
+
os.path.join(self._cfg_dir, "pipelines", f"{name}.yml"),
|
99
|
+
# Then try .yaml extension in pipelines/ subdirectory
|
100
|
+
os.path.join(self._cfg_dir, "pipelines", f"{name}.yaml"),
|
101
|
+
# Fallback to old paths for backward compatibility
|
102
|
+
os.path.join(self._cfg_dir, f"{name}.yml"),
|
103
|
+
os.path.join(self._cfg_dir, f"{name}.yaml"),
|
104
|
+
]
|
105
|
+
|
106
|
+
for path in possible_paths:
|
107
|
+
try:
|
108
|
+
if fs.exists(path):
|
109
|
+
cfg_path = path
|
110
|
+
break
|
111
|
+
except Exception:
|
112
|
+
continue
|
113
|
+
|
114
|
+
if cfg_path is None:
|
115
|
+
raise FileNotFoundError(
|
116
|
+
f"Pipeline configuration not found. Searched for: {possible_paths}"
|
117
|
+
)
|
118
|
+
|
119
|
+
# Load configuration
|
120
|
+
self._pipeline_cfg = PipelineConfig.from_yaml(
|
121
|
+
name=name,
|
122
|
+
path=cfg_path,
|
123
|
+
fs=fs,
|
124
|
+
)
|
125
|
+
|
126
|
+
# Update current pipeline name
|
127
|
+
self._current_pipeline_name = name
|
128
|
+
|
129
|
+
return self._pipeline_cfg
|
130
|
+
|
131
|
+
@property
|
132
|
+
def project_config(self) -> ProjectConfig:
|
133
|
+
"""Get the current project configuration.
|
134
|
+
|
135
|
+
Returns:
|
136
|
+
ProjectConfig: The current project configuration
|
137
|
+
|
138
|
+
Raises:
|
139
|
+
ValueError: If project configuration has not been loaded
|
140
|
+
"""
|
141
|
+
if self._project_cfg is None:
|
142
|
+
raise ValueError("Project configuration not loaded. Call load_project_config() first.")
|
143
|
+
return self._project_cfg
|
144
|
+
|
145
|
+
@property
|
146
|
+
def pipeline_config(self) -> PipelineConfig:
|
147
|
+
"""Get the current pipeline configuration.
|
148
|
+
|
149
|
+
Returns:
|
150
|
+
PipelineConfig: The current pipeline configuration
|
151
|
+
|
152
|
+
Raises:
|
153
|
+
ValueError: If pipeline configuration has not been loaded
|
154
|
+
"""
|
155
|
+
if self._pipeline_cfg is None:
|
156
|
+
raise ValueError("Pipeline configuration not loaded. Call load_pipeline_config() first.")
|
157
|
+
return self._pipeline_cfg
|
158
|
+
|
159
|
+
@property
|
160
|
+
def current_pipeline_name(self) -> Optional[str]:
|
161
|
+
"""Get the name of the currently loaded pipeline.
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
str | None: Name of the current pipeline, or None if none loaded
|
165
|
+
"""
|
166
|
+
return self._current_pipeline_name
|
167
|
+
|
168
|
+
def _add_modules_path(self, python_path: list[str]) -> None:
|
169
|
+
"""Add module paths to Python path.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
python_path: List of paths to add to sys.path
|
173
|
+
"""
|
174
|
+
import sys
|
175
|
+
from pathlib import Path
|
176
|
+
|
177
|
+
for path in python_path:
|
178
|
+
path_obj = Path(self._base_dir) / path
|
179
|
+
if str(path_obj) not in sys.path:
|
180
|
+
sys.path.insert(0, str(path_obj))
|
@@ -0,0 +1,126 @@
|
|
1
|
+
"""Pipeline execution handling."""
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional
|
4
|
+
from fsspec_utils import AbstractFileSystem
|
5
|
+
|
6
|
+
from ..cfg.pipeline.run import RunConfig
|
7
|
+
from ..utils.config import merge_run_config_with_kwargs
|
8
|
+
from ..utils.logging import setup_logging
|
9
|
+
|
10
|
+
if TYPE_CHECKING:
|
11
|
+
from .config_manager import PipelineConfigManager
|
12
|
+
from .registry import PipelineRegistry
|
13
|
+
|
14
|
+
|
15
|
+
class PipelineExecutor:
|
16
|
+
"""Handles pipeline execution with comprehensive parameter handling.
|
17
|
+
|
18
|
+
This class is responsible for:
|
19
|
+
- Executing pipelines with various configurations
|
20
|
+
- Merging runtime parameters with pipeline defaults
|
21
|
+
- Setting up execution environment (logging, etc.)
|
22
|
+
- Delegating to Pipeline objects for actual execution
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(
|
26
|
+
self,
|
27
|
+
config_manager: "PipelineConfigManager",
|
28
|
+
registry: "PipelineRegistry",
|
29
|
+
project_context: Optional[Any] = None
|
30
|
+
):
|
31
|
+
"""Initialize the pipeline executor.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
config_manager: Configuration manager for accessing pipeline configs
|
35
|
+
registry: Pipeline registry for accessing pipeline objects
|
36
|
+
project_context: Optional project context for execution
|
37
|
+
"""
|
38
|
+
self._config_manager = config_manager
|
39
|
+
self._registry = registry
|
40
|
+
self._project_context = project_context
|
41
|
+
|
42
|
+
def run(
|
43
|
+
self,
|
44
|
+
name: str,
|
45
|
+
run_config: Optional[RunConfig] = None,
|
46
|
+
**kwargs
|
47
|
+
) -> dict[str, Any]:
|
48
|
+
"""Execute a pipeline synchronously and return its results.
|
49
|
+
|
50
|
+
This is the main method for running pipelines directly. It handles configuration
|
51
|
+
loading, adapter setup, and execution via Pipeline objects.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
name: Name of the pipeline to run. Must be a valid identifier.
|
55
|
+
run_config: Run configuration object containing all execution parameters.
|
56
|
+
If None, the default configuration from the pipeline will be used.
|
57
|
+
**kwargs: Additional parameters to override the run_config.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
dict[str, Any]: Results of pipeline execution
|
61
|
+
|
62
|
+
Raises:
|
63
|
+
ValueError: If pipeline configuration cannot be loaded
|
64
|
+
Exception: If pipeline execution fails
|
65
|
+
"""
|
66
|
+
# Load pipeline configuration
|
67
|
+
pipeline_config = self._config_manager.load_pipeline_config(name=name)
|
68
|
+
|
69
|
+
# Initialize run_config with pipeline defaults if not provided
|
70
|
+
run_config = run_config or pipeline_config.run
|
71
|
+
|
72
|
+
# Merge kwargs into run_config
|
73
|
+
if kwargs:
|
74
|
+
run_config = merge_run_config_with_kwargs(run_config, kwargs)
|
75
|
+
|
76
|
+
# Set up logging for this specific run if log_level is provided
|
77
|
+
if run_config.log_level is not None:
|
78
|
+
setup_logging(level=run_config.log_level)
|
79
|
+
|
80
|
+
# Get the pipeline object from registry
|
81
|
+
pipeline = self._registry.get_pipeline(
|
82
|
+
name=name,
|
83
|
+
project_context=self._project_context,
|
84
|
+
)
|
85
|
+
|
86
|
+
# Execute the pipeline
|
87
|
+
return pipeline.run(run_config=run_config)
|
88
|
+
|
89
|
+
async def run_async(
|
90
|
+
self,
|
91
|
+
name: str,
|
92
|
+
run_config: Optional[RunConfig] = None,
|
93
|
+
**kwargs
|
94
|
+
) -> dict[str, Any]:
|
95
|
+
"""Execute a pipeline asynchronously and return its results.
|
96
|
+
|
97
|
+
Args:
|
98
|
+
name: Name of the pipeline to run
|
99
|
+
run_config: Run configuration object
|
100
|
+
**kwargs: Additional parameters to override the run_config
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
dict[str, Any]: Results of pipeline execution
|
104
|
+
"""
|
105
|
+
# Load pipeline configuration
|
106
|
+
pipeline_config = self._config_manager.load_pipeline_config(name=name)
|
107
|
+
|
108
|
+
# Initialize run_config with pipeline defaults if not provided
|
109
|
+
run_config = run_config or pipeline_config.run
|
110
|
+
|
111
|
+
# Merge kwargs into run_config
|
112
|
+
if kwargs:
|
113
|
+
run_config = merge_run_config_with_kwargs(run_config, kwargs)
|
114
|
+
|
115
|
+
# Set up logging for this specific run if log_level is provided
|
116
|
+
if run_config.log_level is not None:
|
117
|
+
setup_logging(level=run_config.log_level)
|
118
|
+
|
119
|
+
# Get the pipeline object from registry
|
120
|
+
pipeline = self._registry.get_pipeline(
|
121
|
+
name=name,
|
122
|
+
project_context=self._project_context,
|
123
|
+
)
|
124
|
+
|
125
|
+
# Execute the pipeline asynchronously
|
126
|
+
return await pipeline.run_async(run_config=run_config)
|