FlowerPower 0.20.0__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +2 -6
- flowerpower/cfg/__init__.py +4 -11
- flowerpower/cfg/base.py +29 -25
- flowerpower/cfg/pipeline/__init__.py +3 -3
- flowerpower/cfg/pipeline/_schedule.py +32 -0
- flowerpower/cfg/pipeline/adapter.py +0 -5
- flowerpower/cfg/pipeline/builder.py +377 -0
- flowerpower/cfg/pipeline/run.py +36 -0
- flowerpower/cfg/project/__init__.py +8 -21
- flowerpower/cfg/project/adapter.py +0 -12
- flowerpower/cli/__init__.py +2 -21
- flowerpower/cli/mqtt.py +0 -6
- flowerpower/cli/pipeline.py +10 -4
- flowerpower/flowerpower.py +275 -585
- flowerpower/pipeline/base.py +19 -10
- flowerpower/pipeline/io.py +52 -46
- flowerpower/pipeline/manager.py +149 -89
- flowerpower/pipeline/pipeline.py +159 -87
- flowerpower/pipeline/registry.py +68 -33
- flowerpower/pipeline/visualizer.py +4 -4
- flowerpower/plugins/{_io → io}/__init__.py +1 -1
- flowerpower/plugins/mqtt/__init__.py +7 -11
- flowerpower/settings/__init__.py +0 -2
- flowerpower/settings/{backend.py → _backend.py} +0 -19
- flowerpower/settings/logging.py +1 -1
- flowerpower/utils/logging.py +24 -12
- flowerpower/utils/misc.py +17 -0
- flowerpower-0.21.0.dist-info/METADATA +463 -0
- flowerpower-0.21.0.dist-info/RECORD +44 -0
- flowerpower/cfg/pipeline/schedule.py +0 -74
- flowerpower/cfg/project/job_queue.py +0 -111
- flowerpower/cli/job_queue.py +0 -1329
- flowerpower/job_queue/__init__.py +0 -205
- flowerpower/job_queue/base.py +0 -611
- flowerpower/job_queue/rq/__init__.py +0 -10
- flowerpower/job_queue/rq/_trigger.py +0 -37
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -228
- flowerpower/job_queue/rq/manager.py +0 -1893
- flowerpower/job_queue/rq/setup.py +0 -154
- flowerpower/job_queue/rq/utils.py +0 -69
- flowerpower/mqtt.py +0 -12
- flowerpower/plugins/mqtt/cfg.py +0 -17
- flowerpower/plugins/mqtt/manager.py +0 -962
- flowerpower/settings/job_queue.py +0 -31
- flowerpower-0.20.0.dist-info/METADATA +0 -693
- flowerpower-0.20.0.dist-info/RECORD +0 -58
- {flowerpower-0.20.0.dist-info → flowerpower-0.21.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.20.0.dist-info → flowerpower-0.21.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.20.0.dist-info → flowerpower-0.21.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.20.0.dist-info → flowerpower-0.21.0.dist-info}/top_level.txt +0 -0
flowerpower/__init__.py
CHANGED
@@ -1,22 +1,18 @@
|
|
1
1
|
import importlib.metadata
|
2
2
|
|
3
3
|
from .cfg import Config, PipelineConfig, ProjectConfig
|
4
|
-
from .flowerpower import FlowerPower, FlowerPowerProject
|
5
|
-
from .flowerpower import init as init_project # noqa: E402
|
6
|
-
from .job_queue import JobQueueManager # noqa: E402
|
4
|
+
from .flowerpower import FlowerPower, FlowerPowerProject, create_project
|
7
5
|
from .pipeline import PipelineManager
|
8
6
|
|
9
7
|
__version__ = importlib.metadata.version("FlowerPower")
|
10
8
|
|
11
9
|
__all__ = [
|
12
10
|
"__version__",
|
13
|
-
"
|
11
|
+
"create_project",
|
14
12
|
"FlowerPower",
|
15
13
|
"FlowerPowerProject",
|
16
14
|
"PipelineManager",
|
17
|
-
"JobQueueManager",
|
18
15
|
"Config",
|
19
16
|
"ProjectConfig",
|
20
17
|
"PipelineConfig",
|
21
|
-
"PipelineConfig",
|
22
18
|
]
|
flowerpower/cfg/__init__.py
CHANGED
@@ -4,6 +4,7 @@ import msgspec
|
|
4
4
|
from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
|
5
5
|
from munch import Munch
|
6
6
|
|
7
|
+
from ..settings import CONFIG_DIR, PIPELINES_DIR
|
7
8
|
from .base import BaseConfig
|
8
9
|
from .pipeline import PipelineConfig, init_pipeline_config
|
9
10
|
from .project import ProjectConfig, init_project_config
|
@@ -49,7 +50,6 @@ class Config(BaseConfig):
|
|
49
50
|
base_dir: str = ".",
|
50
51
|
name: str | None = None,
|
51
52
|
pipeline_name: str | None = None,
|
52
|
-
job_queue_type: str | None = None,
|
53
53
|
fs: AbstractFileSystem | None = None,
|
54
54
|
storage_options: dict | BaseStorageOptions | None = {},
|
55
55
|
):
|
@@ -59,7 +59,6 @@ class Config(BaseConfig):
|
|
59
59
|
base_dir (str, optional): Base directory for configurations. Defaults to ".".
|
60
60
|
name (str | None, optional): Project name. Defaults to None.
|
61
61
|
pipeline_name (str | None, optional): Pipeline name. Defaults to None.
|
62
|
-
job_queue_type (str | None, optional): Type of job queue to use. Defaults to None.
|
63
62
|
fs (AbstractFileSystem | None, optional): Filesystem to use. Defaults to None.
|
64
63
|
storage_options (dict | Munch, optional): Options for filesystem. Defaults to empty Munch.
|
65
64
|
|
@@ -72,7 +71,6 @@ class Config(BaseConfig):
|
|
72
71
|
base_dir="my_project",
|
73
72
|
name="test_project",
|
74
73
|
pipeline_name="etl",
|
75
|
-
job_queue_type="rq"
|
76
74
|
)
|
77
75
|
```
|
78
76
|
"""
|
@@ -83,7 +81,6 @@ class Config(BaseConfig):
|
|
83
81
|
project = ProjectConfig.load(
|
84
82
|
base_dir=base_dir,
|
85
83
|
name=name,
|
86
|
-
job_queue_type=job_queue_type,
|
87
84
|
fs=fs,
|
88
85
|
storage_options=storage_options,
|
89
86
|
)
|
@@ -127,11 +124,11 @@ class Config(BaseConfig):
|
|
127
124
|
self.base_dir, cached=True, dirfs=True, **storage_options
|
128
125
|
)
|
129
126
|
|
130
|
-
if not self.fs.exists(
|
131
|
-
self.fs.makedirs(
|
127
|
+
if not self.fs.exists(CONFIG_DIR):
|
128
|
+
self.fs.makedirs(CONFIG_DIR)
|
132
129
|
|
133
130
|
if pipeline:
|
134
|
-
self.fs.makedirs(
|
131
|
+
self.fs.makedirs(PIPELINES_DIR, exist_ok=True)
|
135
132
|
h_params = self.pipeline.pop("h_params") if self.pipeline.h_params else None
|
136
133
|
self.pipeline.to_yaml(
|
137
134
|
path=f"conf/pipelines/{self.pipeline.name}.yml", fs=self.fs
|
@@ -210,7 +207,6 @@ def init_config(
|
|
210
207
|
base_dir: str = ".",
|
211
208
|
name: str | None = None,
|
212
209
|
pipeline_name: str | None = None,
|
213
|
-
job_queue_type: str | None = None,
|
214
210
|
fs: AbstractFileSystem | None = None,
|
215
211
|
storage_options: dict | BaseStorageOptions | None = {},
|
216
212
|
):
|
@@ -223,7 +219,6 @@ def init_config(
|
|
223
219
|
base_dir (str, optional): Base directory for configurations. Defaults to ".".
|
224
220
|
name (str | None, optional): Project name. Defaults to None.
|
225
221
|
pipeline_name (str | None, optional): Pipeline name. Defaults to None.
|
226
|
-
job_queue_type (str | None, optional): Type of job queue to use. Defaults to None.
|
227
222
|
fs (AbstractFileSystem | None, optional): Filesystem to use. Defaults to None.
|
228
223
|
storage_options (dict | Munch, optional): Options for filesystem. Defaults to empty Munch.
|
229
224
|
|
@@ -236,7 +231,6 @@ def init_config(
|
|
236
231
|
base_dir="my_project",
|
237
232
|
name="test_project",
|
238
233
|
pipeline_name="data-pipeline",
|
239
|
-
job_queue_type="rq"
|
240
234
|
)
|
241
235
|
```
|
242
236
|
"""
|
@@ -249,7 +243,6 @@ def init_config(
|
|
249
243
|
project_cfg = init_project_config(
|
250
244
|
base_dir=base_dir,
|
251
245
|
name=name,
|
252
|
-
job_queue_type=job_queue_type,
|
253
246
|
fs=fs,
|
254
247
|
storage_options=storage_options,
|
255
248
|
)
|
flowerpower/cfg/base.py
CHANGED
@@ -2,7 +2,8 @@ import copy
|
|
2
2
|
from typing import Any, Self
|
3
3
|
|
4
4
|
import msgspec
|
5
|
-
from
|
5
|
+
from fsspec_utils import AbstractFileSystem, filesystem
|
6
|
+
from ..utils.misc import get_filesystem
|
6
7
|
|
7
8
|
|
8
9
|
class BaseConfig(msgspec.Struct, kw_only=True):
|
@@ -20,12 +21,10 @@ class BaseConfig(msgspec.Struct, kw_only=True):
|
|
20
21
|
Raises:
|
21
22
|
NotImplementedError: If the filesystem does not support writing files.
|
22
23
|
"""
|
23
|
-
|
24
|
-
fs = filesystem("file")
|
24
|
+
fs = get_filesystem(fs)
|
25
25
|
try:
|
26
26
|
with fs.open(path, "wb") as f:
|
27
27
|
f.write(msgspec.yaml.encode(self, order="deterministic"))
|
28
|
-
# yaml.dump(self.to_dict(), f, default_flow_style=False)
|
29
28
|
except NotImplementedError:
|
30
29
|
raise NotImplementedError("The filesystem does not support writing files.")
|
31
30
|
|
@@ -54,29 +53,42 @@ class BaseConfig(msgspec.Struct, kw_only=True):
|
|
54
53
|
An instance of the class with the values from the YAML file.
|
55
54
|
|
56
55
|
"""
|
57
|
-
|
58
|
-
fs = filesystem("file")
|
56
|
+
fs = get_filesystem(fs)
|
59
57
|
with fs.open(path) as f:
|
60
|
-
# data = yaml.full_load(f)
|
61
|
-
# return cls.from_dict(data)
|
62
58
|
return msgspec.yaml.decode(f.read(), type=cls, strict=False)
|
63
59
|
|
64
|
-
def
|
60
|
+
def _apply_dict_updates(self, target: Self, d: dict[str, Any]) -> None:
|
61
|
+
"""
|
62
|
+
Helper method to apply dictionary updates to a target instance.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
target: The target instance to apply updates to.
|
66
|
+
d: The dictionary containing updates to apply.
|
67
|
+
"""
|
65
68
|
for k, v in d.items():
|
66
|
-
if hasattr(
|
67
|
-
current_value = getattr(
|
69
|
+
if hasattr(target, k):
|
70
|
+
current_value = getattr(target, k)
|
68
71
|
if isinstance(current_value, dict) and isinstance(v, dict):
|
69
72
|
current_value.update(v)
|
70
73
|
else:
|
71
|
-
setattr(
|
74
|
+
setattr(target, k, v)
|
72
75
|
else:
|
73
|
-
|
76
|
+
# Use object.__setattr__ to bypass msgspec.Struct's restrictions
|
77
|
+
object.__setattr__(target, k, v)
|
78
|
+
|
79
|
+
def update(self, d: dict[str, Any]) -> None:
|
80
|
+
"""
|
81
|
+
Updates this instance with values from the provided dictionary.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
d: The dictionary containing updates to apply.
|
85
|
+
"""
|
86
|
+
self._apply_dict_updates(self, d)
|
74
87
|
|
75
88
|
def merge_dict(self, d: dict[str, Any]) -> Self:
|
76
89
|
"""
|
77
90
|
Creates a copy of this instance and updates the copy with values
|
78
|
-
from the provided dictionary
|
79
|
-
its default value. The original instance (self) is not modified.
|
91
|
+
from the provided dictionary. The original instance (self) is not modified.
|
80
92
|
|
81
93
|
Args:
|
82
94
|
d: The dictionary to get values from.
|
@@ -84,16 +96,8 @@ class BaseConfig(msgspec.Struct, kw_only=True):
|
|
84
96
|
Returns:
|
85
97
|
A new instance of the struct with updated values.
|
86
98
|
"""
|
87
|
-
self_copy = copy.
|
88
|
-
|
89
|
-
if hasattr(self_copy, k):
|
90
|
-
current_value = getattr(self_copy, k)
|
91
|
-
if isinstance(current_value, dict) and isinstance(v, dict):
|
92
|
-
current_value.update(v)
|
93
|
-
else:
|
94
|
-
setattr(self_copy, k, v)
|
95
|
-
else:
|
96
|
-
setattr(self_copy, k, v)
|
99
|
+
self_copy = copy.deepcopy(self)
|
100
|
+
self._apply_dict_updates(self_copy, d)
|
97
101
|
return self_copy
|
98
102
|
|
99
103
|
def merge(self, source: Self) -> Self:
|
@@ -9,7 +9,7 @@ from .adapter import AdapterConfig
|
|
9
9
|
from .run import ExecutorConfig as ExecutorConfig
|
10
10
|
from .run import RunConfig
|
11
11
|
from .run import WithAdapterConfig as WithAdapterConfig
|
12
|
-
from .schedule import ScheduleConfig
|
12
|
+
#from .schedule import ScheduleConfig
|
13
13
|
|
14
14
|
|
15
15
|
class PipelineConfig(BaseConfig):
|
@@ -22,7 +22,7 @@ class PipelineConfig(BaseConfig):
|
|
22
22
|
Attributes:
|
23
23
|
name (str | None): The name of the pipeline.
|
24
24
|
run (RunConfig): Configuration for pipeline execution.
|
25
|
-
schedule (ScheduleConfig): Configuration for pipeline scheduling.
|
25
|
+
schedule (ScheduleConfig): Configuration for pipeline scheduling. DEPRECATED.
|
26
26
|
params (dict): Pipeline parameters.
|
27
27
|
adapter (AdapterConfig): Configuration for the pipeline adapter.
|
28
28
|
h_params (dict): Hamilton-formatted parameters.
|
@@ -45,7 +45,7 @@ class PipelineConfig(BaseConfig):
|
|
45
45
|
|
46
46
|
name: str | None = msgspec.field(default=None)
|
47
47
|
run: RunConfig = msgspec.field(default_factory=RunConfig)
|
48
|
-
|
48
|
+
#: ScheduleConfig = msgspec.field(default_factory=ScheduleConfig)
|
49
49
|
params: dict = msgspec.field(default_factory=dict)
|
50
50
|
adapter: AdapterConfig = msgspec.field(default_factory=AdapterConfig)
|
51
51
|
h_params: dict = msgspec.field(default_factory=dict)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
import datetime as dt
|
2
|
+
|
3
|
+
import msgspec
|
4
|
+
from munch import munchify
|
5
|
+
|
6
|
+
from ..base import BaseConfig
|
7
|
+
|
8
|
+
|
9
|
+
class ScheduleConfig(BaseConfig):
|
10
|
+
cron: str | dict | None = msgspec.field(default=None)
|
11
|
+
interval: str | int | dict | None = msgspec.field(default=None)
|
12
|
+
date: str | None = msgspec.field(default=None)
|
13
|
+
|
14
|
+
def __post_init__(self):
|
15
|
+
if isinstance(self.date, str):
|
16
|
+
try:
|
17
|
+
self.date = dt.datetime.fromisoformat(self.date)
|
18
|
+
except ValueError:
|
19
|
+
raise ValueError(
|
20
|
+
f"Invalid date format: {self.date}. Expected ISO format."
|
21
|
+
)
|
22
|
+
if isinstance(self.cron, dict):
|
23
|
+
self.cron = munchify(self.cron)
|
24
|
+
if isinstance(self.interval, dict):
|
25
|
+
self.interval = munchify(self.interval)
|
26
|
+
|
27
|
+
|
28
|
+
# class ScheduleConfig(BaseConfig):
|
29
|
+
# run: ScheduleRunConfig = msgspec.field(default_factory=ScheduleRunConfig)
|
30
|
+
# trigger: ScheduleTriggerConfig = msgspec.field(
|
31
|
+
# default_factory=ScheduleTriggerConfig
|
32
|
+
# )
|
@@ -39,17 +39,12 @@ class MLFlowConfig(BaseConfig):
|
|
39
39
|
self.run_tags = munchify(self.run_tags)
|
40
40
|
|
41
41
|
|
42
|
-
# class OpenLineageConfig(BaseConfig):
|
43
|
-
# namespace : str | None = msgspec.field(default=None)
|
44
|
-
# job_name : str | None = msgspec.field(default=None)
|
45
|
-
|
46
42
|
|
47
43
|
class AdapterConfig(BaseConfig):
|
48
44
|
hamilton_tracker: HamiltonTracerConfig = msgspec.field(
|
49
45
|
default_factory=HamiltonTracerConfig
|
50
46
|
)
|
51
47
|
mlflow: MLFlowConfig = msgspec.field(default_factory=MLFlowConfig)
|
52
|
-
# openlineage: OpenLineageConfig | dict = msgspec.field(default_factory=OpenLineageConfig)
|
53
48
|
|
54
49
|
def __post_init__(self):
|
55
50
|
if isinstance(self.hamilton_tracker, dict):
|
@@ -0,0 +1,377 @@
|
|
1
|
+
import copy
|
2
|
+
from typing import Any, Callable, Optional, Union
|
3
|
+
|
4
|
+
from fsspec_utils import AbstractFileSystem, BaseStorageOptions, filesystem
|
5
|
+
|
6
|
+
from ... import settings
|
7
|
+
from ..base import BaseConfig
|
8
|
+
from .adapter import AdapterConfig as PipelineAdapterConfig
|
9
|
+
from .run import ExecutorConfig, RunConfig, WithAdapterConfig
|
10
|
+
from ..project.adapter import AdapterConfig as ProjectAdapterConfig
|
11
|
+
|
12
|
+
|
13
|
+
class RunConfigBuilder:
|
14
|
+
"""A fluent builder for creating RunConfig objects.
|
15
|
+
|
16
|
+
This builder provides a clean interface for constructing RunConfig objects
|
17
|
+
with proper configuration merging from project and pipeline defaults.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
pipeline_name: str,
|
23
|
+
base_dir: str | None = None,
|
24
|
+
fs: AbstractFileSystem | None = None,
|
25
|
+
storage_options: dict | BaseStorageOptions | None = {}
|
26
|
+
):
|
27
|
+
"""Initialize the RunConfigBuilder.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
pipeline_name: Name of the pipeline to build config for
|
31
|
+
base_dir: Base directory for the project (defaults to current directory)
|
32
|
+
fs: Optional filesystem instance
|
33
|
+
storage_options: Options for filesystem access
|
34
|
+
"""
|
35
|
+
self.pipeline_name = pipeline_name
|
36
|
+
self.base_dir = base_dir or "."
|
37
|
+
self._fs = fs
|
38
|
+
self._storage_options = storage_options
|
39
|
+
|
40
|
+
# Initialize with empty config
|
41
|
+
self._config = RunConfig()
|
42
|
+
|
43
|
+
# Load defaults from pipeline and project configs
|
44
|
+
self._load_defaults()
|
45
|
+
|
46
|
+
def _load_defaults(self):
|
47
|
+
"""Load default configuration from pipeline and project YAML files."""
|
48
|
+
if self._fs is None:
|
49
|
+
self._fs = filesystem(
|
50
|
+
self.base_dir,
|
51
|
+
cached=False,
|
52
|
+
dirfs=True,
|
53
|
+
storage_options=self._storage_options
|
54
|
+
)
|
55
|
+
|
56
|
+
# Load pipeline configuration
|
57
|
+
try:
|
58
|
+
from .. import PipelineConfig
|
59
|
+
pipeline_cfg = PipelineConfig.load(
|
60
|
+
base_dir=self.base_dir,
|
61
|
+
name=self.pipeline_name,
|
62
|
+
fs=self._fs,
|
63
|
+
storage_options=self._storage_options
|
64
|
+
)
|
65
|
+
if pipeline_cfg and pipeline_cfg.run:
|
66
|
+
self._config = copy.deepcopy(pipeline_cfg.run)
|
67
|
+
except Exception:
|
68
|
+
# If pipeline config doesn't exist, use defaults
|
69
|
+
pass
|
70
|
+
|
71
|
+
# Load project configuration for adapter defaults
|
72
|
+
try:
|
73
|
+
from .. import ProjectConfig
|
74
|
+
project_cfg = ProjectConfig.load(
|
75
|
+
base_dir=self.base_dir,
|
76
|
+
fs=self._fs,
|
77
|
+
storage_options=self._storage_options
|
78
|
+
)
|
79
|
+
if project_cfg and project_cfg.adapter:
|
80
|
+
# Store project adapter config for merging
|
81
|
+
self._project_adapter_cfg = project_cfg.adapter
|
82
|
+
else:
|
83
|
+
self._project_adapter_cfg = ProjectAdapterConfig()
|
84
|
+
except Exception:
|
85
|
+
self._project_adapter_cfg = ProjectAdapterConfig()
|
86
|
+
|
87
|
+
def with_inputs(self, inputs: dict) -> "RunConfigBuilder":
|
88
|
+
"""Set pipeline input values.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
inputs: Dictionary of input values to override defaults
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Self for method chaining
|
95
|
+
"""
|
96
|
+
if inputs:
|
97
|
+
if self._config.inputs is None:
|
98
|
+
self._config.inputs = {}
|
99
|
+
self._config.inputs.update(inputs)
|
100
|
+
return self
|
101
|
+
|
102
|
+
def with_final_vars(self, final_vars: list[str]) -> "RunConfigBuilder":
|
103
|
+
"""Set the final output variables.
|
104
|
+
|
105
|
+
Args:
|
106
|
+
final_vars: List of variable names to return from execution
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
Self for method chaining
|
110
|
+
"""
|
111
|
+
self._config.final_vars = final_vars
|
112
|
+
return self
|
113
|
+
|
114
|
+
def with_config(self, config: dict) -> "RunConfigBuilder":
|
115
|
+
"""Set Hamilton driver configuration.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
config: Dictionary of configuration values for Hamilton
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
Self for method chaining
|
122
|
+
"""
|
123
|
+
if config:
|
124
|
+
if self._config.config is None:
|
125
|
+
self._config.config = {}
|
126
|
+
self._config.config.update(config)
|
127
|
+
return self
|
128
|
+
|
129
|
+
def with_cache(self, cache: Union[dict, bool]) -> "RunConfigBuilder":
|
130
|
+
"""Set cache configuration.
|
131
|
+
|
132
|
+
Args:
|
133
|
+
cache: Cache configuration (dict) or enable/disable flag (bool)
|
134
|
+
|
135
|
+
Returns:
|
136
|
+
Self for method chaining
|
137
|
+
"""
|
138
|
+
self._config.cache = cache
|
139
|
+
return self
|
140
|
+
|
141
|
+
def with_executor(self, executor_type: str, **kwargs) -> "RunConfigBuilder":
|
142
|
+
"""Set executor configuration.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
executor_type: Type of executor ('synchronous', 'threadpool', 'processpool', 'ray', 'dask')
|
146
|
+
**kwargs: Additional executor configuration options
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
Self for method chaining
|
150
|
+
"""
|
151
|
+
if not self._config.executor:
|
152
|
+
self._config.executor = ExecutorConfig()
|
153
|
+
|
154
|
+
self._config.executor.type = executor_type
|
155
|
+
|
156
|
+
# Apply additional executor options
|
157
|
+
for key, value in kwargs.items():
|
158
|
+
if hasattr(self._config.executor, key):
|
159
|
+
setattr(self._config.executor, key, value)
|
160
|
+
|
161
|
+
return self
|
162
|
+
|
163
|
+
def with_adapter(self, adapter_name: str, **kwargs) -> "RunConfigBuilder":
|
164
|
+
"""Enable and configure a specific adapter.
|
165
|
+
|
166
|
+
Args:
|
167
|
+
adapter_name: Name of the adapter ('hamilton_tracker', 'mlflow', 'opentelemetry', etc.)
|
168
|
+
**kwargs: Adapter-specific configuration options
|
169
|
+
|
170
|
+
Returns:
|
171
|
+
Self for method chaining
|
172
|
+
"""
|
173
|
+
if not self._config.with_adapter:
|
174
|
+
self._config.with_adapter = WithAdapterConfig()
|
175
|
+
|
176
|
+
# Enable the adapter
|
177
|
+
if hasattr(self._config.with_adapter, adapter_name):
|
178
|
+
setattr(self._config.with_adapter, adapter_name, True)
|
179
|
+
|
180
|
+
# Store adapter configuration for merging
|
181
|
+
if not hasattr(self, '_adapter_configs'):
|
182
|
+
self._adapter_configs = {}
|
183
|
+
self._adapter_configs[adapter_name] = kwargs
|
184
|
+
|
185
|
+
return self
|
186
|
+
|
187
|
+
def with_retries(
|
188
|
+
self,
|
189
|
+
max_attempts: int = 3,
|
190
|
+
delay: float = 1.0,
|
191
|
+
jitter: float = 0.1,
|
192
|
+
exceptions: Optional[list] = None
|
193
|
+
) -> "RunConfigBuilder":
|
194
|
+
"""Configure retry behavior.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
max_attempts: Maximum number of retry attempts
|
198
|
+
delay: Base delay between retries in seconds
|
199
|
+
jitter: Random jitter factor to add to retry delay
|
200
|
+
exceptions: List of exception types that should trigger retries
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
Self for method chaining
|
204
|
+
"""
|
205
|
+
self._config.max_retries = max_attempts
|
206
|
+
self._config.retry_delay = delay
|
207
|
+
self._config.jitter_factor = jitter
|
208
|
+
|
209
|
+
if exceptions:
|
210
|
+
self._config.retry_exceptions = exceptions
|
211
|
+
|
212
|
+
return self
|
213
|
+
|
214
|
+
def with_callbacks(
|
215
|
+
self,
|
216
|
+
on_success: Optional[Callable] = None,
|
217
|
+
on_failure: Optional[Callable] = None
|
218
|
+
) -> "RunConfigBuilder":
|
219
|
+
"""Set success and failure callbacks.
|
220
|
+
|
221
|
+
Args:
|
222
|
+
on_success: Callback function to execute on successful completion
|
223
|
+
on_failure: Callback function to execute on failure
|
224
|
+
|
225
|
+
Returns:
|
226
|
+
Self for method chaining
|
227
|
+
"""
|
228
|
+
if on_success:
|
229
|
+
self._config.on_success = on_success
|
230
|
+
if on_failure:
|
231
|
+
self._config.on_failure = on_failure
|
232
|
+
|
233
|
+
return self
|
234
|
+
|
235
|
+
def with_log_level(self, log_level: str) -> "RunConfigBuilder":
|
236
|
+
"""Set the log level for execution.
|
237
|
+
|
238
|
+
Args:
|
239
|
+
log_level: Log level ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
|
240
|
+
|
241
|
+
Returns:
|
242
|
+
Self for method chaining
|
243
|
+
"""
|
244
|
+
self._config.log_level = log_level
|
245
|
+
return self
|
246
|
+
|
247
|
+
def with_reload(self, reload: bool = True) -> "RunConfigBuilder":
|
248
|
+
"""Set whether to reload the pipeline module.
|
249
|
+
|
250
|
+
Args:
|
251
|
+
reload: Whether to force reload of the pipeline module
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
Self for method chaining
|
255
|
+
"""
|
256
|
+
self._config.reload = reload
|
257
|
+
return self
|
258
|
+
|
259
|
+
def with_pipeline_adapter_config(self, config: dict) -> "RunConfigBuilder":
|
260
|
+
"""Set pipeline-specific adapter configuration.
|
261
|
+
|
262
|
+
Args:
|
263
|
+
config: Pipeline adapter configuration dictionary
|
264
|
+
|
265
|
+
Returns:
|
266
|
+
Self for method chaining
|
267
|
+
"""
|
268
|
+
if config:
|
269
|
+
if self._config.pipeline_adapter_cfg is None:
|
270
|
+
self._config.pipeline_adapter_cfg = {}
|
271
|
+
self._config.pipeline_adapter_cfg.update(config)
|
272
|
+
return self
|
273
|
+
|
274
|
+
def with_project_adapter_config(self, config: dict) -> "RunConfigBuilder":
|
275
|
+
"""Set project-level adapter configuration.
|
276
|
+
|
277
|
+
Args:
|
278
|
+
config: Project adapter configuration dictionary
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
Self for method chaining
|
282
|
+
"""
|
283
|
+
if config:
|
284
|
+
if self._config.project_adapter_cfg is None:
|
285
|
+
self._config.project_adapter_cfg = {}
|
286
|
+
self._config.project_adapter_cfg.update(config)
|
287
|
+
return self
|
288
|
+
|
289
|
+
def with_custom_adapter(self, name: str, adapter: Any) -> "RunConfigBuilder":
|
290
|
+
"""Add a custom adapter instance.
|
291
|
+
|
292
|
+
Args:
|
293
|
+
name: Name/identifier for the adapter
|
294
|
+
adapter: Adapter instance
|
295
|
+
|
296
|
+
Returns:
|
297
|
+
Self for method chaining
|
298
|
+
"""
|
299
|
+
if self._config.adapter is None:
|
300
|
+
self._config.adapter = {}
|
301
|
+
self._config.adapter[name] = adapter
|
302
|
+
return self
|
303
|
+
|
304
|
+
def build(self) -> RunConfig:
|
305
|
+
"""Build the final RunConfig object.
|
306
|
+
|
307
|
+
This method merges all configurations and validates the final result.
|
308
|
+
|
309
|
+
Returns:
|
310
|
+
Fully configured RunConfig object
|
311
|
+
|
312
|
+
Raises:
|
313
|
+
ValueError: If configuration is invalid
|
314
|
+
"""
|
315
|
+
# Create a deep copy to avoid modifying the internal state
|
316
|
+
final_config = copy.deepcopy(self._config)
|
317
|
+
|
318
|
+
# Merge adapter configurations
|
319
|
+
if hasattr(self, '_adapter_configs') and self._adapter_configs:
|
320
|
+
self._merge_adapter_configs(final_config)
|
321
|
+
|
322
|
+
# Validate configuration
|
323
|
+
self._validate_config(final_config)
|
324
|
+
|
325
|
+
return final_config
|
326
|
+
|
327
|
+
def _merge_adapter_configs(self, config: RunConfig):
|
328
|
+
"""Merge adapter configurations from builder with project/pipeline configs."""
|
329
|
+
if not config.pipeline_adapter_cfg:
|
330
|
+
config.pipeline_adapter_cfg = {}
|
331
|
+
|
332
|
+
if not config.project_adapter_cfg:
|
333
|
+
config.project_adapter_cfg = {}
|
334
|
+
|
335
|
+
# Merge project adapter defaults
|
336
|
+
for adapter_name, adapter_config in self._adapter_configs.items():
|
337
|
+
if adapter_name in ['hamilton_tracker', 'mlflow', 'opentelemetry']:
|
338
|
+
# Merge with project config
|
339
|
+
if hasattr(self._project_adapter_cfg, adapter_name):
|
340
|
+
project_config = getattr(self._project_adapter_cfg, adapter_name).to_dict()
|
341
|
+
adapter_config = {**project_config, **adapter_config}
|
342
|
+
|
343
|
+
# Store in pipeline adapter config
|
344
|
+
if adapter_name not in config.pipeline_adapter_cfg:
|
345
|
+
config.pipeline_adapter_cfg[adapter_name] = {}
|
346
|
+
config.pipeline_adapter_cfg[adapter_name].update(adapter_config)
|
347
|
+
|
348
|
+
def _validate_config(self, config: RunConfig):
|
349
|
+
"""Validate the final configuration.
|
350
|
+
|
351
|
+
Args:
|
352
|
+
config: RunConfig object to validate
|
353
|
+
|
354
|
+
Raises:
|
355
|
+
ValueError: If configuration is invalid
|
356
|
+
"""
|
357
|
+
# Validate retry configuration
|
358
|
+
if config.max_retries < 0:
|
359
|
+
raise ValueError("max_retries must be non-negative")
|
360
|
+
|
361
|
+
if config.retry_delay < 0:
|
362
|
+
raise ValueError("retry_delay must be non-negative")
|
363
|
+
|
364
|
+
if config.jitter_factor is not None and config.jitter_factor < 0:
|
365
|
+
raise ValueError("jitter_factor must be non-negative")
|
366
|
+
|
367
|
+
# Validate executor configuration
|
368
|
+
if config.executor and config.executor.type:
|
369
|
+
valid_executors = ['synchronous', 'threadpool', 'processpool', 'ray', 'dask']
|
370
|
+
if config.executor.type not in valid_executors:
|
371
|
+
raise ValueError(f"Invalid executor type: {config.executor.type}")
|
372
|
+
|
373
|
+
# Validate log level
|
374
|
+
if config.log_level:
|
375
|
+
valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
|
376
|
+
if config.log_level.upper() not in valid_levels:
|
377
|
+
raise ValueError(f"Invalid log level: {config.log_level}")
|