FlowerPower 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/cfg/__init__.py +143 -25
- flowerpower/cfg/base.py +132 -11
- flowerpower/cfg/exceptions.py +53 -0
- flowerpower/cfg/pipeline/__init__.py +151 -35
- flowerpower/cfg/pipeline/adapter.py +1 -0
- flowerpower/cfg/pipeline/builder.py +24 -25
- flowerpower/cfg/pipeline/builder_adapter.py +142 -0
- flowerpower/cfg/pipeline/builder_executor.py +101 -0
- flowerpower/cfg/pipeline/run.py +99 -40
- flowerpower/cfg/project/__init__.py +59 -14
- flowerpower/cfg/project/adapter.py +6 -0
- flowerpower/cli/__init__.py +8 -2
- flowerpower/cli/cfg.py +0 -38
- flowerpower/cli/pipeline.py +121 -83
- flowerpower/cli/utils.py +120 -71
- flowerpower/flowerpower.py +94 -120
- flowerpower/pipeline/config_manager.py +180 -0
- flowerpower/pipeline/executor.py +126 -0
- flowerpower/pipeline/lifecycle_manager.py +231 -0
- flowerpower/pipeline/manager.py +121 -274
- flowerpower/pipeline/pipeline.py +66 -278
- flowerpower/pipeline/registry.py +45 -4
- flowerpower/utils/__init__.py +19 -0
- flowerpower/utils/adapter.py +286 -0
- flowerpower/utils/callback.py +73 -67
- flowerpower/utils/config.py +306 -0
- flowerpower/utils/executor.py +178 -0
- flowerpower/utils/filesystem.py +194 -0
- flowerpower/utils/misc.py +312 -138
- flowerpower/utils/security.py +221 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/METADATA +2 -2
- flowerpower-0.31.1.dist-info/RECORD +53 -0
- flowerpower/cfg/pipeline/_schedule.py +0 -32
- flowerpower-0.30.0.dist-info/RECORD +0 -42
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/WHEEL +0 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,306 @@
|
|
1
|
+
"""
|
2
|
+
Configuration utilities for FlowerPower.
|
3
|
+
|
4
|
+
This module provides shared configuration handling utilities to avoid code duplication.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Any, Dict
|
8
|
+
from dataclasses import fields
|
9
|
+
|
10
|
+
from ..cfg.pipeline.run import (
|
11
|
+
RunConfig,
|
12
|
+
ExecutorConfig,
|
13
|
+
WithAdapterConfig,
|
14
|
+
)
|
15
|
+
from .security import validate_config_dict, validate_callback_function
|
16
|
+
def _merge_inputs(run_config: RunConfig, value):
|
17
|
+
"""Merge inputs into run config."""
|
18
|
+
validate_config_dict(value)
|
19
|
+
if run_config.inputs is None:
|
20
|
+
run_config.inputs = value
|
21
|
+
else:
|
22
|
+
run_config.inputs.update(value)
|
23
|
+
|
24
|
+
|
25
|
+
def _merge_config(run_config: RunConfig, value):
|
26
|
+
"""Merge config into run config."""
|
27
|
+
validate_config_dict(value)
|
28
|
+
if run_config.config is None:
|
29
|
+
run_config.config = value
|
30
|
+
else:
|
31
|
+
run_config.config.update(value)
|
32
|
+
|
33
|
+
|
34
|
+
def _set_cache(run_config: RunConfig, value):
|
35
|
+
"""Set cache in run config."""
|
36
|
+
run_config.cache = value
|
37
|
+
|
38
|
+
|
39
|
+
def _merge_adapter(run_config: RunConfig, value):
|
40
|
+
"""Merge adapter into run config."""
|
41
|
+
if run_config.adapter is None:
|
42
|
+
run_config.adapter = value
|
43
|
+
else:
|
44
|
+
run_config.adapter.update(value)
|
45
|
+
|
46
|
+
|
47
|
+
def _set_executor_cfg(run_config: RunConfig, value):
|
48
|
+
"""Set executor config."""
|
49
|
+
if isinstance(value, str):
|
50
|
+
run_config.executor = ExecutorConfig(type=value)
|
51
|
+
elif isinstance(value, dict):
|
52
|
+
run_config.executor = ExecutorConfig.from_dict(value)
|
53
|
+
elif isinstance(value, ExecutorConfig):
|
54
|
+
run_config.executor = value
|
55
|
+
|
56
|
+
|
57
|
+
def _set_with_adapter_cfg(run_config: RunConfig, value):
|
58
|
+
"""Set with adapter config."""
|
59
|
+
if isinstance(value, dict):
|
60
|
+
run_config.with_adapter = WithAdapterConfig.from_dict(value)
|
61
|
+
elif isinstance(value, WithAdapterConfig):
|
62
|
+
run_config.with_adapter = value
|
63
|
+
|
64
|
+
|
65
|
+
def _set_pipeline_adapter_cfg(run_config: RunConfig, value):
|
66
|
+
"""Set pipeline adapter config."""
|
67
|
+
run_config.pipeline_adapter_cfg = value
|
68
|
+
|
69
|
+
|
70
|
+
def _set_project_adapter_cfg(run_config: RunConfig, value):
|
71
|
+
"""Set project adapter config."""
|
72
|
+
run_config.project_adapter_cfg = value
|
73
|
+
|
74
|
+
|
75
|
+
_attr_handlers = {
|
76
|
+
'inputs': _merge_inputs,
|
77
|
+
'config': _merge_config,
|
78
|
+
'cache': _set_cache,
|
79
|
+
'adapter': _merge_adapter,
|
80
|
+
'executor_cfg': _set_executor_cfg,
|
81
|
+
'with_adapter_cfg': _set_with_adapter_cfg,
|
82
|
+
'pipeline_adapter_cfg': _set_pipeline_adapter_cfg,
|
83
|
+
'project_adapter_cfg': _set_project_adapter_cfg,
|
84
|
+
}
|
85
|
+
|
86
|
+
|
87
|
+
def merge_run_config_with_kwargs(run_config: RunConfig, kwargs: Dict[str, Any]) -> RunConfig:
|
88
|
+
"""Merge kwargs into a RunConfig object.
|
89
|
+
|
90
|
+
This utility function updates the RunConfig object with values from kwargs,
|
91
|
+
handling different types of attributes appropriately.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
run_config: The RunConfig object to update
|
95
|
+
kwargs: Dictionary of additional parameters to merge
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
RunConfig: Updated RunConfig object
|
99
|
+
"""
|
100
|
+
# Handle complex attributes with specific logic
|
101
|
+
for attr, handler in _attr_handlers.items():
|
102
|
+
if attr in kwargs and kwargs[attr] is not None:
|
103
|
+
handler(run_config, kwargs[attr])
|
104
|
+
|
105
|
+
# Handle simple attributes
|
106
|
+
simple_attrs = [
|
107
|
+
'final_vars', 'reload', 'log_level', 'max_retries', 'retry_delay',
|
108
|
+
'jitter_factor', 'retry_exceptions', 'on_success', 'on_failure'
|
109
|
+
]
|
110
|
+
|
111
|
+
for attr in simple_attrs:
|
112
|
+
if attr in kwargs and kwargs[attr] is not None:
|
113
|
+
value = kwargs[attr]
|
114
|
+
# Validate callbacks for security
|
115
|
+
if attr in ['on_success', 'on_failure']:
|
116
|
+
validate_callback_function(value)
|
117
|
+
setattr(run_config, attr, value)
|
118
|
+
|
119
|
+
return run_config
|
120
|
+
|
121
|
+
|
122
|
+
class RunConfigBuilder:
|
123
|
+
"""Builder pattern for constructing RunConfig objects with fluent interface."""
|
124
|
+
|
125
|
+
def __init__(self, base_config: RunConfig | None = None):
|
126
|
+
self.config = base_config or RunConfig()
|
127
|
+
|
128
|
+
def with_inputs(self, inputs: Dict[str, Any] | None) -> 'RunConfigBuilder':
|
129
|
+
"""Set inputs configuration."""
|
130
|
+
if inputs is not None:
|
131
|
+
validate_config_dict(inputs)
|
132
|
+
self.config.inputs = inputs
|
133
|
+
return self
|
134
|
+
|
135
|
+
def with_config(self, config: Dict[str, Any] | None) -> 'RunConfigBuilder':
|
136
|
+
"""Set pipeline configuration."""
|
137
|
+
if config is not None:
|
138
|
+
validate_config_dict(config)
|
139
|
+
self.config.config = config
|
140
|
+
return self
|
141
|
+
|
142
|
+
def with_cache(self, cache: bool | None) -> 'RunConfigBuilder':
|
143
|
+
"""Set caching configuration."""
|
144
|
+
if cache is not None:
|
145
|
+
self.config.cache = cache
|
146
|
+
return self
|
147
|
+
|
148
|
+
def with_adapter(self, adapter: Dict[str, Any] | None) -> 'RunConfigBuilder':
|
149
|
+
"""Set adapter configuration."""
|
150
|
+
if adapter is not None:
|
151
|
+
if self.config.adapter is None:
|
152
|
+
self.config.adapter = adapter
|
153
|
+
else:
|
154
|
+
self.config.adapter.update(adapter)
|
155
|
+
return self
|
156
|
+
|
157
|
+
def with_executor(self, executor_cfg: str | Dict[str, Any] | ExecutorConfig | None) -> 'RunConfigBuilder':
|
158
|
+
"""Set executor configuration."""
|
159
|
+
if executor_cfg is not None:
|
160
|
+
if isinstance(executor_cfg, str):
|
161
|
+
self.config.executor = ExecutorConfig(type=executor_cfg)
|
162
|
+
elif isinstance(executor_cfg, dict):
|
163
|
+
self.config.executor = ExecutorConfig.from_dict(executor_cfg)
|
164
|
+
elif isinstance(executor_cfg, ExecutorConfig):
|
165
|
+
self.config.executor = executor_cfg
|
166
|
+
return self
|
167
|
+
|
168
|
+
def with_retry_config(self, max_retries: int | None = None, retry_delay: float | None = None,
|
169
|
+
jitter_factor: float | None = None, retry_exceptions: tuple | None = None) -> 'RunConfigBuilder':
|
170
|
+
"""Set retry configuration."""
|
171
|
+
if max_retries is not None:
|
172
|
+
self.config.max_retries = max_retries
|
173
|
+
if retry_delay is not None:
|
174
|
+
self.config.retry_delay = retry_delay
|
175
|
+
if jitter_factor is not None:
|
176
|
+
self.config.jitter_factor = jitter_factor
|
177
|
+
if retry_exceptions is not None:
|
178
|
+
self.config.retry_exceptions = retry_exceptions
|
179
|
+
return self
|
180
|
+
|
181
|
+
def with_logging(self, log_level: str | None = None) -> 'RunConfigBuilder':
|
182
|
+
"""Set logging configuration."""
|
183
|
+
if log_level is not None:
|
184
|
+
self.config.log_level = log_level
|
185
|
+
return self
|
186
|
+
|
187
|
+
def with_callbacks(self, on_success: str | None = None, on_failure: str | None = None) -> 'RunConfigBuilder':
|
188
|
+
"""Set callback configurations."""
|
189
|
+
if on_success is not None:
|
190
|
+
validate_callback_function(on_success)
|
191
|
+
self.config.on_success = on_success
|
192
|
+
if on_failure is not None:
|
193
|
+
validate_callback_function(on_failure)
|
194
|
+
self.config.on_failure = on_failure
|
195
|
+
return self
|
196
|
+
|
197
|
+
# Additional methods for backward compatibility with tests
|
198
|
+
def with_final_vars(self, final_vars: list[str] | None) -> 'RunConfigBuilder':
|
199
|
+
"""Set final variables."""
|
200
|
+
if final_vars is not None:
|
201
|
+
self.config.final_vars = final_vars
|
202
|
+
return self
|
203
|
+
|
204
|
+
def with_executor_cfg(self, executor_cfg: str | Dict[str, Any] | ExecutorConfig | None) -> 'RunConfigBuilder':
|
205
|
+
"""Set executor configuration (alias for with_executor)."""
|
206
|
+
return self.with_executor(executor_cfg)
|
207
|
+
|
208
|
+
def with_with_adapter_cfg(self, with_adapter_cfg: Dict[str, Any] | WithAdapterConfig | None) -> 'RunConfigBuilder':
|
209
|
+
"""Set with_adapter configuration."""
|
210
|
+
if with_adapter_cfg is not None:
|
211
|
+
if isinstance(with_adapter_cfg, dict):
|
212
|
+
self.config.with_adapter = WithAdapterConfig.from_dict(with_adapter_cfg)
|
213
|
+
elif isinstance(with_adapter_cfg, WithAdapterConfig):
|
214
|
+
self.config.with_adapter = with_adapter_cfg
|
215
|
+
return self
|
216
|
+
|
217
|
+
def with_pipeline_adapter_cfg(self, pipeline_adapter_cfg: Any | None) -> 'RunConfigBuilder':
|
218
|
+
"""Set pipeline adapter configuration."""
|
219
|
+
if pipeline_adapter_cfg is not None:
|
220
|
+
self.config.pipeline_adapter_cfg = pipeline_adapter_cfg
|
221
|
+
return self
|
222
|
+
|
223
|
+
def with_project_adapter_cfg(self, project_adapter_cfg: Any | None) -> 'RunConfigBuilder':
|
224
|
+
"""Set project adapter configuration."""
|
225
|
+
if project_adapter_cfg is not None:
|
226
|
+
self.config.project_adapter_cfg = project_adapter_cfg
|
227
|
+
return self
|
228
|
+
|
229
|
+
def with_reload(self, reload: bool | None) -> 'RunConfigBuilder':
|
230
|
+
"""Set reload flag."""
|
231
|
+
if reload is not None:
|
232
|
+
self.config.reload = reload
|
233
|
+
return self
|
234
|
+
|
235
|
+
def with_log_level(self, log_level: str | None) -> 'RunConfigBuilder':
|
236
|
+
"""Set log level (alias for with_logging)."""
|
237
|
+
return self.with_logging(log_level)
|
238
|
+
|
239
|
+
def with_max_retries(self, max_retries: int | None) -> 'RunConfigBuilder':
|
240
|
+
"""Set max retries."""
|
241
|
+
if max_retries is not None:
|
242
|
+
self.config.max_retries = max_retries
|
243
|
+
return self
|
244
|
+
|
245
|
+
def with_retry_delay(self, retry_delay: float | None) -> 'RunConfigBuilder':
|
246
|
+
"""Set retry delay."""
|
247
|
+
if retry_delay is not None:
|
248
|
+
self.config.retry_delay = retry_delay
|
249
|
+
return self
|
250
|
+
|
251
|
+
def with_jitter_factor(self, jitter_factor: float | None) -> 'RunConfigBuilder':
|
252
|
+
"""Set jitter factor."""
|
253
|
+
if jitter_factor is not None:
|
254
|
+
self.config.jitter_factor = jitter_factor
|
255
|
+
return self
|
256
|
+
|
257
|
+
def with_retry_exceptions(self, retry_exceptions: list | None) -> 'RunConfigBuilder':
|
258
|
+
"""Set retry exceptions."""
|
259
|
+
if retry_exceptions is not None:
|
260
|
+
self.config.retry_exceptions = retry_exceptions
|
261
|
+
return self
|
262
|
+
|
263
|
+
def with_on_success(self, on_success: Any | None) -> 'RunConfigBuilder':
|
264
|
+
"""Set on_success callback."""
|
265
|
+
if on_success is not None:
|
266
|
+
self.config.on_success = on_success
|
267
|
+
return self
|
268
|
+
|
269
|
+
def with_on_failure(self, on_failure: Any | None) -> 'RunConfigBuilder':
|
270
|
+
"""Set on_failure callback."""
|
271
|
+
if on_failure is not None:
|
272
|
+
self.config.on_failure = on_failure
|
273
|
+
return self
|
274
|
+
|
275
|
+
def reset(self) -> 'RunConfigBuilder':
|
276
|
+
"""Reset builder to default values."""
|
277
|
+
self.config = RunConfig()
|
278
|
+
return self
|
279
|
+
|
280
|
+
@classmethod
|
281
|
+
def from_config(cls, config: RunConfig) -> 'RunConfigBuilder':
|
282
|
+
"""Create builder from existing config."""
|
283
|
+
return cls(base_config=config)
|
284
|
+
|
285
|
+
def build(self) -> RunConfig:
|
286
|
+
"""Build and return the RunConfig object."""
|
287
|
+
# Create a new copy to ensure immutability
|
288
|
+
return RunConfig(
|
289
|
+
inputs=self.config.inputs,
|
290
|
+
final_vars=self.config.final_vars,
|
291
|
+
config=self.config.config,
|
292
|
+
cache=self.config.cache,
|
293
|
+
executor=self.config.executor,
|
294
|
+
with_adapter=self.config.with_adapter,
|
295
|
+
pipeline_adapter_cfg=self.config.pipeline_adapter_cfg,
|
296
|
+
project_adapter_cfg=self.config.project_adapter_cfg,
|
297
|
+
adapter=self.config.adapter,
|
298
|
+
reload=self.config.reload,
|
299
|
+
log_level=self.config.log_level,
|
300
|
+
max_retries=self.config.max_retries,
|
301
|
+
retry_delay=self.config.retry_delay,
|
302
|
+
jitter_factor=self.config.jitter_factor,
|
303
|
+
retry_exceptions=self.config.retry_exceptions,
|
304
|
+
on_success=self.config.on_success,
|
305
|
+
on_failure=self.config.on_failure,
|
306
|
+
)
|
@@ -0,0 +1,178 @@
|
|
1
|
+
"""
|
2
|
+
Executor utilities for FlowerPower pipeline management.
|
3
|
+
|
4
|
+
This module provides factory methods for creating executor instances
|
5
|
+
with proper error handling and dependency management.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Dict, Optional, Union
|
9
|
+
|
10
|
+
from loguru import logger
|
11
|
+
|
12
|
+
# Lazy imports to avoid circular dependencies
|
13
|
+
|
14
|
+
|
15
|
+
class ExecutorFactory:
|
16
|
+
"""
|
17
|
+
Factory class for creating executor instances.
|
18
|
+
|
19
|
+
This class centralizes executor type selection and instance creation
|
20
|
+
to reduce complexity in the Pipeline class.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self):
|
24
|
+
"""Initialize the executor factory."""
|
25
|
+
self._executor_cache: Dict[str, Any] = {}
|
26
|
+
|
27
|
+
def create_executor(
|
28
|
+
self,
|
29
|
+
executor_cfg: Union[str, Dict[str, Any], Any, None]
|
30
|
+
) -> Any:
|
31
|
+
"""
|
32
|
+
Create an executor instance based on configuration.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
executor_cfg: Executor configuration (string, dict, or ExecutorConfig)
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
Executor instance
|
39
|
+
"""
|
40
|
+
# Normalize configuration
|
41
|
+
executor_cfg = self._normalize_config(executor_cfg)
|
42
|
+
|
43
|
+
# Create executor based on type
|
44
|
+
executor_type = executor_cfg.type or "synchronous"
|
45
|
+
cache_key = f"{executor_type}_{hash(str(executor_cfg.to_dict()))}"
|
46
|
+
|
47
|
+
if cache_key in self._executor_cache:
|
48
|
+
return self._executor_cache[cache_key]
|
49
|
+
|
50
|
+
executor = self._create_executor_by_type(executor_cfg)
|
51
|
+
self._executor_cache[cache_key] = executor
|
52
|
+
return executor
|
53
|
+
|
54
|
+
def _normalize_config(
|
55
|
+
self,
|
56
|
+
executor_cfg: Union[str, Dict[str, Any], Any, None]
|
57
|
+
) -> Any:
|
58
|
+
"""Normalize executor configuration to ExecutorConfig instance."""
|
59
|
+
from ..cfg.pipeline.run import ExecutorConfig
|
60
|
+
|
61
|
+
if executor_cfg is None:
|
62
|
+
return ExecutorConfig()
|
63
|
+
|
64
|
+
if isinstance(executor_cfg, str):
|
65
|
+
return ExecutorConfig(type=executor_cfg)
|
66
|
+
elif isinstance(executor_cfg, dict):
|
67
|
+
return ExecutorConfig.from_dict(executor_cfg)
|
68
|
+
elif not isinstance(executor_cfg, ExecutorConfig):
|
69
|
+
raise TypeError(
|
70
|
+
"executor_cfg must be a string, dictionary, or ExecutorConfig instance."
|
71
|
+
)
|
72
|
+
|
73
|
+
return executor_cfg
|
74
|
+
|
75
|
+
def _create_executor_by_type(self, executor_cfg: Any) -> Any:
|
76
|
+
"""Create executor based on type."""
|
77
|
+
executor_type = executor_cfg.type or "synchronous"
|
78
|
+
|
79
|
+
if executor_type in ("synchronous", None):
|
80
|
+
return self._create_synchronous_executor()
|
81
|
+
elif executor_type == "threadpool":
|
82
|
+
return self._create_threadpool_executor(executor_cfg)
|
83
|
+
elif executor_type == "processpool":
|
84
|
+
return self._create_processpool_executor(executor_cfg)
|
85
|
+
elif executor_type == "ray":
|
86
|
+
return self._create_ray_executor(executor_cfg)
|
87
|
+
elif executor_type == "dask":
|
88
|
+
return self._create_dask_executor(executor_cfg)
|
89
|
+
else:
|
90
|
+
logger.warning(
|
91
|
+
f"Unknown executor type: {executor_type}. Using local executor."
|
92
|
+
)
|
93
|
+
return self._create_synchronous_executor()
|
94
|
+
|
95
|
+
def _create_synchronous_executor(self) -> Any:
|
96
|
+
"""Create synchronous/local executor."""
|
97
|
+
from hamilton.execution.executors import SynchronousLocalTaskExecutor
|
98
|
+
return SynchronousLocalTaskExecutor()
|
99
|
+
|
100
|
+
def _create_threadpool_executor(self, executor_cfg: Any) -> Any:
|
101
|
+
"""Create thread pool executor."""
|
102
|
+
try:
|
103
|
+
from hamilton.plugins.h_threadpool import ThreadPoolExecutor
|
104
|
+
|
105
|
+
# Extract max workers from config
|
106
|
+
if executor_cfg.max_workers is not None:
|
107
|
+
return ThreadPoolExecutor(max_workers=executor_cfg.max_workers)
|
108
|
+
return ThreadPoolExecutor()
|
109
|
+
except ImportError:
|
110
|
+
logger.warning(
|
111
|
+
"ThreadPool executor dependencies not installed. Using local executor."
|
112
|
+
)
|
113
|
+
return self._create_synchronous_executor()
|
114
|
+
|
115
|
+
def _create_processpool_executor(self, executor_cfg: Any) -> Any:
|
116
|
+
"""Create process pool executor."""
|
117
|
+
try:
|
118
|
+
from hamilton.execution.executors import ProcessPoolExecutor
|
119
|
+
|
120
|
+
# Extract max workers from config
|
121
|
+
if executor_cfg.max_workers is not None:
|
122
|
+
return ProcessPoolExecutor(max_workers=executor_cfg.max_workers)
|
123
|
+
return ProcessPoolExecutor()
|
124
|
+
except ImportError:
|
125
|
+
logger.warning(
|
126
|
+
"ProcessPool executor dependencies not installed. Using local executor."
|
127
|
+
)
|
128
|
+
return self._create_synchronous_executor()
|
129
|
+
|
130
|
+
def _create_ray_executor(self, executor_cfg: Any) -> Any:
|
131
|
+
"""Create Ray executor."""
|
132
|
+
try:
|
133
|
+
from hamilton.plugins.h_ray import RayTaskExecutor
|
134
|
+
|
135
|
+
# Extract configuration
|
136
|
+
config = {}
|
137
|
+
if executor_cfg.num_cpus is not None:
|
138
|
+
config['num_cpus'] = executor_cfg.num_cpus
|
139
|
+
if config:
|
140
|
+
return RayTaskExecutor(**config)
|
141
|
+
return RayTaskExecutor()
|
142
|
+
except ImportError:
|
143
|
+
logger.warning(
|
144
|
+
"Ray executor dependencies not installed. Using local executor."
|
145
|
+
)
|
146
|
+
return self._create_synchronous_executor()
|
147
|
+
|
148
|
+
def _create_dask_executor(self, executor_cfg: Any) -> Any:
|
149
|
+
"""Create Dask executor."""
|
150
|
+
try:
|
151
|
+
from hamilton.plugins.h_dask import DaskExecutor
|
152
|
+
|
153
|
+
# Extract configuration
|
154
|
+
config = {}
|
155
|
+
if executor_cfg.num_cpus is not None:
|
156
|
+
config['num_cpus'] = executor_cfg.num_cpus
|
157
|
+
if config:
|
158
|
+
return DaskExecutor(**config)
|
159
|
+
return DaskExecutor()
|
160
|
+
except ImportError:
|
161
|
+
logger.warning(
|
162
|
+
"Dask executor dependencies not installed. Using local executor."
|
163
|
+
)
|
164
|
+
return self._create_synchronous_executor()
|
165
|
+
|
166
|
+
def clear_cache(self) -> None:
|
167
|
+
"""Clear the executor cache."""
|
168
|
+
self._executor_cache.clear()
|
169
|
+
|
170
|
+
|
171
|
+
def create_executor_factory() -> ExecutorFactory:
|
172
|
+
"""
|
173
|
+
Factory function to create an ExecutorFactory instance.
|
174
|
+
|
175
|
+
Returns:
|
176
|
+
ExecutorFactory: Configured factory instance
|
177
|
+
"""
|
178
|
+
return ExecutorFactory()
|
@@ -0,0 +1,194 @@
|
|
1
|
+
"""
|
2
|
+
Filesystem utilities for FlowerPower pipeline management.
|
3
|
+
|
4
|
+
This module provides helper classes and functions for common filesystem operations
|
5
|
+
used throughout the FlowerPower codebase.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import Any, Dict, Optional
|
10
|
+
|
11
|
+
from fsspec_utils import AbstractFileSystem, filesystem
|
12
|
+
from loguru import logger
|
13
|
+
from .security import validate_file_path
|
14
|
+
|
15
|
+
|
16
|
+
class FilesystemHelper:
|
17
|
+
"""
|
18
|
+
Helper class for filesystem operations with caching and error handling.
|
19
|
+
|
20
|
+
This class provides centralized filesystem operations with proper error handling
|
21
|
+
and logging for common operations like directory creation, path resolution,
|
22
|
+
and filesystem initialization.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self, base_dir: str, storage_options: Optional[Dict[str, Any]] = None):
|
26
|
+
"""
|
27
|
+
Initialize the filesystem helper.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
base_dir: Base directory for filesystem operations
|
31
|
+
storage_options: Storage options for filesystem access
|
32
|
+
"""
|
33
|
+
# Validate base directory for security
|
34
|
+
validate_file_path(base_dir, allow_relative=True)
|
35
|
+
self._base_dir = base_dir
|
36
|
+
self._storage_options = storage_options or {}
|
37
|
+
self._fs_cache: Dict[str, AbstractFileSystem] = {}
|
38
|
+
|
39
|
+
def get_filesystem(self, cached: bool = False, cache_storage: Optional[str] = None) -> AbstractFileSystem:
|
40
|
+
"""
|
41
|
+
Get a filesystem instance with optional caching.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
cached: Whether to use cached filesystem
|
45
|
+
cache_storage: Storage path for cached filesystem
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
AbstractFileSystem: Configured filesystem instance
|
49
|
+
"""
|
50
|
+
cache_key = f"{self._base_dir}_{cached}_{cache_storage}"
|
51
|
+
|
52
|
+
if cache_key not in self._fs_cache:
|
53
|
+
if cached and cache_storage:
|
54
|
+
# Ensure cache storage directory exists
|
55
|
+
cache_path = Path(cache_storage)
|
56
|
+
cache_path.mkdir(parents=True, exist_ok=True)
|
57
|
+
|
58
|
+
self._fs_cache[cache_key] = filesystem(
|
59
|
+
self._base_dir,
|
60
|
+
storage_options=self._storage_options,
|
61
|
+
cached=cached,
|
62
|
+
cache_storage=cache_storage,
|
63
|
+
)
|
64
|
+
|
65
|
+
return self._fs_cache[cache_key]
|
66
|
+
|
67
|
+
def ensure_directories_exist(
|
68
|
+
self,
|
69
|
+
fs: AbstractFileSystem,
|
70
|
+
*directories: str,
|
71
|
+
exist_ok: bool = True
|
72
|
+
) -> None:
|
73
|
+
"""
|
74
|
+
Ensure that the specified directories exist.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
fs: Filesystem instance
|
78
|
+
*directories: Directory paths to create
|
79
|
+
exist_ok: Whether to ignore existing directories
|
80
|
+
|
81
|
+
Raises:
|
82
|
+
RuntimeError: If directory creation fails
|
83
|
+
"""
|
84
|
+
for directory in directories:
|
85
|
+
# Validate directory path for security
|
86
|
+
validate_file_path(directory, allow_relative=True)
|
87
|
+
try:
|
88
|
+
fs.makedirs(directory, exist_ok=exist_ok)
|
89
|
+
except (OSError, PermissionError) as e:
|
90
|
+
logger.error(f"Error creating directory {directory}: {e}")
|
91
|
+
raise RuntimeError(f"Failed to create directory {directory}: {e}") from e
|
92
|
+
except Exception as e:
|
93
|
+
logger.error(f"Unexpected error creating directory {directory}: {e}")
|
94
|
+
raise RuntimeError(f"Unexpected filesystem error for {directory}: {e}") from e
|
95
|
+
|
96
|
+
def resolve_path(self, fs: AbstractFileSystem, *path_parts: str) -> str:
|
97
|
+
"""
|
98
|
+
Resolve a path in the filesystem.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
fs: Filesystem instance
|
102
|
+
*path_parts: Path components to join
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
str: Resolved path
|
106
|
+
"""
|
107
|
+
if hasattr(fs, 'path'):
|
108
|
+
base_path = fs.path
|
109
|
+
else:
|
110
|
+
base_path = self._base_dir
|
111
|
+
|
112
|
+
resolved_path = fs.join(base_path, *path_parts)
|
113
|
+
# Validate resolved path for security
|
114
|
+
validate_file_path(resolved_path, allow_relative=True)
|
115
|
+
return resolved_path
|
116
|
+
|
117
|
+
def clean_directory(
|
118
|
+
self,
|
119
|
+
fs: AbstractFileSystem,
|
120
|
+
*paths: str,
|
121
|
+
recursive: bool = True
|
122
|
+
) -> None:
|
123
|
+
"""
|
124
|
+
Clean specified paths if they exist.
|
125
|
+
|
126
|
+
Args:
|
127
|
+
fs: Filesystem instance
|
128
|
+
*paths: Paths to clean
|
129
|
+
recursive: Whether to remove recursively
|
130
|
+
"""
|
131
|
+
for path in paths:
|
132
|
+
# Validate path for security before cleaning
|
133
|
+
validate_file_path(path, allow_relative=True)
|
134
|
+
if fs.exists(path):
|
135
|
+
try:
|
136
|
+
fs.rm(path, recursive=recursive)
|
137
|
+
except Exception as e:
|
138
|
+
logger.warning(f"Failed to clean path {path}: {e}")
|
139
|
+
|
140
|
+
def sync_filesystem(self, fs: AbstractFileSystem) -> None:
|
141
|
+
"""
|
142
|
+
Sync filesystem cache if applicable.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
fs: Filesystem instance to sync
|
146
|
+
"""
|
147
|
+
if hasattr(fs, 'is_cache_fs') and fs.is_cache_fs:
|
148
|
+
fs.sync_cache()
|
149
|
+
|
150
|
+
# Log sync information if available
|
151
|
+
if hasattr(fs, '_mapper') and hasattr(fs, 'cache_path'):
|
152
|
+
logger.debug(
|
153
|
+
f"Synced filesystem cache: {fs._mapper.directory} -> {fs.cache_path}"
|
154
|
+
)
|
155
|
+
|
156
|
+
def get_project_path(self, fs: AbstractFileSystem) -> str:
|
157
|
+
"""
|
158
|
+
Get the project path for the filesystem.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
fs: Filesystem instance
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
str: Project path
|
165
|
+
"""
|
166
|
+
if hasattr(fs, 'is_cache_fs') and fs.is_cache_fs:
|
167
|
+
project_path = fs._mapper.directory
|
168
|
+
else:
|
169
|
+
project_path = getattr(fs, 'path', self._base_dir)
|
170
|
+
|
171
|
+
# Validate project path for security
|
172
|
+
validate_file_path(project_path, allow_relative=True)
|
173
|
+
return project_path
|
174
|
+
|
175
|
+
def clear_cache(self) -> None:
|
176
|
+
"""Clear the filesystem cache."""
|
177
|
+
self._fs_cache.clear()
|
178
|
+
|
179
|
+
|
180
|
+
def create_filesystem_helper(
|
181
|
+
base_dir: str,
|
182
|
+
storage_options: Optional[Dict[str, Any]] = None
|
183
|
+
) -> FilesystemHelper:
|
184
|
+
"""
|
185
|
+
Factory function to create a FilesystemHelper instance.
|
186
|
+
|
187
|
+
Args:
|
188
|
+
base_dir: Base directory for filesystem operations
|
189
|
+
storage_options: Storage options for filesystem access
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
FilesystemHelper: Configured helper instance
|
193
|
+
"""
|
194
|
+
return FilesystemHelper(base_dir, storage_options)
|