FlowerPower 0.30.0__tar.gz → 0.31.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowerpower-0.30.0/src/FlowerPower.egg-info → flowerpower-0.31.1}/PKG-INFO +2 -2
- {flowerpower-0.30.0 → flowerpower-0.31.1}/pyproject.toml +72 -2
- {flowerpower-0.30.0 → flowerpower-0.31.1/src/FlowerPower.egg-info}/PKG-INFO +2 -2
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/SOURCES.txt +12 -1
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/requires.txt +1 -1
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cfg/__init__.py +143 -25
- flowerpower-0.31.1/src/flowerpower/cfg/base.py +262 -0
- flowerpower-0.31.1/src/flowerpower/cfg/exceptions.py +53 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/__init__.py +151 -35
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/adapter.py +1 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/builder.py +24 -25
- flowerpower-0.31.1/src/flowerpower/cfg/pipeline/builder_adapter.py +142 -0
- flowerpower-0.31.1/src/flowerpower/cfg/pipeline/builder_executor.py +101 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cfg/pipeline/run.py +99 -40
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cfg/project/__init__.py +59 -14
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cfg/project/adapter.py +6 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cli/__init__.py +8 -2
- flowerpower-0.31.1/src/flowerpower/cli/cfg.py +3 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/cli/pipeline.py +121 -83
- flowerpower-0.31.1/src/flowerpower/cli/utils.py +197 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/flowerpower.py +94 -120
- flowerpower-0.31.1/src/flowerpower/pipeline/config_manager.py +180 -0
- flowerpower-0.31.1/src/flowerpower/pipeline/executor.py +126 -0
- flowerpower-0.31.1/src/flowerpower/pipeline/lifecycle_manager.py +231 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/manager.py +121 -274
- flowerpower-0.31.1/src/flowerpower/pipeline/pipeline.py +431 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/registry.py +45 -4
- flowerpower-0.31.1/src/flowerpower/utils/__init__.py +19 -0
- flowerpower-0.31.1/src/flowerpower/utils/adapter.py +286 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/utils/callback.py +73 -67
- flowerpower-0.31.1/src/flowerpower/utils/config.py +306 -0
- flowerpower-0.31.1/src/flowerpower/utils/executor.py +178 -0
- flowerpower-0.31.1/src/flowerpower/utils/filesystem.py +194 -0
- flowerpower-0.31.1/src/flowerpower/utils/misc.py +421 -0
- flowerpower-0.31.1/src/flowerpower/utils/security.py +221 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/tests/test_flowerpower_project.py +1 -1
- flowerpower-0.30.0/src/flowerpower/cfg/base.py +0 -141
- flowerpower-0.30.0/src/flowerpower/cfg/pipeline/_schedule.py +0 -32
- flowerpower-0.30.0/src/flowerpower/cli/cfg.py +0 -41
- flowerpower-0.30.0/src/flowerpower/cli/utils.py +0 -148
- flowerpower-0.30.0/src/flowerpower/pipeline/pipeline.py +0 -643
- flowerpower-0.30.0/src/flowerpower/utils/misc.py +0 -247
- {flowerpower-0.30.0 → flowerpower-0.31.1}/LICENSE +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/README.md +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/setup.cfg +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/entry_points.txt +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/FlowerPower.egg-info/top_level.txt +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/__init__.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/__init__.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/base.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/io.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/pipeline/visualizer.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/plugins/io/__init__.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/settings/__init__.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/settings/_backend.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/settings/executor.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/settings/general.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/settings/hamilton.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/settings/logging.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/settings/retry.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/utils/logging.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/utils/monkey.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/utils/open_telemetry.py +0 -0
- {flowerpower-0.30.0 → flowerpower-0.31.1}/src/flowerpower/utils/templates.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.31.1
|
4
4
|
Summary: A simple workflow framework for building and managing data processing pipelines
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -11,7 +11,7 @@ Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
12
12
|
Requires-Dist: duration-parser>=1.0.1
|
13
13
|
Requires-Dist: fsspec>=2024.10.0
|
14
|
-
Requires-Dist: fsspec-utils
|
14
|
+
Requires-Dist: fsspec-utils>=0.1.0
|
15
15
|
Requires-Dist: humanize>=4.12.2
|
16
16
|
Requires-Dist: msgspec>=0.19.0
|
17
17
|
Requires-Dist: munch>=4.0.0
|
@@ -4,14 +4,14 @@ description = "A simple workflow framework for building and managing data proces
|
|
4
4
|
authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">= 3.11"
|
7
|
-
version = "0.
|
7
|
+
version = "0.31.1"
|
8
8
|
keywords = ["hamilton", "workflow", "pipeline", "scheduler", "dask", "ray"]
|
9
9
|
|
10
10
|
dependencies = [
|
11
11
|
#'dill>=0.3.8',
|
12
12
|
'duration-parser>=1.0.1',
|
13
13
|
'fsspec>=2024.10.0',
|
14
|
-
'fsspec-utils
|
14
|
+
'fsspec-utils>=0.1.0',
|
15
15
|
'humanize>=4.12.2',
|
16
16
|
'msgspec>=0.19.0',
|
17
17
|
'munch>=4.0.0',
|
@@ -68,5 +68,75 @@ dev-dependencies = [
|
|
68
68
|
"mkdocstrings>=0.30.0",
|
69
69
|
"mkdocstrings-python>=1.17.0",
|
70
70
|
"repomix>=0.3.4",
|
71
|
+
# Security audit tools
|
72
|
+
"bandit[toml]>=1.7.7",
|
73
|
+
"safety>=3.2.0",
|
74
|
+
"mypy>=1.13.0",
|
75
|
+
"pandas>=2.3.2",
|
76
|
+
"numpy>=2.3.3",
|
77
|
+
"matplotlib>=3.10.6",
|
78
|
+
"seaborn>=0.13.2",
|
71
79
|
]
|
72
80
|
package = true
|
81
|
+
|
82
|
+
# Security configuration
|
83
|
+
[tool.bandit]
|
84
|
+
exclude_dirs = ["tests", "examples"]
|
85
|
+
skips = ["B101"] # Skip assert_used test for test files
|
86
|
+
|
87
|
+
[tool.bandit.assert_used]
|
88
|
+
skips = ["*/test_*.py", "*/tests.py"]
|
89
|
+
|
90
|
+
# MyPy configuration
|
91
|
+
[tool.mypy]
|
92
|
+
python_version = "3.11"
|
93
|
+
warn_return_any = true
|
94
|
+
warn_unused_configs = true
|
95
|
+
disallow_untyped_defs = true
|
96
|
+
disallow_incomplete_defs = true
|
97
|
+
check_untyped_defs = true
|
98
|
+
disallow_untyped_decorators = true
|
99
|
+
no_implicit_optional = true
|
100
|
+
warn_redundant_casts = true
|
101
|
+
warn_unused_ignores = true
|
102
|
+
warn_no_return = true
|
103
|
+
warn_unreachable = true
|
104
|
+
strict_equality = true
|
105
|
+
show_error_codes = true
|
106
|
+
|
107
|
+
[[tool.mypy.overrides]]
|
108
|
+
module = [
|
109
|
+
"hamilton.*",
|
110
|
+
"sf_hamilton.*",
|
111
|
+
"fsspec_utils.*",
|
112
|
+
"loguru.*",
|
113
|
+
"munch.*",
|
114
|
+
"rich.*",
|
115
|
+
"typer.*",
|
116
|
+
]
|
117
|
+
ignore_missing_imports = true
|
118
|
+
|
119
|
+
# Ruff configuration (extended for security)
|
120
|
+
[tool.ruff]
|
121
|
+
line-length = 88
|
122
|
+
target-version = "py311"
|
123
|
+
|
124
|
+
[tool.ruff.lint]
|
125
|
+
select = [
|
126
|
+
"E", # pycodestyle errors
|
127
|
+
"W", # pycodestyle warnings
|
128
|
+
"F", # pyflakes
|
129
|
+
"I", # isort
|
130
|
+
"B", # flake8-bugbear
|
131
|
+
"C4", # flake8-comprehensions
|
132
|
+
"UP", # pyupgrade
|
133
|
+
"S", # flake8-bandit (security)
|
134
|
+
]
|
135
|
+
ignore = [
|
136
|
+
"E501", # line too long, handled by black
|
137
|
+
"B008", # do not perform function calls in argument defaults
|
138
|
+
]
|
139
|
+
|
140
|
+
[tool.ruff.lint.per-file-ignores]
|
141
|
+
"tests/*" = ["S101"] # Allow assert in tests
|
142
|
+
"examples/*" = ["S101"] # Allow assert in examples
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: FlowerPower
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.31.1
|
4
4
|
Summary: A simple workflow framework for building and managing data processing pipelines
|
5
5
|
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/legout/flowerpower
|
@@ -11,7 +11,7 @@ Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
12
12
|
Requires-Dist: duration-parser>=1.0.1
|
13
13
|
Requires-Dist: fsspec>=2024.10.0
|
14
|
-
Requires-Dist: fsspec-utils
|
14
|
+
Requires-Dist: fsspec-utils>=0.1.0
|
15
15
|
Requires-Dist: humanize>=4.12.2
|
16
16
|
Requires-Dist: msgspec>=0.19.0
|
17
17
|
Requires-Dist: munch>=4.0.0
|
@@ -11,10 +11,12 @@ src/flowerpower/__init__.py
|
|
11
11
|
src/flowerpower/flowerpower.py
|
12
12
|
src/flowerpower/cfg/__init__.py
|
13
13
|
src/flowerpower/cfg/base.py
|
14
|
+
src/flowerpower/cfg/exceptions.py
|
14
15
|
src/flowerpower/cfg/pipeline/__init__.py
|
15
|
-
src/flowerpower/cfg/pipeline/_schedule.py
|
16
16
|
src/flowerpower/cfg/pipeline/adapter.py
|
17
17
|
src/flowerpower/cfg/pipeline/builder.py
|
18
|
+
src/flowerpower/cfg/pipeline/builder_adapter.py
|
19
|
+
src/flowerpower/cfg/pipeline/builder_executor.py
|
18
20
|
src/flowerpower/cfg/pipeline/run.py
|
19
21
|
src/flowerpower/cfg/project/__init__.py
|
20
22
|
src/flowerpower/cfg/project/adapter.py
|
@@ -24,7 +26,10 @@ src/flowerpower/cli/pipeline.py
|
|
24
26
|
src/flowerpower/cli/utils.py
|
25
27
|
src/flowerpower/pipeline/__init__.py
|
26
28
|
src/flowerpower/pipeline/base.py
|
29
|
+
src/flowerpower/pipeline/config_manager.py
|
30
|
+
src/flowerpower/pipeline/executor.py
|
27
31
|
src/flowerpower/pipeline/io.py
|
32
|
+
src/flowerpower/pipeline/lifecycle_manager.py
|
28
33
|
src/flowerpower/pipeline/manager.py
|
29
34
|
src/flowerpower/pipeline/pipeline.py
|
30
35
|
src/flowerpower/pipeline/registry.py
|
@@ -37,10 +42,16 @@ src/flowerpower/settings/general.py
|
|
37
42
|
src/flowerpower/settings/hamilton.py
|
38
43
|
src/flowerpower/settings/logging.py
|
39
44
|
src/flowerpower/settings/retry.py
|
45
|
+
src/flowerpower/utils/__init__.py
|
46
|
+
src/flowerpower/utils/adapter.py
|
40
47
|
src/flowerpower/utils/callback.py
|
48
|
+
src/flowerpower/utils/config.py
|
49
|
+
src/flowerpower/utils/executor.py
|
50
|
+
src/flowerpower/utils/filesystem.py
|
41
51
|
src/flowerpower/utils/logging.py
|
42
52
|
src/flowerpower/utils/misc.py
|
43
53
|
src/flowerpower/utils/monkey.py
|
44
54
|
src/flowerpower/utils/open_telemetry.py
|
55
|
+
src/flowerpower/utils/security.py
|
45
56
|
src/flowerpower/utils/templates.py
|
46
57
|
tests/test_flowerpower_project.py
|
@@ -6,6 +6,7 @@ from munch import Munch
|
|
6
6
|
|
7
7
|
from ..settings import CONFIG_DIR, PIPELINES_DIR
|
8
8
|
from .base import BaseConfig
|
9
|
+
from .exceptions import ConfigLoadError, ConfigSaveError, ConfigPathError
|
9
10
|
from .pipeline import PipelineConfig, init_pipeline_config
|
10
11
|
from .project import ProjectConfig, init_project_config
|
11
12
|
|
@@ -21,8 +22,9 @@ class Config(BaseConfig):
|
|
21
22
|
pipeline (PipelineConfig): Configuration for the pipeline.
|
22
23
|
project (ProjectConfig): Configuration for the project.
|
23
24
|
fs (AbstractFileSystem | None): Filesystem abstraction for I/O operations.
|
24
|
-
base_dir (str |
|
25
|
-
|
25
|
+
base_dir (str | None): Base directory for the configuration.
|
26
|
+
base_dir_path (pathlib.Path | None): Base directory as a Path object (property).
|
27
|
+
storage_options (Munch): Options for filesystem operations.
|
26
28
|
|
27
29
|
Example:
|
28
30
|
```python
|
@@ -41,8 +43,61 @@ class Config(BaseConfig):
|
|
41
43
|
pipeline: PipelineConfig = msgspec.field(default_factory=PipelineConfig)
|
42
44
|
project: ProjectConfig = msgspec.field(default_factory=ProjectConfig)
|
43
45
|
fs: AbstractFileSystem | None = None
|
44
|
-
base_dir: str |
|
45
|
-
storage_options:
|
46
|
+
base_dir: str | None = None
|
47
|
+
storage_options: Munch = msgspec.field(default_factory=Munch)
|
48
|
+
|
49
|
+
def __post_init__(self):
|
50
|
+
"""Handle conversion of storage_options from dict to Munch if needed."""
|
51
|
+
if isinstance(self.storage_options, dict):
|
52
|
+
self.storage_options = Munch(self.storage_options)
|
53
|
+
|
54
|
+
# Validate storage_options
|
55
|
+
self._validate_storage_options()
|
56
|
+
|
57
|
+
# Validate base_dir if provided
|
58
|
+
if self.base_dir is not None:
|
59
|
+
self._validate_base_dir()
|
60
|
+
|
61
|
+
def _validate_storage_options(self) -> None:
|
62
|
+
"""Validate storage_options parameter.
|
63
|
+
|
64
|
+
Raises:
|
65
|
+
ValueError: If storage_options contains invalid values.
|
66
|
+
"""
|
67
|
+
if self.storage_options is None:
|
68
|
+
self.storage_options = Munch()
|
69
|
+
|
70
|
+
if not isinstance(self.storage_options, (dict, Munch)):
|
71
|
+
raise ValueError(f"storage_options must be a dict or Munch, got {type(self.storage_options)}")
|
72
|
+
|
73
|
+
def _validate_base_dir(self) -> None:
|
74
|
+
"""Validate base_dir parameter.
|
75
|
+
|
76
|
+
Raises:
|
77
|
+
ValueError: If base_dir contains invalid characters or is empty.
|
78
|
+
"""
|
79
|
+
# Convert Path to string if needed
|
80
|
+
base_dir_str = str(self.base_dir) if hasattr(self.base_dir, '__str__') else self.base_dir
|
81
|
+
|
82
|
+
if not isinstance(base_dir_str, str):
|
83
|
+
raise ValueError(f"base_dir must be a string or Path, got {type(self.base_dir)}")
|
84
|
+
|
85
|
+
# Check for directory traversal attempts (but allow absolute paths)
|
86
|
+
if '..' in base_dir_str:
|
87
|
+
raise ValueError(f"Invalid base_dir: {base_dir_str}. Contains path traversal characters.")
|
88
|
+
|
89
|
+
# Check for empty string
|
90
|
+
if not base_dir_str.strip():
|
91
|
+
raise ValueError("base_dir cannot be empty or whitespace only.")
|
92
|
+
|
93
|
+
@property
|
94
|
+
def base_dir_path(self) -> Path | None:
|
95
|
+
"""Get base_dir as a pathlib.Path object.
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
pathlib.Path | None: The base directory as a Path object, or None if base_dir is None.
|
99
|
+
"""
|
100
|
+
return Path(self.base_dir) if self.base_dir is not None else None
|
46
101
|
|
47
102
|
@classmethod
|
48
103
|
def load(
|
@@ -75,21 +130,29 @@ class Config(BaseConfig):
|
|
75
130
|
```
|
76
131
|
"""
|
77
132
|
if fs is None:
|
78
|
-
|
79
|
-
|
133
|
+
# Use cached filesystem for better performance
|
134
|
+
storage_options_hash = cls._hash_storage_options(storage_options)
|
135
|
+
fs = cls._get_cached_filesystem(base_dir, storage_options_hash)
|
136
|
+
|
137
|
+
try:
|
138
|
+
project = ProjectConfig.load(
|
139
|
+
base_dir=base_dir,
|
140
|
+
name=name,
|
141
|
+
fs=fs,
|
142
|
+
storage_options=storage_options,
|
80
143
|
)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
144
|
+
except ConfigLoadError as e:
|
145
|
+
raise ConfigLoadError(f"Failed to load project configuration: {e}", path=base_dir, original_error=e)
|
146
|
+
|
147
|
+
try:
|
148
|
+
pipeline = PipelineConfig.load(
|
149
|
+
base_dir=base_dir,
|
150
|
+
name=pipeline_name,
|
151
|
+
fs=fs,
|
152
|
+
storage_options=storage_options,
|
153
|
+
)
|
154
|
+
except ConfigLoadError as e:
|
155
|
+
raise ConfigLoadError(f"Failed to load pipeline configuration: {e}", path=base_dir, original_error=e)
|
93
156
|
|
94
157
|
return cls(
|
95
158
|
base_dir=base_dir,
|
@@ -120,9 +183,9 @@ class Config(BaseConfig):
|
|
120
183
|
```
|
121
184
|
"""
|
122
185
|
if fs is None and self.fs is None:
|
123
|
-
|
124
|
-
|
125
|
-
)
|
186
|
+
# Use cached filesystem for better performance
|
187
|
+
storage_options_hash = self._hash_storage_options(storage_options)
|
188
|
+
self.fs = self._get_cached_filesystem(self.base_dir, storage_options_hash)
|
126
189
|
|
127
190
|
if not self.fs.exists(CONFIG_DIR):
|
128
191
|
self.fs.makedirs(CONFIG_DIR)
|
@@ -130,13 +193,22 @@ class Config(BaseConfig):
|
|
130
193
|
if pipeline:
|
131
194
|
self.fs.makedirs(PIPELINES_DIR, exist_ok=True)
|
132
195
|
h_params = self.pipeline.pop("h_params") if self.pipeline.h_params else None
|
133
|
-
|
134
|
-
|
135
|
-
|
196
|
+
# Validate pipeline name to prevent directory traversal
|
197
|
+
if self.pipeline.name and ('..' in self.pipeline.name or '/' in self.pipeline.name or '\\' in self.pipeline.name):
|
198
|
+
raise ConfigPathError(f"Invalid pipeline name: {self.pipeline.name}. Contains path traversal characters.", path=self.pipeline.name)
|
199
|
+
try:
|
200
|
+
self.pipeline.to_yaml(
|
201
|
+
path=f"conf/pipelines/{self.pipeline.name}.yml", fs=self.fs
|
202
|
+
)
|
203
|
+
except ConfigSaveError as e:
|
204
|
+
raise ConfigSaveError(f"Failed to save pipeline configuration: {e}", path=f"conf/pipelines/{self.pipeline.name}.yml", original_error=e)
|
136
205
|
if h_params:
|
137
206
|
self.pipeline.h_params = h_params
|
138
207
|
if project:
|
139
|
-
|
208
|
+
try:
|
209
|
+
self.project.to_yaml("conf/project.yml", self.fs)
|
210
|
+
except ConfigSaveError as e:
|
211
|
+
raise ConfigSaveError(f"Failed to save project configuration: {e}", path="conf/project.yml", original_error=e)
|
140
212
|
|
141
213
|
|
142
214
|
def load(
|
@@ -247,3 +319,49 @@ def init_config(
|
|
247
319
|
storage_options=storage_options,
|
248
320
|
)
|
249
321
|
return Config(pipeline=pipeline_cfg, project=project_cfg, fs=fs, base_dir=base_dir)
|
322
|
+
|
323
|
+
|
324
|
+
# Helper methods for centralized load/save logic
|
325
|
+
@classmethod
|
326
|
+
def _load_config(
|
327
|
+
cls,
|
328
|
+
config_class: type[BaseConfig],
|
329
|
+
base_dir: str,
|
330
|
+
name: str | None,
|
331
|
+
fs: AbstractFileSystem,
|
332
|
+
storage_options: dict | BaseStorageOptions | None,
|
333
|
+
) -> BaseConfig:
|
334
|
+
"""Centralized configuration loading logic.
|
335
|
+
|
336
|
+
Args:
|
337
|
+
config_class: The configuration class to load.
|
338
|
+
base_dir: Base directory for configurations.
|
339
|
+
name: Configuration name.
|
340
|
+
fs: Filesystem instance.
|
341
|
+
storage_options: Options for filesystem.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
Loaded configuration instance.
|
345
|
+
"""
|
346
|
+
return config_class.load(
|
347
|
+
base_dir=base_dir,
|
348
|
+
name=name,
|
349
|
+
fs=fs,
|
350
|
+
storage_options=storage_options,
|
351
|
+
)
|
352
|
+
|
353
|
+
|
354
|
+
def _save_pipeline_config(self) -> None:
|
355
|
+
"""Save pipeline configuration with proper handling of h_params."""
|
356
|
+
self.fs.makedirs(PIPELINES_DIR, exist_ok=True)
|
357
|
+
h_params = self.pipeline.pop("h_params") if self.pipeline.h_params else None
|
358
|
+
self.pipeline.to_yaml(
|
359
|
+
path=f"conf/pipelines/{self.pipeline.name}.yml", fs=self.fs
|
360
|
+
)
|
361
|
+
if h_params:
|
362
|
+
self.pipeline.h_params = h_params
|
363
|
+
|
364
|
+
|
365
|
+
def _save_project_config(self) -> None:
|
366
|
+
"""Save project configuration."""
|
367
|
+
self.project.to_yaml("conf/project.yml", self.fs)
|
@@ -0,0 +1,262 @@
|
|
1
|
+
import copy
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Any, Self, Optional
|
4
|
+
from functools import lru_cache
|
5
|
+
|
6
|
+
import msgspec
|
7
|
+
from fsspec_utils import AbstractFileSystem, filesystem
|
8
|
+
from ..utils.misc import get_filesystem
|
9
|
+
from ..utils.security import validate_file_path as security_validate_file_path
|
10
|
+
from .exceptions import ConfigLoadError, ConfigSaveError, ConfigPathError
|
11
|
+
|
12
|
+
|
13
|
+
def validate_file_path(path: str) -> str:
|
14
|
+
"""
|
15
|
+
Validate a file path to prevent directory traversal attacks.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
path: The file path to validate
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
str: The validated path
|
22
|
+
|
23
|
+
Raises:
|
24
|
+
ConfigPathError: If the path contains directory traversal attempts
|
25
|
+
"""
|
26
|
+
try:
|
27
|
+
# Use the comprehensive security validation
|
28
|
+
validated_path = security_validate_file_path(
|
29
|
+
path,
|
30
|
+
allow_absolute=False, # Config files should be relative
|
31
|
+
allow_relative=True
|
32
|
+
)
|
33
|
+
return str(validated_path)
|
34
|
+
except Exception as e:
|
35
|
+
# Convert security errors to config path errors for consistency
|
36
|
+
raise ConfigPathError(f"Invalid file path: {path}. {str(e)}", path=path) from e
|
37
|
+
|
38
|
+
|
39
|
+
class BaseConfig(msgspec.Struct, kw_only=True):
|
40
|
+
# Class-level cache for filesystem instances
|
41
|
+
_fs_cache = {}
|
42
|
+
|
43
|
+
@classmethod
|
44
|
+
@lru_cache(maxsize=32)
|
45
|
+
def _get_cached_filesystem(cls, base_dir: str, storage_options_hash: int) -> AbstractFileSystem:
|
46
|
+
"""Get a cached filesystem instance.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
base_dir: Base directory for the filesystem.
|
50
|
+
storage_options_hash: Hash of storage options for cache key.
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
Cached filesystem instance.
|
54
|
+
"""
|
55
|
+
cache_key = (base_dir, storage_options_hash)
|
56
|
+
if cache_key not in cls._fs_cache:
|
57
|
+
cls._fs_cache[cache_key] = filesystem(base_dir, cached=True, dirfs=True)
|
58
|
+
return cls._fs_cache[cache_key]
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
def _hash_storage_options(cls, storage_options: dict | None) -> int:
|
62
|
+
"""Create a hash of storage options for caching.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
storage_options: Storage options to hash.
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
Hash of storage options.
|
69
|
+
"""
|
70
|
+
if not storage_options:
|
71
|
+
return hash(())
|
72
|
+
|
73
|
+
# Convert to frozenset of items for consistent hashing
|
74
|
+
try:
|
75
|
+
return hash(frozenset(sorted(storage_options.items())))
|
76
|
+
except TypeError:
|
77
|
+
# If items are not hashable, use string representation
|
78
|
+
return hash(str(sorted(storage_options.items())))
|
79
|
+
def to_dict(self) -> dict[str, Any]:
|
80
|
+
# Convert to dictionary, handling special cases like type objects
|
81
|
+
result = {}
|
82
|
+
for field in self.__struct_fields__:
|
83
|
+
value = getattr(self, field)
|
84
|
+
if isinstance(value, type):
|
85
|
+
# Convert type objects to string representation
|
86
|
+
result[field] = str(value)
|
87
|
+
elif hasattr(value, '__struct_fields__'):
|
88
|
+
# Recursively convert nested msgspec structs
|
89
|
+
result[field] = value.to_dict()
|
90
|
+
elif isinstance(value, list):
|
91
|
+
# Handle lists that might contain type objects (like exception classes)
|
92
|
+
converted_list = []
|
93
|
+
for item in value:
|
94
|
+
if isinstance(item, type):
|
95
|
+
converted_list.append(str(item))
|
96
|
+
else:
|
97
|
+
converted_list.append(item)
|
98
|
+
result[field] = converted_list
|
99
|
+
else:
|
100
|
+
result[field] = value
|
101
|
+
return result
|
102
|
+
|
103
|
+
def to_yaml(self, path: str, fs: AbstractFileSystem | None = None) -> None:
|
104
|
+
"""
|
105
|
+
Converts the instance to a YAML file.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
path: The path to the YAML file.
|
109
|
+
fs: An optional filesystem instance to use for file operations.
|
110
|
+
|
111
|
+
Raises:
|
112
|
+
ConfigSaveError: If saving the configuration fails.
|
113
|
+
ConfigPathError: If the path contains directory traversal attempts.
|
114
|
+
"""
|
115
|
+
# Validate the path to prevent directory traversal
|
116
|
+
try:
|
117
|
+
validated_path = validate_file_path(path)
|
118
|
+
except ConfigPathError as e:
|
119
|
+
raise ConfigSaveError(f"Path validation failed: {e}", path=path, original_error=e)
|
120
|
+
|
121
|
+
# Use cached filesystem if available
|
122
|
+
if fs is None:
|
123
|
+
# Use cached filesystem if available
|
124
|
+
if fs is None:
|
125
|
+
fs = get_filesystem(fs)
|
126
|
+
try:
|
127
|
+
with fs.open(validated_path, "w") as f:
|
128
|
+
f.write(msgspec.yaml.encode(self, order="deterministic").decode('utf-8'))
|
129
|
+
except NotImplementedError as e:
|
130
|
+
raise ConfigSaveError("The filesystem does not support writing files.", path=validated_path, original_error=e)
|
131
|
+
except Exception as e:
|
132
|
+
raise ConfigSaveError(f"Failed to write configuration to {validated_path}", path=validated_path, original_error=e)
|
133
|
+
|
134
|
+
@classmethod
|
135
|
+
def from_dict(cls, data: dict[str, Any]) -> "BaseConfig":
|
136
|
+
"""
|
137
|
+
Converts a dictionary to an instance of the class.
|
138
|
+
Args:
|
139
|
+
data: The dictionary to convert.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
An instance of the class with the values from the dictionary.
|
143
|
+
"""
|
144
|
+
return msgspec.convert(data, cls)
|
145
|
+
|
146
|
+
@classmethod
|
147
|
+
def from_yaml(cls, path: str, fs: AbstractFileSystem | None = None) -> "BaseConfig":
|
148
|
+
"""
|
149
|
+
Loads a YAML file and converts it to an instance of the class.
|
150
|
+
|
151
|
+
Args:
|
152
|
+
path: The path to the YAML file.
|
153
|
+
fs: An optional filesystem instance to use for file operations.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
An instance of the class with the values from the YAML file.
|
157
|
+
|
158
|
+
Raises:
|
159
|
+
ConfigLoadError: If loading the configuration fails.
|
160
|
+
ConfigPathError: If the path contains directory traversal attempts.
|
161
|
+
"""
|
162
|
+
# Validate the path to prevent directory traversal
|
163
|
+
try:
|
164
|
+
validated_path = validate_file_path(path)
|
165
|
+
except ConfigPathError as e:
|
166
|
+
raise ConfigLoadError(f"Path validation failed: {e}", path=path, original_error=e)
|
167
|
+
|
168
|
+
fs = get_filesystem(fs)
|
169
|
+
try:
|
170
|
+
with fs.open(validated_path) as f:
|
171
|
+
return msgspec.yaml.decode(f.read(), type=cls, strict=True)
|
172
|
+
except Exception as e:
|
173
|
+
raise ConfigLoadError(f"Failed to load configuration from {validated_path}", path=validated_path, original_error=e)
|
174
|
+
|
175
|
+
def _apply_dict_updates(self, target: Self, d: dict[str, Any]) -> None:
|
176
|
+
"""
|
177
|
+
Helper method to apply dictionary updates to a target instance.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
target: The target instance to apply updates to.
|
181
|
+
d: The dictionary containing updates to apply.
|
182
|
+
"""
|
183
|
+
for k, v in d.items():
|
184
|
+
if hasattr(target, k):
|
185
|
+
current_value = getattr(target, k)
|
186
|
+
if isinstance(current_value, dict) and isinstance(v, dict):
|
187
|
+
# For dictionaries, update in-place to avoid deep copy
|
188
|
+
current_value.update(v)
|
189
|
+
elif hasattr(current_value, '__struct_fields__'):
|
190
|
+
# For nested msgspec structs, create a new instance with merged values
|
191
|
+
setattr(target, k, current_value.merge_dict(v))
|
192
|
+
else:
|
193
|
+
# For primitive values, direct assignment is fine
|
194
|
+
setattr(target, k, v)
|
195
|
+
else:
|
196
|
+
# Use object.__setattr__ to bypass msgspec.Struct's restrictions
|
197
|
+
object.__setattr__(target, k, v)
|
198
|
+
|
199
|
+
def update(self, d: dict[str, Any]) -> None:
|
200
|
+
"""
|
201
|
+
Updates this instance with values from the provided dictionary.
|
202
|
+
|
203
|
+
Args:
|
204
|
+
d: The dictionary containing updates to apply.
|
205
|
+
"""
|
206
|
+
self._apply_dict_updates(self, d)
|
207
|
+
|
208
|
+
def merge_dict(self, d: dict[str, Any]) -> Self:
|
209
|
+
"""
|
210
|
+
Creates a copy of this instance and updates the copy with values
|
211
|
+
from the provided dictionary. The original instance (self) is not modified.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
d: The dictionary to get values from.
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
A new instance of the struct with updated values.
|
218
|
+
"""
|
219
|
+
# Use shallow copy for better performance
|
220
|
+
self_copy = copy.copy(self)
|
221
|
+
self._apply_dict_updates(self_copy, d)
|
222
|
+
return self_copy
|
223
|
+
|
224
|
+
def merge(self, source: Self) -> Self:
|
225
|
+
"""
|
226
|
+
Creates a copy of this instance and updates the copy with values
|
227
|
+
from the source struct, only if the source field's value is not
|
228
|
+
its default value. The original instance (self) is not modified.
|
229
|
+
|
230
|
+
Args:
|
231
|
+
source: The msgspec.Struct instance of the same type to get values from.
|
232
|
+
|
233
|
+
Returns:
|
234
|
+
A new instance of the struct with updated values.
|
235
|
+
|
236
|
+
Raises:
|
237
|
+
TypeError: If source is not of the same type as self.
|
238
|
+
"""
|
239
|
+
if type(self) is not type(source):
|
240
|
+
raise TypeError(
|
241
|
+
f"Source must be an instance of {type(self).__name__}, not {type(source).__name__}"
|
242
|
+
)
|
243
|
+
|
244
|
+
updated_instance = copy.copy(self)
|
245
|
+
|
246
|
+
# Get default values if they exist
|
247
|
+
defaults = getattr(source, "__struct_defaults__", {})
|
248
|
+
|
249
|
+
for field in source.__struct_fields__:
|
250
|
+
source_value = getattr(source, field)
|
251
|
+
has_explicit_default = field in defaults
|
252
|
+
is_default_value = False
|
253
|
+
|
254
|
+
if has_explicit_default:
|
255
|
+
is_default_value = source_value == defaults[field]
|
256
|
+
else:
|
257
|
+
is_default_value = source_value is None
|
258
|
+
|
259
|
+
if not is_default_value:
|
260
|
+
setattr(updated_instance, field, source_value)
|
261
|
+
|
262
|
+
return updated_instance
|