FlowerPower 0.30.0__py3-none-any.whl → 0.31.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. flowerpower/cfg/__init__.py +143 -25
  2. flowerpower/cfg/base.py +132 -11
  3. flowerpower/cfg/exceptions.py +53 -0
  4. flowerpower/cfg/pipeline/__init__.py +151 -35
  5. flowerpower/cfg/pipeline/adapter.py +1 -0
  6. flowerpower/cfg/pipeline/builder.py +24 -25
  7. flowerpower/cfg/pipeline/builder_adapter.py +142 -0
  8. flowerpower/cfg/pipeline/builder_executor.py +101 -0
  9. flowerpower/cfg/pipeline/run.py +99 -40
  10. flowerpower/cfg/project/__init__.py +59 -14
  11. flowerpower/cfg/project/adapter.py +6 -0
  12. flowerpower/cli/__init__.py +8 -2
  13. flowerpower/cli/cfg.py +0 -38
  14. flowerpower/cli/pipeline.py +121 -83
  15. flowerpower/cli/utils.py +120 -71
  16. flowerpower/flowerpower.py +94 -120
  17. flowerpower/pipeline/config_manager.py +180 -0
  18. flowerpower/pipeline/executor.py +126 -0
  19. flowerpower/pipeline/lifecycle_manager.py +231 -0
  20. flowerpower/pipeline/manager.py +121 -274
  21. flowerpower/pipeline/pipeline.py +66 -278
  22. flowerpower/pipeline/registry.py +45 -4
  23. flowerpower/utils/__init__.py +19 -0
  24. flowerpower/utils/adapter.py +286 -0
  25. flowerpower/utils/callback.py +73 -67
  26. flowerpower/utils/config.py +306 -0
  27. flowerpower/utils/executor.py +178 -0
  28. flowerpower/utils/filesystem.py +194 -0
  29. flowerpower/utils/misc.py +312 -138
  30. flowerpower/utils/security.py +221 -0
  31. {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/METADATA +2 -2
  32. flowerpower-0.31.1.dist-info/RECORD +53 -0
  33. flowerpower/cfg/pipeline/_schedule.py +0 -32
  34. flowerpower-0.30.0.dist-info/RECORD +0 -42
  35. {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/WHEEL +0 -0
  36. {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/entry_points.txt +0 -0
  37. {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/licenses/LICENSE +0 -0
  38. {flowerpower-0.30.0.dist-info → flowerpower-0.31.1.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ from munch import Munch
6
6
 
7
7
  from ..settings import CONFIG_DIR, PIPELINES_DIR
8
8
  from .base import BaseConfig
9
+ from .exceptions import ConfigLoadError, ConfigSaveError, ConfigPathError
9
10
  from .pipeline import PipelineConfig, init_pipeline_config
10
11
  from .project import ProjectConfig, init_project_config
11
12
 
@@ -21,8 +22,9 @@ class Config(BaseConfig):
21
22
  pipeline (PipelineConfig): Configuration for the pipeline.
22
23
  project (ProjectConfig): Configuration for the project.
23
24
  fs (AbstractFileSystem | None): Filesystem abstraction for I/O operations.
24
- base_dir (str | Path | None): Base directory for the configuration.
25
- storage_options (dict | Munch): Options for filesystem operations.
25
+ base_dir (str | None): Base directory for the configuration.
26
+ base_dir_path (pathlib.Path | None): Base directory as a Path object (property).
27
+ storage_options (Munch): Options for filesystem operations.
26
28
 
27
29
  Example:
28
30
  ```python
@@ -41,8 +43,61 @@ class Config(BaseConfig):
41
43
  pipeline: PipelineConfig = msgspec.field(default_factory=PipelineConfig)
42
44
  project: ProjectConfig = msgspec.field(default_factory=ProjectConfig)
43
45
  fs: AbstractFileSystem | None = None
44
- base_dir: str | Path | None = None
45
- storage_options: dict | Munch = msgspec.field(default_factory=Munch)
46
+ base_dir: str | None = None
47
+ storage_options: Munch = msgspec.field(default_factory=Munch)
48
+
49
+ def __post_init__(self):
50
+ """Handle conversion of storage_options from dict to Munch if needed."""
51
+ if isinstance(self.storage_options, dict):
52
+ self.storage_options = Munch(self.storage_options)
53
+
54
+ # Validate storage_options
55
+ self._validate_storage_options()
56
+
57
+ # Validate base_dir if provided
58
+ if self.base_dir is not None:
59
+ self._validate_base_dir()
60
+
61
+ def _validate_storage_options(self) -> None:
62
+ """Validate storage_options parameter.
63
+
64
+ Raises:
65
+ ValueError: If storage_options contains invalid values.
66
+ """
67
+ if self.storage_options is None:
68
+ self.storage_options = Munch()
69
+
70
+ if not isinstance(self.storage_options, (dict, Munch)):
71
+ raise ValueError(f"storage_options must be a dict or Munch, got {type(self.storage_options)}")
72
+
73
+ def _validate_base_dir(self) -> None:
74
+ """Validate base_dir parameter.
75
+
76
+ Raises:
77
+ ValueError: If base_dir contains invalid characters or is empty.
78
+ """
79
+ # Convert Path to string if needed
80
+ base_dir_str = str(self.base_dir) if hasattr(self.base_dir, '__str__') else self.base_dir
81
+
82
+ if not isinstance(base_dir_str, str):
83
+ raise ValueError(f"base_dir must be a string or Path, got {type(self.base_dir)}")
84
+
85
+ # Check for directory traversal attempts (but allow absolute paths)
86
+ if '..' in base_dir_str:
87
+ raise ValueError(f"Invalid base_dir: {base_dir_str}. Contains path traversal characters.")
88
+
89
+ # Check for empty string
90
+ if not base_dir_str.strip():
91
+ raise ValueError("base_dir cannot be empty or whitespace only.")
92
+
93
+ @property
94
+ def base_dir_path(self) -> Path | None:
95
+ """Get base_dir as a pathlib.Path object.
96
+
97
+ Returns:
98
+ pathlib.Path | None: The base directory as a Path object, or None if base_dir is None.
99
+ """
100
+ return Path(self.base_dir) if self.base_dir is not None else None
46
101
 
47
102
  @classmethod
48
103
  def load(
@@ -75,21 +130,29 @@ class Config(BaseConfig):
75
130
  ```
76
131
  """
77
132
  if fs is None:
78
- fs = filesystem(
79
- base_dir, cached=True, dirfs=True, storage_options=storage_options
133
+ # Use cached filesystem for better performance
134
+ storage_options_hash = cls._hash_storage_options(storage_options)
135
+ fs = cls._get_cached_filesystem(base_dir, storage_options_hash)
136
+
137
+ try:
138
+ project = ProjectConfig.load(
139
+ base_dir=base_dir,
140
+ name=name,
141
+ fs=fs,
142
+ storage_options=storage_options,
80
143
  )
81
- project = ProjectConfig.load(
82
- base_dir=base_dir,
83
- name=name,
84
- fs=fs,
85
- storage_options=storage_options,
86
- )
87
- pipeline = PipelineConfig.load(
88
- base_dir=base_dir,
89
- name=pipeline_name,
90
- fs=fs,
91
- storage_options=storage_options,
92
- )
144
+ except ConfigLoadError as e:
145
+ raise ConfigLoadError(f"Failed to load project configuration: {e}", path=base_dir, original_error=e)
146
+
147
+ try:
148
+ pipeline = PipelineConfig.load(
149
+ base_dir=base_dir,
150
+ name=pipeline_name,
151
+ fs=fs,
152
+ storage_options=storage_options,
153
+ )
154
+ except ConfigLoadError as e:
155
+ raise ConfigLoadError(f"Failed to load pipeline configuration: {e}", path=base_dir, original_error=e)
93
156
 
94
157
  return cls(
95
158
  base_dir=base_dir,
@@ -120,9 +183,9 @@ class Config(BaseConfig):
120
183
  ```
121
184
  """
122
185
  if fs is None and self.fs is None:
123
- self.fs = filesystem(
124
- self.base_dir, cached=True, dirfs=True, **storage_options
125
- )
186
+ # Use cached filesystem for better performance
187
+ storage_options_hash = self._hash_storage_options(storage_options)
188
+ self.fs = self._get_cached_filesystem(self.base_dir, storage_options_hash)
126
189
 
127
190
  if not self.fs.exists(CONFIG_DIR):
128
191
  self.fs.makedirs(CONFIG_DIR)
@@ -130,13 +193,22 @@ class Config(BaseConfig):
130
193
  if pipeline:
131
194
  self.fs.makedirs(PIPELINES_DIR, exist_ok=True)
132
195
  h_params = self.pipeline.pop("h_params") if self.pipeline.h_params else None
133
- self.pipeline.to_yaml(
134
- path=f"conf/pipelines/{self.pipeline.name}.yml", fs=self.fs
135
- )
196
+ # Validate pipeline name to prevent directory traversal
197
+ if self.pipeline.name and ('..' in self.pipeline.name or '/' in self.pipeline.name or '\\' in self.pipeline.name):
198
+ raise ConfigPathError(f"Invalid pipeline name: {self.pipeline.name}. Contains path traversal characters.", path=self.pipeline.name)
199
+ try:
200
+ self.pipeline.to_yaml(
201
+ path=f"conf/pipelines/{self.pipeline.name}.yml", fs=self.fs
202
+ )
203
+ except ConfigSaveError as e:
204
+ raise ConfigSaveError(f"Failed to save pipeline configuration: {e}", path=f"conf/pipelines/{self.pipeline.name}.yml", original_error=e)
136
205
  if h_params:
137
206
  self.pipeline.h_params = h_params
138
207
  if project:
139
- self.project.to_yaml("conf/project.yml", self.fs)
208
+ try:
209
+ self.project.to_yaml("conf/project.yml", self.fs)
210
+ except ConfigSaveError as e:
211
+ raise ConfigSaveError(f"Failed to save project configuration: {e}", path="conf/project.yml", original_error=e)
140
212
 
141
213
 
142
214
  def load(
@@ -247,3 +319,49 @@ def init_config(
247
319
  storage_options=storage_options,
248
320
  )
249
321
  return Config(pipeline=pipeline_cfg, project=project_cfg, fs=fs, base_dir=base_dir)
322
+
323
+
324
+ # Helper methods for centralized load/save logic
325
+ @classmethod
326
+ def _load_config(
327
+ cls,
328
+ config_class: type[BaseConfig],
329
+ base_dir: str,
330
+ name: str | None,
331
+ fs: AbstractFileSystem,
332
+ storage_options: dict | BaseStorageOptions | None,
333
+ ) -> BaseConfig:
334
+ """Centralized configuration loading logic.
335
+
336
+ Args:
337
+ config_class: The configuration class to load.
338
+ base_dir: Base directory for configurations.
339
+ name: Configuration name.
340
+ fs: Filesystem instance.
341
+ storage_options: Options for filesystem.
342
+
343
+ Returns:
344
+ Loaded configuration instance.
345
+ """
346
+ return config_class.load(
347
+ base_dir=base_dir,
348
+ name=name,
349
+ fs=fs,
350
+ storage_options=storage_options,
351
+ )
352
+
353
+
354
+ def _save_pipeline_config(self) -> None:
355
+ """Save pipeline configuration with proper handling of h_params."""
356
+ self.fs.makedirs(PIPELINES_DIR, exist_ok=True)
357
+ h_params = self.pipeline.pop("h_params") if self.pipeline.h_params else None
358
+ self.pipeline.to_yaml(
359
+ path=f"conf/pipelines/{self.pipeline.name}.yml", fs=self.fs
360
+ )
361
+ if h_params:
362
+ self.pipeline.h_params = h_params
363
+
364
+
365
+ def _save_project_config(self) -> None:
366
+ """Save project configuration."""
367
+ self.project.to_yaml("conf/project.yml", self.fs)
flowerpower/cfg/base.py CHANGED
@@ -1,14 +1,104 @@
1
1
  import copy
2
- from typing import Any, Self
2
+ from pathlib import Path
3
+ from typing import Any, Self, Optional
4
+ from functools import lru_cache
3
5
 
4
6
  import msgspec
5
7
  from fsspec_utils import AbstractFileSystem, filesystem
6
8
  from ..utils.misc import get_filesystem
9
+ from ..utils.security import validate_file_path as security_validate_file_path
10
+ from .exceptions import ConfigLoadError, ConfigSaveError, ConfigPathError
11
+
12
+
13
+ def validate_file_path(path: str) -> str:
14
+ """
15
+ Validate a file path to prevent directory traversal attacks.
16
+
17
+ Args:
18
+ path: The file path to validate
19
+
20
+ Returns:
21
+ str: The validated path
22
+
23
+ Raises:
24
+ ConfigPathError: If the path contains directory traversal attempts
25
+ """
26
+ try:
27
+ # Use the comprehensive security validation
28
+ validated_path = security_validate_file_path(
29
+ path,
30
+ allow_absolute=False, # Config files should be relative
31
+ allow_relative=True
32
+ )
33
+ return str(validated_path)
34
+ except Exception as e:
35
+ # Convert security errors to config path errors for consistency
36
+ raise ConfigPathError(f"Invalid file path: {path}. {str(e)}", path=path) from e
7
37
 
8
38
 
9
39
  class BaseConfig(msgspec.Struct, kw_only=True):
40
+ # Class-level cache for filesystem instances
41
+ _fs_cache = {}
42
+
43
+ @classmethod
44
+ @lru_cache(maxsize=32)
45
+ def _get_cached_filesystem(cls, base_dir: str, storage_options_hash: int) -> AbstractFileSystem:
46
+ """Get a cached filesystem instance.
47
+
48
+ Args:
49
+ base_dir: Base directory for the filesystem.
50
+ storage_options_hash: Hash of storage options for cache key.
51
+
52
+ Returns:
53
+ Cached filesystem instance.
54
+ """
55
+ cache_key = (base_dir, storage_options_hash)
56
+ if cache_key not in cls._fs_cache:
57
+ cls._fs_cache[cache_key] = filesystem(base_dir, cached=True, dirfs=True)
58
+ return cls._fs_cache[cache_key]
59
+
60
+ @classmethod
61
+ def _hash_storage_options(cls, storage_options: dict | None) -> int:
62
+ """Create a hash of storage options for caching.
63
+
64
+ Args:
65
+ storage_options: Storage options to hash.
66
+
67
+ Returns:
68
+ Hash of storage options.
69
+ """
70
+ if not storage_options:
71
+ return hash(())
72
+
73
+ # Convert to frozenset of items for consistent hashing
74
+ try:
75
+ return hash(frozenset(sorted(storage_options.items())))
76
+ except TypeError:
77
+ # If items are not hashable, use string representation
78
+ return hash(str(sorted(storage_options.items())))
10
79
  def to_dict(self) -> dict[str, Any]:
11
- return msgspec.to_builtins(self)
80
+ # Convert to dictionary, handling special cases like type objects
81
+ result = {}
82
+ for field in self.__struct_fields__:
83
+ value = getattr(self, field)
84
+ if isinstance(value, type):
85
+ # Convert type objects to string representation
86
+ result[field] = str(value)
87
+ elif hasattr(value, '__struct_fields__'):
88
+ # Recursively convert nested msgspec structs
89
+ result[field] = value.to_dict()
90
+ elif isinstance(value, list):
91
+ # Handle lists that might contain type objects (like exception classes)
92
+ converted_list = []
93
+ for item in value:
94
+ if isinstance(item, type):
95
+ converted_list.append(str(item))
96
+ else:
97
+ converted_list.append(item)
98
+ result[field] = converted_list
99
+ else:
100
+ result[field] = value
101
+ return result
12
102
 
13
103
  def to_yaml(self, path: str, fs: AbstractFileSystem | None = None) -> None:
14
104
  """
@@ -19,14 +109,27 @@ class BaseConfig(msgspec.Struct, kw_only=True):
19
109
  fs: An optional filesystem instance to use for file operations.
20
110
 
21
111
  Raises:
22
- NotImplementedError: If the filesystem does not support writing files.
112
+ ConfigSaveError: If saving the configuration fails.
113
+ ConfigPathError: If the path contains directory traversal attempts.
23
114
  """
24
- fs = get_filesystem(fs)
115
+ # Validate the path to prevent directory traversal
116
+ try:
117
+ validated_path = validate_file_path(path)
118
+ except ConfigPathError as e:
119
+ raise ConfigSaveError(f"Path validation failed: {e}", path=path, original_error=e)
120
+
121
+ # Use cached filesystem if available
122
+ if fs is None:
123
+ # Use cached filesystem if available
124
+ if fs is None:
125
+ fs = get_filesystem(fs)
25
126
  try:
26
- with fs.open(path, "wb") as f:
27
- f.write(msgspec.yaml.encode(self, order="deterministic"))
28
- except NotImplementedError:
29
- raise NotImplementedError("The filesystem does not support writing files.")
127
+ with fs.open(validated_path, "w") as f:
128
+ f.write(msgspec.yaml.encode(self, order="deterministic").decode('utf-8'))
129
+ except NotImplementedError as e:
130
+ raise ConfigSaveError("The filesystem does not support writing files.", path=validated_path, original_error=e)
131
+ except Exception as e:
132
+ raise ConfigSaveError(f"Failed to write configuration to {validated_path}", path=validated_path, original_error=e)
30
133
 
31
134
  @classmethod
32
135
  def from_dict(cls, data: dict[str, Any]) -> "BaseConfig":
@@ -52,10 +155,22 @@ class BaseConfig(msgspec.Struct, kw_only=True):
52
155
  Returns:
53
156
  An instance of the class with the values from the YAML file.
54
157
 
158
+ Raises:
159
+ ConfigLoadError: If loading the configuration fails.
160
+ ConfigPathError: If the path contains directory traversal attempts.
55
161
  """
162
+ # Validate the path to prevent directory traversal
163
+ try:
164
+ validated_path = validate_file_path(path)
165
+ except ConfigPathError as e:
166
+ raise ConfigLoadError(f"Path validation failed: {e}", path=path, original_error=e)
167
+
56
168
  fs = get_filesystem(fs)
57
- with fs.open(path) as f:
58
- return msgspec.yaml.decode(f.read(), type=cls, strict=False)
169
+ try:
170
+ with fs.open(validated_path) as f:
171
+ return msgspec.yaml.decode(f.read(), type=cls, strict=True)
172
+ except Exception as e:
173
+ raise ConfigLoadError(f"Failed to load configuration from {validated_path}", path=validated_path, original_error=e)
59
174
 
60
175
  def _apply_dict_updates(self, target: Self, d: dict[str, Any]) -> None:
61
176
  """
@@ -69,8 +184,13 @@ class BaseConfig(msgspec.Struct, kw_only=True):
69
184
  if hasattr(target, k):
70
185
  current_value = getattr(target, k)
71
186
  if isinstance(current_value, dict) and isinstance(v, dict):
187
+ # For dictionaries, update in-place to avoid deep copy
72
188
  current_value.update(v)
189
+ elif hasattr(current_value, '__struct_fields__'):
190
+ # For nested msgspec structs, create a new instance with merged values
191
+ setattr(target, k, current_value.merge_dict(v))
73
192
  else:
193
+ # For primitive values, direct assignment is fine
74
194
  setattr(target, k, v)
75
195
  else:
76
196
  # Use object.__setattr__ to bypass msgspec.Struct's restrictions
@@ -96,7 +216,8 @@ class BaseConfig(msgspec.Struct, kw_only=True):
96
216
  Returns:
97
217
  A new instance of the struct with updated values.
98
218
  """
99
- self_copy = copy.deepcopy(self)
219
+ # Use shallow copy for better performance
220
+ self_copy = copy.copy(self)
100
221
  self._apply_dict_updates(self_copy, d)
101
222
  return self_copy
102
223
 
@@ -0,0 +1,53 @@
1
+ """
2
+ Custom exceptions for the cfg module.
3
+ """
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+
8
+ class ConfigError(Exception):
9
+ """Base exception for configuration-related errors."""
10
+ pass
11
+
12
+
13
+ class ConfigLoadError(ConfigError):
14
+ """Exception raised when configuration loading fails."""
15
+
16
+ def __init__(self, message: str, path: Optional[str] = None, original_error: Optional[Exception] = None):
17
+ self.path = path
18
+ self.original_error = original_error
19
+ super().__init__(message)
20
+
21
+
22
+ class ConfigSaveError(ConfigError):
23
+ """Exception raised when configuration saving fails."""
24
+
25
+ def __init__(self, message: str, path: Optional[str] = None, original_error: Optional[Exception] = None):
26
+ self.path = path
27
+ self.original_error = original_error
28
+ super().__init__(message)
29
+
30
+
31
+ class ConfigValidationError(ConfigError):
32
+ """Exception raised when configuration validation fails."""
33
+
34
+ def __init__(self, message: str, field: Optional[str] = None, value: Any = None):
35
+ self.field = field
36
+ self.value = value
37
+ super().__init__(message)
38
+
39
+
40
+ class ConfigSecurityError(ConfigError):
41
+ """Exception raised for security-related configuration errors."""
42
+
43
+ def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
44
+ self.details = details or {}
45
+ super().__init__(message)
46
+
47
+
48
+ class ConfigPathError(ConfigSecurityError):
49
+ """Exception raised for path-related security errors."""
50
+
51
+ def __init__(self, message: str, path: Optional[str] = None):
52
+ super().__init__(message, {"path": path})
53
+ self.path = path