flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,253 @@
1
+ """Multi-tenancy and project organization."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, TYPE_CHECKING
6
+ from datetime import datetime
7
+
8
+ if TYPE_CHECKING:
9
+ from flowyml.core.pipeline import Pipeline
10
+
11
+
12
+ class Project:
13
+ """Project for organizing pipelines, runs, and artifacts.
14
+
15
+ Provides multi-tenancy and better organization.
16
+
17
+ Examples:
18
+ >>> from flowyml import Project, Pipeline, step
19
+ >>> # Create project
20
+ >>> project = Project("recommendation_system")
21
+ >>> # Create pipeline in project
22
+ >>> pipeline = project.create_pipeline("training")
23
+ >>> pipeline.add_step(...)
24
+ >>> result = pipeline.run()
25
+ >>> # List all runs in project
26
+ >>> runs = project.list_runs()
27
+ >>> # Get artifacts
28
+ >>> artifacts = project.get_artifacts()
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ name: str,
34
+ description: str = "",
35
+ projects_dir: str = ".flowyml/projects",
36
+ ):
37
+ self.name = name
38
+ self.description = description
39
+
40
+ # Project directory
41
+ self.project_dir = Path(projects_dir) / name
42
+ self.project_dir.mkdir(parents=True, exist_ok=True)
43
+
44
+ # Sub-directories
45
+ self.pipelines_dir = self.project_dir / "pipelines"
46
+ self.pipelines_dir.mkdir(exist_ok=True)
47
+
48
+ self.runs_dir = self.project_dir / "runs"
49
+ self.runs_dir.mkdir(exist_ok=True)
50
+
51
+ self.artifacts_dir = self.project_dir / "artifacts"
52
+ self.artifacts_dir.mkdir(exist_ok=True)
53
+
54
+ # Metadata
55
+ self.metadata_file = self.project_dir / "project.json"
56
+ self._load_or_create_metadata()
57
+
58
+ # Storage
59
+ from flowyml.storage.metadata import SQLiteMetadataStore
60
+
61
+ db_path = str(self.project_dir / "metadata.db")
62
+ self.metadata_store = SQLiteMetadataStore(db_path)
63
+
64
+ def _load_or_create_metadata(self) -> None:
65
+ """Load or create project metadata."""
66
+ if self.metadata_file.exists():
67
+ with open(self.metadata_file) as f:
68
+ self.metadata = json.load(f)
69
+ else:
70
+ self.metadata = {
71
+ "name": self.name,
72
+ "description": self.description,
73
+ "created_at": datetime.now().isoformat(),
74
+ "pipelines": [],
75
+ "tags": {},
76
+ }
77
+ self._save_metadata()
78
+
79
+ def _save_metadata(self) -> None:
80
+ """Save project metadata."""
81
+ with open(self.metadata_file, "w") as f:
82
+ json.dump(self.metadata, f, indent=2)
83
+
84
+ def create_pipeline(self, name: str, **kwargs) -> "Pipeline":
85
+ """Create a pipeline in this project.
86
+
87
+ Args:
88
+ name: Pipeline name
89
+ **kwargs: Additional arguments for Pipeline
90
+
91
+ Returns:
92
+ Pipeline instance
93
+ """
94
+ from flowyml.core.pipeline import Pipeline
95
+
96
+ # Create pipeline with project-specific settings
97
+ pipeline = Pipeline(
98
+ name=name,
99
+ cache_dir=str(self.artifacts_dir / "cache"),
100
+ **kwargs,
101
+ )
102
+
103
+ # Override runs directory to project runs
104
+ pipeline.runs_dir = self.runs_dir
105
+
106
+ # Use project metadata store
107
+ pipeline.metadata_store = self.metadata_store
108
+
109
+ # Register pipeline
110
+ if name not in self.metadata["pipelines"]:
111
+ self.metadata["pipelines"].append(name)
112
+ self._save_metadata()
113
+
114
+ return pipeline
115
+
116
+ def list_runs(
117
+ self,
118
+ pipeline_name: str | None = None,
119
+ limit: int = 100,
120
+ ) -> list[dict[str, Any]]:
121
+ """List all runs in this project.
122
+
123
+ Args:
124
+ pipeline_name: Filter by pipeline name
125
+ limit: Maximum number of runs
126
+
127
+ Returns:
128
+ List of run metadata
129
+ """
130
+ if pipeline_name:
131
+ runs = self.metadata_store.query(pipeline_name=pipeline_name)
132
+ else:
133
+ runs = self.metadata_store.list_runs(limit=limit)
134
+
135
+ return runs
136
+
137
+ def get_artifacts(
138
+ self,
139
+ artifact_type: str | None = None,
140
+ limit: int = 100,
141
+ ) -> list[dict[str, Any]]:
142
+ """Get artifacts in this project.
143
+
144
+ Args:
145
+ artifact_type: Filter by type
146
+ limit: Maximum number of artifacts
147
+
148
+ Returns:
149
+ List of artifact metadata
150
+ """
151
+ filters = {}
152
+ if artifact_type:
153
+ filters["type"] = artifact_type
154
+
155
+ return self.metadata_store.list_assets(limit=limit, **filters)
156
+
157
+ def get_pipelines(self) -> list[str]:
158
+ """Get list of pipelines in this project."""
159
+ return self.metadata["pipelines"]
160
+
161
+ def get_stats(self) -> dict[str, Any]:
162
+ """Get project statistics."""
163
+ stats = self.metadata_store.get_statistics()
164
+ stats["project_name"] = self.name
165
+ stats["pipelines"] = len(self.metadata["pipelines"])
166
+ return stats
167
+
168
+ def add_tag(self, key: str, value: str) -> None:
169
+ """Add a tag to the project."""
170
+ self.metadata["tags"][key] = value
171
+ self._save_metadata()
172
+
173
+ def get_tags(self) -> dict[str, str]:
174
+ """Get project tags."""
175
+ return self.metadata["tags"]
176
+
177
+ def export_metadata(self, output_file: str) -> None:
178
+ """Export project metadata."""
179
+ export_data = {
180
+ "project": self.metadata,
181
+ "runs": self.list_runs(),
182
+ "artifacts": self.get_artifacts(),
183
+ "stats": self.get_stats(),
184
+ }
185
+
186
+ with open(output_file, "w") as f:
187
+ json.dump(export_data, f, indent=2)
188
+
189
+ def __repr__(self) -> str:
190
+ return f"Project(name='{self.name}', pipelines={len(self.metadata['pipelines'])})"
191
+
192
+
193
+ class ProjectManager:
194
+ """Manage multiple projects.
195
+
196
+ Examples:
197
+ >>> from flowyml import ProjectManager
198
+ >>> manager = ProjectManager()
199
+ >>> # Create projects
200
+ >>> rec_sys = manager.create_project("recommendation_system")
201
+ >>> fraud = manager.create_project("fraud_detection")
202
+ >>> # List all projects
203
+ >>> projects = manager.list_projects()
204
+ >>> # Get project
205
+ >>> project = manager.get_project("recommendation_system")
206
+ """
207
+
208
+ def __init__(self, projects_dir: str = ".flowyml/projects"):
209
+ self.projects_dir = Path(projects_dir)
210
+ self.projects_dir.mkdir(parents=True, exist_ok=True)
211
+
212
+ def create_project(self, name: str, description: str = "") -> Project:
213
+ """Create a new project."""
214
+ # Fix: Pass the projects directory itself, not the parent
215
+ project = Project(name, description, str(self.projects_dir))
216
+ return project
217
+
218
+ def get_project(self, name: str) -> Project | None:
219
+ """Get an existing project."""
220
+ project_dir = self.projects_dir / name
221
+ if not project_dir.exists():
222
+ return None
223
+ # Fix: Pass the projects directory itself
224
+ return Project(name, projects_dir=str(self.projects_dir))
225
+
226
+ def list_projects(self) -> list[dict[str, Any]]:
227
+ """List all projects."""
228
+ projects = []
229
+ if not self.projects_dir.exists():
230
+ return projects
231
+
232
+ for project_dir in self.projects_dir.iterdir():
233
+ if project_dir.is_dir():
234
+ metadata_file = project_dir / "project.json"
235
+ if metadata_file.exists():
236
+ try:
237
+ with open(metadata_file) as f:
238
+ metadata = json.load(f)
239
+ projects.append(metadata)
240
+ except Exception:
241
+ pass
242
+ return projects
243
+
244
+ def delete_project(self, name: str, confirm: bool = False) -> None:
245
+ """Delete a project (requires confirmation)."""
246
+ if not confirm:
247
+ return
248
+
249
+ project_dir = self.projects_dir / name
250
+ if project_dir.exists():
251
+ import shutil
252
+
253
+ shutil.rmtree(project_dir)
@@ -0,0 +1,424 @@
1
+ """Resource specification for pipeline steps.
2
+
3
+ This module provides orchestrator-agnostic resource specification for flowyml pipeline steps,
4
+ including CPU, memory, GPU, storage, and node affinity requirements.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Optional
9
+ import re
10
+
11
+
12
+ @dataclass
13
+ class GPUConfig:
14
+ """GPU configuration specification.
15
+
16
+ Args:
17
+ gpu_type: GPU type/model (e.g., 'nvidia-tesla-v100', 'nvidia-a100')
18
+ count: Number of GPUs required
19
+ memory: GPU memory per device (e.g., '16Gi', '32Gi')
20
+
21
+ Examples:
22
+ >>> gpu = GPUConfig(gpu_type="nvidia-tesla-v100", count=2, memory="16Gi")
23
+ >>> gpu = GPUConfig(gpu_type="nvidia-a100", count=4)
24
+ """
25
+
26
+ gpu_type: str
27
+ count: int = 1
28
+ memory: Optional[str] = None
29
+
30
+ def __post_init__(self):
31
+ """Validate GPU configuration."""
32
+ if self.count < 1:
33
+ msg = f"GPU count must be >= 1, got {self.count}"
34
+ raise ValueError(msg)
35
+ if self.memory and not self._is_valid_memory(self.memory):
36
+ msg = f"Invalid GPU memory format: {self.memory}"
37
+ raise ValueError(msg)
38
+
39
+ @staticmethod
40
+ def _is_valid_memory(memory: str) -> bool:
41
+ """Check if memory string is valid (e.g., '16Gi', '32768Mi')."""
42
+ return bool(re.match(r"^\d+(\.\d+)?(Ki|Mi|Gi|Ti|K|M|G|T)$", memory))
43
+
44
+ def to_dict(self) -> dict[str, Any]:
45
+ """Convert to dictionary representation."""
46
+ return {
47
+ "type": self.gpu_type,
48
+ "count": self.count,
49
+ "memory": self.memory,
50
+ }
51
+
52
+ def merge_with(self, other: "GPUConfig") -> "GPUConfig":
53
+ """Merge with another GPU config, taking max count and best GPU type.
54
+
55
+ Args:
56
+ other: Another GPUConfig to merge with
57
+
58
+ Returns:
59
+ New GPUConfig with merged specifications
60
+ """
61
+ # Prefer A100 > V100 > T4 > other, or just take first if unknown
62
+ gpu_hierarchy = ["nvidia-a100", "nvidia-tesla-a100", "nvidia-tesla-v100", "nvidia-v100", "nvidia-t4"]
63
+
64
+ best_type = self.gpu_type
65
+ for gpu_type in gpu_hierarchy:
66
+ if gpu_type in self.gpu_type.lower():
67
+ self_rank = gpu_hierarchy.index(gpu_type)
68
+ break
69
+ else:
70
+ self_rank = 999
71
+
72
+ for gpu_type in gpu_hierarchy:
73
+ if gpu_type in other.gpu_type.lower():
74
+ other_rank = gpu_hierarchy.index(gpu_type)
75
+ break
76
+ else:
77
+ other_rank = 999
78
+
79
+ if other_rank < self_rank:
80
+ best_type = other.gpu_type
81
+
82
+ # Take max count
83
+ max_count = max(self.count, other.count)
84
+
85
+ # Take max memory if both specified
86
+ max_memory = None
87
+ if self.memory and other.memory:
88
+ max_memory = self._compare_memory(self.memory, other.memory)
89
+ elif self.memory:
90
+ max_memory = self.memory
91
+ elif other.memory:
92
+ max_memory = other.memory
93
+
94
+ return GPUConfig(
95
+ gpu_type=best_type,
96
+ count=max_count,
97
+ memory=max_memory,
98
+ )
99
+
100
+ @staticmethod
101
+ def _compare_memory(mem1: str, mem2: str) -> str:
102
+ """Return the larger memory specification."""
103
+
104
+ # Simple comparison - convert to bytes and compare
105
+ def to_bytes(mem: str) -> int:
106
+ import re
107
+
108
+ match = re.match(r"^(\d+(?:\.\d+)?)(Ki|Mi|Gi|Ti|K|M|G|T)?$", mem)
109
+ if not match:
110
+ return 0
111
+ value, unit = float(match.group(1)), match.group(2) or ""
112
+ multipliers = {
113
+ "Ki": 1024,
114
+ "Mi": 1024**2,
115
+ "Gi": 1024**3,
116
+ "Ti": 1024**4,
117
+ "K": 1000,
118
+ "M": 1000**2,
119
+ "G": 1000**3,
120
+ "T": 1000**4,
121
+ "": 1,
122
+ }
123
+ return int(value * multipliers.get(unit, 1))
124
+
125
+ return mem1 if to_bytes(mem1) >= to_bytes(mem2) else mem2
126
+
127
+
128
+ @dataclass
129
+ class NodeAffinity:
130
+ """Node affinity and anti-affinity rules.
131
+
132
+ Args:
133
+ required: Required node labels (hard constraints)
134
+ preferred: Preferred node labels (soft constraints)
135
+ tolerations: Tolerations for node taints
136
+
137
+ Examples:
138
+ >>> affinity = NodeAffinity(
139
+ ... required={"cloud.google.com/gke-nodepool": "gpu-pool"}, preferred={"instance-type": "n1-standard-8"}
140
+ ... )
141
+ """
142
+
143
+ required: dict[str, str] = field(default_factory=dict)
144
+ preferred: dict[str, str] = field(default_factory=dict)
145
+ tolerations: list[dict[str, str]] = field(default_factory=list)
146
+
147
+ def to_dict(self) -> dict[str, Any]:
148
+ """Convert to dictionary representation."""
149
+ return {
150
+ "required": self.required,
151
+ "preferred": self.preferred,
152
+ "tolerations": self.tolerations,
153
+ }
154
+
155
+ def merge_with(self, other: "NodeAffinity") -> "NodeAffinity":
156
+ """Merge with another node affinity, combining constraints.
157
+
158
+ Args:
159
+ other: Another NodeAffinity to merge with
160
+
161
+ Returns:
162
+ New NodeAffinity with merged constraints
163
+ """
164
+ # Merge required labels (intersection - both must be satisfied)
165
+ merged_required = {**self.required, **other.required}
166
+
167
+ # Merge preferred labels (union - prefer either)
168
+ merged_preferred = {**self.preferred, **other.preferred}
169
+
170
+ # Merge tolerations (union - tolerate all)
171
+ merged_tolerations = list(self.tolerations)
172
+ for tol in other.tolerations:
173
+ if tol not in merged_tolerations:
174
+ merged_tolerations.append(tol)
175
+
176
+ return NodeAffinity(
177
+ required=merged_required,
178
+ preferred=merged_preferred,
179
+ tolerations=merged_tolerations,
180
+ )
181
+
182
+
183
+ @dataclass
184
+ class ResourceRequirements:
185
+ """Resource requirements for a pipeline step.
186
+
187
+ Orchestrator-agnostic resource specification that can be translated to
188
+ platform-specific formats (Kubernetes, Vertex AI, SageMaker, etc.).
189
+
190
+ Args:
191
+ cpu: CPU cores (e.g., "2", "500m", "2.5")
192
+ memory: RAM amount (e.g., "4Gi", "8192Mi", "16G")
193
+ storage: Ephemeral storage (e.g., "100Gi", "50G")
194
+ gpu: GPU configuration
195
+ node_affinity: Node selection rules
196
+
197
+ Examples:
198
+ >>> # Simple CPU/memory
199
+ >>> resources = ResourceRequirements(cpu="2", memory="4Gi")
200
+
201
+ >>> # With GPU
202
+ >>> resources = ResourceRequirements(cpu="4", memory="16Gi", gpu=GPUConfig(gpu_type="nvidia-tesla-v100", count=2))
203
+
204
+ >>> # With node affinity
205
+ >>> resources = ResourceRequirements(
206
+ ... cpu="8",
207
+ ... memory="32Gi",
208
+ ... node_affinity=NodeAffinity(
209
+ ... required={"gpu": "true"}, tolerations=[{"key": "nvidia.com/gpu", "operator": "Exists"}]
210
+ ... ),
211
+ ... )
212
+ """
213
+
214
+ cpu: Optional[str] = None
215
+ memory: Optional[str] = None
216
+ storage: Optional[str] = None
217
+ gpu: Optional[GPUConfig] = None
218
+ node_affinity: Optional[NodeAffinity] = None
219
+
220
+ def __post_init__(self):
221
+ """Validate resource specifications."""
222
+ if self.cpu and not self._is_valid_cpu(self.cpu):
223
+ msg = f"Invalid CPU format: {self.cpu}"
224
+ raise ValueError(msg)
225
+ if self.memory and not self._is_valid_memory(self.memory):
226
+ msg = f"Invalid memory format: {self.memory}"
227
+ raise ValueError(msg)
228
+ if self.storage and not self._is_valid_memory(self.storage):
229
+ msg = f"Invalid storage format: {self.storage}"
230
+ raise ValueError(msg)
231
+
232
+ @staticmethod
233
+ def _is_valid_cpu(cpu: str) -> bool:
234
+ """Check if CPU string is valid (e.g., '2', '500m', '2.5')."""
235
+ return bool(re.match(r"^\d+(\.\d+)?m?$", cpu))
236
+
237
+ @staticmethod
238
+ def _is_valid_memory(memory: str) -> bool:
239
+ """Check if memory string is valid (e.g., '16Gi', '32768Mi')."""
240
+ return bool(re.match(r"^\d+(\.\d+)?(Ki|Mi|Gi|Ti|K|M|G|T|B)?$", memory))
241
+
242
+ def to_dict(self) -> dict[str, Any]:
243
+ """Convert to dictionary representation."""
244
+ result = {}
245
+ if self.cpu:
246
+ result["cpu"] = self.cpu
247
+ if self.memory:
248
+ result["memory"] = self.memory
249
+ if self.storage:
250
+ result["storage"] = self.storage
251
+ if self.gpu:
252
+ result["gpu"] = self.gpu.to_dict()
253
+ if self.node_affinity:
254
+ result["node_affinity"] = self.node_affinity.to_dict()
255
+ return result
256
+
257
+ def has_gpu(self) -> bool:
258
+ """Check if GPU resources are requested."""
259
+ return self.gpu is not None
260
+
261
+ def get_gpu_count(self) -> int:
262
+ """Get total number of GPUs requested."""
263
+ return self.gpu.count if self.gpu else 0
264
+
265
+ @staticmethod
266
+ def _compare_cpu(cpu1: str, cpu2: str) -> str:
267
+ """Return the larger CPU requirement.
268
+
269
+ Args:
270
+ cpu1: First CPU specification (e.g., "2", "500m")
271
+ cpu2: Second CPU specification
272
+
273
+ Returns:
274
+ The larger CPU specification
275
+ """
276
+
277
+ def to_millicores(cpu: str) -> int:
278
+ if cpu.endswith("m"):
279
+ return int(cpu[:-1])
280
+ return int(float(cpu) * 1000)
281
+
282
+ return cpu1 if to_millicores(cpu1) >= to_millicores(cpu2) else cpu2
283
+
284
+ @staticmethod
285
+ def _compare_memory(mem1: str, mem2: str) -> str:
286
+ """Return the larger memory requirement.
287
+
288
+ Args:
289
+ mem1: First memory specification (e.g., "4Gi", "8192Mi")
290
+ mem2: Second memory specification
291
+
292
+ Returns:
293
+ The larger memory specification (in original format)
294
+ """
295
+ import re
296
+
297
+ def to_bytes(mem: str) -> int:
298
+ match = re.match(r"^(\d+(?:\.\d+)?)(Ki|Mi|Gi|Ti|K|M|G|T|B)?$", mem)
299
+ if not match:
300
+ return 0
301
+ value, unit = float(match.group(1)), match.group(2) or "B"
302
+ multipliers = {
303
+ "Ki": 1024,
304
+ "Mi": 1024**2,
305
+ "Gi": 1024**3,
306
+ "Ti": 1024**4,
307
+ "K": 1000,
308
+ "M": 1000**2,
309
+ "G": 1000**3,
310
+ "T": 1000**4,
311
+ "B": 1,
312
+ "": 1,
313
+ }
314
+ return int(value * multipliers.get(unit, 1))
315
+
316
+ bytes1 = to_bytes(mem1)
317
+ bytes2 = to_bytes(mem2)
318
+
319
+ # Return whichever is larger, but keep original format
320
+ return mem1 if bytes1 >= bytes2 else mem2
321
+
322
+ def merge_with(self, other: "ResourceRequirements") -> "ResourceRequirements":
323
+ """Merge with another ResourceRequirements, taking maximum of each.
324
+
325
+ This is used when grouping steps to aggregate their resource needs.
326
+ Strategy:
327
+ - CPU: Take maximum
328
+ - Memory: Take maximum
329
+ - Storage: Take maximum
330
+ - GPU: Merge configs (max count, best type)
331
+ - Node affinity: Merge constraints
332
+
333
+ Args:
334
+ other: Another ResourceRequirements to merge with
335
+
336
+ Returns:
337
+ New ResourceRequirements with merged specifications
338
+ """
339
+ # Merge CPU
340
+ merged_cpu = None
341
+ if self.cpu and other.cpu:
342
+ merged_cpu = self._compare_cpu(self.cpu, other.cpu)
343
+ elif self.cpu:
344
+ merged_cpu = self.cpu
345
+ elif other.cpu:
346
+ merged_cpu = other.cpu
347
+
348
+ # Merge memory
349
+ merged_memory = None
350
+ if self.memory and other.memory:
351
+ merged_memory = self._compare_memory(self.memory, other.memory)
352
+ elif self.memory:
353
+ merged_memory = self.memory
354
+ elif other.memory:
355
+ merged_memory = other.memory
356
+
357
+ # Merge storage
358
+ merged_storage = None
359
+ if self.storage and other.storage:
360
+ merged_storage = self._compare_memory(self.storage, other.storage)
361
+ elif self.storage:
362
+ merged_storage = self.storage
363
+ elif other.storage:
364
+ merged_storage = other.storage
365
+
366
+ # Merge GPU
367
+ merged_gpu = None
368
+ if self.gpu and other.gpu:
369
+ merged_gpu = self.gpu.merge_with(other.gpu)
370
+ elif self.gpu:
371
+ merged_gpu = self.gpu
372
+ elif other.gpu:
373
+ merged_gpu = other.gpu
374
+
375
+ # Merge node affinity
376
+ merged_affinity = None
377
+ if self.node_affinity and other.node_affinity:
378
+ merged_affinity = self.node_affinity.merge_with(other.node_affinity)
379
+ elif self.node_affinity:
380
+ merged_affinity = self.node_affinity
381
+ elif other.node_affinity:
382
+ merged_affinity = other.node_affinity
383
+
384
+ return ResourceRequirements(
385
+ cpu=merged_cpu,
386
+ memory=merged_memory,
387
+ storage=merged_storage,
388
+ gpu=merged_gpu,
389
+ node_affinity=merged_affinity,
390
+ )
391
+
392
+
393
+ def resources(
394
+ cpu: Optional[str] = None,
395
+ memory: Optional[str] = None,
396
+ storage: Optional[str] = None,
397
+ gpu: Optional[GPUConfig] = None,
398
+ node_affinity: Optional[NodeAffinity] = None,
399
+ ) -> ResourceRequirements:
400
+ """Create a ResourceRequirements object with validation.
401
+
402
+ Convenience function for creating resource specifications with cleaner syntax.
403
+
404
+ Args:
405
+ cpu: CPU cores (e.g., "2", "500m")
406
+ memory: RAM amount (e.g., "4Gi", "8192Mi")
407
+ storage: Ephemeral storage (e.g., "100Gi")
408
+ gpu: GPU configuration
409
+ node_affinity: Node selection rules
410
+
411
+ Returns:
412
+ Validated ResourceRequirements object
413
+
414
+ Examples:
415
+ >>> req = resources(cpu="2", memory="4Gi")
416
+ >>> req = resources(cpu="4", memory="16Gi", gpu=GPUConfig(gpu_type="nvidia-v100", count=2))
417
+ """
418
+ return ResourceRequirements(
419
+ cpu=cpu,
420
+ memory=memory,
421
+ storage=storage,
422
+ gpu=gpu,
423
+ node_affinity=node_affinity,
424
+ )