flowyml 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. flowyml/__init__.py +207 -0
  2. flowyml/assets/__init__.py +22 -0
  3. flowyml/assets/artifact.py +40 -0
  4. flowyml/assets/base.py +209 -0
  5. flowyml/assets/dataset.py +100 -0
  6. flowyml/assets/featureset.py +301 -0
  7. flowyml/assets/metrics.py +104 -0
  8. flowyml/assets/model.py +82 -0
  9. flowyml/assets/registry.py +157 -0
  10. flowyml/assets/report.py +315 -0
  11. flowyml/cli/__init__.py +5 -0
  12. flowyml/cli/experiment.py +232 -0
  13. flowyml/cli/init.py +256 -0
  14. flowyml/cli/main.py +327 -0
  15. flowyml/cli/run.py +75 -0
  16. flowyml/cli/stack_cli.py +532 -0
  17. flowyml/cli/ui.py +33 -0
  18. flowyml/core/__init__.py +68 -0
  19. flowyml/core/advanced_cache.py +274 -0
  20. flowyml/core/approval.py +64 -0
  21. flowyml/core/cache.py +203 -0
  22. flowyml/core/checkpoint.py +148 -0
  23. flowyml/core/conditional.py +373 -0
  24. flowyml/core/context.py +155 -0
  25. flowyml/core/error_handling.py +419 -0
  26. flowyml/core/executor.py +354 -0
  27. flowyml/core/graph.py +185 -0
  28. flowyml/core/parallel.py +452 -0
  29. flowyml/core/pipeline.py +764 -0
  30. flowyml/core/project.py +253 -0
  31. flowyml/core/resources.py +424 -0
  32. flowyml/core/scheduler.py +630 -0
  33. flowyml/core/scheduler_config.py +32 -0
  34. flowyml/core/step.py +201 -0
  35. flowyml/core/step_grouping.py +292 -0
  36. flowyml/core/templates.py +226 -0
  37. flowyml/core/versioning.py +217 -0
  38. flowyml/integrations/__init__.py +1 -0
  39. flowyml/integrations/keras.py +134 -0
  40. flowyml/monitoring/__init__.py +1 -0
  41. flowyml/monitoring/alerts.py +57 -0
  42. flowyml/monitoring/data.py +102 -0
  43. flowyml/monitoring/llm.py +160 -0
  44. flowyml/monitoring/monitor.py +57 -0
  45. flowyml/monitoring/notifications.py +246 -0
  46. flowyml/registry/__init__.py +5 -0
  47. flowyml/registry/model_registry.py +491 -0
  48. flowyml/registry/pipeline_registry.py +55 -0
  49. flowyml/stacks/__init__.py +27 -0
  50. flowyml/stacks/base.py +77 -0
  51. flowyml/stacks/bridge.py +288 -0
  52. flowyml/stacks/components.py +155 -0
  53. flowyml/stacks/gcp.py +499 -0
  54. flowyml/stacks/local.py +112 -0
  55. flowyml/stacks/migration.py +97 -0
  56. flowyml/stacks/plugin_config.py +78 -0
  57. flowyml/stacks/plugins.py +401 -0
  58. flowyml/stacks/registry.py +226 -0
  59. flowyml/storage/__init__.py +26 -0
  60. flowyml/storage/artifacts.py +246 -0
  61. flowyml/storage/materializers/__init__.py +20 -0
  62. flowyml/storage/materializers/base.py +133 -0
  63. flowyml/storage/materializers/keras.py +185 -0
  64. flowyml/storage/materializers/numpy.py +94 -0
  65. flowyml/storage/materializers/pandas.py +142 -0
  66. flowyml/storage/materializers/pytorch.py +135 -0
  67. flowyml/storage/materializers/sklearn.py +110 -0
  68. flowyml/storage/materializers/tensorflow.py +152 -0
  69. flowyml/storage/metadata.py +931 -0
  70. flowyml/tracking/__init__.py +1 -0
  71. flowyml/tracking/experiment.py +211 -0
  72. flowyml/tracking/leaderboard.py +191 -0
  73. flowyml/tracking/runs.py +145 -0
  74. flowyml/ui/__init__.py +15 -0
  75. flowyml/ui/backend/Dockerfile +31 -0
  76. flowyml/ui/backend/__init__.py +0 -0
  77. flowyml/ui/backend/auth.py +163 -0
  78. flowyml/ui/backend/main.py +187 -0
  79. flowyml/ui/backend/routers/__init__.py +0 -0
  80. flowyml/ui/backend/routers/assets.py +45 -0
  81. flowyml/ui/backend/routers/execution.py +179 -0
  82. flowyml/ui/backend/routers/experiments.py +49 -0
  83. flowyml/ui/backend/routers/leaderboard.py +118 -0
  84. flowyml/ui/backend/routers/notifications.py +72 -0
  85. flowyml/ui/backend/routers/pipelines.py +110 -0
  86. flowyml/ui/backend/routers/plugins.py +192 -0
  87. flowyml/ui/backend/routers/projects.py +85 -0
  88. flowyml/ui/backend/routers/runs.py +66 -0
  89. flowyml/ui/backend/routers/schedules.py +222 -0
  90. flowyml/ui/backend/routers/traces.py +84 -0
  91. flowyml/ui/frontend/Dockerfile +20 -0
  92. flowyml/ui/frontend/README.md +315 -0
  93. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
  94. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
  95. flowyml/ui/frontend/dist/index.html +16 -0
  96. flowyml/ui/frontend/index.html +15 -0
  97. flowyml/ui/frontend/nginx.conf +26 -0
  98. flowyml/ui/frontend/package-lock.json +3545 -0
  99. flowyml/ui/frontend/package.json +33 -0
  100. flowyml/ui/frontend/postcss.config.js +6 -0
  101. flowyml/ui/frontend/src/App.jsx +21 -0
  102. flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
  103. flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
  104. flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
  105. flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
  106. flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
  107. flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
  108. flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
  109. flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
  110. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
  111. flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
  112. flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
  113. flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
  114. flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
  115. flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
  116. flowyml/ui/frontend/src/components/Layout.jsx +108 -0
  117. flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
  118. flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
  119. flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
  120. flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
  121. flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
  122. flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
  123. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
  124. flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
  125. flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
  126. flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
  127. flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
  128. flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
  129. flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
  130. flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
  131. flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
  132. flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
  133. flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
  134. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
  135. flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
  136. flowyml/ui/frontend/src/index.css +11 -0
  137. flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
  138. flowyml/ui/frontend/src/main.jsx +10 -0
  139. flowyml/ui/frontend/src/router/index.jsx +39 -0
  140. flowyml/ui/frontend/src/services/pluginService.js +90 -0
  141. flowyml/ui/frontend/src/utils/api.js +47 -0
  142. flowyml/ui/frontend/src/utils/cn.js +6 -0
  143. flowyml/ui/frontend/tailwind.config.js +31 -0
  144. flowyml/ui/frontend/vite.config.js +21 -0
  145. flowyml/ui/utils.py +77 -0
  146. flowyml/utils/__init__.py +67 -0
  147. flowyml/utils/config.py +308 -0
  148. flowyml/utils/debug.py +240 -0
  149. flowyml/utils/environment.py +346 -0
  150. flowyml/utils/git.py +319 -0
  151. flowyml/utils/logging.py +61 -0
  152. flowyml/utils/performance.py +314 -0
  153. flowyml/utils/stack_config.py +296 -0
  154. flowyml/utils/validation.py +270 -0
  155. flowyml-1.1.0.dist-info/METADATA +372 -0
  156. flowyml-1.1.0.dist-info/RECORD +159 -0
  157. flowyml-1.1.0.dist-info/WHEEL +4 -0
  158. flowyml-1.1.0.dist-info/entry_points.txt +3 -0
  159. flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/core/step.py ADDED
@@ -0,0 +1,201 @@
1
+ """Step Decorator - Define pipeline steps with automatic context injection."""
2
+
3
+ import hashlib
4
+ import inspect
5
+ import json
6
+ from typing import Any, Union
7
+ from collections.abc import Callable
8
+ from dataclasses import dataclass, field
9
+
10
+ # Import resource types
11
+ try:
12
+ from flowyml.core.resources import ResourceRequirements
13
+ except ImportError:
14
+ ResourceRequirements = None # Type: ignore
15
+
16
+
17
+ @dataclass
18
+ class StepConfig:
19
+ """Configuration for a pipeline step."""
20
+
21
+ name: str
22
+ func: Callable
23
+ inputs: list[str] = field(default_factory=list)
24
+ outputs: list[str] = field(default_factory=list)
25
+ cache: bool | str | Callable = "code_hash"
26
+ retry: int = 0
27
+ timeout: int | None = None
28
+ resources: Union[dict[str, Any], "ResourceRequirements", None] = None
29
+ tags: dict[str, str] = field(default_factory=dict)
30
+ condition: Callable | None = None
31
+ execution_group: str | None = None
32
+
33
+ def __hash__(self):
34
+ """Make StepConfig hashable."""
35
+ return hash(self.name)
36
+
37
+
38
+ class Step:
39
+ """A pipeline step that can be executed with automatic context injection."""
40
+
41
+ def __init__(
42
+ self,
43
+ func: Callable,
44
+ name: str | None = None,
45
+ inputs: list[str] | None = None,
46
+ outputs: list[str] | None = None,
47
+ cache: bool | str | Callable = "code_hash",
48
+ retry: int = 0,
49
+ timeout: int | None = None,
50
+ resources: Union[dict[str, Any], "ResourceRequirements", None] = None,
51
+ tags: dict[str, str] | None = None,
52
+ condition: Callable | None = None,
53
+ execution_group: str | None = None,
54
+ ):
55
+ self.func = func
56
+ self.name = name or func.__name__
57
+ self.inputs = inputs or []
58
+ self.outputs = outputs or []
59
+ self.cache = cache
60
+ self.retry = retry
61
+ self.timeout = timeout
62
+
63
+ # Store resources (accept both dict for backward compatibility and ResourceRequirements)
64
+ self.resources = resources
65
+
66
+ self.tags = tags or {}
67
+ self.condition = condition
68
+ self.execution_group = execution_group
69
+
70
+ # Capture source code for UI display
71
+ try:
72
+ self.source_code = inspect.getsource(func)
73
+ except (OSError, TypeError):
74
+ self.source_code = "# Source code not available"
75
+
76
+ self.config = StepConfig(
77
+ name=self.name,
78
+ func=func,
79
+ inputs=self.inputs,
80
+ outputs=self.outputs,
81
+ cache=self.cache,
82
+ retry=self.retry,
83
+ timeout=self.timeout,
84
+ resources=self.resources,
85
+ tags=self.tags,
86
+ condition=self.condition,
87
+ execution_group=self.execution_group,
88
+ )
89
+
90
+ def __call__(self, *args, **kwargs):
91
+ """Execute the step function."""
92
+ # Check condition if present
93
+ if self.condition:
94
+ # We might need to inject context into condition too,
95
+ # but for now assume it takes no args or same args as step?
96
+ # This is tricky without context injection logic here.
97
+ # The executor handles execution, so maybe we just store it here.
98
+ pass
99
+
100
+ return self.func(*args, **kwargs)
101
+
102
+ def get_code_hash(self) -> str:
103
+ """Compute hash of the step's source code."""
104
+ try:
105
+ source = inspect.getsource(self.func)
106
+ return hashlib.md5(source.encode()).hexdigest()
107
+ except (OSError, TypeError):
108
+ # Fallback for dynamically defined functions or when source is unavailable
109
+ return hashlib.md5(self.name.encode()).hexdigest()[:16]
110
+
111
+ def get_input_hash(self, inputs: dict[str, Any]) -> str:
112
+ """Generate hash of inputs for caching."""
113
+ input_str = json.dumps(inputs, sort_keys=True, default=str)
114
+ return hashlib.sha256(input_str.encode()).hexdigest()[:16]
115
+
116
+ def get_cache_key(self, inputs: dict[str, Any] | None = None) -> str:
117
+ """Generate cache key based on caching strategy.
118
+
119
+ Args:
120
+ inputs: Input data for the step
121
+
122
+ Returns:
123
+ Cache key string
124
+ """
125
+ if self.cache == "code_hash":
126
+ return f"{self.name}:{self.get_code_hash()}"
127
+ elif self.cache == "input_hash" and inputs:
128
+ return f"{self.name}:{self.get_input_hash(inputs)}"
129
+ elif callable(self.cache) and inputs:
130
+ return self.cache(inputs, {})
131
+ else:
132
+ return f"{self.name}:no-cache"
133
+
134
+ def __repr__(self) -> str:
135
+ return f"Step(name='{self.name}', inputs={self.inputs}, outputs={self.outputs})"
136
+
137
+
138
+ def step(
139
+ _func: Callable | None = None,
140
+ *,
141
+ inputs: list[str] | None = None,
142
+ outputs: list[str] | None = None,
143
+ cache: bool | str | Callable = "code_hash",
144
+ retry: int = 0,
145
+ timeout: int | None = None,
146
+ resources: Union[dict[str, Any], "ResourceRequirements", None] = None,
147
+ tags: dict[str, str] | None = None,
148
+ name: str | None = None,
149
+ condition: Callable | None = None,
150
+ execution_group: str | None = None,
151
+ ):
152
+ """Decorator to define a pipeline step with automatic context injection.
153
+
154
+ Can be used as @step or @step(inputs=...)
155
+
156
+ Args:
157
+ _func: Function being decorated (when used as @step)
158
+ inputs: List of input asset names
159
+ outputs: List of output asset names
160
+ cache: Caching strategy ("code_hash", "input_hash", callable, or False)
161
+ retry: Number of retry attempts on failure
162
+ timeout: Maximum execution time in seconds
163
+ resources: Resource requirements (ResourceRequirements object or dict for backward compat)
164
+ tags: Metadata tags for the step
165
+ name: Optional custom name for the step
166
+ condition: Optional callable that returns True if step should run
167
+ execution_group: Optional group name for executing multiple steps together
168
+
169
+ Example:
170
+ >>> @step
171
+ ... def simple_step():
172
+ ... ...
173
+ >>> @step(inputs=["data/train"], outputs=["model/trained"])
174
+ ... def train_model(train_data):
175
+ ... ...
176
+ >>> # With resource requirements
177
+ >>> from flowyml.core.resources import ResourceRequirements, GPUConfig
178
+ >>> @step(resources=ResourceRequirements(cpu="4", memory="16Gi", gpu=GPUConfig(gpu_type="nvidia-v100", count=2)))
179
+ ... def gpu_train(data):
180
+ ... ...
181
+ """
182
+
183
+ def decorator(func: Callable) -> Step:
184
+ return Step(
185
+ func=func,
186
+ name=name,
187
+ inputs=inputs,
188
+ outputs=outputs,
189
+ cache=cache,
190
+ retry=retry,
191
+ timeout=timeout,
192
+ resources=resources,
193
+ tags=tags,
194
+ condition=condition,
195
+ execution_group=execution_group,
196
+ )
197
+
198
+ if _func is None:
199
+ return decorator
200
+ else:
201
+ return decorator(_func)
@@ -0,0 +1,292 @@
1
+ """Step Grouping - Analyze and group pipeline steps for efficient execution.
2
+
3
+ This module provides functionality to group multiple pipeline steps that should execute
4
+ together in the same environment (e.g., Docker container, remote worker). It analyzes
5
+ the DAG to ensure only consecutive steps are grouped and aggregates their resource
6
+ requirements intelligently.
7
+ """
8
+
9
+ from collections import defaultdict
10
+ from dataclasses import dataclass
11
+
12
+ from flowyml.core.graph import DAG
13
+ from flowyml.core.step import Step
14
+ from flowyml.core.resources import ResourceRequirements
15
+
16
+
17
+ @dataclass
18
+ class StepGroup:
19
+ """Represents a group of steps that execute together.
20
+
21
+ Args:
22
+ group_name: Name identifier for this group
23
+ steps: List of Step objects in this group
24
+ aggregated_resources: Combined resource requirements for the group
25
+ execution_order: Ordered list of step names (topological order within group)
26
+ """
27
+
28
+ group_name: str
29
+ steps: list[Step]
30
+ aggregated_resources: ResourceRequirements | None
31
+ execution_order: list[str]
32
+
33
+ def __repr__(self) -> str:
34
+ step_names = [s.name for s in self.steps]
35
+ return f"StepGroup(name='{self.group_name}', steps={step_names})"
36
+
37
+
38
+ class StepGroupAnalyzer:
39
+ """Analyzes pipeline DAG to create valid step groups.
40
+
41
+ This analyzer ensures that:
42
+ 1. Only steps with the same execution_group name are grouped
43
+ 2. Grouped steps can execute consecutively (no gaps in DAG)
44
+ 3. Resources are aggregated intelligently (max CPU, memory, etc.)
45
+ 4. Execution order within groups is preserved
46
+ """
47
+
48
+ def analyze_groups(self, dag: DAG, steps: list[Step]) -> list[StepGroup]:
49
+ """Analyze DAG and create valid step groups.
50
+
51
+ Args:
52
+ dag: Pipeline DAG
53
+ steps: List of all pipeline steps
54
+
55
+ Returns:
56
+ List of StepGroup objects (excludes ungrouped steps)
57
+ """
58
+ # Collect steps by execution_group
59
+ groups_dict: dict[str, list[Step]] = defaultdict(list)
60
+
61
+ for step in steps:
62
+ if step.execution_group:
63
+ groups_dict[step.execution_group].append(step)
64
+
65
+ # Process each group
66
+ step_groups = []
67
+ for group_name, group_steps in groups_dict.items():
68
+ # Split into consecutive subgroups if needed
69
+ subgroups = self._split_into_consecutive_groups(group_steps, dag)
70
+
71
+ # Create StepGroup for each subgroup
72
+ for i, subgroup in enumerate(subgroups):
73
+ # If original group was split, append index to name
74
+ final_name = group_name if len(subgroups) == 1 else f"{group_name}_{i}"
75
+
76
+ # Get execution order for this subgroup
77
+ exec_order = self._get_execution_order(subgroup, dag)
78
+
79
+ # Aggregate resources
80
+ aggregated = self._aggregate_resources(subgroup)
81
+
82
+ step_groups.append(
83
+ StepGroup(
84
+ group_name=final_name,
85
+ steps=subgroup,
86
+ aggregated_resources=aggregated,
87
+ execution_order=exec_order,
88
+ ),
89
+ )
90
+
91
+ return step_groups
92
+
93
+ def _split_into_consecutive_groups(
94
+ self,
95
+ steps: list[Step],
96
+ dag: DAG,
97
+ ) -> list[list[Step]]:
98
+ """Split steps into subgroups that can execute consecutively.
99
+
100
+ This handles cases where steps with the same execution_group are not
101
+ actually consecutive in the DAG (e.g., parallel branches).
102
+
103
+ Args:
104
+ steps: Steps with the same execution_group
105
+ dag: Pipeline DAG
106
+
107
+ Returns:
108
+ List of step sublists, where each sublist can execute consecutively
109
+ """
110
+ if len(steps) <= 1:
111
+ return [steps] if steps else []
112
+
113
+ # Build a mapping of step names to steps
114
+ step_map = {s.name: s for s in steps}
115
+ step_names = set(step_map.keys())
116
+
117
+ # Get topological order for all nodes
118
+ try:
119
+ all_nodes = dag.topological_sort()
120
+ except ValueError:
121
+ # DAG has cycles, return each step as separate group
122
+ return [[s] for s in steps]
123
+
124
+ # Filter to only our steps, preserving topological order
125
+ ordered_steps = [step_map[node.name] for node in all_nodes if node.name in step_names]
126
+
127
+ # Now split into consecutive sequences
128
+ # Two steps are consecutive if there are no other group steps between them
129
+ subgroups: list[list[Step]] = []
130
+ current_group: list[Step] = []
131
+
132
+ for step in ordered_steps:
133
+ if not current_group:
134
+ # Start new group
135
+ current_group.append(step)
136
+ else:
137
+ # Check if this step can join current group
138
+ last_step = current_group[-1]
139
+
140
+ if self._are_consecutive(last_step, step, dag, step_names):
141
+ current_group.append(step)
142
+ else:
143
+ # Gap detected, finalize current group and start new one
144
+ subgroups.append(current_group)
145
+ current_group = [step]
146
+
147
+ # Add final group
148
+ if current_group:
149
+ subgroups.append(current_group)
150
+
151
+ return subgroups
152
+
153
+ def _are_consecutive(
154
+ self,
155
+ step1: Step,
156
+ step2: Step,
157
+ dag: DAG,
158
+ group_step_names: set[str],
159
+ ) -> bool:
160
+ """Check if two steps can execute consecutively in a group.
161
+
162
+ Steps are consecutive if:
163
+ - step2 depends on step1 (directly or transitively) OR they're independent
164
+ - All intermediate dependencies are NOT in this group
165
+
166
+ Args:
167
+ step1: First step
168
+ step2: Second step
169
+ dag: Pipeline DAG
170
+ group_step_names: Set of all step names in this group
171
+
172
+ Returns:
173
+ True if steps can execute consecutively
174
+ """
175
+ # Get all dependencies of step2
176
+ step2_deps = dag.get_all_dependencies(step2.name)
177
+
178
+ # If step2 doesn't depend on anything in the group, they can be consecutive
179
+ # (parallel steps in same group are OK if no dependencies)
180
+ group_deps = step2_deps & group_step_names
181
+ if not group_deps:
182
+ # No dependencies from this group, consecutive is OK
183
+ return True
184
+
185
+ # If step2 depends on step1, check for intermediate group steps
186
+ if step1.name in step2_deps:
187
+ # Get all group steps that step2 depends on (excluding step1)
188
+ intermediate = group_deps - {step1.name}
189
+
190
+ # If there are NO intermediate group steps, they're consecutive
191
+ return len(intermediate) == 0
192
+
193
+ # step2 doesn't depend on step1, not consecutive
194
+ return False
195
+
196
+ def _get_execution_order(self, steps: list[Step], dag: DAG) -> list[str]:
197
+ """Get topological execution order for steps in a group.
198
+
199
+ Args:
200
+ steps: Steps in the group
201
+ dag: Pipeline DAG
202
+
203
+ Returns:
204
+ Ordered list of step names
205
+ """
206
+ step_names = {s.name for s in steps}
207
+
208
+ # Get full topological order
209
+ all_nodes = dag.topological_sort()
210
+
211
+ # Filter to only our steps
212
+ return [node.name for node in all_nodes if node.name in step_names]
213
+
214
+ def _aggregate_resources(self, steps: list[Step]) -> ResourceRequirements | None:
215
+ """Aggregate resource requirements from multiple steps.
216
+
217
+ Strategy:
218
+ - CPU: Take maximum
219
+ - Memory: Take maximum
220
+ - GPU: Merge configs (max count, best type)
221
+ - Storage: Take maximum
222
+ - Node affinity: Merge required/preferred labels
223
+
224
+ Args:
225
+ steps: Steps to aggregate resources from
226
+
227
+ Returns:
228
+ Aggregated ResourceRequirements or None if no steps have resources
229
+ """
230
+ resource_reqs = [s.resources for s in steps if s.resources and isinstance(s.resources, ResourceRequirements)]
231
+
232
+ if not resource_reqs:
233
+ return None
234
+
235
+ # Start with first resource requirement
236
+ aggregated = resource_reqs[0]
237
+
238
+ # Merge with remaining
239
+ for req in resource_reqs[1:]:
240
+ aggregated = aggregated.merge_with(req)
241
+
242
+ return aggregated
243
+
244
+
245
+ def get_execution_units(dag: DAG, steps: list[Step]) -> list[Step | StepGroup]:
246
+ """Get ordered execution units (individual steps or groups).
247
+
248
+ This is a convenience function that analyzes groups and returns a mixed list
249
+ of ungrouped steps and StepGroups in topological order.
250
+
251
+ Args:
252
+ dag: Pipeline DAG
253
+ steps: All pipeline steps
254
+
255
+ Returns:
256
+ List of execution units (Step or StepGroup) in execution order
257
+ """
258
+ analyzer = StepGroupAnalyzer()
259
+ step_groups = analyzer.analyze_groups(dag, steps)
260
+
261
+ # Build a mapping of step names to their groups
262
+ step_to_group: dict[str, StepGroup] = {}
263
+ for group in step_groups:
264
+ for step in group.steps:
265
+ step_to_group[step.name] = group
266
+
267
+ # Get topological order of all nodes
268
+ all_nodes = dag.topological_sort()
269
+
270
+ # Build execution units, avoiding duplicates for grouped steps
271
+ execution_units: list[Step | StepGroup] = []
272
+ processed_groups: set[str] = set()
273
+
274
+ for node in all_nodes:
275
+ # Find the step object
276
+ step = next((s for s in steps if s.name == node.name), None)
277
+ if not step:
278
+ continue
279
+
280
+ # Check if this step belongs to a group
281
+ if step.name in step_to_group:
282
+ group = step_to_group[step.name]
283
+
284
+ # Only add the group once (when we encounter its first step)
285
+ if group.group_name not in processed_groups:
286
+ execution_units.append(group)
287
+ processed_groups.add(group.group_name)
288
+ else:
289
+ # Ungrouped step, add as-is
290
+ execution_units.append(step)
291
+
292
+ return execution_units