flowyml 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/base.py +15 -0
- flowyml/assets/dataset.py +570 -17
- flowyml/assets/metrics.py +5 -0
- flowyml/assets/model.py +1052 -15
- flowyml/cli/main.py +709 -0
- flowyml/cli/stack_cli.py +138 -25
- flowyml/core/__init__.py +17 -0
- flowyml/core/executor.py +231 -37
- flowyml/core/image_builder.py +129 -0
- flowyml/core/log_streamer.py +227 -0
- flowyml/core/orchestrator.py +59 -4
- flowyml/core/pipeline.py +65 -13
- flowyml/core/routing.py +558 -0
- flowyml/core/scheduler.py +88 -5
- flowyml/core/step.py +9 -1
- flowyml/core/step_grouping.py +49 -35
- flowyml/core/types.py +407 -0
- flowyml/integrations/keras.py +247 -82
- flowyml/monitoring/alerts.py +10 -0
- flowyml/monitoring/notifications.py +104 -25
- flowyml/monitoring/slack_blocks.py +323 -0
- flowyml/plugins/__init__.py +251 -0
- flowyml/plugins/alerters/__init__.py +1 -0
- flowyml/plugins/alerters/slack.py +168 -0
- flowyml/plugins/base.py +752 -0
- flowyml/plugins/config.py +478 -0
- flowyml/plugins/deployers/__init__.py +22 -0
- flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
- flowyml/plugins/deployers/sagemaker.py +306 -0
- flowyml/plugins/deployers/vertex.py +290 -0
- flowyml/plugins/integration.py +369 -0
- flowyml/plugins/manager.py +510 -0
- flowyml/plugins/model_registries/__init__.py +22 -0
- flowyml/plugins/model_registries/mlflow.py +159 -0
- flowyml/plugins/model_registries/sagemaker.py +489 -0
- flowyml/plugins/model_registries/vertex.py +386 -0
- flowyml/plugins/orchestrators/__init__.py +13 -0
- flowyml/plugins/orchestrators/sagemaker.py +443 -0
- flowyml/plugins/orchestrators/vertex_ai.py +461 -0
- flowyml/plugins/registries/__init__.py +13 -0
- flowyml/plugins/registries/ecr.py +321 -0
- flowyml/plugins/registries/gcr.py +313 -0
- flowyml/plugins/registry.py +454 -0
- flowyml/plugins/stack.py +494 -0
- flowyml/plugins/stack_config.py +537 -0
- flowyml/plugins/stores/__init__.py +13 -0
- flowyml/plugins/stores/gcs.py +460 -0
- flowyml/plugins/stores/s3.py +453 -0
- flowyml/plugins/trackers/__init__.py +11 -0
- flowyml/plugins/trackers/mlflow.py +316 -0
- flowyml/plugins/validators/__init__.py +3 -0
- flowyml/plugins/validators/deepchecks.py +119 -0
- flowyml/registry/__init__.py +2 -1
- flowyml/registry/model_environment.py +109 -0
- flowyml/registry/model_registry.py +241 -96
- flowyml/serving/__init__.py +17 -0
- flowyml/serving/model_server.py +628 -0
- flowyml/stacks/__init__.py +60 -0
- flowyml/stacks/aws.py +93 -0
- flowyml/stacks/base.py +62 -0
- flowyml/stacks/components.py +12 -0
- flowyml/stacks/gcp.py +44 -9
- flowyml/stacks/plugins.py +115 -0
- flowyml/stacks/registry.py +2 -1
- flowyml/storage/sql.py +401 -12
- flowyml/tracking/experiment.py +8 -5
- flowyml/ui/backend/Dockerfile +87 -16
- flowyml/ui/backend/auth.py +12 -2
- flowyml/ui/backend/main.py +149 -5
- flowyml/ui/backend/routers/ai_context.py +226 -0
- flowyml/ui/backend/routers/assets.py +23 -4
- flowyml/ui/backend/routers/auth.py +96 -0
- flowyml/ui/backend/routers/deployments.py +660 -0
- flowyml/ui/backend/routers/model_explorer.py +597 -0
- flowyml/ui/backend/routers/plugins.py +103 -51
- flowyml/ui/backend/routers/projects.py +91 -8
- flowyml/ui/backend/routers/runs.py +132 -1
- flowyml/ui/backend/routers/schedules.py +54 -29
- flowyml/ui/backend/routers/templates.py +319 -0
- flowyml/ui/backend/routers/websocket.py +2 -2
- flowyml/ui/frontend/Dockerfile +55 -6
- flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
- flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/dist/logo.png +0 -0
- flowyml/ui/frontend/nginx.conf +65 -4
- flowyml/ui/frontend/package-lock.json +1415 -74
- flowyml/ui/frontend/package.json +4 -0
- flowyml/ui/frontend/public/logo.png +0 -0
- flowyml/ui/frontend/src/App.jsx +10 -7
- flowyml/ui/frontend/src/app/assets/page.jsx +890 -321
- flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
- flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
- flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +1 -1
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +601 -101
- flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
- flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +62 -2
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +424 -29
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +119 -11
- flowyml/ui/frontend/src/components/DatasetViewer.jsx +753 -0
- flowyml/ui/frontend/src/components/Layout.jsx +6 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
- flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
- flowyml/ui/frontend/src/components/TrainingHistoryChart.jsx +514 -0
- flowyml/ui/frontend/src/components/TrainingMetricsPanel.jsx +175 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
- flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
- flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
- flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
- flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
- flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
- flowyml/ui/frontend/src/router/index.jsx +47 -20
- flowyml/ui/frontend/src/services/pluginService.js +3 -1
- flowyml/ui/server_manager.py +5 -5
- flowyml/ui/utils.py +157 -39
- flowyml/utils/config.py +37 -15
- flowyml/utils/model_introspection.py +123 -0
- flowyml/utils/observability.py +30 -0
- flowyml-1.8.0.dist-info/METADATA +174 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/RECORD +134 -73
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
- flowyml/ui/frontend/dist/assets/index-BqDQvp63.js +0 -630
- flowyml/ui/frontend/dist/assets/index-By4trVyv.css +0 -1
- flowyml-1.7.1.dist-info/METADATA +0 -477
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
flowyml/cli/stack_cli.py
CHANGED
|
@@ -98,23 +98,28 @@ def stack() -> None:
|
|
|
98
98
|
@click.option("--config", "-c", help="Path to flowyml.yaml")
|
|
99
99
|
def list_stacks(config: str | None) -> None:
|
|
100
100
|
"""List all configured stacks."""
|
|
101
|
-
from flowyml.
|
|
101
|
+
from flowyml.plugins.stack_config import get_stack_manager
|
|
102
|
+
from flowyml.plugins.config import get_config
|
|
102
103
|
|
|
103
|
-
|
|
104
|
-
|
|
104
|
+
# Initialize config if path provided
|
|
105
|
+
if config:
|
|
106
|
+
get_config(config)
|
|
107
|
+
|
|
108
|
+
manager = get_stack_manager()
|
|
109
|
+
stacks = manager.list_stacks()
|
|
105
110
|
|
|
106
111
|
if not stacks:
|
|
107
|
-
click.echo("No stacks configured. Create a flowyml.yaml file.")
|
|
112
|
+
click.echo("No stacks configured. Create a flowyml.yaml file with 'stacks:' section.")
|
|
108
113
|
return
|
|
109
114
|
|
|
110
|
-
|
|
115
|
+
active = manager.active_stack_name
|
|
111
116
|
|
|
112
|
-
click.echo("\
|
|
117
|
+
click.echo("\n📦 Configured stacks:")
|
|
113
118
|
for stack_name in stacks:
|
|
114
|
-
marker = " (
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
click.echo(f" • {stack_name}{marker} [{
|
|
119
|
+
marker = " ✓ (active)" if stack_name == active else ""
|
|
120
|
+
stack = manager.get_stack(stack_name)
|
|
121
|
+
orch_type = stack.orchestrator.get("type", "local") if stack and stack.orchestrator else "local"
|
|
122
|
+
click.echo(f" • {stack_name}{marker} [orchestrator: {orch_type}]")
|
|
118
123
|
click.echo()
|
|
119
124
|
|
|
120
125
|
|
|
@@ -123,35 +128,143 @@ def list_stacks(config: str | None) -> None:
|
|
|
123
128
|
@click.option("--config", "-c", help="Path to flowyml.yaml")
|
|
124
129
|
def show_stack(stack_name: str, config: str | None) -> None:
|
|
125
130
|
"""Show detailed stack configuration."""
|
|
126
|
-
from flowyml.
|
|
127
|
-
import
|
|
131
|
+
from flowyml.plugins.stack_config import get_stack_manager
|
|
132
|
+
from flowyml.plugins.config import get_config
|
|
128
133
|
|
|
129
|
-
|
|
130
|
-
|
|
134
|
+
# Initialize config if path provided
|
|
135
|
+
if config:
|
|
136
|
+
get_config(config)
|
|
131
137
|
|
|
132
|
-
|
|
138
|
+
manager = get_stack_manager()
|
|
139
|
+
stack = manager.get_stack(stack_name)
|
|
140
|
+
|
|
141
|
+
if not stack:
|
|
133
142
|
click.echo(f"Stack '{stack_name}' not found", err=True)
|
|
143
|
+
available = manager.list_stacks()
|
|
144
|
+
if available:
|
|
145
|
+
click.echo(f"Available stacks: {', '.join(available)}")
|
|
134
146
|
sys.exit(1)
|
|
135
147
|
|
|
136
|
-
|
|
137
|
-
|
|
148
|
+
is_active = stack_name == manager.active_stack_name
|
|
149
|
+
status = " (active)" if is_active else ""
|
|
150
|
+
|
|
151
|
+
click.echo(f"\n📦 Stack: {stack_name}{status}")
|
|
152
|
+
click.echo("─" * 40)
|
|
153
|
+
|
|
154
|
+
# Show components
|
|
155
|
+
if stack.orchestrator:
|
|
156
|
+
click.echo(f"\n🎯 Orchestrator: {stack.orchestrator.get('type', 'unknown')}")
|
|
157
|
+
for k, v in stack.orchestrator.items():
|
|
158
|
+
if k != "type":
|
|
159
|
+
click.echo(f" {k}: {v}")
|
|
160
|
+
|
|
161
|
+
if stack.artifact_store:
|
|
162
|
+
click.echo(f"\n💾 Artifact Store: {stack.artifact_store.get('type', 'unknown')}")
|
|
163
|
+
for k, v in stack.artifact_store.items():
|
|
164
|
+
if k != "type":
|
|
165
|
+
click.echo(f" {k}: {v}")
|
|
166
|
+
|
|
167
|
+
if stack.experiment_tracker:
|
|
168
|
+
click.echo(f"\n📊 Experiment Tracker: {stack.experiment_tracker.get('type', 'unknown')}")
|
|
169
|
+
|
|
170
|
+
if stack.model_registry:
|
|
171
|
+
click.echo(f"\n📝 Model Registry: {stack.model_registry.get('type', 'unknown')}")
|
|
172
|
+
|
|
173
|
+
if stack.model_deployer:
|
|
174
|
+
click.echo(f"\n🚀 Model Deployer: {stack.model_deployer.get('type', 'unknown')}")
|
|
175
|
+
|
|
176
|
+
if stack.container_registry:
|
|
177
|
+
click.echo(f"\n🐳 Container Registry: {stack.container_registry.get('type', 'unknown')}")
|
|
178
|
+
|
|
179
|
+
if stack.artifact_routing:
|
|
180
|
+
click.echo("\n📍 Artifact Routing:")
|
|
181
|
+
for type_name, rule in stack.artifact_routing.rules.items():
|
|
182
|
+
click.echo(f" {type_name}: store={rule.store}, register={rule.register}")
|
|
183
|
+
|
|
184
|
+
click.echo()
|
|
138
185
|
|
|
139
186
|
|
|
140
187
|
@stack.command("set-default")
|
|
141
188
|
@click.argument("stack_name")
|
|
142
189
|
@click.option("--config", "-c", help="Path to flowyml.yaml")
|
|
143
|
-
def
|
|
144
|
-
"""Set the
|
|
145
|
-
|
|
190
|
+
def set_active_stack(stack_name: str, config: str | None) -> None:
|
|
191
|
+
"""Set the active stack (alias for switch)."""
|
|
192
|
+
switch_stack_impl(stack_name, config)
|
|
146
193
|
|
|
147
|
-
registry = get_registry()
|
|
148
194
|
|
|
149
|
-
|
|
150
|
-
|
|
195
|
+
@stack.command("switch")
|
|
196
|
+
@click.argument("stack_name")
|
|
197
|
+
@click.option("--config", "-c", help="Path to flowyml.yaml")
|
|
198
|
+
@click.option("--validate/--no-validate", default=True, help="Validate stack configuration after switching")
|
|
199
|
+
def switch_stack(stack_name: str, config: str | None, validate: bool) -> None:
|
|
200
|
+
"""Switch to a different stack."""
|
|
201
|
+
switch_stack_impl(stack_name, config, validate)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def switch_stack_impl(stack_name: str, config: str | None, validate: bool = False) -> None:
|
|
205
|
+
from flowyml.plugins.stack_config import get_stack_manager
|
|
206
|
+
from flowyml.plugins.config import get_config
|
|
207
|
+
|
|
208
|
+
from rich.console import Console
|
|
209
|
+
|
|
210
|
+
console = Console()
|
|
211
|
+
|
|
212
|
+
# Initialize config if path provided
|
|
213
|
+
if config:
|
|
214
|
+
get_config(config)
|
|
215
|
+
|
|
216
|
+
manager = get_stack_manager()
|
|
217
|
+
|
|
218
|
+
if manager.set_active_stack(stack_name):
|
|
219
|
+
console.print(f"[bold green]✅ Active stack set to '{stack_name}'[/bold green]")
|
|
220
|
+
|
|
221
|
+
if validate:
|
|
222
|
+
stack = manager.get_stack(stack_name)
|
|
223
|
+
console.print(f"🔍 Validating stack '{stack_name}' environment...")
|
|
224
|
+
|
|
225
|
+
# Check for remote requirements
|
|
226
|
+
if stack.orchestrator and stack.orchestrator.get("type") != "local":
|
|
227
|
+
console.print("[yellow]Remote stack detected. Checking Docker configuration...[/yellow]")
|
|
228
|
+
if not stack.container_registry:
|
|
229
|
+
console.print(
|
|
230
|
+
"[bold red]❌ Warning: Remote stack usually requires a Container Registry for automatic builds.[/bold red]",
|
|
231
|
+
)
|
|
232
|
+
console.print(
|
|
233
|
+
" Please configure 'container_registry' in your stack or ensure you provide pre-built image URIs.",
|
|
234
|
+
)
|
|
235
|
+
else:
|
|
236
|
+
console.print(f"[bold red]❌ Stack '{stack_name}' not found[/bold red]")
|
|
237
|
+
available = manager.list_stacks()
|
|
238
|
+
if available:
|
|
239
|
+
console.print(f"Available stacks: {', '.join(available)}")
|
|
240
|
+
sys.exit(1)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@stack.command("register")
|
|
244
|
+
@click.argument("stack_name")
|
|
245
|
+
@click.option("--file", "-f", "config_file", required=True, help="Path to stack config file")
|
|
246
|
+
def register_stack(stack_name: str, config_file: str) -> None:
|
|
247
|
+
"""Register a new stack from a config file."""
|
|
248
|
+
import yaml
|
|
249
|
+
from flowyml.plugins.stack_config import get_stack_manager, StackConfig
|
|
250
|
+
|
|
251
|
+
config_path = Path(config_file)
|
|
252
|
+
if not config_path.exists():
|
|
253
|
+
click.echo(f"Config file not found: {config_file}", err=True)
|
|
151
254
|
sys.exit(1)
|
|
152
255
|
|
|
153
|
-
|
|
154
|
-
|
|
256
|
+
try:
|
|
257
|
+
with open(config_path) as f:
|
|
258
|
+
stack_data = yaml.safe_load(f)
|
|
259
|
+
|
|
260
|
+
stack_config = StackConfig.from_dict(stack_name, stack_data)
|
|
261
|
+
manager = get_stack_manager()
|
|
262
|
+
manager.register_stack(stack_name, stack_config)
|
|
263
|
+
|
|
264
|
+
click.echo(f"✅ Registered stack '{stack_name}' from {config_file}")
|
|
265
|
+
except Exception as e:
|
|
266
|
+
click.echo(f"❌ Error registering stack: {e}", err=True)
|
|
267
|
+
sys.exit(1)
|
|
155
268
|
|
|
156
269
|
|
|
157
270
|
@cli.command()
|
flowyml/core/__init__.py
CHANGED
|
@@ -27,6 +27,15 @@ from flowyml.core.resources import (
|
|
|
27
27
|
NodeAffinity,
|
|
28
28
|
resources,
|
|
29
29
|
)
|
|
30
|
+
from flowyml.core.types import (
|
|
31
|
+
Artifact,
|
|
32
|
+
Model,
|
|
33
|
+
Dataset,
|
|
34
|
+
Metrics,
|
|
35
|
+
Parameters,
|
|
36
|
+
is_artifact_type,
|
|
37
|
+
get_artifact_type_name,
|
|
38
|
+
)
|
|
30
39
|
|
|
31
40
|
__all__ = [
|
|
32
41
|
# Context
|
|
@@ -65,4 +74,12 @@ __all__ = [
|
|
|
65
74
|
"GPUConfig",
|
|
66
75
|
"NodeAffinity",
|
|
67
76
|
"resources",
|
|
77
|
+
# Artifact Types
|
|
78
|
+
"Artifact",
|
|
79
|
+
"Model",
|
|
80
|
+
"Dataset",
|
|
81
|
+
"Metrics",
|
|
82
|
+
"Parameters",
|
|
83
|
+
"is_artifact_type",
|
|
84
|
+
"get_artifact_type_name",
|
|
68
85
|
]
|
flowyml/core/executor.py
CHANGED
|
@@ -12,6 +12,7 @@ import ctypes
|
|
|
12
12
|
import requests
|
|
13
13
|
import os
|
|
14
14
|
import inspect
|
|
15
|
+
import psutil
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class StopExecutionError(Exception):
|
|
@@ -87,10 +88,16 @@ class MonitorThread(threading.Thread):
|
|
|
87
88
|
# Fallback to environment variable or default
|
|
88
89
|
self.api_url = os.getenv("FLOWYML_SERVER_URL", "http://localhost:8080")
|
|
89
90
|
|
|
90
|
-
def stop(self):
|
|
91
|
+
def stop(self, error: str | None = None):
|
|
92
|
+
"""Stop the monitor thread.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
error: Optional error message to send as final log entry
|
|
96
|
+
"""
|
|
97
|
+
self._final_error = error
|
|
91
98
|
self._stop_event.set()
|
|
92
99
|
|
|
93
|
-
def _flush_logs(self):
|
|
100
|
+
def _flush_logs(self, level: str = "INFO"):
|
|
94
101
|
"""Send captured logs to the server."""
|
|
95
102
|
if not self.log_capture:
|
|
96
103
|
return
|
|
@@ -105,7 +112,20 @@ class MonitorThread(threading.Thread):
|
|
|
105
112
|
f"{self.api_url}/api/runs/{self.run_id}/steps/{self.step_name}/logs",
|
|
106
113
|
json={
|
|
107
114
|
"content": content,
|
|
108
|
-
"level":
|
|
115
|
+
"level": level,
|
|
116
|
+
"timestamp": datetime.now().isoformat(),
|
|
117
|
+
},
|
|
118
|
+
timeout=2,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def _send_error(self, error: str):
|
|
122
|
+
"""Send error message to the server."""
|
|
123
|
+
with contextlib.suppress(Exception):
|
|
124
|
+
requests.post(
|
|
125
|
+
f"{self.api_url}/api/runs/{self.run_id}/steps/{self.step_name}/logs",
|
|
126
|
+
json={
|
|
127
|
+
"content": f"ERROR: {error}",
|
|
128
|
+
"level": "ERROR",
|
|
109
129
|
"timestamp": datetime.now().isoformat(),
|
|
110
130
|
},
|
|
111
131
|
timeout=2,
|
|
@@ -114,10 +134,25 @@ class MonitorThread(threading.Thread):
|
|
|
114
134
|
def run(self):
|
|
115
135
|
while not self._stop_event.is_set():
|
|
116
136
|
try:
|
|
117
|
-
#
|
|
137
|
+
# Collect metrics
|
|
138
|
+
process = psutil.Process()
|
|
139
|
+
with process.oneshot():
|
|
140
|
+
cpu_percent = process.cpu_percent(interval=None)
|
|
141
|
+
memory_info = process.memory_info()
|
|
142
|
+
memory_mb = memory_info.rss / 1024 / 1024
|
|
143
|
+
|
|
144
|
+
# Send heartbeat with metrics
|
|
118
145
|
response = requests.post(
|
|
119
146
|
f"{self.api_url}/api/runs/{self.run_id}/steps/{self.step_name}/heartbeat",
|
|
120
|
-
json={
|
|
147
|
+
json={
|
|
148
|
+
"step_name": self.step_name,
|
|
149
|
+
"status": "running",
|
|
150
|
+
"metrics": {
|
|
151
|
+
"cpu_percent": cpu_percent,
|
|
152
|
+
"memory_mb": memory_mb,
|
|
153
|
+
"timestamp": datetime.now().isoformat(),
|
|
154
|
+
},
|
|
155
|
+
},
|
|
121
156
|
timeout=2,
|
|
122
157
|
)
|
|
123
158
|
if response.status_code == 200:
|
|
@@ -137,6 +172,10 @@ class MonitorThread(threading.Thread):
|
|
|
137
172
|
# Final log flush
|
|
138
173
|
self._flush_logs()
|
|
139
174
|
|
|
175
|
+
# Send error if there was one
|
|
176
|
+
if hasattr(self, "_final_error") and self._final_error:
|
|
177
|
+
self._send_error(self._final_error)
|
|
178
|
+
|
|
140
179
|
|
|
141
180
|
# Keep HeartbeatThread as an alias for backwards compatibility
|
|
142
181
|
HeartbeatThread = MonitorThread
|
|
@@ -171,6 +210,10 @@ class Executor:
|
|
|
171
210
|
inputs: dict[str, Any],
|
|
172
211
|
context_params: dict[str, Any],
|
|
173
212
|
cache_store: Any | None = None,
|
|
213
|
+
artifact_store: Any | None = None,
|
|
214
|
+
run_id: str | None = None,
|
|
215
|
+
project_name: str = "default",
|
|
216
|
+
all_outputs: dict[str, Any] | None = None,
|
|
174
217
|
) -> ExecutionResult:
|
|
175
218
|
"""Execute a single step.
|
|
176
219
|
|
|
@@ -179,6 +222,10 @@ class Executor:
|
|
|
179
222
|
inputs: Input data for the step
|
|
180
223
|
context_params: Parameters from context
|
|
181
224
|
cache_store: Cache store for caching
|
|
225
|
+
artifact_store: Artifact store for logging results
|
|
226
|
+
run_id: Unique ID for this pipeline run
|
|
227
|
+
project_name: Name of the project
|
|
228
|
+
all_outputs: Collection of all step outputs for conditional evaluation
|
|
182
229
|
|
|
183
230
|
Returns:
|
|
184
231
|
ExecutionResult with output or error
|
|
@@ -226,21 +273,43 @@ class LocalExecutor(Executor):
|
|
|
226
273
|
artifact_store: Any | None = None,
|
|
227
274
|
run_id: str | None = None,
|
|
228
275
|
project_name: str = "default",
|
|
276
|
+
all_outputs: dict[str, Any] | None = None,
|
|
229
277
|
) -> ExecutionResult:
|
|
230
|
-
"""Execute step locally with retry, caching, and materialization.
|
|
278
|
+
"""Execute step locally with retry, caching, and materialization.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
step: Step to execute
|
|
282
|
+
inputs: Input data for the step
|
|
283
|
+
context_params: Parameters from context
|
|
284
|
+
cache_store: Cache store for caching
|
|
285
|
+
artifact_store: Artifact store for logging results
|
|
286
|
+
run_id: Unique ID for this pipeline run
|
|
287
|
+
project_name: Name of the project
|
|
288
|
+
all_outputs: Collection of all step outputs for conditional evaluation
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
ExecutionResult with output or error
|
|
292
|
+
"""
|
|
231
293
|
start_time = time.time()
|
|
232
294
|
retries = 0
|
|
233
295
|
|
|
234
296
|
# Check condition
|
|
235
297
|
if step.condition:
|
|
236
298
|
try:
|
|
237
|
-
#
|
|
238
|
-
# For simplicity, let's try to inspect the condition function
|
|
239
|
-
# or just pass what we can.
|
|
240
|
-
# A simple approach: pass nothing if it takes no args, or kwargs if it does.
|
|
241
|
-
# But inspect is safer.
|
|
299
|
+
# Prepare kwargs for condition: inputs + context_params + all_outputs
|
|
242
300
|
sig = inspect.signature(step.condition)
|
|
243
|
-
kwargs = {**
|
|
301
|
+
kwargs = {**context_params}
|
|
302
|
+
|
|
303
|
+
# Add all outputs so far (paths like 'data/processed')
|
|
304
|
+
if all_outputs:
|
|
305
|
+
kwargs.update(all_outputs)
|
|
306
|
+
# Also flatten dict outputs to allow access to keys like 'quality_score'
|
|
307
|
+
for val in all_outputs.values():
|
|
308
|
+
if isinstance(val, dict):
|
|
309
|
+
kwargs.update({k: v for k, v in val.items() if k not in kwargs})
|
|
310
|
+
|
|
311
|
+
# Add direct inputs (might override all_outputs if paths match)
|
|
312
|
+
kwargs.update(inputs)
|
|
244
313
|
|
|
245
314
|
# Filter kwargs to only what condition accepts
|
|
246
315
|
cond_kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
|
|
@@ -314,7 +383,12 @@ class LocalExecutor(Executor):
|
|
|
314
383
|
)
|
|
315
384
|
monitor_thread.start()
|
|
316
385
|
|
|
317
|
-
|
|
386
|
+
# Filter kwargs to only what the function accepts
|
|
387
|
+
func_sig = inspect.signature(step.func)
|
|
388
|
+
# Handle *args/**kwargs if needed, but for now strict matching is safer for steps
|
|
389
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k in func_sig.parameters}
|
|
390
|
+
|
|
391
|
+
result = step.func(**filtered_kwargs)
|
|
318
392
|
except StopExecution:
|
|
319
393
|
duration = time.time() - start_time
|
|
320
394
|
return ExecutionResult(
|
|
@@ -335,22 +409,63 @@ class LocalExecutor(Executor):
|
|
|
335
409
|
|
|
336
410
|
sys.stderr = original_stderr
|
|
337
411
|
|
|
338
|
-
# Stop monitor thread
|
|
339
|
-
if monitor_thread:
|
|
412
|
+
# Stop monitor thread (only if not already stopped in exception handler)
|
|
413
|
+
if monitor_thread and not monitor_thread._stop_event.is_set():
|
|
340
414
|
monitor_thread.stop()
|
|
341
415
|
monitor_thread.join()
|
|
342
416
|
|
|
343
417
|
# Materialize output if artifact store is available
|
|
418
|
+
# Only upload if the result is an Asset with upload=True
|
|
344
419
|
artifact_uri = None
|
|
345
420
|
if artifact_store and result is not None and run_id:
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
421
|
+
# Check if result is an Asset and respects upload flag
|
|
422
|
+
should_upload = True
|
|
423
|
+
try:
|
|
424
|
+
from flowyml.assets.base import Asset
|
|
425
|
+
|
|
426
|
+
if isinstance(result, Asset):
|
|
427
|
+
should_upload = getattr(result, "upload", False)
|
|
428
|
+
except ImportError:
|
|
429
|
+
pass
|
|
430
|
+
|
|
431
|
+
if should_upload:
|
|
432
|
+
with contextlib.suppress(Exception):
|
|
433
|
+
artifact_uri = artifact_store.materialize(
|
|
434
|
+
obj=result,
|
|
435
|
+
name="output", # Default name for single output
|
|
436
|
+
run_id=run_id,
|
|
437
|
+
step_name=step.name,
|
|
438
|
+
project_name=project_name,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Type-based artifact routing
|
|
442
|
+
routing_result = None
|
|
443
|
+
try:
|
|
444
|
+
from flowyml.core.routing import route_artifact, should_route
|
|
445
|
+
|
|
446
|
+
if should_route(result):
|
|
447
|
+
# Get return type annotation if available
|
|
448
|
+
return_type = None
|
|
449
|
+
try:
|
|
450
|
+
from flowyml.core.routing import get_step_return_type
|
|
451
|
+
|
|
452
|
+
return_type = get_step_return_type(step.func)
|
|
453
|
+
except Exception:
|
|
454
|
+
pass
|
|
455
|
+
|
|
456
|
+
routing_result = route_artifact(
|
|
457
|
+
output=result,
|
|
351
458
|
step_name=step.name,
|
|
459
|
+
run_id=run_id or "local",
|
|
460
|
+
return_type=return_type,
|
|
352
461
|
project_name=project_name,
|
|
353
462
|
)
|
|
463
|
+
if routing_result and routing_result.store_uri:
|
|
464
|
+
artifact_uri = routing_result.store_uri
|
|
465
|
+
except ImportError:
|
|
466
|
+
pass # Routing module not available
|
|
467
|
+
except Exception:
|
|
468
|
+
pass # Routing failed, continue with normal flow
|
|
354
469
|
|
|
355
470
|
# Cache result
|
|
356
471
|
if cache_store and step.cache:
|
|
@@ -374,6 +489,7 @@ class LocalExecutor(Executor):
|
|
|
374
489
|
|
|
375
490
|
except Exception as e:
|
|
376
491
|
last_error = str(e)
|
|
492
|
+
error_traceback = traceback.format_exc()
|
|
377
493
|
retries += 1
|
|
378
494
|
|
|
379
495
|
if attempt < max_retries:
|
|
@@ -382,12 +498,17 @@ class LocalExecutor(Executor):
|
|
|
382
498
|
time.sleep(wait_time)
|
|
383
499
|
continue
|
|
384
500
|
|
|
385
|
-
# All retries exhausted
|
|
501
|
+
# All retries exhausted - send error to logs
|
|
502
|
+
if monitor_thread:
|
|
503
|
+
monitor_thread.stop(error=f"{last_error}\n{error_traceback}")
|
|
504
|
+
monitor_thread.join()
|
|
505
|
+
monitor_thread = None # Prevent double-stop in finally
|
|
506
|
+
|
|
386
507
|
duration = time.time() - start_time
|
|
387
508
|
return ExecutionResult(
|
|
388
509
|
step_name=step.name,
|
|
389
510
|
success=False,
|
|
390
|
-
error=f"{last_error}\n{
|
|
511
|
+
error=f"{last_error}\n{error_traceback}",
|
|
391
512
|
duration_seconds=duration,
|
|
392
513
|
retries=retries,
|
|
393
514
|
)
|
|
@@ -438,11 +559,41 @@ class LocalExecutor(Executor):
|
|
|
438
559
|
# Find the step object
|
|
439
560
|
step = next(s for s in step_group.steps if s.name == step_name)
|
|
440
561
|
|
|
441
|
-
# Prepare inputs for this step
|
|
562
|
+
# Prepare inputs for this step - map input names to function parameters
|
|
442
563
|
step_inputs = {}
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
564
|
+
|
|
565
|
+
# Get function signature to properly map inputs to parameters
|
|
566
|
+
sig = inspect.signature(step.func)
|
|
567
|
+
params = list(sig.parameters.values())
|
|
568
|
+
# Filter out self/cls
|
|
569
|
+
params = [p for p in params if p.name not in ("self", "cls")]
|
|
570
|
+
assigned_params = set()
|
|
571
|
+
|
|
572
|
+
if step.inputs:
|
|
573
|
+
for i, input_name in enumerate(step.inputs):
|
|
574
|
+
if input_name not in step_outputs:
|
|
575
|
+
continue
|
|
576
|
+
|
|
577
|
+
val = step_outputs[input_name]
|
|
578
|
+
|
|
579
|
+
# Check if input name matches a parameter directly
|
|
580
|
+
param_match = next((p for p in params if p.name == input_name), None)
|
|
581
|
+
|
|
582
|
+
if param_match:
|
|
583
|
+
step_inputs[param_match.name] = val
|
|
584
|
+
assigned_params.add(param_match.name)
|
|
585
|
+
elif i < len(params):
|
|
586
|
+
# Positional fallback - use the parameter at the same position
|
|
587
|
+
target_param = params[i]
|
|
588
|
+
if target_param.name not in assigned_params:
|
|
589
|
+
step_inputs[target_param.name] = val
|
|
590
|
+
assigned_params.add(target_param.name)
|
|
591
|
+
|
|
592
|
+
# Auto-map parameters from available outputs by name
|
|
593
|
+
for param in params:
|
|
594
|
+
if param.name in step_outputs and param.name not in step_inputs:
|
|
595
|
+
step_inputs[param.name] = step_outputs[param.name]
|
|
596
|
+
assigned_params.add(param.name)
|
|
446
597
|
|
|
447
598
|
# Inject context parameters for this specific step
|
|
448
599
|
if context is not None:
|
|
@@ -515,33 +666,76 @@ class DistributedExecutor(Executor):
|
|
|
515
666
|
inputs: dict[str, Any],
|
|
516
667
|
context_params: dict[str, Any],
|
|
517
668
|
cache_store: Any | None = None,
|
|
669
|
+
artifact_store: Any | None = None,
|
|
670
|
+
run_id: str | None = None,
|
|
671
|
+
project_name: str = "default",
|
|
672
|
+
all_outputs: dict[str, Any] | None = None,
|
|
518
673
|
) -> ExecutionResult:
|
|
519
|
-
"""Execute step in distributed manner.
|
|
674
|
+
"""Execute step in distributed manner.
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
step: Step to execute
|
|
678
|
+
inputs: Input data for the step
|
|
679
|
+
context_params: Parameters from context
|
|
680
|
+
cache_store: Cache store for caching
|
|
681
|
+
artifact_store: Artifact store for logging results
|
|
682
|
+
run_id: Unique ID for this pipeline run
|
|
683
|
+
project_name: Name of the project
|
|
684
|
+
all_outputs: Collection of all step outputs for conditional evaluation
|
|
685
|
+
|
|
686
|
+
Returns:
|
|
687
|
+
ExecutionResult with output or error
|
|
688
|
+
"""
|
|
520
689
|
# Placeholder - would use Ray, Dask, or similar
|
|
521
690
|
# For now, fall back to local execution
|
|
522
691
|
local_executor = LocalExecutor()
|
|
523
|
-
return local_executor.execute_step(
|
|
692
|
+
return local_executor.execute_step(
|
|
693
|
+
step,
|
|
694
|
+
inputs,
|
|
695
|
+
context_params,
|
|
696
|
+
cache_store,
|
|
697
|
+
artifact_store,
|
|
698
|
+
run_id,
|
|
699
|
+
project_name,
|
|
700
|
+
all_outputs,
|
|
701
|
+
)
|
|
524
702
|
|
|
525
703
|
def execute_step_group(
|
|
526
704
|
self,
|
|
527
705
|
step_group, # StepGroup
|
|
528
706
|
inputs: dict[str, Any],
|
|
529
|
-
|
|
707
|
+
context: Any | None = None, # Context object for per-step injection
|
|
708
|
+
context_params: dict[str, Any] | None = None, # Deprecated: use context instead
|
|
530
709
|
cache_store: Any | None = None,
|
|
531
710
|
artifact_store: Any | None = None,
|
|
532
711
|
run_id: str | None = None,
|
|
533
712
|
project_name: str = "default",
|
|
534
713
|
) -> list[ExecutionResult]:
|
|
535
|
-
"""Execute step group in distributed manner.
|
|
714
|
+
"""Execute step group in distributed manner.
|
|
715
|
+
|
|
716
|
+
Args:
|
|
717
|
+
step_group: StepGroup to execute
|
|
718
|
+
inputs: Input data available to the group
|
|
719
|
+
context: Context object for per-step parameter injection (preferred)
|
|
720
|
+
context_params: Parameters from context (deprecated, use context instead)
|
|
721
|
+
cache_store: Cache store for caching
|
|
722
|
+
artifact_store: Artifact store for materialization
|
|
723
|
+
run_id: Run identifier
|
|
724
|
+
project_name: Project name
|
|
725
|
+
|
|
726
|
+
Returns:
|
|
727
|
+
List of ExecutionResult (one per step)
|
|
728
|
+
"""
|
|
536
729
|
# Placeholder - in real implementation, would send entire group to remote worker
|
|
537
730
|
# For now, fall back to local execution
|
|
538
731
|
local_executor = LocalExecutor()
|
|
539
732
|
return local_executor.execute_step_group(
|
|
540
|
-
step_group,
|
|
541
|
-
inputs,
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
733
|
+
step_group=step_group,
|
|
734
|
+
inputs=inputs,
|
|
735
|
+
context=context,
|
|
736
|
+
context_params=context_params,
|
|
737
|
+
cache_store=cache_store,
|
|
738
|
+
artifact_store=artifact_store,
|
|
739
|
+
run_id=run_id,
|
|
740
|
+
project_name=project_name,
|
|
547
741
|
)
|