flowyml 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. flowyml/__init__.py +3 -0
  2. flowyml/assets/base.py +10 -0
  3. flowyml/assets/metrics.py +6 -0
  4. flowyml/cli/main.py +108 -2
  5. flowyml/cli/run.py +9 -2
  6. flowyml/core/execution_status.py +52 -0
  7. flowyml/core/hooks.py +106 -0
  8. flowyml/core/observability.py +210 -0
  9. flowyml/core/orchestrator.py +274 -0
  10. flowyml/core/pipeline.py +193 -231
  11. flowyml/core/project.py +34 -2
  12. flowyml/core/remote_orchestrator.py +109 -0
  13. flowyml/core/resources.py +34 -17
  14. flowyml/core/retry_policy.py +80 -0
  15. flowyml/core/scheduler.py +9 -9
  16. flowyml/core/scheduler_config.py +2 -3
  17. flowyml/core/step.py +18 -1
  18. flowyml/core/submission_result.py +53 -0
  19. flowyml/integrations/keras.py +95 -22
  20. flowyml/monitoring/alerts.py +2 -2
  21. flowyml/stacks/__init__.py +15 -0
  22. flowyml/stacks/aws.py +599 -0
  23. flowyml/stacks/azure.py +295 -0
  24. flowyml/stacks/bridge.py +9 -9
  25. flowyml/stacks/components.py +24 -2
  26. flowyml/stacks/gcp.py +158 -11
  27. flowyml/stacks/local.py +5 -0
  28. flowyml/stacks/plugins.py +2 -2
  29. flowyml/stacks/registry.py +21 -0
  30. flowyml/storage/artifacts.py +15 -5
  31. flowyml/storage/materializers/__init__.py +2 -0
  32. flowyml/storage/materializers/base.py +33 -0
  33. flowyml/storage/materializers/cloudpickle.py +74 -0
  34. flowyml/storage/metadata.py +3 -881
  35. flowyml/storage/remote.py +590 -0
  36. flowyml/storage/sql.py +911 -0
  37. flowyml/ui/backend/dependencies.py +28 -0
  38. flowyml/ui/backend/main.py +43 -80
  39. flowyml/ui/backend/routers/assets.py +483 -17
  40. flowyml/ui/backend/routers/client.py +46 -0
  41. flowyml/ui/backend/routers/execution.py +13 -2
  42. flowyml/ui/backend/routers/experiments.py +97 -14
  43. flowyml/ui/backend/routers/metrics.py +168 -0
  44. flowyml/ui/backend/routers/pipelines.py +77 -12
  45. flowyml/ui/backend/routers/projects.py +33 -7
  46. flowyml/ui/backend/routers/runs.py +221 -12
  47. flowyml/ui/backend/routers/schedules.py +5 -21
  48. flowyml/ui/backend/routers/stats.py +14 -0
  49. flowyml/ui/backend/routers/traces.py +37 -53
  50. flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
  51. flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
  52. flowyml/ui/frontend/dist/index.html +2 -2
  53. flowyml/ui/frontend/src/App.jsx +4 -1
  54. flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
  55. flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
  56. flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
  57. flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
  58. flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
  59. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
  60. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
  61. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
  62. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
  63. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
  64. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
  65. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
  66. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
  67. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
  68. flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
  69. flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
  70. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
  71. flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
  72. flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
  73. flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
  74. flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
  75. flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
  76. flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
  77. flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
  78. flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
  79. flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
  80. flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
  81. flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
  82. flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
  83. flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
  84. flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
  85. flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
  86. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
  87. flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
  88. flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
  89. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
  90. flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
  91. flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
  92. flowyml/ui/frontend/src/router/index.jsx +4 -0
  93. flowyml/ui/frontend/src/utils/date.js +10 -0
  94. flowyml/ui/frontend/src/utils/downloads.js +11 -0
  95. flowyml/utils/config.py +6 -0
  96. flowyml/utils/stack_config.py +45 -3
  97. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/METADATA +44 -4
  98. flowyml-1.4.0.dist-info/RECORD +200 -0
  99. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/licenses/LICENSE +1 -1
  100. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
  101. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
  102. flowyml-1.2.0.dist-info/RECORD +0 -159
  103. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/WHEEL +0 -0
  104. {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,295 @@
1
+ """Azure Stack Components and Preset Stack."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from flowyml.stacks.base import Stack
10
+ from flowyml.stacks.components import ArtifactStore, ContainerRegistry, DockerConfig, ResourceConfig
11
+ from flowyml.core.remote_orchestrator import RemoteOrchestrator
12
+ from flowyml.stacks.plugins import register_component
13
+ from flowyml.storage.metadata import SQLiteMetadataStore
14
+ from flowyml.core.submission_result import SubmissionResult
15
+
16
+
17
+ @register_component(name="azure_blob")
18
+ class AzureBlobArtifactStore(ArtifactStore):
19
+ """Artifact store backed by Azure Blob Storage."""
20
+
21
+ def __init__(
22
+ self,
23
+ name: str = "azure_blob",
24
+ account_url: str | None = None,
25
+ container_name: str | None = None,
26
+ credential: Any | None = None,
27
+ ):
28
+ super().__init__(name)
29
+ self.account_url = account_url
30
+ self.container_name = container_name
31
+ self.credential = credential
32
+
33
+ def _client(self):
34
+ from azure.storage.blob import BlobServiceClient
35
+
36
+ credential = self.credential
37
+ if credential is None:
38
+ from azure.identity import DefaultAzureCredential
39
+
40
+ credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
41
+
42
+ return BlobServiceClient(account_url=self.account_url, credential=credential)
43
+
44
+ def validate(self) -> bool:
45
+ if not self.account_url or not self.container_name:
46
+ raise ValueError("account_url and container_name are required for AzureBlobArtifactStore")
47
+ try:
48
+ client = self._client()
49
+ container_client = client.get_container_client(self.container_name)
50
+ container_client.get_container_properties()
51
+ except Exception as exc:
52
+ raise ValueError(f"Unable to access container '{self.container_name}': {exc}") from exc
53
+ return True
54
+
55
+ def save(self, artifact: Any, path: str) -> str:
56
+ blob_name = path.lstrip("/")
57
+ client = self._client().get_container_client(self.container_name)
58
+ if isinstance(artifact, (str, Path)) and Path(artifact).exists():
59
+ with open(Path(artifact), "rb") as f:
60
+ client.upload_blob(name=blob_name, data=f, overwrite=True)
61
+ else:
62
+ data = artifact if isinstance(artifact, bytes) else str(artifact).encode()
63
+ client.upload_blob(name=blob_name, data=data, overwrite=True)
64
+ return f"{self.account_url}/{self.container_name}/{blob_name}"
65
+
66
+ def load(self, path: str) -> bytes:
67
+ blob_name = path.lstrip("/")
68
+ client = self._client().get_blob_client(self.container_name, blob_name)
69
+ downloader = client.download_blob()
70
+ return downloader.readall()
71
+
72
+ def exists(self, path: str) -> bool:
73
+ blob_name = path.lstrip("/")
74
+ client = self._client().get_blob_client(self.container_name, blob_name)
75
+ return client.exists()
76
+
77
+ def to_dict(self) -> dict[str, Any]:
78
+ return {
79
+ "name": self.name,
80
+ "type": "azure_blob",
81
+ "account_url": self.account_url,
82
+ "container_name": self.container_name,
83
+ }
84
+
85
+
86
+ @register_component(name="acr")
87
+ class ACRContainerRegistry(ContainerRegistry):
88
+ """Azure Container Registry integration."""
89
+
90
+ def __init__(
91
+ self,
92
+ name: str = "acr",
93
+ registry_name: str | None = None,
94
+ login_server: str | None = None,
95
+ username: str | None = None,
96
+ password: str | None = None,
97
+ ):
98
+ super().__init__(name)
99
+ self.registry_name = registry_name
100
+ self.login_server = login_server or (f"{registry_name}.azurecr.io" if registry_name else None)
101
+ self.username = username
102
+ self.password = password
103
+
104
+ def validate(self) -> bool:
105
+ if not self.registry_name and not self.login_server:
106
+ raise ValueError("registry_name or login_server is required for ACRContainerRegistry")
107
+ return True
108
+
109
+ def _login(self) -> None:
110
+ if self.username and self.password:
111
+ subprocess.run(
112
+ ["docker", "login", self.login_server, "-u", self.username, "-p", self.password],
113
+ check=True,
114
+ )
115
+ else:
116
+ subprocess.run(["az", "acr", "login", "--name", self.registry_name], check=True)
117
+
118
+ def push_image(self, image_name: str, tag: str = "latest") -> str:
119
+ full_uri = self.get_image_uri(image_name, tag)
120
+ self._login()
121
+ subprocess.run(["docker", "tag", f"{image_name}:{tag}", full_uri], check=True)
122
+ subprocess.run(["docker", "push", full_uri], check=True)
123
+ return full_uri
124
+
125
+ def pull_image(self, image_name: str, tag: str = "latest") -> None:
126
+ full_uri = self.get_image_uri(image_name, tag)
127
+ self._login()
128
+ subprocess.run(["docker", "pull", full_uri], check=True)
129
+
130
+ def get_image_uri(self, image_name: str, tag: str = "latest") -> str:
131
+ return f"{self.login_server}/{image_name}:{tag}"
132
+
133
+ def to_dict(self) -> dict[str, Any]:
134
+ return {
135
+ "name": self.name,
136
+ "type": "acr",
137
+ "login_server": self.login_server,
138
+ "registry_name": self.registry_name,
139
+ }
140
+
141
+
142
+ @register_component(name="azure_ml")
143
+ class AzureMLOrchestrator(RemoteOrchestrator):
144
+ """Submit pipeline runs to Azure ML managed compute."""
145
+
146
+ def __init__(
147
+ self,
148
+ name: str = "azure_ml",
149
+ subscription_id: str | None = None,
150
+ resource_group: str | None = None,
151
+ workspace_name: str | None = None,
152
+ compute: str | None = None,
153
+ experiment_name: str = "flowyml",
154
+ credential: Any | None = None,
155
+ ):
156
+ super().__init__(name)
157
+ self.subscription_id = subscription_id
158
+ self.resource_group = resource_group
159
+ self.workspace_name = workspace_name
160
+ self.compute = compute
161
+ self.experiment_name = experiment_name
162
+ self.credential = credential
163
+
164
+ def _client(self):
165
+ from azure.ai.ml import MLClient
166
+ from azure.identity import DefaultAzureCredential
167
+
168
+ credential = self.credential or DefaultAzureCredential(exclude_shared_token_cache_credential=True)
169
+ return MLClient(
170
+ credential,
171
+ subscription_id=self.subscription_id,
172
+ resource_group_name=self.resource_group,
173
+ workspace_name=self.workspace_name,
174
+ )
175
+
176
+ def validate(self) -> bool:
177
+ if not all([self.subscription_id, self.resource_group, self.workspace_name, self.compute]):
178
+ raise ValueError(
179
+ "subscription_id, resource_group, workspace_name, and compute are required for AzureMLOrchestrator",
180
+ )
181
+ return True
182
+
183
+ def run_pipeline(
184
+ self,
185
+ pipeline: Any,
186
+ run_id: str,
187
+ resources: ResourceConfig | None = None,
188
+ docker_config: DockerConfig | None = None,
189
+ inputs: dict[str, Any] | None = None,
190
+ context: dict[str, Any] | None = None,
191
+ **kwargs,
192
+ ) -> SubmissionResult:
193
+ """Submit pipeline to Azure ML.
194
+
195
+ Returns:
196
+ SubmissionResult with job name and optional wait function.
197
+ """
198
+ from azure.ai.ml import command
199
+
200
+ ml_client = self._client()
201
+ job_name = kwargs.get("job_name") or f"{pipeline.name}-{run_id[:8]}"
202
+
203
+ # Build command job
204
+ job = command(
205
+ code=".", # Use current directory or specify path
206
+ command="python -m flowyml.cli.run",
207
+ environment=docker_config.image if docker_config else self.environment_name,
208
+ compute=self.compute,
209
+ display_name=job_name,
210
+ experiment_name=self.experiment_name,
211
+ )
212
+
213
+ submitted = ml_client.jobs.create_or_update(job)
214
+
215
+ def wait_for_completion():
216
+ ml_client.jobs.stream(submitted.name)
217
+
218
+ return SubmissionResult(
219
+ job_id=submitted.name,
220
+ wait_for_completion=wait_for_completion,
221
+ metadata={"studio_url": submitted.studio_url},
222
+ )
223
+
224
+ def get_run_status(self, run_id: str) -> str:
225
+ client = self._client()
226
+ job = client.jobs.get(run_id)
227
+ return job.status
228
+
229
+ def to_dict(self) -> dict[str, Any]:
230
+ return {
231
+ "name": self.name,
232
+ "type": "azure_ml",
233
+ "subscription_id": self.subscription_id,
234
+ "resource_group": self.resource_group,
235
+ "workspace_name": self.workspace_name,
236
+ "compute": self.compute,
237
+ }
238
+
239
+
240
+ class AzureMLStack(Stack):
241
+ """Managed Azure ML stack built from AzureML orchestrator, Blob storage, and ACR."""
242
+
243
+ def __init__(
244
+ self,
245
+ name: str = "azure",
246
+ subscription_id: str | None = None,
247
+ resource_group: str | None = None,
248
+ workspace_name: str | None = None,
249
+ compute: str | None = None,
250
+ account_url: str | None = None,
251
+ container_name: str | None = None,
252
+ registry_name: str | None = None,
253
+ login_server: str | None = None,
254
+ metadata_store: Any | None = None,
255
+ ):
256
+ orchestrator = AzureMLOrchestrator(
257
+ subscription_id=subscription_id,
258
+ resource_group=resource_group,
259
+ workspace_name=workspace_name,
260
+ compute=compute,
261
+ )
262
+ artifact_store = AzureBlobArtifactStore(account_url=account_url, container_name=container_name)
263
+ container_registry = ACRContainerRegistry(registry_name=registry_name, login_server=login_server)
264
+
265
+ if metadata_store is None:
266
+ metadata_store = SQLiteMetadataStore()
267
+
268
+ super().__init__(
269
+ name=name,
270
+ executor=None,
271
+ artifact_store=artifact_store,
272
+ metadata_store=metadata_store,
273
+ container_registry=container_registry,
274
+ orchestrator=orchestrator,
275
+ )
276
+
277
+ self.subscription_id = subscription_id
278
+ self.workspace_name = workspace_name
279
+
280
+ def validate(self) -> bool:
281
+ self.orchestrator.validate()
282
+ self.artifact_store.validate()
283
+ self.container_registry.validate()
284
+ return True
285
+
286
+ def to_dict(self) -> dict[str, Any]:
287
+ return {
288
+ "name": self.name,
289
+ "type": "azure",
290
+ "subscription_id": self.subscription_id,
291
+ "workspace_name": self.workspace_name,
292
+ "orchestrator": self.orchestrator.to_dict(),
293
+ "artifact_store": self.artifact_store.to_dict(),
294
+ "container_registry": self.container_registry.to_dict(),
295
+ }
flowyml/stacks/bridge.py CHANGED
@@ -6,7 +6,7 @@ frameworks (ZenML, Airflow, Prefect, etc.) using rule-based adaptation.
6
6
 
7
7
  import inspect
8
8
  from dataclasses import dataclass, field
9
- from typing import Any, Optional
9
+ from typing import Any
10
10
  import logging
11
11
 
12
12
  from flowyml.stacks.components import (
@@ -25,8 +25,8 @@ class AdaptationRule:
25
25
  """Rule for adapting an external component."""
26
26
 
27
27
  # Matching criteria
28
- source_type: Optional[str] = None # e.g., "zenml.orchestrators.base.BaseOrchestrator"
29
- name_pattern: Optional[str] = None # e.g., ".*Orchestrator"
28
+ source_type: str | None = None # e.g., "zenml.orchestrators.base.BaseOrchestrator"
29
+ name_pattern: str | None = None # e.g., ".*Orchestrator"
30
30
  has_methods: list[str] = field(default_factory=list) # e.g., ["run", "prepare_pipeline"]
31
31
 
32
32
  # Adaptation logic
@@ -49,7 +49,7 @@ class GenericBridge:
49
49
  self,
50
50
  external_class: Any,
51
51
  name: str,
52
- config: Optional[dict[str, Any]] = None,
52
+ config: dict[str, Any] | None = None,
53
53
  ) -> type[StackComponent]:
54
54
  """Dynamically create a wrapper class based on rules.
55
55
 
@@ -75,7 +75,7 @@ class GenericBridge:
75
75
  else:
76
76
  return self._create_generic_wrapper(external_class, name, component_type, rule)
77
77
 
78
- def _find_matching_rule(self, external_class: Any) -> Optional[AdaptationRule]:
78
+ def _find_matching_rule(self, external_class: Any) -> AdaptationRule | None:
79
79
  """Find the first rule that matches the external class."""
80
80
  for rule in self.rules:
81
81
  # Check source type
@@ -120,7 +120,7 @@ class GenericBridge:
120
120
  self,
121
121
  external_class: Any,
122
122
  name: str,
123
- rule: Optional[AdaptationRule],
123
+ rule: AdaptationRule | None,
124
124
  ) -> type[Orchestrator]:
125
125
  """Create a wrapper for an Orchestrator."""
126
126
 
@@ -174,7 +174,7 @@ class GenericBridge:
174
174
  self,
175
175
  external_class: Any,
176
176
  name: str,
177
- rule: Optional[AdaptationRule],
177
+ rule: AdaptationRule | None,
178
178
  ) -> type[ArtifactStore]:
179
179
  """Create a wrapper for an Artifact Store."""
180
180
 
@@ -216,7 +216,7 @@ class GenericBridge:
216
216
  self,
217
217
  external_class: Any,
218
218
  name: str,
219
- rule: Optional[AdaptationRule],
219
+ rule: AdaptationRule | None,
220
220
  ) -> type[ContainerRegistry]:
221
221
  """Create a wrapper for a Container Registry."""
222
222
 
@@ -259,7 +259,7 @@ class GenericBridge:
259
259
  external_class: Any,
260
260
  name: str,
261
261
  comp_type: ComponentType,
262
- rule: Optional[AdaptationRule],
262
+ rule: AdaptationRule | None,
263
263
  ) -> type[StackComponent]:
264
264
  """Create a generic wrapper."""
265
265
 
@@ -99,8 +99,30 @@ class Orchestrator(StackComponent):
99
99
  return ComponentType.ORCHESTRATOR
100
100
 
101
101
  @abstractmethod
102
- def run_pipeline(self, pipeline: Any, **kwargs) -> Any:
103
- """Run a pipeline on this orchestrator."""
102
+ def run_pipeline(
103
+ self,
104
+ pipeline: Any,
105
+ run_id: str,
106
+ resources: "ResourceConfig | None" = None,
107
+ docker_config: "DockerConfig | None" = None,
108
+ inputs: dict[str, Any] | None = None,
109
+ context: dict[str, Any] | None = None,
110
+ **kwargs,
111
+ ) -> Any:
112
+ """Run a pipeline on this orchestrator.
113
+
114
+ Args:
115
+ pipeline: The pipeline to run.
116
+ run_id: The unique run identifier.
117
+ resources: Resource configuration.
118
+ docker_config: Docker configuration.
119
+ inputs: Input data.
120
+ context: Context variables.
121
+ **kwargs: Additional arguments.
122
+
123
+ Returns:
124
+ The run result or job ID.
125
+ """
104
126
  pass
105
127
 
106
128
  @abstractmethod
flowyml/stacks/gcp.py CHANGED
@@ -6,17 +6,22 @@ on Google Cloud Platform using Vertex AI, Cloud Storage, and Container Registry.
6
6
 
7
7
  from typing import Any
8
8
 
9
+ import subprocess
9
10
  from flowyml.stacks.base import Stack
10
11
  from flowyml.stacks.components import (
11
- Orchestrator,
12
12
  ArtifactStore,
13
13
  ContainerRegistry,
14
14
  ResourceConfig,
15
15
  DockerConfig,
16
16
  )
17
+ from flowyml.core.remote_orchestrator import RemoteOrchestrator
18
+ from flowyml.stacks.plugins import register_component
19
+ from flowyml.core.submission_result import SubmissionResult
20
+ from flowyml.core.execution_status import ExecutionStatus
17
21
 
18
22
 
19
- class VertexAIOrchestrator(Orchestrator):
23
+ @register_component(name="vertex_ai")
24
+ class VertexAIOrchestrator(RemoteOrchestrator):
20
25
  """Vertex AI orchestrator for running pipelines on Google Cloud.
21
26
 
22
27
  This orchestrator submits pipeline jobs to Vertex AI Pipelines,
@@ -76,28 +81,35 @@ class VertexAIOrchestrator(Orchestrator):
76
81
  def run_pipeline(
77
82
  self,
78
83
  pipeline: Any,
84
+ run_id: str,
79
85
  resources: ResourceConfig | None = None,
80
86
  docker_config: DockerConfig | None = None,
87
+ inputs: dict[str, Any] | None = None,
88
+ context: dict[str, Any] | None = None,
81
89
  **kwargs,
82
- ) -> str:
90
+ ) -> "SubmissionResult":
83
91
  """Run pipeline on Vertex AI.
84
92
 
85
93
  Args:
86
94
  pipeline: Pipeline to run
95
+ run_id: Run identifier
87
96
  resources: Resource configuration
88
97
  docker_config: Docker configuration
98
+ inputs: Input data
99
+ context: Context variables
89
100
  **kwargs: Additional arguments
90
101
 
91
102
  Returns:
92
- Job ID
103
+ SubmissionResult with job resource name
93
104
  """
94
105
  from google.cloud import aiplatform
106
+ import time
95
107
 
96
108
  # Initialize Vertex AI
97
109
  aiplatform.init(project=self.project_id, location=self.region)
98
110
 
99
111
  # Create custom job
100
- job_display_name = f"{pipeline.name}-{pipeline.run_id}"
112
+ job_display_name = f"{pipeline.name}-{run_id[:8]}"
101
113
 
102
114
  # Build worker pool specs
103
115
  worker_pool_specs = self._build_worker_pool_specs(
@@ -114,16 +126,66 @@ class VertexAIOrchestrator(Orchestrator):
114
126
  encryption_spec_key_name=self.encryption_key,
115
127
  )
116
128
 
117
- job.run(sync=False)
118
-
119
- return job.resource_name
129
+ # Submit job asynchronously
130
+ job.submit()
131
+
132
+ job_id = job.resource_name
133
+
134
+ # Create wait function
135
+ def wait_for_completion():
136
+ """Poll job status until completion."""
137
+ while True:
138
+ status = self.get_run_status(job_id)
139
+ if status.is_finished:
140
+ if not status.is_successful:
141
+ raise RuntimeError(f"Vertex AI job {job_id} failed with status: {status}")
142
+ break
143
+ time.sleep(15) # Poll every 15 seconds
144
+
145
+ return SubmissionResult(
146
+ job_id=job_id,
147
+ wait_for_completion=wait_for_completion,
148
+ metadata={
149
+ "platform": "vertex_ai",
150
+ "project": self.project_id,
151
+ "region": self.region,
152
+ "job_name": job_display_name,
153
+ },
154
+ )
120
155
 
121
- def get_run_status(self, run_id: str) -> str:
156
+ def get_run_status(self, job_id: str) -> "ExecutionStatus":
122
157
  """Get status of a Vertex AI job."""
123
158
  from google.cloud import aiplatform
124
159
 
125
- job = aiplatform.CustomJob(run_id)
126
- return job.state.name
160
+ try:
161
+ job = aiplatform.CustomJob(job_id)
162
+ state = job.state.name
163
+
164
+ # Map Vertex AI states to ExecutionStatus
165
+ status_map = {
166
+ "JOB_STATE_QUEUED": ExecutionStatus.PROVISIONING,
167
+ "JOB_STATE_PENDING": ExecutionStatus.PROVISIONING,
168
+ "JOB_STATE_RUNNING": ExecutionStatus.RUNNING,
169
+ "JOB_STATE_SUCCEEDED": ExecutionStatus.COMPLETED,
170
+ "JOB_STATE_FAILED": ExecutionStatus.FAILED,
171
+ "JOB_STATE_CANCELLING": ExecutionStatus.STOPPING,
172
+ "JOB_STATE_CANCELLED": ExecutionStatus.CANCELLED,
173
+ }
174
+ return status_map.get(state, ExecutionStatus.RUNNING)
175
+ except Exception as e:
176
+ print(f"Error fetching job status: {e}")
177
+ return ExecutionStatus.FAILED
178
+
179
+ def stop_run(self, job_id: str, graceful: bool = True) -> None:
180
+ """Cancel a Vertex AI job."""
181
+ from google.cloud import aiplatform
182
+
183
+ try:
184
+ job = aiplatform.CustomJob(job_id)
185
+ job.cancel()
186
+ except Exception as e:
187
+ print(f"Error cancelling job {job_id}: {e}")
188
+ raise
127
189
 
128
190
  def _build_worker_pool_specs(
129
191
  self,
@@ -171,6 +233,7 @@ class VertexAIOrchestrator(Orchestrator):
171
233
  }
172
234
 
173
235
 
236
+ @register_component(name="gcs")
174
237
  class GCSArtifactStore(ArtifactStore):
175
238
  """Google Cloud Storage artifact store.
176
239
 
@@ -280,6 +343,7 @@ class GCSArtifactStore(ArtifactStore):
280
343
  }
281
344
 
282
345
 
346
+ @register_component(name="gcr")
283
347
  class GCRContainerRegistry(ContainerRegistry):
284
348
  """Google Container Registry integration.
285
349
 
@@ -478,6 +542,8 @@ class GCPStack(Stack):
478
542
 
479
543
  self.project_id = project_id
480
544
  self.region = region
545
+ self.vertex_endpoints = VertexEndpointManager(project_id=project_id, region=region)
546
+ self.cloud_run = CloudRunDeployer(project_id=project_id, region=region)
481
547
 
482
548
  def validate(self) -> bool:
483
549
  """Validate all GCP stack components."""
@@ -497,3 +563,84 @@ class GCPStack(Stack):
497
563
  "artifact_store": self.artifact_store.to_dict(),
498
564
  "container_registry": self.container_registry.to_dict(),
499
565
  }
566
+
567
+
568
+ class VertexEndpointManager:
569
+ """Deploy trained models as Vertex AI endpoints."""
570
+
571
+ def __init__(self, project_id: str | None, region: str = "us-central1"):
572
+ self.project_id = project_id
573
+ self.region = region
574
+
575
+ def deploy_model(
576
+ self,
577
+ model_display_name: str,
578
+ artifact_uri: str,
579
+ serving_image: str,
580
+ endpoint_display_name: str | None = None,
581
+ machine_type: str = "n1-standard-4",
582
+ ) -> str:
583
+ from google.cloud import aiplatform
584
+
585
+ aiplatform.init(project=self.project_id, location=self.region)
586
+ model = aiplatform.Model.upload(
587
+ display_name=model_display_name,
588
+ artifact_uri=artifact_uri,
589
+ serving_container_image_uri=serving_image,
590
+ )
591
+ endpoint = model.deploy(
592
+ machine_type=machine_type,
593
+ endpoint=aiplatform.Endpoint.create(
594
+ display_name=endpoint_display_name or f"{model_display_name}-endpoint",
595
+ ),
596
+ )
597
+ return endpoint.resource_name
598
+
599
+
600
+ class CloudRunDeployer:
601
+ """Deploy container images to Cloud Run."""
602
+
603
+ def __init__(self, project_id: str | None, region: str = "us-central1"):
604
+ self.project_id = project_id
605
+ self.region = region
606
+
607
+ def deploy_service(
608
+ self,
609
+ service_name: str,
610
+ image: str,
611
+ env: dict[str, str] | None = None,
612
+ allow_unauthenticated: bool = True,
613
+ ) -> str:
614
+ command = [
615
+ "gcloud",
616
+ "run",
617
+ "deploy",
618
+ service_name,
619
+ f"--image={image}",
620
+ f"--region={self.region}",
621
+ f"--project={self.project_id}",
622
+ ]
623
+ if allow_unauthenticated:
624
+ command.append("--allow-unauthenticated")
625
+
626
+ env = env or {}
627
+ for key, value in env.items():
628
+ command.append(f"--set-env-vars={key}={value}")
629
+
630
+ subprocess.run(command, check=True)
631
+ url_result = subprocess.run(
632
+ [
633
+ "gcloud",
634
+ "run",
635
+ "services",
636
+ "describe",
637
+ service_name,
638
+ f"--region={self.region}",
639
+ f"--project={self.project_id}",
640
+ "--format=value(status.url)",
641
+ ],
642
+ check=True,
643
+ capture_output=True,
644
+ text=True,
645
+ )
646
+ return url_result.stdout.strip()
flowyml/stacks/local.py CHANGED
@@ -46,12 +46,17 @@ class LocalStack(Stack):
46
46
  artifact_store = LocalArtifactStore(artifact_path)
47
47
  metadata_store = SQLiteMetadataStore(metadata_path)
48
48
 
49
+ from flowyml.core.orchestrator import LocalOrchestrator
50
+
51
+ orchestrator = LocalOrchestrator()
52
+
49
53
  # Initialize base stack
50
54
  super().__init__(
51
55
  name=name,
52
56
  executor=executor,
53
57
  artifact_store=artifact_store,
54
58
  metadata_store=metadata_store,
59
+ orchestrator=orchestrator,
55
60
  )
56
61
 
57
62
  # Ensure directories exist
flowyml/stacks/plugins.py CHANGED
@@ -14,7 +14,7 @@ import inspect
14
14
  import subprocess
15
15
  import sys
16
16
  from dataclasses import dataclass, field
17
- from typing import Any, Optional, Protocol, runtime_checkable
17
+ from typing import Any, Protocol, runtime_checkable
18
18
 
19
19
  from flowyml.stacks.components import (
20
20
  StackComponent,
@@ -48,7 +48,7 @@ class PluginBridge(Protocol):
48
48
  self,
49
49
  component_class: Any,
50
50
  name: str,
51
- config: Optional[dict[str, Any]] = None,
51
+ config: dict[str, Any] | None = None,
52
52
  ) -> type[StackComponent]:
53
53
  """Wrap an external component class into a flowyml component."""
54
54
  ...