flowyml 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +3 -0
- flowyml/assets/base.py +10 -0
- flowyml/assets/metrics.py +6 -0
- flowyml/cli/main.py +108 -2
- flowyml/cli/run.py +9 -2
- flowyml/core/execution_status.py +52 -0
- flowyml/core/hooks.py +106 -0
- flowyml/core/observability.py +210 -0
- flowyml/core/orchestrator.py +274 -0
- flowyml/core/pipeline.py +193 -231
- flowyml/core/project.py +34 -2
- flowyml/core/remote_orchestrator.py +109 -0
- flowyml/core/resources.py +34 -17
- flowyml/core/retry_policy.py +80 -0
- flowyml/core/scheduler.py +9 -9
- flowyml/core/scheduler_config.py +2 -3
- flowyml/core/step.py +18 -1
- flowyml/core/submission_result.py +53 -0
- flowyml/integrations/keras.py +95 -22
- flowyml/monitoring/alerts.py +2 -2
- flowyml/stacks/__init__.py +15 -0
- flowyml/stacks/aws.py +599 -0
- flowyml/stacks/azure.py +295 -0
- flowyml/stacks/bridge.py +9 -9
- flowyml/stacks/components.py +24 -2
- flowyml/stacks/gcp.py +158 -11
- flowyml/stacks/local.py +5 -0
- flowyml/stacks/plugins.py +2 -2
- flowyml/stacks/registry.py +21 -0
- flowyml/storage/artifacts.py +15 -5
- flowyml/storage/materializers/__init__.py +2 -0
- flowyml/storage/materializers/base.py +33 -0
- flowyml/storage/materializers/cloudpickle.py +74 -0
- flowyml/storage/metadata.py +3 -881
- flowyml/storage/remote.py +590 -0
- flowyml/storage/sql.py +911 -0
- flowyml/ui/backend/dependencies.py +28 -0
- flowyml/ui/backend/main.py +43 -80
- flowyml/ui/backend/routers/assets.py +483 -17
- flowyml/ui/backend/routers/client.py +46 -0
- flowyml/ui/backend/routers/execution.py +13 -2
- flowyml/ui/backend/routers/experiments.py +97 -14
- flowyml/ui/backend/routers/metrics.py +168 -0
- flowyml/ui/backend/routers/pipelines.py +77 -12
- flowyml/ui/backend/routers/projects.py +33 -7
- flowyml/ui/backend/routers/runs.py +221 -12
- flowyml/ui/backend/routers/schedules.py +5 -21
- flowyml/ui/backend/routers/stats.py +14 -0
- flowyml/ui/backend/routers/traces.py +37 -53
- flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
- flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/src/App.jsx +4 -1
- flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
- flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
- flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
- flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
- flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
- flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
- flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
- flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
- flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
- flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
- flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
- flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
- flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
- flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
- flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
- flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
- flowyml/ui/frontend/src/router/index.jsx +4 -0
- flowyml/ui/frontend/src/utils/date.js +10 -0
- flowyml/ui/frontend/src/utils/downloads.js +11 -0
- flowyml/utils/config.py +6 -0
- flowyml/utils/stack_config.py +45 -3
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/METADATA +44 -4
- flowyml-1.4.0.dist-info/RECORD +200 -0
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/licenses/LICENSE +1 -1
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
- flowyml-1.2.0.dist-info/RECORD +0 -159
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/WHEEL +0 -0
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/entry_points.txt +0 -0
flowyml/stacks/azure.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""Azure Stack Components and Preset Stack."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from flowyml.stacks.base import Stack
|
|
10
|
+
from flowyml.stacks.components import ArtifactStore, ContainerRegistry, DockerConfig, ResourceConfig
|
|
11
|
+
from flowyml.core.remote_orchestrator import RemoteOrchestrator
|
|
12
|
+
from flowyml.stacks.plugins import register_component
|
|
13
|
+
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
14
|
+
from flowyml.core.submission_result import SubmissionResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@register_component(name="azure_blob")
|
|
18
|
+
class AzureBlobArtifactStore(ArtifactStore):
|
|
19
|
+
"""Artifact store backed by Azure Blob Storage."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
name: str = "azure_blob",
|
|
24
|
+
account_url: str | None = None,
|
|
25
|
+
container_name: str | None = None,
|
|
26
|
+
credential: Any | None = None,
|
|
27
|
+
):
|
|
28
|
+
super().__init__(name)
|
|
29
|
+
self.account_url = account_url
|
|
30
|
+
self.container_name = container_name
|
|
31
|
+
self.credential = credential
|
|
32
|
+
|
|
33
|
+
def _client(self):
|
|
34
|
+
from azure.storage.blob import BlobServiceClient
|
|
35
|
+
|
|
36
|
+
credential = self.credential
|
|
37
|
+
if credential is None:
|
|
38
|
+
from azure.identity import DefaultAzureCredential
|
|
39
|
+
|
|
40
|
+
credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
|
|
41
|
+
|
|
42
|
+
return BlobServiceClient(account_url=self.account_url, credential=credential)
|
|
43
|
+
|
|
44
|
+
def validate(self) -> bool:
|
|
45
|
+
if not self.account_url or not self.container_name:
|
|
46
|
+
raise ValueError("account_url and container_name are required for AzureBlobArtifactStore")
|
|
47
|
+
try:
|
|
48
|
+
client = self._client()
|
|
49
|
+
container_client = client.get_container_client(self.container_name)
|
|
50
|
+
container_client.get_container_properties()
|
|
51
|
+
except Exception as exc:
|
|
52
|
+
raise ValueError(f"Unable to access container '{self.container_name}': {exc}") from exc
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
def save(self, artifact: Any, path: str) -> str:
|
|
56
|
+
blob_name = path.lstrip("/")
|
|
57
|
+
client = self._client().get_container_client(self.container_name)
|
|
58
|
+
if isinstance(artifact, (str, Path)) and Path(artifact).exists():
|
|
59
|
+
with open(Path(artifact), "rb") as f:
|
|
60
|
+
client.upload_blob(name=blob_name, data=f, overwrite=True)
|
|
61
|
+
else:
|
|
62
|
+
data = artifact if isinstance(artifact, bytes) else str(artifact).encode()
|
|
63
|
+
client.upload_blob(name=blob_name, data=data, overwrite=True)
|
|
64
|
+
return f"{self.account_url}/{self.container_name}/{blob_name}"
|
|
65
|
+
|
|
66
|
+
def load(self, path: str) -> bytes:
|
|
67
|
+
blob_name = path.lstrip("/")
|
|
68
|
+
client = self._client().get_blob_client(self.container_name, blob_name)
|
|
69
|
+
downloader = client.download_blob()
|
|
70
|
+
return downloader.readall()
|
|
71
|
+
|
|
72
|
+
def exists(self, path: str) -> bool:
|
|
73
|
+
blob_name = path.lstrip("/")
|
|
74
|
+
client = self._client().get_blob_client(self.container_name, blob_name)
|
|
75
|
+
return client.exists()
|
|
76
|
+
|
|
77
|
+
def to_dict(self) -> dict[str, Any]:
|
|
78
|
+
return {
|
|
79
|
+
"name": self.name,
|
|
80
|
+
"type": "azure_blob",
|
|
81
|
+
"account_url": self.account_url,
|
|
82
|
+
"container_name": self.container_name,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@register_component(name="acr")
|
|
87
|
+
class ACRContainerRegistry(ContainerRegistry):
|
|
88
|
+
"""Azure Container Registry integration."""
|
|
89
|
+
|
|
90
|
+
def __init__(
|
|
91
|
+
self,
|
|
92
|
+
name: str = "acr",
|
|
93
|
+
registry_name: str | None = None,
|
|
94
|
+
login_server: str | None = None,
|
|
95
|
+
username: str | None = None,
|
|
96
|
+
password: str | None = None,
|
|
97
|
+
):
|
|
98
|
+
super().__init__(name)
|
|
99
|
+
self.registry_name = registry_name
|
|
100
|
+
self.login_server = login_server or (f"{registry_name}.azurecr.io" if registry_name else None)
|
|
101
|
+
self.username = username
|
|
102
|
+
self.password = password
|
|
103
|
+
|
|
104
|
+
def validate(self) -> bool:
|
|
105
|
+
if not self.registry_name and not self.login_server:
|
|
106
|
+
raise ValueError("registry_name or login_server is required for ACRContainerRegistry")
|
|
107
|
+
return True
|
|
108
|
+
|
|
109
|
+
def _login(self) -> None:
|
|
110
|
+
if self.username and self.password:
|
|
111
|
+
subprocess.run(
|
|
112
|
+
["docker", "login", self.login_server, "-u", self.username, "-p", self.password],
|
|
113
|
+
check=True,
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
subprocess.run(["az", "acr", "login", "--name", self.registry_name], check=True)
|
|
117
|
+
|
|
118
|
+
def push_image(self, image_name: str, tag: str = "latest") -> str:
|
|
119
|
+
full_uri = self.get_image_uri(image_name, tag)
|
|
120
|
+
self._login()
|
|
121
|
+
subprocess.run(["docker", "tag", f"{image_name}:{tag}", full_uri], check=True)
|
|
122
|
+
subprocess.run(["docker", "push", full_uri], check=True)
|
|
123
|
+
return full_uri
|
|
124
|
+
|
|
125
|
+
def pull_image(self, image_name: str, tag: str = "latest") -> None:
|
|
126
|
+
full_uri = self.get_image_uri(image_name, tag)
|
|
127
|
+
self._login()
|
|
128
|
+
subprocess.run(["docker", "pull", full_uri], check=True)
|
|
129
|
+
|
|
130
|
+
def get_image_uri(self, image_name: str, tag: str = "latest") -> str:
|
|
131
|
+
return f"{self.login_server}/{image_name}:{tag}"
|
|
132
|
+
|
|
133
|
+
def to_dict(self) -> dict[str, Any]:
|
|
134
|
+
return {
|
|
135
|
+
"name": self.name,
|
|
136
|
+
"type": "acr",
|
|
137
|
+
"login_server": self.login_server,
|
|
138
|
+
"registry_name": self.registry_name,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@register_component(name="azure_ml")
|
|
143
|
+
class AzureMLOrchestrator(RemoteOrchestrator):
|
|
144
|
+
"""Submit pipeline runs to Azure ML managed compute."""
|
|
145
|
+
|
|
146
|
+
def __init__(
|
|
147
|
+
self,
|
|
148
|
+
name: str = "azure_ml",
|
|
149
|
+
subscription_id: str | None = None,
|
|
150
|
+
resource_group: str | None = None,
|
|
151
|
+
workspace_name: str | None = None,
|
|
152
|
+
compute: str | None = None,
|
|
153
|
+
experiment_name: str = "flowyml",
|
|
154
|
+
credential: Any | None = None,
|
|
155
|
+
):
|
|
156
|
+
super().__init__(name)
|
|
157
|
+
self.subscription_id = subscription_id
|
|
158
|
+
self.resource_group = resource_group
|
|
159
|
+
self.workspace_name = workspace_name
|
|
160
|
+
self.compute = compute
|
|
161
|
+
self.experiment_name = experiment_name
|
|
162
|
+
self.credential = credential
|
|
163
|
+
|
|
164
|
+
def _client(self):
|
|
165
|
+
from azure.ai.ml import MLClient
|
|
166
|
+
from azure.identity import DefaultAzureCredential
|
|
167
|
+
|
|
168
|
+
credential = self.credential or DefaultAzureCredential(exclude_shared_token_cache_credential=True)
|
|
169
|
+
return MLClient(
|
|
170
|
+
credential,
|
|
171
|
+
subscription_id=self.subscription_id,
|
|
172
|
+
resource_group_name=self.resource_group,
|
|
173
|
+
workspace_name=self.workspace_name,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def validate(self) -> bool:
|
|
177
|
+
if not all([self.subscription_id, self.resource_group, self.workspace_name, self.compute]):
|
|
178
|
+
raise ValueError(
|
|
179
|
+
"subscription_id, resource_group, workspace_name, and compute are required for AzureMLOrchestrator",
|
|
180
|
+
)
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
def run_pipeline(
|
|
184
|
+
self,
|
|
185
|
+
pipeline: Any,
|
|
186
|
+
run_id: str,
|
|
187
|
+
resources: ResourceConfig | None = None,
|
|
188
|
+
docker_config: DockerConfig | None = None,
|
|
189
|
+
inputs: dict[str, Any] | None = None,
|
|
190
|
+
context: dict[str, Any] | None = None,
|
|
191
|
+
**kwargs,
|
|
192
|
+
) -> SubmissionResult:
|
|
193
|
+
"""Submit pipeline to Azure ML.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
SubmissionResult with job name and optional wait function.
|
|
197
|
+
"""
|
|
198
|
+
from azure.ai.ml import command
|
|
199
|
+
|
|
200
|
+
ml_client = self._client()
|
|
201
|
+
job_name = kwargs.get("job_name") or f"{pipeline.name}-{run_id[:8]}"
|
|
202
|
+
|
|
203
|
+
# Build command job
|
|
204
|
+
job = command(
|
|
205
|
+
code=".", # Use current directory or specify path
|
|
206
|
+
command="python -m flowyml.cli.run",
|
|
207
|
+
environment=docker_config.image if docker_config else self.environment_name,
|
|
208
|
+
compute=self.compute,
|
|
209
|
+
display_name=job_name,
|
|
210
|
+
experiment_name=self.experiment_name,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
submitted = ml_client.jobs.create_or_update(job)
|
|
214
|
+
|
|
215
|
+
def wait_for_completion():
|
|
216
|
+
ml_client.jobs.stream(submitted.name)
|
|
217
|
+
|
|
218
|
+
return SubmissionResult(
|
|
219
|
+
job_id=submitted.name,
|
|
220
|
+
wait_for_completion=wait_for_completion,
|
|
221
|
+
metadata={"studio_url": submitted.studio_url},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def get_run_status(self, run_id: str) -> str:
|
|
225
|
+
client = self._client()
|
|
226
|
+
job = client.jobs.get(run_id)
|
|
227
|
+
return job.status
|
|
228
|
+
|
|
229
|
+
def to_dict(self) -> dict[str, Any]:
|
|
230
|
+
return {
|
|
231
|
+
"name": self.name,
|
|
232
|
+
"type": "azure_ml",
|
|
233
|
+
"subscription_id": self.subscription_id,
|
|
234
|
+
"resource_group": self.resource_group,
|
|
235
|
+
"workspace_name": self.workspace_name,
|
|
236
|
+
"compute": self.compute,
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class AzureMLStack(Stack):
|
|
241
|
+
"""Managed Azure ML stack built from AzureML orchestrator, Blob storage, and ACR."""
|
|
242
|
+
|
|
243
|
+
def __init__(
|
|
244
|
+
self,
|
|
245
|
+
name: str = "azure",
|
|
246
|
+
subscription_id: str | None = None,
|
|
247
|
+
resource_group: str | None = None,
|
|
248
|
+
workspace_name: str | None = None,
|
|
249
|
+
compute: str | None = None,
|
|
250
|
+
account_url: str | None = None,
|
|
251
|
+
container_name: str | None = None,
|
|
252
|
+
registry_name: str | None = None,
|
|
253
|
+
login_server: str | None = None,
|
|
254
|
+
metadata_store: Any | None = None,
|
|
255
|
+
):
|
|
256
|
+
orchestrator = AzureMLOrchestrator(
|
|
257
|
+
subscription_id=subscription_id,
|
|
258
|
+
resource_group=resource_group,
|
|
259
|
+
workspace_name=workspace_name,
|
|
260
|
+
compute=compute,
|
|
261
|
+
)
|
|
262
|
+
artifact_store = AzureBlobArtifactStore(account_url=account_url, container_name=container_name)
|
|
263
|
+
container_registry = ACRContainerRegistry(registry_name=registry_name, login_server=login_server)
|
|
264
|
+
|
|
265
|
+
if metadata_store is None:
|
|
266
|
+
metadata_store = SQLiteMetadataStore()
|
|
267
|
+
|
|
268
|
+
super().__init__(
|
|
269
|
+
name=name,
|
|
270
|
+
executor=None,
|
|
271
|
+
artifact_store=artifact_store,
|
|
272
|
+
metadata_store=metadata_store,
|
|
273
|
+
container_registry=container_registry,
|
|
274
|
+
orchestrator=orchestrator,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
self.subscription_id = subscription_id
|
|
278
|
+
self.workspace_name = workspace_name
|
|
279
|
+
|
|
280
|
+
def validate(self) -> bool:
|
|
281
|
+
self.orchestrator.validate()
|
|
282
|
+
self.artifact_store.validate()
|
|
283
|
+
self.container_registry.validate()
|
|
284
|
+
return True
|
|
285
|
+
|
|
286
|
+
def to_dict(self) -> dict[str, Any]:
|
|
287
|
+
return {
|
|
288
|
+
"name": self.name,
|
|
289
|
+
"type": "azure",
|
|
290
|
+
"subscription_id": self.subscription_id,
|
|
291
|
+
"workspace_name": self.workspace_name,
|
|
292
|
+
"orchestrator": self.orchestrator.to_dict(),
|
|
293
|
+
"artifact_store": self.artifact_store.to_dict(),
|
|
294
|
+
"container_registry": self.container_registry.to_dict(),
|
|
295
|
+
}
|
flowyml/stacks/bridge.py
CHANGED
|
@@ -6,7 +6,7 @@ frameworks (ZenML, Airflow, Prefect, etc.) using rule-based adaptation.
|
|
|
6
6
|
|
|
7
7
|
import inspect
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
import logging
|
|
11
11
|
|
|
12
12
|
from flowyml.stacks.components import (
|
|
@@ -25,8 +25,8 @@ class AdaptationRule:
|
|
|
25
25
|
"""Rule for adapting an external component."""
|
|
26
26
|
|
|
27
27
|
# Matching criteria
|
|
28
|
-
source_type:
|
|
29
|
-
name_pattern:
|
|
28
|
+
source_type: str | None = None # e.g., "zenml.orchestrators.base.BaseOrchestrator"
|
|
29
|
+
name_pattern: str | None = None # e.g., ".*Orchestrator"
|
|
30
30
|
has_methods: list[str] = field(default_factory=list) # e.g., ["run", "prepare_pipeline"]
|
|
31
31
|
|
|
32
32
|
# Adaptation logic
|
|
@@ -49,7 +49,7 @@ class GenericBridge:
|
|
|
49
49
|
self,
|
|
50
50
|
external_class: Any,
|
|
51
51
|
name: str,
|
|
52
|
-
config:
|
|
52
|
+
config: dict[str, Any] | None = None,
|
|
53
53
|
) -> type[StackComponent]:
|
|
54
54
|
"""Dynamically create a wrapper class based on rules.
|
|
55
55
|
|
|
@@ -75,7 +75,7 @@ class GenericBridge:
|
|
|
75
75
|
else:
|
|
76
76
|
return self._create_generic_wrapper(external_class, name, component_type, rule)
|
|
77
77
|
|
|
78
|
-
def _find_matching_rule(self, external_class: Any) ->
|
|
78
|
+
def _find_matching_rule(self, external_class: Any) -> AdaptationRule | None:
|
|
79
79
|
"""Find the first rule that matches the external class."""
|
|
80
80
|
for rule in self.rules:
|
|
81
81
|
# Check source type
|
|
@@ -120,7 +120,7 @@ class GenericBridge:
|
|
|
120
120
|
self,
|
|
121
121
|
external_class: Any,
|
|
122
122
|
name: str,
|
|
123
|
-
rule:
|
|
123
|
+
rule: AdaptationRule | None,
|
|
124
124
|
) -> type[Orchestrator]:
|
|
125
125
|
"""Create a wrapper for an Orchestrator."""
|
|
126
126
|
|
|
@@ -174,7 +174,7 @@ class GenericBridge:
|
|
|
174
174
|
self,
|
|
175
175
|
external_class: Any,
|
|
176
176
|
name: str,
|
|
177
|
-
rule:
|
|
177
|
+
rule: AdaptationRule | None,
|
|
178
178
|
) -> type[ArtifactStore]:
|
|
179
179
|
"""Create a wrapper for an Artifact Store."""
|
|
180
180
|
|
|
@@ -216,7 +216,7 @@ class GenericBridge:
|
|
|
216
216
|
self,
|
|
217
217
|
external_class: Any,
|
|
218
218
|
name: str,
|
|
219
|
-
rule:
|
|
219
|
+
rule: AdaptationRule | None,
|
|
220
220
|
) -> type[ContainerRegistry]:
|
|
221
221
|
"""Create a wrapper for a Container Registry."""
|
|
222
222
|
|
|
@@ -259,7 +259,7 @@ class GenericBridge:
|
|
|
259
259
|
external_class: Any,
|
|
260
260
|
name: str,
|
|
261
261
|
comp_type: ComponentType,
|
|
262
|
-
rule:
|
|
262
|
+
rule: AdaptationRule | None,
|
|
263
263
|
) -> type[StackComponent]:
|
|
264
264
|
"""Create a generic wrapper."""
|
|
265
265
|
|
flowyml/stacks/components.py
CHANGED
|
@@ -99,8 +99,30 @@ class Orchestrator(StackComponent):
|
|
|
99
99
|
return ComponentType.ORCHESTRATOR
|
|
100
100
|
|
|
101
101
|
@abstractmethod
|
|
102
|
-
def run_pipeline(
|
|
103
|
-
|
|
102
|
+
def run_pipeline(
|
|
103
|
+
self,
|
|
104
|
+
pipeline: Any,
|
|
105
|
+
run_id: str,
|
|
106
|
+
resources: "ResourceConfig | None" = None,
|
|
107
|
+
docker_config: "DockerConfig | None" = None,
|
|
108
|
+
inputs: dict[str, Any] | None = None,
|
|
109
|
+
context: dict[str, Any] | None = None,
|
|
110
|
+
**kwargs,
|
|
111
|
+
) -> Any:
|
|
112
|
+
"""Run a pipeline on this orchestrator.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
pipeline: The pipeline to run.
|
|
116
|
+
run_id: The unique run identifier.
|
|
117
|
+
resources: Resource configuration.
|
|
118
|
+
docker_config: Docker configuration.
|
|
119
|
+
inputs: Input data.
|
|
120
|
+
context: Context variables.
|
|
121
|
+
**kwargs: Additional arguments.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
The run result or job ID.
|
|
125
|
+
"""
|
|
104
126
|
pass
|
|
105
127
|
|
|
106
128
|
@abstractmethod
|
flowyml/stacks/gcp.py
CHANGED
|
@@ -6,17 +6,22 @@ on Google Cloud Platform using Vertex AI, Cloud Storage, and Container Registry.
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
+
import subprocess
|
|
9
10
|
from flowyml.stacks.base import Stack
|
|
10
11
|
from flowyml.stacks.components import (
|
|
11
|
-
Orchestrator,
|
|
12
12
|
ArtifactStore,
|
|
13
13
|
ContainerRegistry,
|
|
14
14
|
ResourceConfig,
|
|
15
15
|
DockerConfig,
|
|
16
16
|
)
|
|
17
|
+
from flowyml.core.remote_orchestrator import RemoteOrchestrator
|
|
18
|
+
from flowyml.stacks.plugins import register_component
|
|
19
|
+
from flowyml.core.submission_result import SubmissionResult
|
|
20
|
+
from flowyml.core.execution_status import ExecutionStatus
|
|
17
21
|
|
|
18
22
|
|
|
19
|
-
|
|
23
|
+
@register_component(name="vertex_ai")
|
|
24
|
+
class VertexAIOrchestrator(RemoteOrchestrator):
|
|
20
25
|
"""Vertex AI orchestrator for running pipelines on Google Cloud.
|
|
21
26
|
|
|
22
27
|
This orchestrator submits pipeline jobs to Vertex AI Pipelines,
|
|
@@ -76,28 +81,35 @@ class VertexAIOrchestrator(Orchestrator):
|
|
|
76
81
|
def run_pipeline(
|
|
77
82
|
self,
|
|
78
83
|
pipeline: Any,
|
|
84
|
+
run_id: str,
|
|
79
85
|
resources: ResourceConfig | None = None,
|
|
80
86
|
docker_config: DockerConfig | None = None,
|
|
87
|
+
inputs: dict[str, Any] | None = None,
|
|
88
|
+
context: dict[str, Any] | None = None,
|
|
81
89
|
**kwargs,
|
|
82
|
-
) ->
|
|
90
|
+
) -> "SubmissionResult":
|
|
83
91
|
"""Run pipeline on Vertex AI.
|
|
84
92
|
|
|
85
93
|
Args:
|
|
86
94
|
pipeline: Pipeline to run
|
|
95
|
+
run_id: Run identifier
|
|
87
96
|
resources: Resource configuration
|
|
88
97
|
docker_config: Docker configuration
|
|
98
|
+
inputs: Input data
|
|
99
|
+
context: Context variables
|
|
89
100
|
**kwargs: Additional arguments
|
|
90
101
|
|
|
91
102
|
Returns:
|
|
92
|
-
|
|
103
|
+
SubmissionResult with job resource name
|
|
93
104
|
"""
|
|
94
105
|
from google.cloud import aiplatform
|
|
106
|
+
import time
|
|
95
107
|
|
|
96
108
|
# Initialize Vertex AI
|
|
97
109
|
aiplatform.init(project=self.project_id, location=self.region)
|
|
98
110
|
|
|
99
111
|
# Create custom job
|
|
100
|
-
job_display_name = f"{pipeline.name}-{
|
|
112
|
+
job_display_name = f"{pipeline.name}-{run_id[:8]}"
|
|
101
113
|
|
|
102
114
|
# Build worker pool specs
|
|
103
115
|
worker_pool_specs = self._build_worker_pool_specs(
|
|
@@ -114,16 +126,66 @@ class VertexAIOrchestrator(Orchestrator):
|
|
|
114
126
|
encryption_spec_key_name=self.encryption_key,
|
|
115
127
|
)
|
|
116
128
|
|
|
117
|
-
job
|
|
118
|
-
|
|
119
|
-
|
|
129
|
+
# Submit job asynchronously
|
|
130
|
+
job.submit()
|
|
131
|
+
|
|
132
|
+
job_id = job.resource_name
|
|
133
|
+
|
|
134
|
+
# Create wait function
|
|
135
|
+
def wait_for_completion():
|
|
136
|
+
"""Poll job status until completion."""
|
|
137
|
+
while True:
|
|
138
|
+
status = self.get_run_status(job_id)
|
|
139
|
+
if status.is_finished:
|
|
140
|
+
if not status.is_successful:
|
|
141
|
+
raise RuntimeError(f"Vertex AI job {job_id} failed with status: {status}")
|
|
142
|
+
break
|
|
143
|
+
time.sleep(15) # Poll every 15 seconds
|
|
144
|
+
|
|
145
|
+
return SubmissionResult(
|
|
146
|
+
job_id=job_id,
|
|
147
|
+
wait_for_completion=wait_for_completion,
|
|
148
|
+
metadata={
|
|
149
|
+
"platform": "vertex_ai",
|
|
150
|
+
"project": self.project_id,
|
|
151
|
+
"region": self.region,
|
|
152
|
+
"job_name": job_display_name,
|
|
153
|
+
},
|
|
154
|
+
)
|
|
120
155
|
|
|
121
|
-
def get_run_status(self,
|
|
156
|
+
def get_run_status(self, job_id: str) -> "ExecutionStatus":
|
|
122
157
|
"""Get status of a Vertex AI job."""
|
|
123
158
|
from google.cloud import aiplatform
|
|
124
159
|
|
|
125
|
-
|
|
126
|
-
|
|
160
|
+
try:
|
|
161
|
+
job = aiplatform.CustomJob(job_id)
|
|
162
|
+
state = job.state.name
|
|
163
|
+
|
|
164
|
+
# Map Vertex AI states to ExecutionStatus
|
|
165
|
+
status_map = {
|
|
166
|
+
"JOB_STATE_QUEUED": ExecutionStatus.PROVISIONING,
|
|
167
|
+
"JOB_STATE_PENDING": ExecutionStatus.PROVISIONING,
|
|
168
|
+
"JOB_STATE_RUNNING": ExecutionStatus.RUNNING,
|
|
169
|
+
"JOB_STATE_SUCCEEDED": ExecutionStatus.COMPLETED,
|
|
170
|
+
"JOB_STATE_FAILED": ExecutionStatus.FAILED,
|
|
171
|
+
"JOB_STATE_CANCELLING": ExecutionStatus.STOPPING,
|
|
172
|
+
"JOB_STATE_CANCELLED": ExecutionStatus.CANCELLED,
|
|
173
|
+
}
|
|
174
|
+
return status_map.get(state, ExecutionStatus.RUNNING)
|
|
175
|
+
except Exception as e:
|
|
176
|
+
print(f"Error fetching job status: {e}")
|
|
177
|
+
return ExecutionStatus.FAILED
|
|
178
|
+
|
|
179
|
+
def stop_run(self, job_id: str, graceful: bool = True) -> None:
|
|
180
|
+
"""Cancel a Vertex AI job."""
|
|
181
|
+
from google.cloud import aiplatform
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
job = aiplatform.CustomJob(job_id)
|
|
185
|
+
job.cancel()
|
|
186
|
+
except Exception as e:
|
|
187
|
+
print(f"Error cancelling job {job_id}: {e}")
|
|
188
|
+
raise
|
|
127
189
|
|
|
128
190
|
def _build_worker_pool_specs(
|
|
129
191
|
self,
|
|
@@ -171,6 +233,7 @@ class VertexAIOrchestrator(Orchestrator):
|
|
|
171
233
|
}
|
|
172
234
|
|
|
173
235
|
|
|
236
|
+
@register_component(name="gcs")
|
|
174
237
|
class GCSArtifactStore(ArtifactStore):
|
|
175
238
|
"""Google Cloud Storage artifact store.
|
|
176
239
|
|
|
@@ -280,6 +343,7 @@ class GCSArtifactStore(ArtifactStore):
|
|
|
280
343
|
}
|
|
281
344
|
|
|
282
345
|
|
|
346
|
+
@register_component(name="gcr")
|
|
283
347
|
class GCRContainerRegistry(ContainerRegistry):
|
|
284
348
|
"""Google Container Registry integration.
|
|
285
349
|
|
|
@@ -478,6 +542,8 @@ class GCPStack(Stack):
|
|
|
478
542
|
|
|
479
543
|
self.project_id = project_id
|
|
480
544
|
self.region = region
|
|
545
|
+
self.vertex_endpoints = VertexEndpointManager(project_id=project_id, region=region)
|
|
546
|
+
self.cloud_run = CloudRunDeployer(project_id=project_id, region=region)
|
|
481
547
|
|
|
482
548
|
def validate(self) -> bool:
|
|
483
549
|
"""Validate all GCP stack components."""
|
|
@@ -497,3 +563,84 @@ class GCPStack(Stack):
|
|
|
497
563
|
"artifact_store": self.artifact_store.to_dict(),
|
|
498
564
|
"container_registry": self.container_registry.to_dict(),
|
|
499
565
|
}
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
class VertexEndpointManager:
|
|
569
|
+
"""Deploy trained models as Vertex AI endpoints."""
|
|
570
|
+
|
|
571
|
+
def __init__(self, project_id: str | None, region: str = "us-central1"):
|
|
572
|
+
self.project_id = project_id
|
|
573
|
+
self.region = region
|
|
574
|
+
|
|
575
|
+
def deploy_model(
|
|
576
|
+
self,
|
|
577
|
+
model_display_name: str,
|
|
578
|
+
artifact_uri: str,
|
|
579
|
+
serving_image: str,
|
|
580
|
+
endpoint_display_name: str | None = None,
|
|
581
|
+
machine_type: str = "n1-standard-4",
|
|
582
|
+
) -> str:
|
|
583
|
+
from google.cloud import aiplatform
|
|
584
|
+
|
|
585
|
+
aiplatform.init(project=self.project_id, location=self.region)
|
|
586
|
+
model = aiplatform.Model.upload(
|
|
587
|
+
display_name=model_display_name,
|
|
588
|
+
artifact_uri=artifact_uri,
|
|
589
|
+
serving_container_image_uri=serving_image,
|
|
590
|
+
)
|
|
591
|
+
endpoint = model.deploy(
|
|
592
|
+
machine_type=machine_type,
|
|
593
|
+
endpoint=aiplatform.Endpoint.create(
|
|
594
|
+
display_name=endpoint_display_name or f"{model_display_name}-endpoint",
|
|
595
|
+
),
|
|
596
|
+
)
|
|
597
|
+
return endpoint.resource_name
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
class CloudRunDeployer:
|
|
601
|
+
"""Deploy container images to Cloud Run."""
|
|
602
|
+
|
|
603
|
+
def __init__(self, project_id: str | None, region: str = "us-central1"):
|
|
604
|
+
self.project_id = project_id
|
|
605
|
+
self.region = region
|
|
606
|
+
|
|
607
|
+
def deploy_service(
|
|
608
|
+
self,
|
|
609
|
+
service_name: str,
|
|
610
|
+
image: str,
|
|
611
|
+
env: dict[str, str] | None = None,
|
|
612
|
+
allow_unauthenticated: bool = True,
|
|
613
|
+
) -> str:
|
|
614
|
+
command = [
|
|
615
|
+
"gcloud",
|
|
616
|
+
"run",
|
|
617
|
+
"deploy",
|
|
618
|
+
service_name,
|
|
619
|
+
f"--image={image}",
|
|
620
|
+
f"--region={self.region}",
|
|
621
|
+
f"--project={self.project_id}",
|
|
622
|
+
]
|
|
623
|
+
if allow_unauthenticated:
|
|
624
|
+
command.append("--allow-unauthenticated")
|
|
625
|
+
|
|
626
|
+
env = env or {}
|
|
627
|
+
for key, value in env.items():
|
|
628
|
+
command.append(f"--set-env-vars={key}={value}")
|
|
629
|
+
|
|
630
|
+
subprocess.run(command, check=True)
|
|
631
|
+
url_result = subprocess.run(
|
|
632
|
+
[
|
|
633
|
+
"gcloud",
|
|
634
|
+
"run",
|
|
635
|
+
"services",
|
|
636
|
+
"describe",
|
|
637
|
+
service_name,
|
|
638
|
+
f"--region={self.region}",
|
|
639
|
+
f"--project={self.project_id}",
|
|
640
|
+
"--format=value(status.url)",
|
|
641
|
+
],
|
|
642
|
+
check=True,
|
|
643
|
+
capture_output=True,
|
|
644
|
+
text=True,
|
|
645
|
+
)
|
|
646
|
+
return url_result.stdout.strip()
|
flowyml/stacks/local.py
CHANGED
|
@@ -46,12 +46,17 @@ class LocalStack(Stack):
|
|
|
46
46
|
artifact_store = LocalArtifactStore(artifact_path)
|
|
47
47
|
metadata_store = SQLiteMetadataStore(metadata_path)
|
|
48
48
|
|
|
49
|
+
from flowyml.core.orchestrator import LocalOrchestrator
|
|
50
|
+
|
|
51
|
+
orchestrator = LocalOrchestrator()
|
|
52
|
+
|
|
49
53
|
# Initialize base stack
|
|
50
54
|
super().__init__(
|
|
51
55
|
name=name,
|
|
52
56
|
executor=executor,
|
|
53
57
|
artifact_store=artifact_store,
|
|
54
58
|
metadata_store=metadata_store,
|
|
59
|
+
orchestrator=orchestrator,
|
|
55
60
|
)
|
|
56
61
|
|
|
57
62
|
# Ensure directories exist
|
flowyml/stacks/plugins.py
CHANGED
|
@@ -14,7 +14,7 @@ import inspect
|
|
|
14
14
|
import subprocess
|
|
15
15
|
import sys
|
|
16
16
|
from dataclasses import dataclass, field
|
|
17
|
-
from typing import Any,
|
|
17
|
+
from typing import Any, Protocol, runtime_checkable
|
|
18
18
|
|
|
19
19
|
from flowyml.stacks.components import (
|
|
20
20
|
StackComponent,
|
|
@@ -48,7 +48,7 @@ class PluginBridge(Protocol):
|
|
|
48
48
|
self,
|
|
49
49
|
component_class: Any,
|
|
50
50
|
name: str,
|
|
51
|
-
config:
|
|
51
|
+
config: dict[str, Any] | None = None,
|
|
52
52
|
) -> type[StackComponent]:
|
|
53
53
|
"""Wrap an external component class into a flowyml component."""
|
|
54
54
|
...
|