flowyml 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. flowyml/__init__.py +3 -0
  2. flowyml/assets/base.py +10 -0
  3. flowyml/assets/metrics.py +6 -0
  4. flowyml/cli/main.py +108 -2
  5. flowyml/cli/run.py +9 -2
  6. flowyml/core/execution_status.py +52 -0
  7. flowyml/core/hooks.py +106 -0
  8. flowyml/core/observability.py +210 -0
  9. flowyml/core/orchestrator.py +274 -0
  10. flowyml/core/pipeline.py +193 -231
  11. flowyml/core/project.py +34 -2
  12. flowyml/core/remote_orchestrator.py +109 -0
  13. flowyml/core/resources.py +22 -5
  14. flowyml/core/retry_policy.py +80 -0
  15. flowyml/core/step.py +18 -1
  16. flowyml/core/submission_result.py +53 -0
  17. flowyml/core/versioning.py +2 -2
  18. flowyml/integrations/keras.py +95 -22
  19. flowyml/monitoring/alerts.py +2 -2
  20. flowyml/stacks/__init__.py +15 -0
  21. flowyml/stacks/aws.py +599 -0
  22. flowyml/stacks/azure.py +295 -0
  23. flowyml/stacks/components.py +24 -2
  24. flowyml/stacks/gcp.py +158 -11
  25. flowyml/stacks/local.py +5 -0
  26. flowyml/storage/artifacts.py +15 -5
  27. flowyml/storage/materializers/__init__.py +2 -0
  28. flowyml/storage/materializers/cloudpickle.py +74 -0
  29. flowyml/storage/metadata.py +166 -5
  30. flowyml/ui/backend/main.py +41 -1
  31. flowyml/ui/backend/routers/assets.py +356 -15
  32. flowyml/ui/backend/routers/client.py +46 -0
  33. flowyml/ui/backend/routers/execution.py +13 -2
  34. flowyml/ui/backend/routers/experiments.py +48 -12
  35. flowyml/ui/backend/routers/metrics.py +213 -0
  36. flowyml/ui/backend/routers/pipelines.py +63 -7
  37. flowyml/ui/backend/routers/projects.py +33 -7
  38. flowyml/ui/backend/routers/runs.py +150 -8
  39. flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
  40. flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
  41. flowyml/ui/frontend/dist/index.html +2 -2
  42. flowyml/ui/frontend/src/App.jsx +4 -1
  43. flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
  44. flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
  45. flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
  46. flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
  47. flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
  48. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
  49. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
  50. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
  51. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
  52. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
  53. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
  54. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
  55. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
  56. flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
  57. flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
  58. flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
  59. flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
  60. flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
  61. flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
  62. flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
  63. flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
  64. flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
  65. flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
  66. flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
  67. flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
  68. flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
  69. flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
  70. flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
  71. flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
  72. flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
  73. flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
  74. flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
  75. flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
  76. flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
  77. flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
  78. flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
  79. flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
  80. flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
  81. flowyml/ui/frontend/src/router/index.jsx +4 -0
  82. flowyml/ui/frontend/src/utils/date.js +10 -0
  83. flowyml/ui/frontend/src/utils/downloads.js +11 -0
  84. flowyml/utils/config.py +6 -0
  85. flowyml/utils/stack_config.py +45 -3
  86. {flowyml-1.1.0.dist-info → flowyml-1.3.0.dist-info}/METADATA +113 -12
  87. {flowyml-1.1.0.dist-info → flowyml-1.3.0.dist-info}/RECORD +90 -53
  88. {flowyml-1.1.0.dist-info → flowyml-1.3.0.dist-info}/licenses/LICENSE +1 -1
  89. flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
  90. flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
  91. {flowyml-1.1.0.dist-info → flowyml-1.3.0.dist-info}/WHEEL +0 -0
  92. {flowyml-1.1.0.dist-info → flowyml-1.3.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,295 @@
1
+ """Azure Stack Components and Preset Stack."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from flowyml.stacks.base import Stack
10
+ from flowyml.stacks.components import ArtifactStore, ContainerRegistry, DockerConfig, ResourceConfig
11
+ from flowyml.core.remote_orchestrator import RemoteOrchestrator
12
+ from flowyml.stacks.plugins import register_component
13
+ from flowyml.storage.metadata import SQLiteMetadataStore
14
+ from flowyml.core.submission_result import SubmissionResult
15
+
16
+
17
+ @register_component(name="azure_blob")
18
+ class AzureBlobArtifactStore(ArtifactStore):
19
+ """Artifact store backed by Azure Blob Storage."""
20
+
21
+ def __init__(
22
+ self,
23
+ name: str = "azure_blob",
24
+ account_url: str | None = None,
25
+ container_name: str | None = None,
26
+ credential: Any | None = None,
27
+ ):
28
+ super().__init__(name)
29
+ self.account_url = account_url
30
+ self.container_name = container_name
31
+ self.credential = credential
32
+
33
+ def _client(self):
34
+ from azure.storage.blob import BlobServiceClient
35
+
36
+ credential = self.credential
37
+ if credential is None:
38
+ from azure.identity import DefaultAzureCredential
39
+
40
+ credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
41
+
42
+ return BlobServiceClient(account_url=self.account_url, credential=credential)
43
+
44
+ def validate(self) -> bool:
45
+ if not self.account_url or not self.container_name:
46
+ raise ValueError("account_url and container_name are required for AzureBlobArtifactStore")
47
+ try:
48
+ client = self._client()
49
+ container_client = client.get_container_client(self.container_name)
50
+ container_client.get_container_properties()
51
+ except Exception as exc:
52
+ raise ValueError(f"Unable to access container '{self.container_name}': {exc}") from exc
53
+ return True
54
+
55
+ def save(self, artifact: Any, path: str) -> str:
56
+ blob_name = path.lstrip("/")
57
+ client = self._client().get_container_client(self.container_name)
58
+ if isinstance(artifact, (str, Path)) and Path(artifact).exists():
59
+ with open(Path(artifact), "rb") as f:
60
+ client.upload_blob(name=blob_name, data=f, overwrite=True)
61
+ else:
62
+ data = artifact if isinstance(artifact, bytes) else str(artifact).encode()
63
+ client.upload_blob(name=blob_name, data=data, overwrite=True)
64
+ return f"{self.account_url}/{self.container_name}/{blob_name}"
65
+
66
+ def load(self, path: str) -> bytes:
67
+ blob_name = path.lstrip("/")
68
+ client = self._client().get_blob_client(self.container_name, blob_name)
69
+ downloader = client.download_blob()
70
+ return downloader.readall()
71
+
72
+ def exists(self, path: str) -> bool:
73
+ blob_name = path.lstrip("/")
74
+ client = self._client().get_blob_client(self.container_name, blob_name)
75
+ return client.exists()
76
+
77
+ def to_dict(self) -> dict[str, Any]:
78
+ return {
79
+ "name": self.name,
80
+ "type": "azure_blob",
81
+ "account_url": self.account_url,
82
+ "container_name": self.container_name,
83
+ }
84
+
85
+
86
+ @register_component(name="acr")
87
+ class ACRContainerRegistry(ContainerRegistry):
88
+ """Azure Container Registry integration."""
89
+
90
+ def __init__(
91
+ self,
92
+ name: str = "acr",
93
+ registry_name: str | None = None,
94
+ login_server: str | None = None,
95
+ username: str | None = None,
96
+ password: str | None = None,
97
+ ):
98
+ super().__init__(name)
99
+ self.registry_name = registry_name
100
+ self.login_server = login_server or (f"{registry_name}.azurecr.io" if registry_name else None)
101
+ self.username = username
102
+ self.password = password
103
+
104
+ def validate(self) -> bool:
105
+ if not self.registry_name and not self.login_server:
106
+ raise ValueError("registry_name or login_server is required for ACRContainerRegistry")
107
+ return True
108
+
109
+ def _login(self) -> None:
110
+ if self.username and self.password:
111
+ subprocess.run(
112
+ ["docker", "login", self.login_server, "-u", self.username, "-p", self.password],
113
+ check=True,
114
+ )
115
+ else:
116
+ subprocess.run(["az", "acr", "login", "--name", self.registry_name], check=True)
117
+
118
+ def push_image(self, image_name: str, tag: str = "latest") -> str:
119
+ full_uri = self.get_image_uri(image_name, tag)
120
+ self._login()
121
+ subprocess.run(["docker", "tag", f"{image_name}:{tag}", full_uri], check=True)
122
+ subprocess.run(["docker", "push", full_uri], check=True)
123
+ return full_uri
124
+
125
+ def pull_image(self, image_name: str, tag: str = "latest") -> None:
126
+ full_uri = self.get_image_uri(image_name, tag)
127
+ self._login()
128
+ subprocess.run(["docker", "pull", full_uri], check=True)
129
+
130
+ def get_image_uri(self, image_name: str, tag: str = "latest") -> str:
131
+ return f"{self.login_server}/{image_name}:{tag}"
132
+
133
+ def to_dict(self) -> dict[str, Any]:
134
+ return {
135
+ "name": self.name,
136
+ "type": "acr",
137
+ "login_server": self.login_server,
138
+ "registry_name": self.registry_name,
139
+ }
140
+
141
+
142
+ @register_component(name="azure_ml")
143
+ class AzureMLOrchestrator(RemoteOrchestrator):
144
+ """Submit pipeline runs to Azure ML managed compute."""
145
+
146
+ def __init__(
147
+ self,
148
+ name: str = "azure_ml",
149
+ subscription_id: str | None = None,
150
+ resource_group: str | None = None,
151
+ workspace_name: str | None = None,
152
+ compute: str | None = None,
153
+ experiment_name: str = "flowyml",
154
+ credential: Any | None = None,
155
+ ):
156
+ super().__init__(name)
157
+ self.subscription_id = subscription_id
158
+ self.resource_group = resource_group
159
+ self.workspace_name = workspace_name
160
+ self.compute = compute
161
+ self.experiment_name = experiment_name
162
+ self.credential = credential
163
+
164
+ def _client(self):
165
+ from azure.ai.ml import MLClient
166
+ from azure.identity import DefaultAzureCredential
167
+
168
+ credential = self.credential or DefaultAzureCredential(exclude_shared_token_cache_credential=True)
169
+ return MLClient(
170
+ credential,
171
+ subscription_id=self.subscription_id,
172
+ resource_group_name=self.resource_group,
173
+ workspace_name=self.workspace_name,
174
+ )
175
+
176
+ def validate(self) -> bool:
177
+ if not all([self.subscription_id, self.resource_group, self.workspace_name, self.compute]):
178
+ raise ValueError(
179
+ "subscription_id, resource_group, workspace_name, and compute are required for AzureMLOrchestrator",
180
+ )
181
+ return True
182
+
183
+ def run_pipeline(
184
+ self,
185
+ pipeline: Any,
186
+ run_id: str,
187
+ resources: ResourceConfig | None = None,
188
+ docker_config: DockerConfig | None = None,
189
+ inputs: dict[str, Any] | None = None,
190
+ context: dict[str, Any] | None = None,
191
+ **kwargs,
192
+ ) -> SubmissionResult:
193
+ """Submit pipeline to Azure ML.
194
+
195
+ Returns:
196
+ SubmissionResult with job name and optional wait function.
197
+ """
198
+ from azure.ai.ml import command
199
+
200
+ ml_client = self._client()
201
+ job_name = kwargs.get("job_name") or f"{pipeline.name}-{run_id[:8]}"
202
+
203
+ # Build command job
204
+ job = command(
205
+ code=".", # Use current directory or specify path
206
+ command="python -m flowyml.cli.run",
207
+ environment=docker_config.image if docker_config else self.environment_name,
208
+ compute=self.compute,
209
+ display_name=job_name,
210
+ experiment_name=self.experiment_name,
211
+ )
212
+
213
+ submitted = ml_client.jobs.create_or_update(job)
214
+
215
+ def wait_for_completion():
216
+ ml_client.jobs.stream(submitted.name)
217
+
218
+ return SubmissionResult(
219
+ job_id=submitted.name,
220
+ wait_for_completion=wait_for_completion,
221
+ metadata={"studio_url": submitted.studio_url},
222
+ )
223
+
224
+ def get_run_status(self, run_id: str) -> str:
225
+ client = self._client()
226
+ job = client.jobs.get(run_id)
227
+ return job.status
228
+
229
+ def to_dict(self) -> dict[str, Any]:
230
+ return {
231
+ "name": self.name,
232
+ "type": "azure_ml",
233
+ "subscription_id": self.subscription_id,
234
+ "resource_group": self.resource_group,
235
+ "workspace_name": self.workspace_name,
236
+ "compute": self.compute,
237
+ }
238
+
239
+
240
+ class AzureMLStack(Stack):
241
+ """Managed Azure ML stack built from AzureML orchestrator, Blob storage, and ACR."""
242
+
243
+ def __init__(
244
+ self,
245
+ name: str = "azure",
246
+ subscription_id: str | None = None,
247
+ resource_group: str | None = None,
248
+ workspace_name: str | None = None,
249
+ compute: str | None = None,
250
+ account_url: str | None = None,
251
+ container_name: str | None = None,
252
+ registry_name: str | None = None,
253
+ login_server: str | None = None,
254
+ metadata_store: Any | None = None,
255
+ ):
256
+ orchestrator = AzureMLOrchestrator(
257
+ subscription_id=subscription_id,
258
+ resource_group=resource_group,
259
+ workspace_name=workspace_name,
260
+ compute=compute,
261
+ )
262
+ artifact_store = AzureBlobArtifactStore(account_url=account_url, container_name=container_name)
263
+ container_registry = ACRContainerRegistry(registry_name=registry_name, login_server=login_server)
264
+
265
+ if metadata_store is None:
266
+ metadata_store = SQLiteMetadataStore()
267
+
268
+ super().__init__(
269
+ name=name,
270
+ executor=None,
271
+ artifact_store=artifact_store,
272
+ metadata_store=metadata_store,
273
+ container_registry=container_registry,
274
+ orchestrator=orchestrator,
275
+ )
276
+
277
+ self.subscription_id = subscription_id
278
+ self.workspace_name = workspace_name
279
+
280
+ def validate(self) -> bool:
281
+ self.orchestrator.validate()
282
+ self.artifact_store.validate()
283
+ self.container_registry.validate()
284
+ return True
285
+
286
+ def to_dict(self) -> dict[str, Any]:
287
+ return {
288
+ "name": self.name,
289
+ "type": "azure",
290
+ "subscription_id": self.subscription_id,
291
+ "workspace_name": self.workspace_name,
292
+ "orchestrator": self.orchestrator.to_dict(),
293
+ "artifact_store": self.artifact_store.to_dict(),
294
+ "container_registry": self.container_registry.to_dict(),
295
+ }
@@ -99,8 +99,30 @@ class Orchestrator(StackComponent):
99
99
  return ComponentType.ORCHESTRATOR
100
100
 
101
101
  @abstractmethod
102
- def run_pipeline(self, pipeline: Any, **kwargs) -> Any:
103
- """Run a pipeline on this orchestrator."""
102
+ def run_pipeline(
103
+ self,
104
+ pipeline: Any,
105
+ run_id: str,
106
+ resources: "ResourceConfig | None" = None,
107
+ docker_config: "DockerConfig | None" = None,
108
+ inputs: dict[str, Any] | None = None,
109
+ context: dict[str, Any] | None = None,
110
+ **kwargs,
111
+ ) -> Any:
112
+ """Run a pipeline on this orchestrator.
113
+
114
+ Args:
115
+ pipeline: The pipeline to run.
116
+ run_id: The unique run identifier.
117
+ resources: Resource configuration.
118
+ docker_config: Docker configuration.
119
+ inputs: Input data.
120
+ context: Context variables.
121
+ **kwargs: Additional arguments.
122
+
123
+ Returns:
124
+ The run result or job ID.
125
+ """
104
126
  pass
105
127
 
106
128
  @abstractmethod
flowyml/stacks/gcp.py CHANGED
@@ -6,17 +6,22 @@ on Google Cloud Platform using Vertex AI, Cloud Storage, and Container Registry.
6
6
 
7
7
  from typing import Any
8
8
 
9
+ import subprocess
9
10
  from flowyml.stacks.base import Stack
10
11
  from flowyml.stacks.components import (
11
- Orchestrator,
12
12
  ArtifactStore,
13
13
  ContainerRegistry,
14
14
  ResourceConfig,
15
15
  DockerConfig,
16
16
  )
17
+ from flowyml.core.remote_orchestrator import RemoteOrchestrator
18
+ from flowyml.stacks.plugins import register_component
19
+ from flowyml.core.submission_result import SubmissionResult
20
+ from flowyml.core.execution_status import ExecutionStatus
17
21
 
18
22
 
19
- class VertexAIOrchestrator(Orchestrator):
23
+ @register_component(name="vertex_ai")
24
+ class VertexAIOrchestrator(RemoteOrchestrator):
20
25
  """Vertex AI orchestrator for running pipelines on Google Cloud.
21
26
 
22
27
  This orchestrator submits pipeline jobs to Vertex AI Pipelines,
@@ -76,28 +81,35 @@ class VertexAIOrchestrator(Orchestrator):
76
81
  def run_pipeline(
77
82
  self,
78
83
  pipeline: Any,
84
+ run_id: str,
79
85
  resources: ResourceConfig | None = None,
80
86
  docker_config: DockerConfig | None = None,
87
+ inputs: dict[str, Any] | None = None,
88
+ context: dict[str, Any] | None = None,
81
89
  **kwargs,
82
- ) -> str:
90
+ ) -> "SubmissionResult":
83
91
  """Run pipeline on Vertex AI.
84
92
 
85
93
  Args:
86
94
  pipeline: Pipeline to run
95
+ run_id: Run identifier
87
96
  resources: Resource configuration
88
97
  docker_config: Docker configuration
98
+ inputs: Input data
99
+ context: Context variables
89
100
  **kwargs: Additional arguments
90
101
 
91
102
  Returns:
92
- Job ID
103
+ SubmissionResult with job resource name
93
104
  """
94
105
  from google.cloud import aiplatform
106
+ import time
95
107
 
96
108
  # Initialize Vertex AI
97
109
  aiplatform.init(project=self.project_id, location=self.region)
98
110
 
99
111
  # Create custom job
100
- job_display_name = f"{pipeline.name}-{pipeline.run_id}"
112
+ job_display_name = f"{pipeline.name}-{run_id[:8]}"
101
113
 
102
114
  # Build worker pool specs
103
115
  worker_pool_specs = self._build_worker_pool_specs(
@@ -114,16 +126,66 @@ class VertexAIOrchestrator(Orchestrator):
114
126
  encryption_spec_key_name=self.encryption_key,
115
127
  )
116
128
 
117
- job.run(sync=False)
118
-
119
- return job.resource_name
129
+ # Submit job asynchronously
130
+ job.submit()
131
+
132
+ job_id = job.resource_name
133
+
134
+ # Create wait function
135
+ def wait_for_completion():
136
+ """Poll job status until completion."""
137
+ while True:
138
+ status = self.get_run_status(job_id)
139
+ if status.is_finished:
140
+ if not status.is_successful:
141
+ raise RuntimeError(f"Vertex AI job {job_id} failed with status: {status}")
142
+ break
143
+ time.sleep(15) # Poll every 15 seconds
144
+
145
+ return SubmissionResult(
146
+ job_id=job_id,
147
+ wait_for_completion=wait_for_completion,
148
+ metadata={
149
+ "platform": "vertex_ai",
150
+ "project": self.project_id,
151
+ "region": self.region,
152
+ "job_name": job_display_name,
153
+ },
154
+ )
120
155
 
121
- def get_run_status(self, run_id: str) -> str:
156
+ def get_run_status(self, job_id: str) -> "ExecutionStatus":
122
157
  """Get status of a Vertex AI job."""
123
158
  from google.cloud import aiplatform
124
159
 
125
- job = aiplatform.CustomJob(run_id)
126
- return job.state.name
160
+ try:
161
+ job = aiplatform.CustomJob(job_id)
162
+ state = job.state.name
163
+
164
+ # Map Vertex AI states to ExecutionStatus
165
+ status_map = {
166
+ "JOB_STATE_QUEUED": ExecutionStatus.PROVISIONING,
167
+ "JOB_STATE_PENDING": ExecutionStatus.PROVISIONING,
168
+ "JOB_STATE_RUNNING": ExecutionStatus.RUNNING,
169
+ "JOB_STATE_SUCCEEDED": ExecutionStatus.COMPLETED,
170
+ "JOB_STATE_FAILED": ExecutionStatus.FAILED,
171
+ "JOB_STATE_CANCELLING": ExecutionStatus.STOPPING,
172
+ "JOB_STATE_CANCELLED": ExecutionStatus.CANCELLED,
173
+ }
174
+ return status_map.get(state, ExecutionStatus.RUNNING)
175
+ except Exception as e:
176
+ print(f"Error fetching job status: {e}")
177
+ return ExecutionStatus.FAILED
178
+
179
+ def stop_run(self, job_id: str, graceful: bool = True) -> None:
180
+ """Cancel a Vertex AI job."""
181
+ from google.cloud import aiplatform
182
+
183
+ try:
184
+ job = aiplatform.CustomJob(job_id)
185
+ job.cancel()
186
+ except Exception as e:
187
+ print(f"Error cancelling job {job_id}: {e}")
188
+ raise
127
189
 
128
190
  def _build_worker_pool_specs(
129
191
  self,
@@ -171,6 +233,7 @@ class VertexAIOrchestrator(Orchestrator):
171
233
  }
172
234
 
173
235
 
236
+ @register_component(name="gcs")
174
237
  class GCSArtifactStore(ArtifactStore):
175
238
  """Google Cloud Storage artifact store.
176
239
 
@@ -280,6 +343,7 @@ class GCSArtifactStore(ArtifactStore):
280
343
  }
281
344
 
282
345
 
346
+ @register_component(name="gcr")
283
347
  class GCRContainerRegistry(ContainerRegistry):
284
348
  """Google Container Registry integration.
285
349
 
@@ -478,6 +542,8 @@ class GCPStack(Stack):
478
542
 
479
543
  self.project_id = project_id
480
544
  self.region = region
545
+ self.vertex_endpoints = VertexEndpointManager(project_id=project_id, region=region)
546
+ self.cloud_run = CloudRunDeployer(project_id=project_id, region=region)
481
547
 
482
548
  def validate(self) -> bool:
483
549
  """Validate all GCP stack components."""
@@ -497,3 +563,84 @@ class GCPStack(Stack):
497
563
  "artifact_store": self.artifact_store.to_dict(),
498
564
  "container_registry": self.container_registry.to_dict(),
499
565
  }
566
+
567
+
568
+ class VertexEndpointManager:
569
+ """Deploy trained models as Vertex AI endpoints."""
570
+
571
+ def __init__(self, project_id: str | None, region: str = "us-central1"):
572
+ self.project_id = project_id
573
+ self.region = region
574
+
575
+ def deploy_model(
576
+ self,
577
+ model_display_name: str,
578
+ artifact_uri: str,
579
+ serving_image: str,
580
+ endpoint_display_name: str | None = None,
581
+ machine_type: str = "n1-standard-4",
582
+ ) -> str:
583
+ from google.cloud import aiplatform
584
+
585
+ aiplatform.init(project=self.project_id, location=self.region)
586
+ model = aiplatform.Model.upload(
587
+ display_name=model_display_name,
588
+ artifact_uri=artifact_uri,
589
+ serving_container_image_uri=serving_image,
590
+ )
591
+ endpoint = model.deploy(
592
+ machine_type=machine_type,
593
+ endpoint=aiplatform.Endpoint.create(
594
+ display_name=endpoint_display_name or f"{model_display_name}-endpoint",
595
+ ),
596
+ )
597
+ return endpoint.resource_name
598
+
599
+
600
+ class CloudRunDeployer:
601
+ """Deploy container images to Cloud Run."""
602
+
603
+ def __init__(self, project_id: str | None, region: str = "us-central1"):
604
+ self.project_id = project_id
605
+ self.region = region
606
+
607
+ def deploy_service(
608
+ self,
609
+ service_name: str,
610
+ image: str,
611
+ env: dict[str, str] | None = None,
612
+ allow_unauthenticated: bool = True,
613
+ ) -> str:
614
+ command = [
615
+ "gcloud",
616
+ "run",
617
+ "deploy",
618
+ service_name,
619
+ f"--image={image}",
620
+ f"--region={self.region}",
621
+ f"--project={self.project_id}",
622
+ ]
623
+ if allow_unauthenticated:
624
+ command.append("--allow-unauthenticated")
625
+
626
+ env = env or {}
627
+ for key, value in env.items():
628
+ command.append(f"--set-env-vars={key}={value}")
629
+
630
+ subprocess.run(command, check=True)
631
+ url_result = subprocess.run(
632
+ [
633
+ "gcloud",
634
+ "run",
635
+ "services",
636
+ "describe",
637
+ service_name,
638
+ f"--region={self.region}",
639
+ f"--project={self.project_id}",
640
+ "--format=value(status.url)",
641
+ ],
642
+ check=True,
643
+ capture_output=True,
644
+ text=True,
645
+ )
646
+ return url_result.stdout.strip()
flowyml/stacks/local.py CHANGED
@@ -46,12 +46,17 @@ class LocalStack(Stack):
46
46
  artifact_store = LocalArtifactStore(artifact_path)
47
47
  metadata_store = SQLiteMetadataStore(metadata_path)
48
48
 
49
+ from flowyml.core.orchestrator import LocalOrchestrator
50
+
51
+ orchestrator = LocalOrchestrator()
52
+
49
53
  # Initialize base stack
50
54
  super().__init__(
51
55
  name=name,
52
56
  executor=executor,
53
57
  artifact_store=artifact_store,
54
58
  metadata_store=metadata_store,
59
+ orchestrator=orchestrator,
55
60
  )
56
61
 
57
62
  # Ensure directories exist
@@ -215,7 +215,7 @@ class LocalArtifactStore(ArtifactStore):
215
215
  from datetime import datetime
216
216
  from flowyml.storage.materializers.base import get_materializer
217
217
  import shutil
218
- import pickle
218
+ import cloudpickle
219
219
  import json
220
220
 
221
221
  date_str = datetime.now().strftime("%Y-%m-%d")
@@ -236,11 +236,21 @@ class LocalArtifactStore(ArtifactStore):
236
236
  if materializer:
237
237
  materializer.save(obj, full_path)
238
238
  else:
239
- # Fallback to pickle
240
- with open(full_path / "data.pkl", "wb") as f:
241
- pickle.dump(obj, f)
239
+ # Fallback to cloudpickle (more robust than pickle)
240
+ fallback_file = full_path / "data.pkl"
241
+ with open(fallback_file, "wb") as f:
242
+ cloudpickle.dump(obj, f)
242
243
  # Save metadata
243
244
  with open(full_path / "metadata.json", "w") as f:
244
- json.dump({"type": "pickle", "format": "pickle"}, f, indent=2)
245
+ json.dump(
246
+ {
247
+ "type": type(obj).__name__,
248
+ "serializer": "cloudpickle",
249
+ "format": "pickle",
250
+ "file": fallback_file.name,
251
+ },
252
+ f,
253
+ indent=2,
254
+ )
245
255
 
246
256
  return str(full_path)
@@ -7,6 +7,7 @@ from flowyml.storage.materializers.sklearn import SklearnMaterializer
7
7
  from flowyml.storage.materializers.pandas import PandasMaterializer
8
8
  from flowyml.storage.materializers.numpy import NumPyMaterializer
9
9
  from flowyml.storage.materializers.keras import KerasMaterializer
10
+ from flowyml.storage.materializers.cloudpickle import CloudpickleMaterializer
10
11
 
11
12
  __all__ = [
12
13
  "BaseMaterializer",
@@ -17,4 +18,5 @@ __all__ = [
17
18
  "PandasMaterializer",
18
19
  "NumPyMaterializer",
19
20
  "KerasMaterializer",
21
+ "CloudpickleMaterializer",
20
22
  ]