flowyml 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +207 -0
- flowyml/assets/__init__.py +22 -0
- flowyml/assets/artifact.py +40 -0
- flowyml/assets/base.py +209 -0
- flowyml/assets/dataset.py +100 -0
- flowyml/assets/featureset.py +301 -0
- flowyml/assets/metrics.py +104 -0
- flowyml/assets/model.py +82 -0
- flowyml/assets/registry.py +157 -0
- flowyml/assets/report.py +315 -0
- flowyml/cli/__init__.py +5 -0
- flowyml/cli/experiment.py +232 -0
- flowyml/cli/init.py +256 -0
- flowyml/cli/main.py +327 -0
- flowyml/cli/run.py +75 -0
- flowyml/cli/stack_cli.py +532 -0
- flowyml/cli/ui.py +33 -0
- flowyml/core/__init__.py +68 -0
- flowyml/core/advanced_cache.py +274 -0
- flowyml/core/approval.py +64 -0
- flowyml/core/cache.py +203 -0
- flowyml/core/checkpoint.py +148 -0
- flowyml/core/conditional.py +373 -0
- flowyml/core/context.py +155 -0
- flowyml/core/error_handling.py +419 -0
- flowyml/core/executor.py +354 -0
- flowyml/core/graph.py +185 -0
- flowyml/core/parallel.py +452 -0
- flowyml/core/pipeline.py +764 -0
- flowyml/core/project.py +253 -0
- flowyml/core/resources.py +424 -0
- flowyml/core/scheduler.py +630 -0
- flowyml/core/scheduler_config.py +32 -0
- flowyml/core/step.py +201 -0
- flowyml/core/step_grouping.py +292 -0
- flowyml/core/templates.py +226 -0
- flowyml/core/versioning.py +217 -0
- flowyml/integrations/__init__.py +1 -0
- flowyml/integrations/keras.py +134 -0
- flowyml/monitoring/__init__.py +1 -0
- flowyml/monitoring/alerts.py +57 -0
- flowyml/monitoring/data.py +102 -0
- flowyml/monitoring/llm.py +160 -0
- flowyml/monitoring/monitor.py +57 -0
- flowyml/monitoring/notifications.py +246 -0
- flowyml/registry/__init__.py +5 -0
- flowyml/registry/model_registry.py +491 -0
- flowyml/registry/pipeline_registry.py +55 -0
- flowyml/stacks/__init__.py +27 -0
- flowyml/stacks/base.py +77 -0
- flowyml/stacks/bridge.py +288 -0
- flowyml/stacks/components.py +155 -0
- flowyml/stacks/gcp.py +499 -0
- flowyml/stacks/local.py +112 -0
- flowyml/stacks/migration.py +97 -0
- flowyml/stacks/plugin_config.py +78 -0
- flowyml/stacks/plugins.py +401 -0
- flowyml/stacks/registry.py +226 -0
- flowyml/storage/__init__.py +26 -0
- flowyml/storage/artifacts.py +246 -0
- flowyml/storage/materializers/__init__.py +20 -0
- flowyml/storage/materializers/base.py +133 -0
- flowyml/storage/materializers/keras.py +185 -0
- flowyml/storage/materializers/numpy.py +94 -0
- flowyml/storage/materializers/pandas.py +142 -0
- flowyml/storage/materializers/pytorch.py +135 -0
- flowyml/storage/materializers/sklearn.py +110 -0
- flowyml/storage/materializers/tensorflow.py +152 -0
- flowyml/storage/metadata.py +931 -0
- flowyml/tracking/__init__.py +1 -0
- flowyml/tracking/experiment.py +211 -0
- flowyml/tracking/leaderboard.py +191 -0
- flowyml/tracking/runs.py +145 -0
- flowyml/ui/__init__.py +15 -0
- flowyml/ui/backend/Dockerfile +31 -0
- flowyml/ui/backend/__init__.py +0 -0
- flowyml/ui/backend/auth.py +163 -0
- flowyml/ui/backend/main.py +187 -0
- flowyml/ui/backend/routers/__init__.py +0 -0
- flowyml/ui/backend/routers/assets.py +45 -0
- flowyml/ui/backend/routers/execution.py +179 -0
- flowyml/ui/backend/routers/experiments.py +49 -0
- flowyml/ui/backend/routers/leaderboard.py +118 -0
- flowyml/ui/backend/routers/notifications.py +72 -0
- flowyml/ui/backend/routers/pipelines.py +110 -0
- flowyml/ui/backend/routers/plugins.py +192 -0
- flowyml/ui/backend/routers/projects.py +85 -0
- flowyml/ui/backend/routers/runs.py +66 -0
- flowyml/ui/backend/routers/schedules.py +222 -0
- flowyml/ui/backend/routers/traces.py +84 -0
- flowyml/ui/frontend/Dockerfile +20 -0
- flowyml/ui/frontend/README.md +315 -0
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +448 -0
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +1 -0
- flowyml/ui/frontend/dist/index.html +16 -0
- flowyml/ui/frontend/index.html +15 -0
- flowyml/ui/frontend/nginx.conf +26 -0
- flowyml/ui/frontend/package-lock.json +3545 -0
- flowyml/ui/frontend/package.json +33 -0
- flowyml/ui/frontend/postcss.config.js +6 -0
- flowyml/ui/frontend/src/App.jsx +21 -0
- flowyml/ui/frontend/src/app/assets/page.jsx +397 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +295 -0
- flowyml/ui/frontend/src/app/experiments/[experimentId]/page.jsx +255 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +360 -0
- flowyml/ui/frontend/src/app/leaderboard/page.jsx +133 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +454 -0
- flowyml/ui/frontend/src/app/plugins/page.jsx +48 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +292 -0
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +682 -0
- flowyml/ui/frontend/src/app/runs/page.jsx +470 -0
- flowyml/ui/frontend/src/app/schedules/page.jsx +585 -0
- flowyml/ui/frontend/src/app/settings/page.jsx +314 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +456 -0
- flowyml/ui/frontend/src/app/traces/page.jsx +246 -0
- flowyml/ui/frontend/src/components/Layout.jsx +108 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +295 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +72 -0
- flowyml/ui/frontend/src/components/plugins/AddPluginDialog.jsx +121 -0
- flowyml/ui/frontend/src/components/plugins/InstalledPlugins.jsx +124 -0
- flowyml/ui/frontend/src/components/plugins/PluginBrowser.jsx +167 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +60 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +145 -0
- flowyml/ui/frontend/src/components/ui/Badge.jsx +26 -0
- flowyml/ui/frontend/src/components/ui/Button.jsx +34 -0
- flowyml/ui/frontend/src/components/ui/Card.jsx +44 -0
- flowyml/ui/frontend/src/components/ui/CodeSnippet.jsx +38 -0
- flowyml/ui/frontend/src/components/ui/CollapsibleCard.jsx +53 -0
- flowyml/ui/frontend/src/components/ui/DataView.jsx +175 -0
- flowyml/ui/frontend/src/components/ui/EmptyState.jsx +49 -0
- flowyml/ui/frontend/src/components/ui/ExecutionStatus.jsx +122 -0
- flowyml/ui/frontend/src/components/ui/KeyValue.jsx +25 -0
- flowyml/ui/frontend/src/components/ui/ProjectSelector.jsx +134 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +79 -0
- flowyml/ui/frontend/src/contexts/ThemeContext.jsx +54 -0
- flowyml/ui/frontend/src/index.css +11 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +23 -0
- flowyml/ui/frontend/src/main.jsx +10 -0
- flowyml/ui/frontend/src/router/index.jsx +39 -0
- flowyml/ui/frontend/src/services/pluginService.js +90 -0
- flowyml/ui/frontend/src/utils/api.js +47 -0
- flowyml/ui/frontend/src/utils/cn.js +6 -0
- flowyml/ui/frontend/tailwind.config.js +31 -0
- flowyml/ui/frontend/vite.config.js +21 -0
- flowyml/ui/utils.py +77 -0
- flowyml/utils/__init__.py +67 -0
- flowyml/utils/config.py +308 -0
- flowyml/utils/debug.py +240 -0
- flowyml/utils/environment.py +346 -0
- flowyml/utils/git.py +319 -0
- flowyml/utils/logging.py +61 -0
- flowyml/utils/performance.py +314 -0
- flowyml/utils/stack_config.py +296 -0
- flowyml/utils/validation.py +270 -0
- flowyml-1.1.0.dist-info/METADATA +372 -0
- flowyml-1.1.0.dist-info/RECORD +159 -0
- flowyml-1.1.0.dist-info/WHEEL +4 -0
- flowyml-1.1.0.dist-info/entry_points.txt +3 -0
- flowyml-1.1.0.dist-info/licenses/LICENSE +17 -0
flowyml/stacks/gcp.py
ADDED
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
"""GCP Stack - Google Cloud Platform integration for flowyml.
|
|
2
|
+
|
|
3
|
+
This module provides GCP-specific implementations for running pipelines
|
|
4
|
+
on Google Cloud Platform using Vertex AI, Cloud Storage, and Container Registry.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from flowyml.stacks.base import Stack
|
|
10
|
+
from flowyml.stacks.components import (
|
|
11
|
+
Orchestrator,
|
|
12
|
+
ArtifactStore,
|
|
13
|
+
ContainerRegistry,
|
|
14
|
+
ResourceConfig,
|
|
15
|
+
DockerConfig,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class VertexAIOrchestrator(Orchestrator):
|
|
20
|
+
"""Vertex AI orchestrator for running pipelines on Google Cloud.
|
|
21
|
+
|
|
22
|
+
This orchestrator submits pipeline jobs to Vertex AI Pipelines,
|
|
23
|
+
allowing for scalable, managed execution in the cloud.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
```python
|
|
27
|
+
from flowyml.stacks.gcp import VertexAIOrchestrator
|
|
28
|
+
|
|
29
|
+
orchestrator = VertexAIOrchestrator(
|
|
30
|
+
project_id="my-gcp-project", region="us-central1", service_account="my-sa@my-project.iam.gserviceaccount.com"
|
|
31
|
+
)
|
|
32
|
+
```
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
name: str = "vertex_ai",
|
|
38
|
+
project_id: str | None = None,
|
|
39
|
+
region: str = "us-central1",
|
|
40
|
+
service_account: str | None = None,
|
|
41
|
+
network: str | None = None,
|
|
42
|
+
encryption_key: str | None = None,
|
|
43
|
+
):
|
|
44
|
+
"""Initialize Vertex AI orchestrator.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
name: Name of the orchestrator
|
|
48
|
+
project_id: GCP project ID
|
|
49
|
+
region: GCP region for Vertex AI
|
|
50
|
+
service_account: Service account email for job execution
|
|
51
|
+
network: VPC network for jobs
|
|
52
|
+
encryption_key: Customer-managed encryption key
|
|
53
|
+
"""
|
|
54
|
+
super().__init__(name)
|
|
55
|
+
self.project_id = project_id
|
|
56
|
+
self.region = region
|
|
57
|
+
self.service_account = service_account
|
|
58
|
+
self.network = network
|
|
59
|
+
self.encryption_key = encryption_key
|
|
60
|
+
|
|
61
|
+
def validate(self) -> bool:
|
|
62
|
+
"""Validate Vertex AI configuration."""
|
|
63
|
+
if not self.project_id:
|
|
64
|
+
raise ValueError("project_id is required for VertexAIOrchestrator")
|
|
65
|
+
|
|
66
|
+
# Check if google-cloud-aiplatform is installed
|
|
67
|
+
import importlib.util
|
|
68
|
+
|
|
69
|
+
if importlib.util.find_spec("google.cloud.aiplatform") is not None:
|
|
70
|
+
return True
|
|
71
|
+
raise ImportError(
|
|
72
|
+
"google-cloud-aiplatform is required for VertexAIOrchestrator. "
|
|
73
|
+
"Install with: pip install google-cloud-aiplatform",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def run_pipeline(
|
|
77
|
+
self,
|
|
78
|
+
pipeline: Any,
|
|
79
|
+
resources: ResourceConfig | None = None,
|
|
80
|
+
docker_config: DockerConfig | None = None,
|
|
81
|
+
**kwargs,
|
|
82
|
+
) -> str:
|
|
83
|
+
"""Run pipeline on Vertex AI.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
pipeline: Pipeline to run
|
|
87
|
+
resources: Resource configuration
|
|
88
|
+
docker_config: Docker configuration
|
|
89
|
+
**kwargs: Additional arguments
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Job ID
|
|
93
|
+
"""
|
|
94
|
+
from google.cloud import aiplatform
|
|
95
|
+
|
|
96
|
+
# Initialize Vertex AI
|
|
97
|
+
aiplatform.init(project=self.project_id, location=self.region)
|
|
98
|
+
|
|
99
|
+
# Create custom job
|
|
100
|
+
job_display_name = f"{pipeline.name}-{pipeline.run_id}"
|
|
101
|
+
|
|
102
|
+
# Build worker pool specs
|
|
103
|
+
worker_pool_specs = self._build_worker_pool_specs(
|
|
104
|
+
docker_config=docker_config,
|
|
105
|
+
resources=resources,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Create and run custom job
|
|
109
|
+
job = aiplatform.CustomJob(
|
|
110
|
+
display_name=job_display_name,
|
|
111
|
+
worker_pool_specs=worker_pool_specs,
|
|
112
|
+
service_account=self.service_account,
|
|
113
|
+
network=self.network,
|
|
114
|
+
encryption_spec_key_name=self.encryption_key,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
job.run(sync=False)
|
|
118
|
+
|
|
119
|
+
return job.resource_name
|
|
120
|
+
|
|
121
|
+
def get_run_status(self, run_id: str) -> str:
|
|
122
|
+
"""Get status of a Vertex AI job."""
|
|
123
|
+
from google.cloud import aiplatform
|
|
124
|
+
|
|
125
|
+
job = aiplatform.CustomJob(run_id)
|
|
126
|
+
return job.state.name
|
|
127
|
+
|
|
128
|
+
def _build_worker_pool_specs(
|
|
129
|
+
self,
|
|
130
|
+
docker_config: DockerConfig | None,
|
|
131
|
+
resources: ResourceConfig | None,
|
|
132
|
+
) -> list[dict]:
|
|
133
|
+
"""Build worker pool specifications for Vertex AI."""
|
|
134
|
+
# Default resources
|
|
135
|
+
if resources is None:
|
|
136
|
+
resources = ResourceConfig()
|
|
137
|
+
|
|
138
|
+
machine_spec = {
|
|
139
|
+
"machine_type": resources.machine_type or "n1-standard-4",
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if resources.gpu:
|
|
143
|
+
machine_spec["accelerator_type"] = resources.gpu
|
|
144
|
+
machine_spec["accelerator_count"] = resources.gpu_count
|
|
145
|
+
|
|
146
|
+
container_spec = {
|
|
147
|
+
"image_uri": (docker_config.image if docker_config else "gcr.io/flowyml/flowyml:latest"),
|
|
148
|
+
"command": ["python", "-m", "flowyml.cli.run"],
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if docker_config and docker_config.env_vars:
|
|
152
|
+
container_spec["env"] = [{"name": k, "value": v} for k, v in docker_config.env_vars.items()]
|
|
153
|
+
|
|
154
|
+
return [
|
|
155
|
+
{
|
|
156
|
+
"replica_count": 1,
|
|
157
|
+
"machine_spec": machine_spec,
|
|
158
|
+
"container_spec": container_spec,
|
|
159
|
+
},
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
def to_dict(self) -> dict[str, Any]:
|
|
163
|
+
"""Convert to dictionary."""
|
|
164
|
+
return {
|
|
165
|
+
"name": self.name,
|
|
166
|
+
"type": "vertex_ai",
|
|
167
|
+
"project_id": self.project_id,
|
|
168
|
+
"region": self.region,
|
|
169
|
+
"service_account": self.service_account,
|
|
170
|
+
"network": self.network,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class GCSArtifactStore(ArtifactStore):
|
|
175
|
+
"""Google Cloud Storage artifact store.
|
|
176
|
+
|
|
177
|
+
Stores pipeline artifacts in Google Cloud Storage buckets.
|
|
178
|
+
|
|
179
|
+
Example:
|
|
180
|
+
```python
|
|
181
|
+
from flowyml.stacks.gcp import GCSArtifactStore
|
|
182
|
+
|
|
183
|
+
artifact_store = GCSArtifactStore(bucket_name="my-flowyml-artifacts", project_id="my-gcp-project")
|
|
184
|
+
```
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def __init__(
|
|
188
|
+
self,
|
|
189
|
+
name: str = "gcs",
|
|
190
|
+
bucket_name: str | None = None,
|
|
191
|
+
project_id: str | None = None,
|
|
192
|
+
prefix: str = "flowyml",
|
|
193
|
+
):
|
|
194
|
+
"""Initialize GCS artifact store.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
name: Name of the artifact store
|
|
198
|
+
bucket_name: GCS bucket name
|
|
199
|
+
project_id: GCP project ID
|
|
200
|
+
prefix: Prefix for all artifacts in bucket
|
|
201
|
+
"""
|
|
202
|
+
super().__init__(name)
|
|
203
|
+
self.bucket_name = bucket_name
|
|
204
|
+
self.project_id = project_id
|
|
205
|
+
self.prefix = prefix
|
|
206
|
+
|
|
207
|
+
def validate(self) -> bool:
|
|
208
|
+
"""Validate GCS configuration."""
|
|
209
|
+
if not self.bucket_name:
|
|
210
|
+
raise ValueError("bucket_name is required for GCSArtifactStore")
|
|
211
|
+
|
|
212
|
+
# Check if google-cloud-storage is installed
|
|
213
|
+
import importlib.util
|
|
214
|
+
|
|
215
|
+
if importlib.util.find_spec("google.cloud.storage") is not None:
|
|
216
|
+
return True
|
|
217
|
+
raise ImportError(
|
|
218
|
+
"google-cloud-storage is required for GCSArtifactStore. Install with: pip install google-cloud-storage",
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def save(self, artifact: Any, path: str) -> str:
|
|
222
|
+
"""Save artifact to GCS."""
|
|
223
|
+
from google.cloud import storage
|
|
224
|
+
import pickle
|
|
225
|
+
|
|
226
|
+
client = storage.Client(project=self.project_id)
|
|
227
|
+
bucket = client.bucket(self.bucket_name)
|
|
228
|
+
|
|
229
|
+
# Full path with prefix
|
|
230
|
+
full_path = f"{self.prefix}/{path}"
|
|
231
|
+
blob = bucket.blob(full_path)
|
|
232
|
+
|
|
233
|
+
# Serialize and upload
|
|
234
|
+
data = pickle.dumps(artifact)
|
|
235
|
+
blob.upload_from_string(data)
|
|
236
|
+
|
|
237
|
+
return f"gs://{self.bucket_name}/{full_path}"
|
|
238
|
+
|
|
239
|
+
def load(self, path: str) -> Any:
|
|
240
|
+
"""Load artifact from GCS."""
|
|
241
|
+
from google.cloud import storage
|
|
242
|
+
import pickle
|
|
243
|
+
|
|
244
|
+
client = storage.Client(project=self.project_id)
|
|
245
|
+
bucket = client.bucket(self.bucket_name)
|
|
246
|
+
|
|
247
|
+
# Handle both full gs:// URIs and relative paths
|
|
248
|
+
if path.startswith("gs://"):
|
|
249
|
+
# Extract bucket and path from URI
|
|
250
|
+
parts = path.replace("gs://", "").split("/", 1)
|
|
251
|
+
blob_path = parts[1] if len(parts) > 1 else ""
|
|
252
|
+
else:
|
|
253
|
+
blob_path = f"{self.prefix}/{path}"
|
|
254
|
+
|
|
255
|
+
blob = bucket.blob(blob_path)
|
|
256
|
+
data = blob.download_as_bytes()
|
|
257
|
+
|
|
258
|
+
return pickle.loads(data)
|
|
259
|
+
|
|
260
|
+
def exists(self, path: str) -> bool:
|
|
261
|
+
"""Check if artifact exists in GCS."""
|
|
262
|
+
from google.cloud import storage
|
|
263
|
+
|
|
264
|
+
client = storage.Client(project=self.project_id)
|
|
265
|
+
bucket = client.bucket(self.bucket_name)
|
|
266
|
+
|
|
267
|
+
full_path = f"{self.prefix}/{path}"
|
|
268
|
+
blob = bucket.blob(full_path)
|
|
269
|
+
|
|
270
|
+
return blob.exists()
|
|
271
|
+
|
|
272
|
+
def to_dict(self) -> dict[str, Any]:
|
|
273
|
+
"""Convert to dictionary."""
|
|
274
|
+
return {
|
|
275
|
+
"name": self.name,
|
|
276
|
+
"type": "gcs",
|
|
277
|
+
"bucket_name": self.bucket_name,
|
|
278
|
+
"project_id": self.project_id,
|
|
279
|
+
"prefix": self.prefix,
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
class GCRContainerRegistry(ContainerRegistry):
|
|
284
|
+
"""Google Container Registry integration.
|
|
285
|
+
|
|
286
|
+
Manages Docker images in Google Container Registry or Artifact Registry.
|
|
287
|
+
|
|
288
|
+
Example:
|
|
289
|
+
```python
|
|
290
|
+
from flowyml.stacks.gcp import GCRContainerRegistry
|
|
291
|
+
|
|
292
|
+
registry = GCRContainerRegistry(project_id="my-gcp-project", registry_uri="gcr.io/my-gcp-project")
|
|
293
|
+
```
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
def __init__(
|
|
297
|
+
self,
|
|
298
|
+
name: str = "gcr",
|
|
299
|
+
project_id: str | None = None,
|
|
300
|
+
registry_uri: str | None = None,
|
|
301
|
+
region: str | None = None, # For Artifact Registry
|
|
302
|
+
):
|
|
303
|
+
"""Initialize GCR container registry.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
name: Name of the registry
|
|
307
|
+
project_id: GCP project ID
|
|
308
|
+
registry_uri: Full registry URI (e.g., gcr.io/project-id)
|
|
309
|
+
region: Region for Artifact Registry (e.g., us-central1)
|
|
310
|
+
"""
|
|
311
|
+
super().__init__(name)
|
|
312
|
+
self.project_id = project_id
|
|
313
|
+
self.registry_uri = registry_uri or f"gcr.io/{project_id}"
|
|
314
|
+
self.region = region
|
|
315
|
+
|
|
316
|
+
def validate(self) -> bool:
|
|
317
|
+
"""Validate registry configuration."""
|
|
318
|
+
if not self.project_id:
|
|
319
|
+
raise ValueError("project_id is required for GCRContainerRegistry")
|
|
320
|
+
return True
|
|
321
|
+
|
|
322
|
+
def push_image(self, image_name: str, tag: str = "latest") -> str:
|
|
323
|
+
"""Push Docker image to GCR.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
image_name: Name of the image
|
|
327
|
+
tag: Image tag
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
Full image URI
|
|
331
|
+
"""
|
|
332
|
+
import subprocess
|
|
333
|
+
|
|
334
|
+
full_uri = self.get_image_uri(image_name, tag)
|
|
335
|
+
|
|
336
|
+
# Tag image
|
|
337
|
+
subprocess.run(
|
|
338
|
+
["docker", "tag", f"{image_name}:{tag}", full_uri],
|
|
339
|
+
check=True,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Push to registry
|
|
343
|
+
subprocess.run(
|
|
344
|
+
["docker", "push", full_uri],
|
|
345
|
+
check=True,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
return full_uri
|
|
349
|
+
|
|
350
|
+
def pull_image(self, image_name: str, tag: str = "latest") -> None:
|
|
351
|
+
"""Pull Docker image from GCR."""
|
|
352
|
+
import subprocess
|
|
353
|
+
|
|
354
|
+
full_uri = self.get_image_uri(image_name, tag)
|
|
355
|
+
subprocess.run(
|
|
356
|
+
["docker", "pull", full_uri],
|
|
357
|
+
check=True,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
def get_image_uri(self, image_name: str, tag: str = "latest") -> str:
|
|
361
|
+
"""Get full URI for an image."""
|
|
362
|
+
return f"{self.registry_uri}/{image_name}:{tag}"
|
|
363
|
+
|
|
364
|
+
def to_dict(self) -> dict[str, Any]:
|
|
365
|
+
"""Convert to dictionary."""
|
|
366
|
+
return {
|
|
367
|
+
"name": self.name,
|
|
368
|
+
"type": "gcr",
|
|
369
|
+
"project_id": self.project_id,
|
|
370
|
+
"registry_uri": self.registry_uri,
|
|
371
|
+
"region": self.region,
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class GCPStack(Stack):
|
|
376
|
+
"""Complete GCP stack for running flowyml pipelines on Google Cloud Platform.
|
|
377
|
+
|
|
378
|
+
This stack integrates:
|
|
379
|
+
- Vertex AI for orchestration
|
|
380
|
+
- Google Cloud Storage for artifact storage
|
|
381
|
+
- Google Container Registry for Docker images
|
|
382
|
+
- Cloud SQL or Firestore for metadata storage
|
|
383
|
+
|
|
384
|
+
Example:
|
|
385
|
+
```python
|
|
386
|
+
from flowyml.stacks.gcp import GCPStack
|
|
387
|
+
from flowyml.stacks.components import ResourceConfig, DockerConfig
|
|
388
|
+
from flowyml import Pipeline, step
|
|
389
|
+
|
|
390
|
+
# Create GCP stack
|
|
391
|
+
stack = GCPStack(
|
|
392
|
+
name="production",
|
|
393
|
+
project_id="my-gcp-project",
|
|
394
|
+
region="us-central1",
|
|
395
|
+
bucket_name="my-flowyml-artifacts",
|
|
396
|
+
registry_uri="gcr.io/my-gcp-project",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Define resource requirements
|
|
400
|
+
resources = ResourceConfig(cpu="4", memory="16Gi", gpu="nvidia-tesla-t4", gpu_count=1, machine_type="n1-highmem-4")
|
|
401
|
+
|
|
402
|
+
# Define Docker configuration
|
|
403
|
+
docker_config = DockerConfig(
|
|
404
|
+
image="gcr.io/my-gcp-project/ml-pipeline:v1",
|
|
405
|
+
requirements=["tensorflow>=2.12.0", "scikit-learn>=1.0.0"],
|
|
406
|
+
env_vars={"PYTHONUNBUFFERED": "1"},
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
# Run pipeline on GCP
|
|
411
|
+
@step
|
|
412
|
+
def train_model():
|
|
413
|
+
# Your training code
|
|
414
|
+
pass
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
pipeline = Pipeline("training", stack=stack)
|
|
418
|
+
pipeline.add_step(train_model)
|
|
419
|
+
|
|
420
|
+
result = pipeline.run(resources=resources, docker_config=docker_config)
|
|
421
|
+
```
|
|
422
|
+
"""
|
|
423
|
+
|
|
424
|
+
def __init__(
|
|
425
|
+
self,
|
|
426
|
+
name: str = "gcp",
|
|
427
|
+
project_id: str | None = None,
|
|
428
|
+
region: str = "us-central1",
|
|
429
|
+
bucket_name: str | None = None,
|
|
430
|
+
registry_uri: str | None = None,
|
|
431
|
+
service_account: str | None = None,
|
|
432
|
+
metadata_store: Any | None = None,
|
|
433
|
+
):
|
|
434
|
+
"""Initialize GCP stack.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
name: Stack name
|
|
438
|
+
project_id: GCP project ID
|
|
439
|
+
region: GCP region
|
|
440
|
+
bucket_name: GCS bucket for artifacts
|
|
441
|
+
registry_uri: Container registry URI
|
|
442
|
+
service_account: Service account for job execution
|
|
443
|
+
metadata_store: Metadata store (optional, defaults to local SQLite)
|
|
444
|
+
"""
|
|
445
|
+
# Create GCP components
|
|
446
|
+
orchestrator = VertexAIOrchestrator(
|
|
447
|
+
project_id=project_id,
|
|
448
|
+
region=region,
|
|
449
|
+
service_account=service_account,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
artifact_store = GCSArtifactStore(
|
|
453
|
+
bucket_name=bucket_name,
|
|
454
|
+
project_id=project_id,
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
container_registry = GCRContainerRegistry(
|
|
458
|
+
project_id=project_id,
|
|
459
|
+
registry_uri=registry_uri,
|
|
460
|
+
region=region,
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# Use local metadata store if not provided
|
|
464
|
+
if metadata_store is None:
|
|
465
|
+
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
466
|
+
|
|
467
|
+
metadata_store = SQLiteMetadataStore()
|
|
468
|
+
|
|
469
|
+
# Initialize base stack
|
|
470
|
+
super().__init__(
|
|
471
|
+
name=name,
|
|
472
|
+
executor=None, # Vertex AI handles execution
|
|
473
|
+
artifact_store=artifact_store,
|
|
474
|
+
metadata_store=metadata_store,
|
|
475
|
+
container_registry=container_registry,
|
|
476
|
+
orchestrator=orchestrator,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
self.project_id = project_id
|
|
480
|
+
self.region = region
|
|
481
|
+
|
|
482
|
+
def validate(self) -> bool:
|
|
483
|
+
"""Validate all GCP stack components."""
|
|
484
|
+
self.orchestrator.validate()
|
|
485
|
+
self.artifact_store.validate()
|
|
486
|
+
self.container_registry.validate()
|
|
487
|
+
return True
|
|
488
|
+
|
|
489
|
+
def to_dict(self) -> dict[str, Any]:
|
|
490
|
+
"""Convert stack configuration to dictionary."""
|
|
491
|
+
return {
|
|
492
|
+
"name": self.name,
|
|
493
|
+
"type": "gcp",
|
|
494
|
+
"project_id": self.project_id,
|
|
495
|
+
"region": self.region,
|
|
496
|
+
"orchestrator": self.orchestrator.to_dict(),
|
|
497
|
+
"artifact_store": self.artifact_store.to_dict(),
|
|
498
|
+
"container_registry": self.container_registry.to_dict(),
|
|
499
|
+
}
|
flowyml/stacks/local.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Local Stack - For local development and testing."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from flowyml.stacks.base import Stack
|
|
5
|
+
from flowyml.core.executor import LocalExecutor
|
|
6
|
+
from flowyml.storage.artifacts import LocalArtifactStore
|
|
7
|
+
from flowyml.storage.metadata import SQLiteMetadataStore
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LocalStack(Stack):
|
|
11
|
+
"""Local stack for development and testing.
|
|
12
|
+
|
|
13
|
+
This stack uses:
|
|
14
|
+
- LocalExecutor for running steps locally
|
|
15
|
+
- LocalArtifactStore for filesystem-based artifact storage
|
|
16
|
+
- SQLiteMetadataStore for metadata storage
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
```python
|
|
20
|
+
from flowyml import LocalStack, Pipeline
|
|
21
|
+
|
|
22
|
+
# Create local stack
|
|
23
|
+
stack = LocalStack(name="local", artifact_path=".flowyml/artifacts", metadata_path=".flowyml/metadata.db")
|
|
24
|
+
|
|
25
|
+
# Use with pipeline
|
|
26
|
+
pipeline = Pipeline("my_pipeline", stack=stack)
|
|
27
|
+
result = pipeline.run()
|
|
28
|
+
```
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
name: str = "local",
|
|
34
|
+
artifact_path: str = ".flowyml/artifacts",
|
|
35
|
+
metadata_path: str = ".flowyml/metadata.db",
|
|
36
|
+
):
|
|
37
|
+
"""Initialize LocalStack.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
name: Stack name
|
|
41
|
+
artifact_path: Path for artifact storage
|
|
42
|
+
metadata_path: Path for metadata database
|
|
43
|
+
"""
|
|
44
|
+
# Create storage backends
|
|
45
|
+
executor = LocalExecutor()
|
|
46
|
+
artifact_store = LocalArtifactStore(artifact_path)
|
|
47
|
+
metadata_store = SQLiteMetadataStore(metadata_path)
|
|
48
|
+
|
|
49
|
+
# Initialize base stack
|
|
50
|
+
super().__init__(
|
|
51
|
+
name=name,
|
|
52
|
+
executor=executor,
|
|
53
|
+
artifact_store=artifact_store,
|
|
54
|
+
metadata_store=metadata_store,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Ensure directories exist
|
|
58
|
+
Path(artifact_path).mkdir(parents=True, exist_ok=True)
|
|
59
|
+
Path(metadata_path).parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
def validate(self) -> bool:
|
|
62
|
+
"""Validate stack configuration.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
True if stack is valid
|
|
66
|
+
"""
|
|
67
|
+
# Check artifact store path exists and is writable
|
|
68
|
+
artifact_path = Path(self.artifact_store.base_path)
|
|
69
|
+
if not artifact_path.exists():
|
|
70
|
+
artifact_path.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
|
|
72
|
+
# Check metadata store path parent exists
|
|
73
|
+
metadata_path = Path(self.metadata_store.db_path)
|
|
74
|
+
if not metadata_path.parent.exists():
|
|
75
|
+
metadata_path.parent.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
def cleanup(self) -> None:
|
|
80
|
+
"""Clean up stack resources."""
|
|
81
|
+
# Could implement cache cleanup, temp file removal, etc.
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
def get_stats(self) -> dict:
|
|
85
|
+
"""Get stack usage statistics.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary with stack statistics
|
|
89
|
+
"""
|
|
90
|
+
from pathlib import Path
|
|
91
|
+
|
|
92
|
+
artifact_path = Path(self.artifact_store.base_path)
|
|
93
|
+
metadata_path = Path(self.metadata_store.db_path)
|
|
94
|
+
|
|
95
|
+
# Calculate artifact storage size
|
|
96
|
+
artifact_size = sum(f.stat().st_size for f in artifact_path.rglob("*") if f.is_file())
|
|
97
|
+
|
|
98
|
+
# Get metadata size
|
|
99
|
+
metadata_size = metadata_path.stat().st_size if metadata_path.exists() else 0
|
|
100
|
+
|
|
101
|
+
# Get metadata stats from store
|
|
102
|
+
metadata_stats = self.metadata_store.get_statistics()
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
"name": self.name,
|
|
106
|
+
"artifact_storage_mb": artifact_size / (1024 * 1024),
|
|
107
|
+
"metadata_storage_mb": metadata_size / (1024 * 1024),
|
|
108
|
+
"total_runs": metadata_stats.get("total_runs", 0),
|
|
109
|
+
"total_artifacts": metadata_stats.get("total_artifacts", 0),
|
|
110
|
+
"total_metrics": metadata_stats.get("total_metrics", 0),
|
|
111
|
+
"total_pipelines": metadata_stats.get("total_pipelines", 0),
|
|
112
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Stack Migration Tools.
|
|
2
|
+
|
|
3
|
+
This module provides tools to migrate stacks from external systems (e.g., ZenML)
|
|
4
|
+
to flowyml configuration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from flowyml.stacks.plugin_config import PluginConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class StackMigrator:
|
|
14
|
+
"""Migrates stacks from external systems to flowyml."""
|
|
15
|
+
|
|
16
|
+
def migrate_zenml_stack(self, stack_name: str) -> dict[str, Any]:
|
|
17
|
+
"""Migrate a ZenML stack to flowyml configuration."""
|
|
18
|
+
try:
|
|
19
|
+
from zenml.client import Client
|
|
20
|
+
|
|
21
|
+
client = Client()
|
|
22
|
+
stack = client.get_stack(stack_name)
|
|
23
|
+
except ImportError:
|
|
24
|
+
raise ImportError("ZenML is not installed. Please install it with: pip install zenml")
|
|
25
|
+
except KeyError:
|
|
26
|
+
raise ValueError(f"ZenML stack '{stack_name}' not found.")
|
|
27
|
+
|
|
28
|
+
# Generate plugin configs for each component
|
|
29
|
+
plugins = []
|
|
30
|
+
stack_config = {
|
|
31
|
+
"name": stack.name,
|
|
32
|
+
"components": {},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
for component_type, components in stack.components.items():
|
|
36
|
+
# Handle list of components (ZenML specific)
|
|
37
|
+
if not isinstance(components, list):
|
|
38
|
+
components = [components]
|
|
39
|
+
|
|
40
|
+
for component in components:
|
|
41
|
+
# Create plugin config
|
|
42
|
+
plugin_name = f"zenml_{component.name}"
|
|
43
|
+
|
|
44
|
+
# Determine source class path
|
|
45
|
+
# This relies on ZenML component having a valid module path
|
|
46
|
+
# We might need to inspect the object class
|
|
47
|
+
source_class = component.__class__.__module__ + "." + component.__class__.__name__
|
|
48
|
+
|
|
49
|
+
plugin = PluginConfig(
|
|
50
|
+
name=plugin_name,
|
|
51
|
+
source=source_class,
|
|
52
|
+
component_type=self._map_zenml_type(component.type),
|
|
53
|
+
adaptation={
|
|
54
|
+
"method_mapping": {"run_pipeline": "run"} if component.type == "orchestrator" else {},
|
|
55
|
+
},
|
|
56
|
+
)
|
|
57
|
+
plugins.append(plugin)
|
|
58
|
+
|
|
59
|
+
# Add to stack config
|
|
60
|
+
# We use the plugin name as the component type in flowyml stack config
|
|
61
|
+
# This assumes the plugin is registered with this name
|
|
62
|
+
stack_config["components"][component_type] = {
|
|
63
|
+
"type": plugin_name,
|
|
64
|
+
"config": component.configuration,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
"plugins": [
|
|
69
|
+
{
|
|
70
|
+
"name": p.name,
|
|
71
|
+
"source": p.source,
|
|
72
|
+
"type": p.component_type, # Serialize as 'type' for compatibility
|
|
73
|
+
"adaptation": p.adaptation,
|
|
74
|
+
}
|
|
75
|
+
for p in plugins
|
|
76
|
+
],
|
|
77
|
+
"stack": stack_config,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def _map_zenml_type(self, zenml_type: str) -> str:
|
|
81
|
+
"""Map ZenML component type to flowyml type."""
|
|
82
|
+
type_mapping = {
|
|
83
|
+
"orchestrator": "orchestrator",
|
|
84
|
+
"artifact_store": "artifact_store",
|
|
85
|
+
"container_registry": "container_registry",
|
|
86
|
+
}
|
|
87
|
+
return type_mapping.get(zenml_type, "custom")
|
|
88
|
+
|
|
89
|
+
def generate_yaml(self, migration_data: dict[str, Any]) -> str:
|
|
90
|
+
"""Generate YAML configuration from migration data."""
|
|
91
|
+
output = {
|
|
92
|
+
"plugins": migration_data["plugins"],
|
|
93
|
+
"stacks": {
|
|
94
|
+
migration_data["stack"]["name"]: migration_data["stack"]["components"],
|
|
95
|
+
},
|
|
96
|
+
}
|
|
97
|
+
return yaml.dump(output, sort_keys=False)
|