flowyml 1.7.1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/assets/base.py +15 -0
- flowyml/assets/dataset.py +570 -17
- flowyml/assets/metrics.py +5 -0
- flowyml/assets/model.py +1052 -15
- flowyml/cli/main.py +709 -0
- flowyml/cli/stack_cli.py +138 -25
- flowyml/core/__init__.py +17 -0
- flowyml/core/executor.py +231 -37
- flowyml/core/image_builder.py +129 -0
- flowyml/core/log_streamer.py +227 -0
- flowyml/core/orchestrator.py +59 -4
- flowyml/core/pipeline.py +65 -13
- flowyml/core/routing.py +558 -0
- flowyml/core/scheduler.py +88 -5
- flowyml/core/step.py +9 -1
- flowyml/core/step_grouping.py +49 -35
- flowyml/core/types.py +407 -0
- flowyml/integrations/keras.py +247 -82
- flowyml/monitoring/alerts.py +10 -0
- flowyml/monitoring/notifications.py +104 -25
- flowyml/monitoring/slack_blocks.py +323 -0
- flowyml/plugins/__init__.py +251 -0
- flowyml/plugins/alerters/__init__.py +1 -0
- flowyml/plugins/alerters/slack.py +168 -0
- flowyml/plugins/base.py +752 -0
- flowyml/plugins/config.py +478 -0
- flowyml/plugins/deployers/__init__.py +22 -0
- flowyml/plugins/deployers/gcp_cloud_run.py +200 -0
- flowyml/plugins/deployers/sagemaker.py +306 -0
- flowyml/plugins/deployers/vertex.py +290 -0
- flowyml/plugins/integration.py +369 -0
- flowyml/plugins/manager.py +510 -0
- flowyml/plugins/model_registries/__init__.py +22 -0
- flowyml/plugins/model_registries/mlflow.py +159 -0
- flowyml/plugins/model_registries/sagemaker.py +489 -0
- flowyml/plugins/model_registries/vertex.py +386 -0
- flowyml/plugins/orchestrators/__init__.py +13 -0
- flowyml/plugins/orchestrators/sagemaker.py +443 -0
- flowyml/plugins/orchestrators/vertex_ai.py +461 -0
- flowyml/plugins/registries/__init__.py +13 -0
- flowyml/plugins/registries/ecr.py +321 -0
- flowyml/plugins/registries/gcr.py +313 -0
- flowyml/plugins/registry.py +454 -0
- flowyml/plugins/stack.py +494 -0
- flowyml/plugins/stack_config.py +537 -0
- flowyml/plugins/stores/__init__.py +13 -0
- flowyml/plugins/stores/gcs.py +460 -0
- flowyml/plugins/stores/s3.py +453 -0
- flowyml/plugins/trackers/__init__.py +11 -0
- flowyml/plugins/trackers/mlflow.py +316 -0
- flowyml/plugins/validators/__init__.py +3 -0
- flowyml/plugins/validators/deepchecks.py +119 -0
- flowyml/registry/__init__.py +2 -1
- flowyml/registry/model_environment.py +109 -0
- flowyml/registry/model_registry.py +241 -96
- flowyml/serving/__init__.py +17 -0
- flowyml/serving/model_server.py +628 -0
- flowyml/stacks/__init__.py +60 -0
- flowyml/stacks/aws.py +93 -0
- flowyml/stacks/base.py +62 -0
- flowyml/stacks/components.py +12 -0
- flowyml/stacks/gcp.py +44 -9
- flowyml/stacks/plugins.py +115 -0
- flowyml/stacks/registry.py +2 -1
- flowyml/storage/sql.py +401 -12
- flowyml/tracking/experiment.py +8 -5
- flowyml/ui/backend/Dockerfile +87 -16
- flowyml/ui/backend/auth.py +12 -2
- flowyml/ui/backend/main.py +149 -5
- flowyml/ui/backend/routers/ai_context.py +226 -0
- flowyml/ui/backend/routers/assets.py +23 -4
- flowyml/ui/backend/routers/auth.py +96 -0
- flowyml/ui/backend/routers/deployments.py +660 -0
- flowyml/ui/backend/routers/model_explorer.py +597 -0
- flowyml/ui/backend/routers/plugins.py +103 -51
- flowyml/ui/backend/routers/projects.py +91 -8
- flowyml/ui/backend/routers/runs.py +132 -1
- flowyml/ui/backend/routers/schedules.py +54 -29
- flowyml/ui/backend/routers/templates.py +319 -0
- flowyml/ui/backend/routers/websocket.py +2 -2
- flowyml/ui/frontend/Dockerfile +55 -6
- flowyml/ui/frontend/dist/assets/index-B5AsPTSz.css +1 -0
- flowyml/ui/frontend/dist/assets/index-dFbZ8wD8.js +753 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/dist/logo.png +0 -0
- flowyml/ui/frontend/nginx.conf +65 -4
- flowyml/ui/frontend/package-lock.json +1415 -74
- flowyml/ui/frontend/package.json +4 -0
- flowyml/ui/frontend/public/logo.png +0 -0
- flowyml/ui/frontend/src/App.jsx +10 -7
- flowyml/ui/frontend/src/app/assets/page.jsx +890 -321
- flowyml/ui/frontend/src/app/auth/Login.jsx +90 -0
- flowyml/ui/frontend/src/app/dashboard/page.jsx +8 -8
- flowyml/ui/frontend/src/app/deployments/page.jsx +786 -0
- flowyml/ui/frontend/src/app/model-explorer/page.jsx +1031 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +12 -2
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +19 -6
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +1 -1
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +601 -101
- flowyml/ui/frontend/src/app/runs/page.jsx +8 -2
- flowyml/ui/frontend/src/app/settings/page.jsx +267 -253
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +62 -2
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +424 -29
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +119 -11
- flowyml/ui/frontend/src/components/DatasetViewer.jsx +753 -0
- flowyml/ui/frontend/src/components/Layout.jsx +6 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +79 -29
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +36 -6
- flowyml/ui/frontend/src/components/RunMetaPanel.jsx +113 -0
- flowyml/ui/frontend/src/components/TrainingHistoryChart.jsx +514 -0
- flowyml/ui/frontend/src/components/TrainingMetricsPanel.jsx +175 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantButton.jsx +71 -0
- flowyml/ui/frontend/src/components/ai/AIAssistantPanel.jsx +420 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +22 -0
- flowyml/ui/frontend/src/components/plugins/PluginManager.jsx +4 -4
- flowyml/ui/frontend/src/components/plugins/{ZenMLIntegration.jsx → StackImport.jsx} +38 -12
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +36 -13
- flowyml/ui/frontend/src/contexts/AIAssistantContext.jsx +245 -0
- flowyml/ui/frontend/src/contexts/AuthContext.jsx +108 -0
- flowyml/ui/frontend/src/hooks/useAIContext.js +156 -0
- flowyml/ui/frontend/src/hooks/useWebGPU.js +54 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +6 -0
- flowyml/ui/frontend/src/router/index.jsx +47 -20
- flowyml/ui/frontend/src/services/pluginService.js +3 -1
- flowyml/ui/server_manager.py +5 -5
- flowyml/ui/utils.py +157 -39
- flowyml/utils/config.py +37 -15
- flowyml/utils/model_introspection.py +123 -0
- flowyml/utils/observability.py +30 -0
- flowyml-1.8.0.dist-info/METADATA +174 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/RECORD +134 -73
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/WHEEL +1 -1
- flowyml/ui/frontend/dist/assets/index-BqDQvp63.js +0 -630
- flowyml/ui/frontend/dist/assets/index-By4trVyv.css +0 -1
- flowyml-1.7.1.dist-info/METADATA +0 -477
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.7.1.dist-info → flowyml-1.8.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
"""GCS Artifact Store - Native FlowyML Plugin.
|
|
2
|
+
|
|
3
|
+
This is a native FlowyML implementation for Google Cloud Storage,
|
|
4
|
+
without requiring any external framework dependencies.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from flowyml.plugins import get_plugin
|
|
8
|
+
|
|
9
|
+
store = get_plugin("gcs",
|
|
10
|
+
bucket="my-ml-artifacts",
|
|
11
|
+
prefix="experiments/"
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# Save artifacts
|
|
15
|
+
store.save(my_model, "models/model.pkl")
|
|
16
|
+
|
|
17
|
+
# Load artifacts
|
|
18
|
+
model = store.load("models/model.pkl")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
from typing import Any
|
|
23
|
+
import pickle
|
|
24
|
+
import json
|
|
25
|
+
|
|
26
|
+
from flowyml.plugins.base import ArtifactStorePlugin, PluginMetadata, PluginType
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GCSArtifactStore(ArtifactStorePlugin):
|
|
32
|
+
"""Native Google Cloud Storage artifact store for FlowyML.
|
|
33
|
+
|
|
34
|
+
This store integrates directly with GCS without any
|
|
35
|
+
intermediate framework, providing full control over
|
|
36
|
+
artifact storage.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
bucket: GCS bucket name.
|
|
40
|
+
prefix: Optional prefix/folder within the bucket.
|
|
41
|
+
project: GCP project ID (uses default if not provided).
|
|
42
|
+
credentials_path: Path to service account JSON file.
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
store = GCSArtifactStore(
|
|
46
|
+
bucket="my-ml-artifacts",
|
|
47
|
+
prefix="experiments/",
|
|
48
|
+
project="my-gcp-project"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Save a model
|
|
52
|
+
store.save(trained_model, "models/classifier.pkl")
|
|
53
|
+
|
|
54
|
+
# Load a model
|
|
55
|
+
model = store.load("models/classifier.pkl")
|
|
56
|
+
|
|
57
|
+
# Check if exists
|
|
58
|
+
if store.exists("models/classifier.pkl"):
|
|
59
|
+
print("Model found!")
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
METADATA = PluginMetadata(
|
|
63
|
+
name="gcs",
|
|
64
|
+
description="Google Cloud Storage artifact storage",
|
|
65
|
+
plugin_type=PluginType.ARTIFACT_STORE,
|
|
66
|
+
version="1.0.0",
|
|
67
|
+
author="FlowyML",
|
|
68
|
+
packages=["google-cloud-storage>=2.0", "gcsfs>=2023.0"],
|
|
69
|
+
documentation_url="https://cloud.google.com/storage/docs",
|
|
70
|
+
tags=["artifact-store", "gcp", "cloud", "popular"],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
bucket: str,
|
|
76
|
+
prefix: str = "",
|
|
77
|
+
project: str = None,
|
|
78
|
+
credentials_path: str = None,
|
|
79
|
+
**kwargs,
|
|
80
|
+
):
|
|
81
|
+
"""Initialize the GCS artifact store."""
|
|
82
|
+
super().__init__(
|
|
83
|
+
name=kwargs.pop("name", "gcs"),
|
|
84
|
+
bucket=bucket,
|
|
85
|
+
prefix=prefix,
|
|
86
|
+
project=project,
|
|
87
|
+
credentials_path=credentials_path,
|
|
88
|
+
**kwargs,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
self._client = None
|
|
92
|
+
self._bucket_obj = None
|
|
93
|
+
self._gcsfs = None
|
|
94
|
+
self._bucket = bucket
|
|
95
|
+
self._prefix = prefix.strip("/")
|
|
96
|
+
|
|
97
|
+
def initialize(self) -> None:
|
|
98
|
+
"""Initialize GCS connection."""
|
|
99
|
+
try:
|
|
100
|
+
from google.cloud import storage
|
|
101
|
+
|
|
102
|
+
# Build client kwargs
|
|
103
|
+
client_kwargs = {}
|
|
104
|
+
|
|
105
|
+
if self._config.get("project"):
|
|
106
|
+
client_kwargs["project"] = self._config["project"]
|
|
107
|
+
|
|
108
|
+
if self._config.get("credentials_path"):
|
|
109
|
+
from google.oauth2 import service_account
|
|
110
|
+
|
|
111
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
112
|
+
self._config["credentials_path"],
|
|
113
|
+
)
|
|
114
|
+
client_kwargs["credentials"] = credentials
|
|
115
|
+
|
|
116
|
+
self._client = storage.Client(**client_kwargs)
|
|
117
|
+
self._bucket_obj = self._client.bucket(self._bucket)
|
|
118
|
+
|
|
119
|
+
# Optionally initialize gcsfs for filesystem-like operations
|
|
120
|
+
try:
|
|
121
|
+
import gcsfs
|
|
122
|
+
|
|
123
|
+
fs_kwargs = {}
|
|
124
|
+
if self._config.get("project"):
|
|
125
|
+
fs_kwargs["project"] = self._config["project"]
|
|
126
|
+
if self._config.get("credentials_path"):
|
|
127
|
+
fs_kwargs["token"] = self._config["credentials_path"]
|
|
128
|
+
self._gcsfs = gcsfs.GCSFileSystem(**fs_kwargs)
|
|
129
|
+
except ImportError:
|
|
130
|
+
logger.debug("gcsfs not available, using google-cloud-storage only")
|
|
131
|
+
|
|
132
|
+
self._is_initialized = True
|
|
133
|
+
logger.info(f"GCS artifact store initialized: gs://{self._bucket}/{self._prefix}")
|
|
134
|
+
|
|
135
|
+
except ImportError:
|
|
136
|
+
raise ImportError(
|
|
137
|
+
"google-cloud-storage is not installed. Run: flowyml plugin install gcs",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _ensure_initialized(self) -> None:
|
|
141
|
+
"""Ensure GCS is initialized."""
|
|
142
|
+
if not self._is_initialized:
|
|
143
|
+
self.initialize()
|
|
144
|
+
|
|
145
|
+
def _get_full_path(self, path: str) -> str:
|
|
146
|
+
"""Get the full GCS blob name for a path."""
|
|
147
|
+
if self._prefix:
|
|
148
|
+
return f"{self._prefix}/{path.lstrip('/')}"
|
|
149
|
+
return path.lstrip("/")
|
|
150
|
+
|
|
151
|
+
def _get_gcs_uri(self, path: str) -> str:
|
|
152
|
+
"""Get the full GCS URI for a path."""
|
|
153
|
+
blob_name = self._get_full_path(path)
|
|
154
|
+
return f"gs://{self._bucket}/{blob_name}"
|
|
155
|
+
|
|
156
|
+
def save(self, artifact: Any, path: str) -> str:
|
|
157
|
+
"""Save an artifact to GCS.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
artifact: The artifact to save. Can be:
|
|
161
|
+
- bytes: Saved directly
|
|
162
|
+
- str: Saved as UTF-8 text
|
|
163
|
+
- dict/list: Saved as JSON
|
|
164
|
+
- Other objects: Pickled
|
|
165
|
+
path: Path within the store.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Full GCS URI of the saved artifact.
|
|
169
|
+
"""
|
|
170
|
+
self._ensure_initialized()
|
|
171
|
+
|
|
172
|
+
blob_name = self._get_full_path(path)
|
|
173
|
+
blob = self._bucket_obj.blob(blob_name)
|
|
174
|
+
|
|
175
|
+
# Determine how to serialize
|
|
176
|
+
if isinstance(artifact, bytes):
|
|
177
|
+
blob.upload_from_string(artifact)
|
|
178
|
+
elif isinstance(artifact, str):
|
|
179
|
+
blob.upload_from_string(artifact.encode("utf-8"))
|
|
180
|
+
elif isinstance(artifact, (dict, list)):
|
|
181
|
+
blob.upload_from_string(
|
|
182
|
+
json.dumps(artifact).encode("utf-8"),
|
|
183
|
+
content_type="application/json",
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
# Pickle the object
|
|
187
|
+
blob.upload_from_string(pickle.dumps(artifact))
|
|
188
|
+
|
|
189
|
+
uri = self._get_gcs_uri(path)
|
|
190
|
+
logger.info(f"Saved artifact to {uri}")
|
|
191
|
+
return uri
|
|
192
|
+
|
|
193
|
+
def save_file(self, local_path: str, remote_path: str) -> str:
|
|
194
|
+
"""Upload a local file to GCS.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
local_path: Path to local file.
|
|
198
|
+
remote_path: Path in GCS.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Full GCS URI.
|
|
202
|
+
"""
|
|
203
|
+
self._ensure_initialized()
|
|
204
|
+
|
|
205
|
+
blob_name = self._get_full_path(remote_path)
|
|
206
|
+
blob = self._bucket_obj.blob(blob_name)
|
|
207
|
+
blob.upload_from_filename(local_path)
|
|
208
|
+
|
|
209
|
+
uri = self._get_gcs_uri(remote_path)
|
|
210
|
+
logger.info(f"Uploaded {local_path} to {uri}")
|
|
211
|
+
return uri
|
|
212
|
+
|
|
213
|
+
def load(self, path: str, deserialize: bool = True) -> Any:
|
|
214
|
+
"""Load an artifact from GCS.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
path: Path to the artifact.
|
|
218
|
+
deserialize: If True, attempts to deserialize (JSON/pickle).
|
|
219
|
+
If False, returns raw bytes.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
The loaded artifact.
|
|
223
|
+
"""
|
|
224
|
+
self._ensure_initialized()
|
|
225
|
+
|
|
226
|
+
blob_name = self._get_full_path(path)
|
|
227
|
+
blob = self._bucket_obj.blob(blob_name)
|
|
228
|
+
body = blob.download_as_bytes()
|
|
229
|
+
|
|
230
|
+
if not deserialize:
|
|
231
|
+
return body
|
|
232
|
+
|
|
233
|
+
# Try to deserialize
|
|
234
|
+
# First try JSON
|
|
235
|
+
try:
|
|
236
|
+
return json.loads(body.decode("utf-8"))
|
|
237
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
238
|
+
pass
|
|
239
|
+
|
|
240
|
+
# Try pickle
|
|
241
|
+
try:
|
|
242
|
+
return pickle.loads(body)
|
|
243
|
+
except Exception:
|
|
244
|
+
pass
|
|
245
|
+
|
|
246
|
+
# Try UTF-8 string
|
|
247
|
+
try:
|
|
248
|
+
return body.decode("utf-8")
|
|
249
|
+
except UnicodeDecodeError:
|
|
250
|
+
pass
|
|
251
|
+
|
|
252
|
+
# Return raw bytes
|
|
253
|
+
return body
|
|
254
|
+
|
|
255
|
+
def download_file(self, remote_path: str, local_path: str) -> str:
|
|
256
|
+
"""Download a file from GCS to local filesystem.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
remote_path: Path in GCS.
|
|
260
|
+
local_path: Local destination path.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Local path.
|
|
264
|
+
"""
|
|
265
|
+
self._ensure_initialized()
|
|
266
|
+
|
|
267
|
+
blob_name = self._get_full_path(remote_path)
|
|
268
|
+
blob = self._bucket_obj.blob(blob_name)
|
|
269
|
+
|
|
270
|
+
# Ensure local directory exists
|
|
271
|
+
from pathlib import Path
|
|
272
|
+
|
|
273
|
+
Path(local_path).parent.mkdir(parents=True, exist_ok=True)
|
|
274
|
+
|
|
275
|
+
blob.download_to_filename(local_path)
|
|
276
|
+
logger.info(f"Downloaded {self._get_gcs_uri(remote_path)} to {local_path}")
|
|
277
|
+
return local_path
|
|
278
|
+
|
|
279
|
+
def exists(self, path: str) -> bool:
|
|
280
|
+
"""Check if an artifact exists in GCS.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
path: Path to check.
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
True if the artifact exists.
|
|
287
|
+
"""
|
|
288
|
+
self._ensure_initialized()
|
|
289
|
+
|
|
290
|
+
blob_name = self._get_full_path(path)
|
|
291
|
+
blob = self._bucket_obj.blob(blob_name)
|
|
292
|
+
return blob.exists()
|
|
293
|
+
|
|
294
|
+
def delete(self, path: str) -> bool:
|
|
295
|
+
"""Delete an artifact from GCS.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
path: Path to delete.
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
True if deletion was successful.
|
|
302
|
+
"""
|
|
303
|
+
self._ensure_initialized()
|
|
304
|
+
|
|
305
|
+
blob_name = self._get_full_path(path)
|
|
306
|
+
blob = self._bucket_obj.blob(blob_name)
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
blob.delete()
|
|
310
|
+
logger.info(f"Deleted {self._get_gcs_uri(path)}")
|
|
311
|
+
return True
|
|
312
|
+
except Exception as e:
|
|
313
|
+
logger.error(f"Failed to delete {path}: {e}")
|
|
314
|
+
return False
|
|
315
|
+
|
|
316
|
+
def list(self, path: str = "") -> list[str]: # noqa: A003
|
|
317
|
+
"""List artifacts in a GCS directory.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
path: Directory path to list.
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
List of artifact paths (relative to prefix).
|
|
324
|
+
"""
|
|
325
|
+
self._ensure_initialized()
|
|
326
|
+
|
|
327
|
+
prefix = self._get_full_path(path)
|
|
328
|
+
if prefix and not prefix.endswith("/"):
|
|
329
|
+
prefix += "/"
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
blobs = self._client.list_blobs(self._bucket, prefix=prefix)
|
|
333
|
+
|
|
334
|
+
items = []
|
|
335
|
+
for blob in blobs:
|
|
336
|
+
# Remove the base prefix to get relative path
|
|
337
|
+
name = blob.name
|
|
338
|
+
if self._prefix:
|
|
339
|
+
name = name[len(self._prefix) + 1 :]
|
|
340
|
+
items.append(name)
|
|
341
|
+
|
|
342
|
+
return items
|
|
343
|
+
|
|
344
|
+
except Exception as e:
|
|
345
|
+
logger.error(f"Failed to list {path}: {e}")
|
|
346
|
+
return []
|
|
347
|
+
|
|
348
|
+
@property
|
|
349
|
+
def root_path(self) -> str:
|
|
350
|
+
"""Get the root GCS URI."""
|
|
351
|
+
if self._prefix:
|
|
352
|
+
return f"gs://{self._bucket}/{self._prefix}"
|
|
353
|
+
return f"gs://{self._bucket}"
|
|
354
|
+
|
|
355
|
+
def get_uri(self, path: str) -> str:
|
|
356
|
+
"""Get the full GCS URI for a path."""
|
|
357
|
+
return self._get_gcs_uri(path)
|
|
358
|
+
|
|
359
|
+
def get_signed_url(self, path: str, expiration_minutes: int = 60) -> str:
|
|
360
|
+
"""Generate a signed URL for temporary access.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
path: Path to the artifact.
|
|
364
|
+
expiration_minutes: URL expiration time in minutes.
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
Signed URL string.
|
|
368
|
+
"""
|
|
369
|
+
self._ensure_initialized()
|
|
370
|
+
|
|
371
|
+
from datetime import timedelta
|
|
372
|
+
|
|
373
|
+
blob_name = self._get_full_path(path)
|
|
374
|
+
blob = self._bucket_obj.blob(blob_name)
|
|
375
|
+
|
|
376
|
+
url = blob.generate_signed_url(
|
|
377
|
+
version="v4",
|
|
378
|
+
expiration=timedelta(minutes=expiration_minutes),
|
|
379
|
+
method="GET",
|
|
380
|
+
)
|
|
381
|
+
return url
|
|
382
|
+
|
|
383
|
+
def save_typed_artifact(
|
|
384
|
+
self,
|
|
385
|
+
artifact: Any,
|
|
386
|
+
path: str,
|
|
387
|
+
run_id: str = "",
|
|
388
|
+
step_name: str = "",
|
|
389
|
+
) -> str:
|
|
390
|
+
"""Save a FlowyML typed artifact with proper handling.
|
|
391
|
+
|
|
392
|
+
Handles Model, Dataset, Metrics, and Parameters types with
|
|
393
|
+
appropriate serialization and metadata.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
artifact: A FlowyML artifact type (Model, Dataset, etc.)
|
|
397
|
+
path: Base path (will be formatted with run_id/step_name)
|
|
398
|
+
run_id: Pipeline run ID for path templating
|
|
399
|
+
step_name: Step name for path templating
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
Full GCS URI of the saved artifact.
|
|
403
|
+
"""
|
|
404
|
+
self._ensure_initialized()
|
|
405
|
+
|
|
406
|
+
# Detect artifact type
|
|
407
|
+
artifact_type = type(artifact).__name__
|
|
408
|
+
|
|
409
|
+
# Format path with run info
|
|
410
|
+
formatted_path = path.format(
|
|
411
|
+
run_id=run_id,
|
|
412
|
+
step_name=step_name,
|
|
413
|
+
artifact_name=artifact_type.lower(),
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Handle different artifact types
|
|
417
|
+
if artifact_type == "Model":
|
|
418
|
+
# Save model data
|
|
419
|
+
model_data = artifact.data if hasattr(artifact, "data") else artifact
|
|
420
|
+
model_path = f"{formatted_path}/model.pkl"
|
|
421
|
+
self.save(model_data, model_path)
|
|
422
|
+
|
|
423
|
+
# Save metadata
|
|
424
|
+
if hasattr(artifact, "metadata") and artifact.metadata:
|
|
425
|
+
self.save(artifact.metadata, f"{formatted_path}/metadata.json")
|
|
426
|
+
|
|
427
|
+
return self._get_gcs_uri(formatted_path)
|
|
428
|
+
|
|
429
|
+
elif artifact_type == "Dataset":
|
|
430
|
+
# Save dataset
|
|
431
|
+
data = artifact.data if hasattr(artifact, "data") else artifact
|
|
432
|
+
|
|
433
|
+
# Check format
|
|
434
|
+
fmt = getattr(artifact, "format", "pickle")
|
|
435
|
+
if fmt == "parquet":
|
|
436
|
+
# Use parquet if available
|
|
437
|
+
try:
|
|
438
|
+
import pandas as pd
|
|
439
|
+
|
|
440
|
+
if isinstance(data, pd.DataFrame):
|
|
441
|
+
with self._bucket_obj.blob(
|
|
442
|
+
self._get_full_path(f"{formatted_path}/data.parquet"),
|
|
443
|
+
).open("wb") as f:
|
|
444
|
+
data.to_parquet(f)
|
|
445
|
+
return self._get_gcs_uri(formatted_path)
|
|
446
|
+
except ImportError:
|
|
447
|
+
pass
|
|
448
|
+
|
|
449
|
+
# Fallback to pickle
|
|
450
|
+
self.save(data, f"{formatted_path}/data.pkl")
|
|
451
|
+
return self._get_gcs_uri(formatted_path)
|
|
452
|
+
|
|
453
|
+
elif artifact_type in ("Metrics", "Parameters"):
|
|
454
|
+
# Save as JSON
|
|
455
|
+
data = dict(artifact) if hasattr(artifact, "__iter__") else artifact
|
|
456
|
+
return self.save(data, f"{formatted_path}.json")
|
|
457
|
+
|
|
458
|
+
else:
|
|
459
|
+
# Generic artifact
|
|
460
|
+
return self.save(artifact, formatted_path)
|