flowyml 1.2.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/__init__.py +3 -0
- flowyml/assets/base.py +10 -0
- flowyml/assets/metrics.py +6 -0
- flowyml/cli/main.py +108 -2
- flowyml/cli/run.py +9 -2
- flowyml/core/execution_status.py +52 -0
- flowyml/core/hooks.py +106 -0
- flowyml/core/observability.py +210 -0
- flowyml/core/orchestrator.py +274 -0
- flowyml/core/pipeline.py +193 -231
- flowyml/core/project.py +34 -2
- flowyml/core/remote_orchestrator.py +109 -0
- flowyml/core/resources.py +34 -17
- flowyml/core/retry_policy.py +80 -0
- flowyml/core/scheduler.py +9 -9
- flowyml/core/scheduler_config.py +2 -3
- flowyml/core/step.py +18 -1
- flowyml/core/submission_result.py +53 -0
- flowyml/integrations/keras.py +95 -22
- flowyml/monitoring/alerts.py +2 -2
- flowyml/stacks/__init__.py +15 -0
- flowyml/stacks/aws.py +599 -0
- flowyml/stacks/azure.py +295 -0
- flowyml/stacks/bridge.py +9 -9
- flowyml/stacks/components.py +24 -2
- flowyml/stacks/gcp.py +158 -11
- flowyml/stacks/local.py +5 -0
- flowyml/stacks/plugins.py +2 -2
- flowyml/stacks/registry.py +21 -0
- flowyml/storage/artifacts.py +15 -5
- flowyml/storage/materializers/__init__.py +2 -0
- flowyml/storage/materializers/base.py +33 -0
- flowyml/storage/materializers/cloudpickle.py +74 -0
- flowyml/storage/metadata.py +3 -881
- flowyml/storage/remote.py +590 -0
- flowyml/storage/sql.py +911 -0
- flowyml/ui/backend/dependencies.py +28 -0
- flowyml/ui/backend/main.py +43 -80
- flowyml/ui/backend/routers/assets.py +483 -17
- flowyml/ui/backend/routers/client.py +46 -0
- flowyml/ui/backend/routers/execution.py +13 -2
- flowyml/ui/backend/routers/experiments.py +97 -14
- flowyml/ui/backend/routers/metrics.py +168 -0
- flowyml/ui/backend/routers/pipelines.py +77 -12
- flowyml/ui/backend/routers/projects.py +33 -7
- flowyml/ui/backend/routers/runs.py +221 -12
- flowyml/ui/backend/routers/schedules.py +5 -21
- flowyml/ui/backend/routers/stats.py +14 -0
- flowyml/ui/backend/routers/traces.py +37 -53
- flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +1 -0
- flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +592 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/src/App.jsx +4 -1
- flowyml/ui/frontend/src/app/assets/page.jsx +260 -230
- flowyml/ui/frontend/src/app/dashboard/page.jsx +38 -7
- flowyml/ui/frontend/src/app/experiments/page.jsx +61 -314
- flowyml/ui/frontend/src/app/observability/page.jsx +277 -0
- flowyml/ui/frontend/src/app/pipelines/page.jsx +79 -402
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectArtifactsList.jsx +151 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectExperimentsList.jsx +145 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHeader.jsx +45 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectHierarchy.jsx +467 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectMetricsPanel.jsx +253 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectPipelinesList.jsx +105 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRelations.jsx +189 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectRunsList.jsx +136 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/_components/ProjectTabs.jsx +95 -0
- flowyml/ui/frontend/src/app/projects/[projectId]/page.jsx +326 -0
- flowyml/ui/frontend/src/app/projects/page.jsx +13 -3
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +79 -10
- flowyml/ui/frontend/src/app/runs/page.jsx +82 -424
- flowyml/ui/frontend/src/app/settings/page.jsx +1 -0
- flowyml/ui/frontend/src/app/tokens/page.jsx +62 -16
- flowyml/ui/frontend/src/components/AssetDetailsPanel.jsx +373 -0
- flowyml/ui/frontend/src/components/AssetLineageGraph.jsx +291 -0
- flowyml/ui/frontend/src/components/AssetStatsDashboard.jsx +302 -0
- flowyml/ui/frontend/src/components/AssetTreeHierarchy.jsx +477 -0
- flowyml/ui/frontend/src/components/ExperimentDetailsPanel.jsx +227 -0
- flowyml/ui/frontend/src/components/NavigationTree.jsx +401 -0
- flowyml/ui/frontend/src/components/PipelineDetailsPanel.jsx +239 -0
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +67 -3
- flowyml/ui/frontend/src/components/ProjectSelector.jsx +115 -0
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +298 -0
- flowyml/ui/frontend/src/components/header/Header.jsx +48 -1
- flowyml/ui/frontend/src/components/plugins/ZenMLIntegration.jsx +106 -0
- flowyml/ui/frontend/src/components/sidebar/Sidebar.jsx +52 -26
- flowyml/ui/frontend/src/components/ui/DataView.jsx +35 -17
- flowyml/ui/frontend/src/components/ui/ErrorBoundary.jsx +118 -0
- flowyml/ui/frontend/src/contexts/ProjectContext.jsx +2 -2
- flowyml/ui/frontend/src/contexts/ToastContext.jsx +116 -0
- flowyml/ui/frontend/src/layouts/MainLayout.jsx +5 -1
- flowyml/ui/frontend/src/router/index.jsx +4 -0
- flowyml/ui/frontend/src/utils/date.js +10 -0
- flowyml/ui/frontend/src/utils/downloads.js +11 -0
- flowyml/utils/config.py +6 -0
- flowyml/utils/stack_config.py +45 -3
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/METADATA +44 -4
- flowyml-1.4.0.dist-info/RECORD +200 -0
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/licenses/LICENSE +1 -1
- flowyml/ui/frontend/dist/assets/index-DFNQnrUj.js +0 -448
- flowyml/ui/frontend/dist/assets/index-pWI271rZ.css +0 -1
- flowyml-1.2.0.dist-info/RECORD +0 -159
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/WHEEL +0 -0
- {flowyml-1.2.0.dist-info → flowyml-1.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,590 @@
|
|
|
1
|
+
"""Remote storage backends for flowyml."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import tempfile
|
|
5
|
+
import cloudpickle
|
|
6
|
+
from typing import Any
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from flowyml.storage.metadata import MetadataStore
|
|
11
|
+
from flowyml.storage.artifacts import ArtifactStore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RemoteMetadataStore(MetadataStore):
|
|
15
|
+
"""Remote metadata storage using FlowyML API."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, api_url: str, api_token: str | None = None):
|
|
18
|
+
"""Initialize remote metadata store.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
api_url: Base URL of the FlowyML API (e.g. http://localhost:8080/api)
|
|
22
|
+
api_token: Optional API token for authentication
|
|
23
|
+
"""
|
|
24
|
+
self.api_url = api_url.rstrip("/")
|
|
25
|
+
self._session = requests.Session()
|
|
26
|
+
if api_token:
|
|
27
|
+
self._session.headers.update({"Authorization": f"Bearer {api_token}"})
|
|
28
|
+
|
|
29
|
+
def _url(self, path: str) -> str:
|
|
30
|
+
return f"{self.api_url}/{path.lstrip('/')}"
|
|
31
|
+
|
|
32
|
+
def save_run(self, run_id: str, metadata: dict) -> None:
|
|
33
|
+
"""Save run metadata to remote server."""
|
|
34
|
+
url = self._url("runs/")
|
|
35
|
+
|
|
36
|
+
# Extract fields expected by API
|
|
37
|
+
payload = {
|
|
38
|
+
"run_id": run_id,
|
|
39
|
+
"pipeline_name": metadata.get("pipeline_name", "unknown"),
|
|
40
|
+
"status": metadata.get("status", "pending"),
|
|
41
|
+
"start_time": metadata.get("start_time"),
|
|
42
|
+
"end_time": metadata.get("end_time"),
|
|
43
|
+
"duration": metadata.get("duration"),
|
|
44
|
+
"project": metadata.get("project"),
|
|
45
|
+
"metadata": metadata, # Full metadata blob
|
|
46
|
+
"metrics": metadata.get("metrics"),
|
|
47
|
+
"parameters": metadata.get("parameters"),
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
response = self._session.post(url, json=payload)
|
|
51
|
+
response.raise_for_status()
|
|
52
|
+
|
|
53
|
+
def load_run(self, run_id: str) -> dict | None:
|
|
54
|
+
"""Load run metadata from remote server."""
|
|
55
|
+
url = self._url(f"runs/{run_id}")
|
|
56
|
+
try:
|
|
57
|
+
response = self._session.get(url)
|
|
58
|
+
if response.status_code == 404:
|
|
59
|
+
return None
|
|
60
|
+
response.raise_for_status()
|
|
61
|
+
return response.json()
|
|
62
|
+
except requests.RequestException:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
def list_runs(self, limit: int | None = None, **filters) -> list[dict]:
|
|
66
|
+
"""List all runs."""
|
|
67
|
+
url = self._url("runs/")
|
|
68
|
+
params = filters.copy()
|
|
69
|
+
if limit:
|
|
70
|
+
params["limit"] = limit
|
|
71
|
+
|
|
72
|
+
response = self._session.get(url, params=params)
|
|
73
|
+
response.raise_for_status()
|
|
74
|
+
return response.json().get("runs", [])
|
|
75
|
+
|
|
76
|
+
def list_pipelines(self) -> list[str]:
|
|
77
|
+
"""List all unique pipeline names."""
|
|
78
|
+
# Try to use the pipelines endpoint
|
|
79
|
+
try:
|
|
80
|
+
url = self._url("pipelines/")
|
|
81
|
+
response = self._session.get(url)
|
|
82
|
+
if response.status_code == 200:
|
|
83
|
+
pipelines = response.json().get("pipelines", [])
|
|
84
|
+
return sorted([p["name"] for p in pipelines])
|
|
85
|
+
except Exception:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
# Fallback to querying runs
|
|
89
|
+
try:
|
|
90
|
+
runs = self.list_runs(limit=1000)
|
|
91
|
+
pipelines = {r.get("pipeline_name") for r in runs if r.get("pipeline_name")}
|
|
92
|
+
return sorted(pipelines)
|
|
93
|
+
except Exception:
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
def save_artifact(self, artifact_id: str, metadata: dict) -> None:
|
|
97
|
+
"""Save artifact metadata."""
|
|
98
|
+
url = self._url("assets/")
|
|
99
|
+
|
|
100
|
+
payload = {
|
|
101
|
+
"artifact_id": artifact_id,
|
|
102
|
+
"name": metadata.get("name", "unknown"),
|
|
103
|
+
"type": metadata.get("type", "unknown"),
|
|
104
|
+
"run_id": metadata.get("run_id", "unknown"),
|
|
105
|
+
"step": metadata.get("step", "unknown"),
|
|
106
|
+
"project": metadata.get("project"),
|
|
107
|
+
"metadata": metadata,
|
|
108
|
+
"value": metadata.get("value"),
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
response = self._session.post(url, json=payload)
|
|
112
|
+
response.raise_for_status()
|
|
113
|
+
|
|
114
|
+
def load_artifact(self, artifact_id: str) -> dict | None:
|
|
115
|
+
"""Load artifact metadata."""
|
|
116
|
+
url = self._url(f"assets/{artifact_id}")
|
|
117
|
+
try:
|
|
118
|
+
response = self._session.get(url)
|
|
119
|
+
if response.status_code == 404:
|
|
120
|
+
return None
|
|
121
|
+
response.raise_for_status()
|
|
122
|
+
return response.json()
|
|
123
|
+
except requests.RequestException:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
def list_assets(self, limit: int | None = None, **filters) -> list[dict]:
|
|
127
|
+
"""List assets with optional filters."""
|
|
128
|
+
url = self._url("assets/")
|
|
129
|
+
params = filters.copy()
|
|
130
|
+
if limit:
|
|
131
|
+
params["limit"] = limit
|
|
132
|
+
|
|
133
|
+
response = self._session.get(url, params=params)
|
|
134
|
+
response.raise_for_status()
|
|
135
|
+
return response.json().get("assets", [])
|
|
136
|
+
|
|
137
|
+
def query(self, **filters) -> list[dict]:
|
|
138
|
+
"""Query runs with filters."""
|
|
139
|
+
return self.list_runs(**filters)
|
|
140
|
+
|
|
141
|
+
def save_metric(self, run_id: str, name: str, value: float, step: int = 0) -> None:
|
|
142
|
+
"""Save a single metric value."""
|
|
143
|
+
url = self._url("metrics/")
|
|
144
|
+
payload = {
|
|
145
|
+
"run_id": run_id,
|
|
146
|
+
"name": name,
|
|
147
|
+
"value": value,
|
|
148
|
+
"step": step,
|
|
149
|
+
}
|
|
150
|
+
response = self._session.post(url, json=payload)
|
|
151
|
+
response.raise_for_status()
|
|
152
|
+
|
|
153
|
+
def get_metrics(self, run_id: str, name: str | None = None) -> list[dict]:
|
|
154
|
+
"""Get metrics for a run."""
|
|
155
|
+
url = self._url(f"runs/{run_id}/metrics")
|
|
156
|
+
response = self._session.get(url)
|
|
157
|
+
response.raise_for_status()
|
|
158
|
+
metrics = response.json().get("metrics", [])
|
|
159
|
+
if name:
|
|
160
|
+
return [m for m in metrics if m["name"] == name]
|
|
161
|
+
return metrics
|
|
162
|
+
|
|
163
|
+
def save_experiment(self, experiment_id: str, name: str, description: str = "", tags: dict = None) -> None:
|
|
164
|
+
"""Save experiment metadata."""
|
|
165
|
+
url = self._url("experiments/")
|
|
166
|
+
payload = {
|
|
167
|
+
"experiment_id": experiment_id,
|
|
168
|
+
"name": name,
|
|
169
|
+
"description": description,
|
|
170
|
+
"tags": tags or {},
|
|
171
|
+
# Project is handled via separate update if needed, or we can add it to payload if API supported it
|
|
172
|
+
# The API we added supports project in payload
|
|
173
|
+
"project": tags.get("project") if tags else None,
|
|
174
|
+
}
|
|
175
|
+
response = self._session.post(url, json=payload)
|
|
176
|
+
response.raise_for_status()
|
|
177
|
+
|
|
178
|
+
def log_experiment_run(
|
|
179
|
+
self,
|
|
180
|
+
experiment_id: str,
|
|
181
|
+
run_id: str,
|
|
182
|
+
metrics: dict = None,
|
|
183
|
+
parameters: dict = None,
|
|
184
|
+
) -> None:
|
|
185
|
+
"""Log a run to an experiment."""
|
|
186
|
+
url = self._url(f"experiments/{experiment_id}/runs")
|
|
187
|
+
payload = {
|
|
188
|
+
"run_id": run_id,
|
|
189
|
+
"metrics": metrics,
|
|
190
|
+
"parameters": parameters,
|
|
191
|
+
}
|
|
192
|
+
response = self._session.post(url, json=payload)
|
|
193
|
+
response.raise_for_status()
|
|
194
|
+
|
|
195
|
+
def list_experiments(self) -> list[dict]:
|
|
196
|
+
"""List all experiments."""
|
|
197
|
+
url = self._url("experiments/")
|
|
198
|
+
response = self._session.get(url)
|
|
199
|
+
response.raise_for_status()
|
|
200
|
+
return response.json().get("experiments", [])
|
|
201
|
+
|
|
202
|
+
def get_experiment(self, experiment_id: str) -> dict | None:
|
|
203
|
+
"""Get experiment details."""
|
|
204
|
+
url = self._url(f"experiments/{experiment_id}")
|
|
205
|
+
try:
|
|
206
|
+
response = self._session.get(url)
|
|
207
|
+
if response.status_code == 404:
|
|
208
|
+
return None
|
|
209
|
+
response.raise_for_status()
|
|
210
|
+
return response.json()
|
|
211
|
+
except requests.RequestException:
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
def update_experiment_project(self, experiment_name: str, project_name: str) -> None:
|
|
215
|
+
"""Update the project for an experiment."""
|
|
216
|
+
url = self._url(f"experiments/{experiment_name}/project")
|
|
217
|
+
payload = {"project_name": project_name}
|
|
218
|
+
response = self._session.put(url, json=payload)
|
|
219
|
+
response.raise_for_status()
|
|
220
|
+
|
|
221
|
+
def log_model_metrics(
|
|
222
|
+
self,
|
|
223
|
+
project: str,
|
|
224
|
+
model_name: str,
|
|
225
|
+
metrics: dict[str, float],
|
|
226
|
+
run_id: str | None = None,
|
|
227
|
+
environment: str | None = None,
|
|
228
|
+
tags: dict | None = None,
|
|
229
|
+
) -> None:
|
|
230
|
+
"""Log production model metrics."""
|
|
231
|
+
url = self._url("metrics/log")
|
|
232
|
+
payload = {
|
|
233
|
+
"project": project,
|
|
234
|
+
"model_name": model_name,
|
|
235
|
+
"metrics": metrics,
|
|
236
|
+
"run_id": run_id,
|
|
237
|
+
"environment": environment,
|
|
238
|
+
"tags": tags,
|
|
239
|
+
}
|
|
240
|
+
response = self._session.post(url, json=payload)
|
|
241
|
+
response.raise_for_status()
|
|
242
|
+
|
|
243
|
+
def list_model_metrics(
|
|
244
|
+
self,
|
|
245
|
+
project: str | None = None,
|
|
246
|
+
model_name: str | None = None,
|
|
247
|
+
limit: int = 100,
|
|
248
|
+
) -> list[dict]:
|
|
249
|
+
"""List logged model metrics."""
|
|
250
|
+
url = self._url("metrics")
|
|
251
|
+
params = {"limit": limit}
|
|
252
|
+
if project:
|
|
253
|
+
params["project"] = project
|
|
254
|
+
if model_name:
|
|
255
|
+
params["model_name"] = model_name
|
|
256
|
+
|
|
257
|
+
response = self._session.get(url, params=params)
|
|
258
|
+
response.raise_for_status()
|
|
259
|
+
return response.json().get("metrics", [])
|
|
260
|
+
|
|
261
|
+
def save_trace_event(self, event: dict) -> None:
|
|
262
|
+
"""Save a trace event."""
|
|
263
|
+
url = self._url("traces/")
|
|
264
|
+
response = self._session.post(url, json=event)
|
|
265
|
+
response.raise_for_status()
|
|
266
|
+
|
|
267
|
+
def save_pipeline_definition(self, pipeline_name: str, definition: dict) -> None:
|
|
268
|
+
"""Save pipeline definition."""
|
|
269
|
+
url = self._url("pipelines/")
|
|
270
|
+
payload = {"pipeline_name": pipeline_name, "definition": definition}
|
|
271
|
+
response = self._session.post(url, json=payload)
|
|
272
|
+
response.raise_for_status()
|
|
273
|
+
|
|
274
|
+
def update_pipeline_project(self, pipeline_name: str, project_name: str) -> None:
|
|
275
|
+
"""Update the project for a pipeline."""
|
|
276
|
+
url = self._url(f"pipelines/{pipeline_name}/project")
|
|
277
|
+
payload = {"project_name": project_name}
|
|
278
|
+
response = self._session.put(url, json=payload)
|
|
279
|
+
response.raise_for_status()
|
|
280
|
+
|
|
281
|
+
def get_statistics(self) -> dict:
|
|
282
|
+
"""Get global statistics."""
|
|
283
|
+
url = self._url("stats/")
|
|
284
|
+
response = self._session.get(url)
|
|
285
|
+
response.raise_for_status()
|
|
286
|
+
return response.json()
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
class RemoteArtifactStore(ArtifactStore):
|
|
290
|
+
"""Remote artifact storage using FlowyML API."""
|
|
291
|
+
|
|
292
|
+
def __init__(self, api_url: str, local_cache_dir: str = ".flowyml/cache/artifacts", api_token: str | None = None):
|
|
293
|
+
"""Initialize remote artifact store.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
api_url: Base URL of the FlowyML API
|
|
297
|
+
local_cache_dir: Directory to cache downloaded artifacts
|
|
298
|
+
api_token: Optional API token for authentication
|
|
299
|
+
"""
|
|
300
|
+
self.api_url = api_url.rstrip("/")
|
|
301
|
+
self.local_cache = Path(local_cache_dir)
|
|
302
|
+
self.local_cache.mkdir(parents=True, exist_ok=True)
|
|
303
|
+
self._session = requests.Session()
|
|
304
|
+
if api_token:
|
|
305
|
+
self._session.headers.update({"Authorization": f"Bearer {api_token}"})
|
|
306
|
+
|
|
307
|
+
def _url(self, path: str) -> str:
|
|
308
|
+
return f"{self.api_url}/{path.lstrip('/')}"
|
|
309
|
+
|
|
310
|
+
def save(self, artifact: Any, path: str, metadata: dict | None = None) -> str:
|
|
311
|
+
"""Save artifact to remote server.
|
|
312
|
+
|
|
313
|
+
This method handles serialization (if needed), metadata creation/validation,
|
|
314
|
+
and file upload to the remote server.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
artifact: Object to save
|
|
318
|
+
path: Path/identifier for the artifact
|
|
319
|
+
metadata: Optional metadata dictionary
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
The remote path or identifier
|
|
323
|
+
"""
|
|
324
|
+
import pickle
|
|
325
|
+
import hashlib
|
|
326
|
+
import tempfile
|
|
327
|
+
|
|
328
|
+
# 1. Determine artifact_id
|
|
329
|
+
if metadata and metadata.get("artifact_id"):
|
|
330
|
+
artifact_id = metadata["artifact_id"]
|
|
331
|
+
else:
|
|
332
|
+
# Generate deterministic ID from path if not provided
|
|
333
|
+
artifact_id = hashlib.md5(path.encode()).hexdigest()
|
|
334
|
+
|
|
335
|
+
# 2. Serialize locally to a temporary file
|
|
336
|
+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
|
337
|
+
pickle.dump(artifact, tmp)
|
|
338
|
+
tmp_path = Path(tmp.name)
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
# 3. Ensure metadata exists on server
|
|
342
|
+
# We try to load it first. If not found, we create a placeholder.
|
|
343
|
+
# This is critical because the upload endpoint requires existing metadata.
|
|
344
|
+
if not self.load_artifact_metadata(artifact_id):
|
|
345
|
+
create_url = self._url("assets/")
|
|
346
|
+
payload = {
|
|
347
|
+
"artifact_id": artifact_id,
|
|
348
|
+
"name": metadata.get("name", "unknown") if metadata else "unknown",
|
|
349
|
+
"type": type(artifact).__name__,
|
|
350
|
+
"run_id": metadata.get("run_id", "unknown") if metadata else "unknown",
|
|
351
|
+
"step": metadata.get("step", "unknown") if metadata else "unknown",
|
|
352
|
+
"project": metadata.get("project") if metadata else None,
|
|
353
|
+
"metadata": metadata or {},
|
|
354
|
+
}
|
|
355
|
+
self._session.post(create_url, json=payload)
|
|
356
|
+
|
|
357
|
+
# 4. Upload content
|
|
358
|
+
upload_url = self._url(f"assets/{artifact_id}/upload")
|
|
359
|
+
with open(tmp_path, "rb") as f:
|
|
360
|
+
# Use a generic filename for pickle dump
|
|
361
|
+
files = {"file": ("artifact.pkl", f)}
|
|
362
|
+
response = self._session.post(upload_url, files=files)
|
|
363
|
+
response.raise_for_status()
|
|
364
|
+
|
|
365
|
+
remote_path = response.json().get("path")
|
|
366
|
+
return remote_path or path
|
|
367
|
+
|
|
368
|
+
finally:
|
|
369
|
+
if tmp_path.exists():
|
|
370
|
+
tmp_path.unlink()
|
|
371
|
+
|
|
372
|
+
def materialize(self, obj: Any, name: str, run_id: str, step_name: str, project_name: str = "default") -> str:
|
|
373
|
+
"""Materialize artifact to remote storage.
|
|
374
|
+
|
|
375
|
+
Uses registered materializers if available, otherwise falls back to cloudpickle.
|
|
376
|
+
Handles directory compression if materializer produces a directory.
|
|
377
|
+
"""
|
|
378
|
+
from flowyml.storage.materializers.base import get_materializer
|
|
379
|
+
|
|
380
|
+
artifact_id = f"{run_id}_{step_name}_{name}"
|
|
381
|
+
|
|
382
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
383
|
+
temp_path = Path(temp_dir)
|
|
384
|
+
file_path = temp_path / name
|
|
385
|
+
|
|
386
|
+
# 1. Materialize locally
|
|
387
|
+
materializer = get_materializer(obj)
|
|
388
|
+
if materializer:
|
|
389
|
+
materializer.save(obj, file_path)
|
|
390
|
+
if file_path.is_dir():
|
|
391
|
+
shutil.make_archive(str(file_path), "zip", file_path)
|
|
392
|
+
upload_path = file_path.with_suffix(".zip")
|
|
393
|
+
filename = f"{name}.zip"
|
|
394
|
+
else:
|
|
395
|
+
upload_path = file_path
|
|
396
|
+
filename = name
|
|
397
|
+
else:
|
|
398
|
+
# Fallback to cloudpickle
|
|
399
|
+
upload_path = temp_path / f"{name}.pkl"
|
|
400
|
+
with open(upload_path, "wb") as f:
|
|
401
|
+
cloudpickle.dump(obj, f)
|
|
402
|
+
filename = f"{name}.pkl"
|
|
403
|
+
|
|
404
|
+
# 2. Create metadata
|
|
405
|
+
create_url = self._url("assets/")
|
|
406
|
+
payload = {
|
|
407
|
+
"artifact_id": artifact_id,
|
|
408
|
+
"name": name,
|
|
409
|
+
"type": type(obj).__name__, # Store simple type name for reference
|
|
410
|
+
# Store full type path in metadata for robust loading
|
|
411
|
+
"run_id": run_id,
|
|
412
|
+
"step": step_name,
|
|
413
|
+
"project": project_name,
|
|
414
|
+
"metadata": {
|
|
415
|
+
"type_module": type(obj).__module__,
|
|
416
|
+
"type_name": type(obj).__name__,
|
|
417
|
+
},
|
|
418
|
+
}
|
|
419
|
+
# Ensure we don't fail if it already exists (idempotency)
|
|
420
|
+
# The API should handle upserts or we check existence.
|
|
421
|
+
# For now, we post. If it exists, we might get 409 or it updates.
|
|
422
|
+
# Our current API implementation updates if exists.
|
|
423
|
+
self._session.post(create_url, json=payload)
|
|
424
|
+
|
|
425
|
+
# 3. Upload content
|
|
426
|
+
upload_url = self._url(f"assets/{artifact_id}/upload")
|
|
427
|
+
with open(upload_path, "rb") as f:
|
|
428
|
+
files = {"file": (filename, f)}
|
|
429
|
+
response = self._session.post(upload_url, files=files)
|
|
430
|
+
response.raise_for_status()
|
|
431
|
+
|
|
432
|
+
return response.json().get("path")
|
|
433
|
+
|
|
434
|
+
def load(self, path: str) -> Any:
|
|
435
|
+
"""Load artifact from remote storage.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
path: The remote path or artifact ID.
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
The deserialized object.
|
|
442
|
+
"""
|
|
443
|
+
import re
|
|
444
|
+
import pickle
|
|
445
|
+
from flowyml.storage.materializers.base import get_materializer_by_type_name
|
|
446
|
+
|
|
447
|
+
# 1. Determine artifact_id
|
|
448
|
+
# Try to treat path as artifact_id first
|
|
449
|
+
if self.load_artifact_metadata(path):
|
|
450
|
+
artifact_id = path
|
|
451
|
+
else:
|
|
452
|
+
# Try to extract ID from path (project/run_id/artifact_id/filename)
|
|
453
|
+
parts = Path(path).parts
|
|
454
|
+
if len(parts) >= 2:
|
|
455
|
+
potential_id = parts[-2]
|
|
456
|
+
if self.load_artifact_metadata(potential_id):
|
|
457
|
+
artifact_id = potential_id
|
|
458
|
+
else:
|
|
459
|
+
# Fallback: assume path is ID even if metadata check failed (maybe network issue?)
|
|
460
|
+
# or just fail? Let's assume path is ID as last resort.
|
|
461
|
+
artifact_id = path
|
|
462
|
+
else:
|
|
463
|
+
artifact_id = path
|
|
464
|
+
|
|
465
|
+
# 2. Get metadata to help with deserialization
|
|
466
|
+
meta = self.load_artifact_metadata(artifact_id)
|
|
467
|
+
|
|
468
|
+
# 3. Download to local cache
|
|
469
|
+
download_url = self._url(f"assets/{artifact_id}/download")
|
|
470
|
+
response = self._session.get(download_url, stream=True)
|
|
471
|
+
response.raise_for_status()
|
|
472
|
+
|
|
473
|
+
local_path = self.local_cache / artifact_id
|
|
474
|
+
local_path.mkdir(parents=True, exist_ok=True)
|
|
475
|
+
|
|
476
|
+
filename = "content"
|
|
477
|
+
if "content-disposition" in response.headers:
|
|
478
|
+
fname = re.findall("filename=(.+)", response.headers["content-disposition"])
|
|
479
|
+
if fname:
|
|
480
|
+
filename = fname[0].strip('"')
|
|
481
|
+
|
|
482
|
+
file_path = local_path / filename
|
|
483
|
+
|
|
484
|
+
with open(file_path, "wb") as f:
|
|
485
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
486
|
+
f.write(chunk)
|
|
487
|
+
|
|
488
|
+
# 4. Handle Decompression
|
|
489
|
+
load_path = file_path
|
|
490
|
+
if filename.endswith(".zip"):
|
|
491
|
+
shutil.unpack_archive(file_path, local_path)
|
|
492
|
+
# The content is inside the directory.
|
|
493
|
+
# Materializers usually expect the directory path or file path.
|
|
494
|
+
# If it was a directory, we pass the directory.
|
|
495
|
+
# The directory name matches the artifact name usually.
|
|
496
|
+
# Let's assume the unzipped content is what we want.
|
|
497
|
+
# If we zipped "name", it unzips to "name".
|
|
498
|
+
# We need to find the unzipped item.
|
|
499
|
+
# It's likely the filename without .zip
|
|
500
|
+
unzipped_name = filename[:-4]
|
|
501
|
+
load_path = local_path / unzipped_name
|
|
502
|
+
|
|
503
|
+
# 5. Deserialize
|
|
504
|
+
# A. Try Pickle/Cloudpickle
|
|
505
|
+
if filename.endswith(".pkl"):
|
|
506
|
+
with open(file_path, "rb") as f:
|
|
507
|
+
return pickle.load(f)
|
|
508
|
+
|
|
509
|
+
# B. Try Materializer using metadata
|
|
510
|
+
if meta:
|
|
511
|
+
# Try to construct full type name
|
|
512
|
+
type_module = meta.get("metadata", {}).get("type_module")
|
|
513
|
+
type_name = meta.get("metadata", {}).get("type_name")
|
|
514
|
+
|
|
515
|
+
if type_module and type_name:
|
|
516
|
+
full_type_name = f"{type_module}.{type_name}"
|
|
517
|
+
materializer = get_materializer_by_type_name(full_type_name)
|
|
518
|
+
if materializer:
|
|
519
|
+
return materializer.load(load_path)
|
|
520
|
+
|
|
521
|
+
# Fallback: try simple type name from 'type' field
|
|
522
|
+
simple_type = meta.get("type")
|
|
523
|
+
if simple_type:
|
|
524
|
+
# This is less reliable but worth a try
|
|
525
|
+
# We need to iterate registry to find matching class name
|
|
526
|
+
# This is handled by get_materializer_by_type_name logic for simple names
|
|
527
|
+
# But we need a dummy module prefix to trigger that logic if we only have simple name?
|
|
528
|
+
# Actually get_materializer_by_type_name handles simple name matching too.
|
|
529
|
+
materializer = get_materializer_by_type_name(simple_type)
|
|
530
|
+
if materializer:
|
|
531
|
+
return materializer.load(load_path)
|
|
532
|
+
|
|
533
|
+
# C. Fallback: Return path
|
|
534
|
+
# If we can't deserialize, we return the path so the user can handle it manually.
|
|
535
|
+
return load_path
|
|
536
|
+
|
|
537
|
+
def load_artifact_metadata(self, artifact_id: str) -> dict | None:
|
|
538
|
+
"""Helper to load metadata."""
|
|
539
|
+
url = self._url(f"assets/{artifact_id}")
|
|
540
|
+
try:
|
|
541
|
+
response = self._session.get(url)
|
|
542
|
+
if response.status_code == 404:
|
|
543
|
+
return None
|
|
544
|
+
return response.json()
|
|
545
|
+
except Exception:
|
|
546
|
+
return None
|
|
547
|
+
|
|
548
|
+
def exists(self, path: str) -> bool:
|
|
549
|
+
"""Check if artifact exists."""
|
|
550
|
+
# Try to treat path as artifact_id
|
|
551
|
+
meta = self.load_artifact_metadata(path)
|
|
552
|
+
if meta:
|
|
553
|
+
return True
|
|
554
|
+
|
|
555
|
+
# Try to extract ID from path
|
|
556
|
+
parts = Path(path).parts
|
|
557
|
+
if len(parts) >= 2:
|
|
558
|
+
potential_id = parts[-2]
|
|
559
|
+
meta = self.load_artifact_metadata(potential_id)
|
|
560
|
+
if meta:
|
|
561
|
+
return True
|
|
562
|
+
|
|
563
|
+
return False
|
|
564
|
+
|
|
565
|
+
def delete(self, path: str) -> None:
|
|
566
|
+
"""Delete artifact."""
|
|
567
|
+
# Extract ID
|
|
568
|
+
artifact_id = path
|
|
569
|
+
parts = Path(path).parts
|
|
570
|
+
if len(parts) >= 2:
|
|
571
|
+
potential_id = parts[-2]
|
|
572
|
+
if self.load_artifact_metadata(potential_id):
|
|
573
|
+
artifact_id = potential_id
|
|
574
|
+
|
|
575
|
+
url = self._url(f"assets/{artifact_id}")
|
|
576
|
+
self._session.delete(url)
|
|
577
|
+
|
|
578
|
+
def list_artifacts(self, prefix: str = "") -> list[str]:
|
|
579
|
+
"""List artifacts."""
|
|
580
|
+
url = self._url("assets/")
|
|
581
|
+
response = self._session.get(url, params={"limit": 1000})
|
|
582
|
+
if response.status_code != 200:
|
|
583
|
+
return []
|
|
584
|
+
|
|
585
|
+
assets = response.json().get("assets", [])
|
|
586
|
+
paths = [a.get("path") for a in assets if a.get("path")]
|
|
587
|
+
|
|
588
|
+
if prefix:
|
|
589
|
+
return [p for p in paths if p.startswith(prefix)]
|
|
590
|
+
return paths
|