flowyml 1.3.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowyml/core/execution_status.py +1 -0
- flowyml/core/executor.py +175 -3
- flowyml/core/observability.py +7 -7
- flowyml/core/resources.py +12 -12
- flowyml/core/retry_policy.py +2 -2
- flowyml/core/scheduler.py +9 -9
- flowyml/core/scheduler_config.py +2 -3
- flowyml/core/submission_result.py +4 -4
- flowyml/stacks/bridge.py +9 -9
- flowyml/stacks/plugins.py +2 -2
- flowyml/stacks/registry.py +21 -0
- flowyml/storage/materializers/base.py +33 -0
- flowyml/storage/metadata.py +3 -1042
- flowyml/storage/remote.py +590 -0
- flowyml/storage/sql.py +951 -0
- flowyml/ui/backend/dependencies.py +28 -0
- flowyml/ui/backend/main.py +4 -79
- flowyml/ui/backend/routers/assets.py +170 -9
- flowyml/ui/backend/routers/client.py +6 -6
- flowyml/ui/backend/routers/execution.py +2 -2
- flowyml/ui/backend/routers/experiments.py +53 -6
- flowyml/ui/backend/routers/metrics.py +23 -68
- flowyml/ui/backend/routers/pipelines.py +19 -10
- flowyml/ui/backend/routers/runs.py +287 -9
- flowyml/ui/backend/routers/schedules.py +5 -21
- flowyml/ui/backend/routers/stats.py +14 -0
- flowyml/ui/backend/routers/traces.py +37 -53
- flowyml/ui/backend/routers/websocket.py +121 -0
- flowyml/ui/frontend/dist/assets/index-CBUXOWze.css +1 -0
- flowyml/ui/frontend/dist/assets/index-DF8dJaFL.js +629 -0
- flowyml/ui/frontend/dist/index.html +2 -2
- flowyml/ui/frontend/package-lock.json +289 -0
- flowyml/ui/frontend/package.json +1 -0
- flowyml/ui/frontend/src/app/compare/page.jsx +213 -0
- flowyml/ui/frontend/src/app/experiments/compare/page.jsx +289 -0
- flowyml/ui/frontend/src/app/experiments/page.jsx +61 -1
- flowyml/ui/frontend/src/app/runs/[runId]/page.jsx +418 -203
- flowyml/ui/frontend/src/app/runs/page.jsx +64 -3
- flowyml/ui/frontend/src/app/settings/page.jsx +1 -1
- flowyml/ui/frontend/src/app/tokens/page.jsx +8 -6
- flowyml/ui/frontend/src/components/ArtifactViewer.jsx +159 -0
- flowyml/ui/frontend/src/components/NavigationTree.jsx +26 -9
- flowyml/ui/frontend/src/components/PipelineGraph.jsx +26 -24
- flowyml/ui/frontend/src/components/RunDetailsPanel.jsx +42 -14
- flowyml/ui/frontend/src/router/index.jsx +4 -0
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/METADATA +3 -1
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/RECORD +50 -42
- flowyml/ui/frontend/dist/assets/index-DcYwrn2j.css +0 -1
- flowyml/ui/frontend/dist/assets/index-Dlz_ygOL.js +0 -592
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/WHEEL +0 -0
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/entry_points.txt +0 -0
- {flowyml-1.3.0.dist-info → flowyml-1.5.0.dist-info}/licenses/LICENSE +0 -0
flowyml/storage/metadata.py
CHANGED
|
@@ -1,11 +1,6 @@
|
|
|
1
1
|
"""Metadata storage backends for flowyml."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
|
-
import sqlite3
|
|
5
3
|
from abc import ABC, abstractmethod
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
import contextlib
|
|
8
|
-
import builtins
|
|
9
4
|
|
|
10
5
|
|
|
11
6
|
class MetadataStore(ABC):
|
|
@@ -52,1041 +47,7 @@ class MetadataStore(ABC):
|
|
|
52
47
|
pass
|
|
53
48
|
|
|
54
49
|
|
|
55
|
-
|
|
56
|
-
"""SQLite-based metadata storage."""
|
|
50
|
+
from flowyml.storage.sql import SQLMetadataStore # noqa: E402
|
|
57
51
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
db_path: Path to SQLite database file
|
|
63
|
-
"""
|
|
64
|
-
self.db_path = Path(db_path)
|
|
65
|
-
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
66
|
-
self._init_db()
|
|
67
|
-
|
|
68
|
-
def _init_db(self) -> None:
|
|
69
|
-
"""Initialize database schema."""
|
|
70
|
-
conn = sqlite3.connect(self.db_path)
|
|
71
|
-
cursor = conn.cursor()
|
|
72
|
-
|
|
73
|
-
# Runs table
|
|
74
|
-
cursor.execute(
|
|
75
|
-
"""
|
|
76
|
-
CREATE TABLE IF NOT EXISTS runs (
|
|
77
|
-
run_id TEXT PRIMARY KEY,
|
|
78
|
-
pipeline_name TEXT,
|
|
79
|
-
status TEXT,
|
|
80
|
-
start_time TEXT,
|
|
81
|
-
end_time TEXT,
|
|
82
|
-
duration REAL,
|
|
83
|
-
metadata TEXT,
|
|
84
|
-
project TEXT,
|
|
85
|
-
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
86
|
-
)
|
|
87
|
-
""",
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
# Migration: Add project column if it doesn't exist
|
|
91
|
-
# Migration: Add project column if it doesn't exist
|
|
92
|
-
with contextlib.suppress(sqlite3.OperationalError):
|
|
93
|
-
cursor.execute("ALTER TABLE runs ADD COLUMN project TEXT")
|
|
94
|
-
|
|
95
|
-
# Artifacts table
|
|
96
|
-
cursor.execute(
|
|
97
|
-
"""
|
|
98
|
-
CREATE TABLE IF NOT EXISTS artifacts (
|
|
99
|
-
artifact_id TEXT PRIMARY KEY,
|
|
100
|
-
name TEXT,
|
|
101
|
-
type TEXT,
|
|
102
|
-
run_id TEXT,
|
|
103
|
-
path TEXT,
|
|
104
|
-
metadata TEXT,
|
|
105
|
-
project TEXT,
|
|
106
|
-
created_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
107
|
-
FOREIGN KEY (run_id) REFERENCES runs(run_id)
|
|
108
|
-
)
|
|
109
|
-
""",
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
# Migration: Add project column to artifacts if it doesn't exist
|
|
113
|
-
# Migration: Add project column to artifacts if it doesn't exist
|
|
114
|
-
with contextlib.suppress(sqlite3.OperationalError):
|
|
115
|
-
cursor.execute("ALTER TABLE artifacts ADD COLUMN project TEXT")
|
|
116
|
-
|
|
117
|
-
# Metrics table
|
|
118
|
-
cursor.execute(
|
|
119
|
-
"""
|
|
120
|
-
CREATE TABLE IF NOT EXISTS metrics (
|
|
121
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
122
|
-
run_id TEXT,
|
|
123
|
-
name TEXT,
|
|
124
|
-
value REAL,
|
|
125
|
-
step INTEGER,
|
|
126
|
-
timestamp TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
127
|
-
FOREIGN KEY (run_id) REFERENCES runs(run_id)
|
|
128
|
-
)
|
|
129
|
-
""",
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
# Model metrics table
|
|
133
|
-
cursor.execute(
|
|
134
|
-
"""
|
|
135
|
-
CREATE TABLE IF NOT EXISTS model_metrics (
|
|
136
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
137
|
-
project TEXT,
|
|
138
|
-
model_name TEXT,
|
|
139
|
-
run_id TEXT,
|
|
140
|
-
metric_name TEXT,
|
|
141
|
-
metric_value REAL,
|
|
142
|
-
environment TEXT,
|
|
143
|
-
tags TEXT,
|
|
144
|
-
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
145
|
-
)
|
|
146
|
-
""",
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
# Parameters table
|
|
150
|
-
cursor.execute(
|
|
151
|
-
"""
|
|
152
|
-
CREATE TABLE IF NOT EXISTS parameters (
|
|
153
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
154
|
-
run_id TEXT,
|
|
155
|
-
name TEXT,
|
|
156
|
-
value TEXT,
|
|
157
|
-
FOREIGN KEY (run_id) REFERENCES runs(run_id)
|
|
158
|
-
)
|
|
159
|
-
""",
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
# Experiments table
|
|
163
|
-
cursor.execute(
|
|
164
|
-
"""
|
|
165
|
-
CREATE TABLE IF NOT EXISTS experiments (
|
|
166
|
-
experiment_id TEXT PRIMARY KEY,
|
|
167
|
-
name TEXT,
|
|
168
|
-
description TEXT,
|
|
169
|
-
tags TEXT,
|
|
170
|
-
project TEXT,
|
|
171
|
-
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
172
|
-
)
|
|
173
|
-
""",
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
# Migration: Add project column to experiments if it doesn't exist
|
|
177
|
-
# Migration: Add project column to experiments if it doesn't exist
|
|
178
|
-
with contextlib.suppress(sqlite3.OperationalError):
|
|
179
|
-
cursor.execute("ALTER TABLE experiments ADD COLUMN project TEXT")
|
|
180
|
-
|
|
181
|
-
# Experiment Runs link table
|
|
182
|
-
cursor.execute(
|
|
183
|
-
"""
|
|
184
|
-
CREATE TABLE IF NOT EXISTS experiment_runs (
|
|
185
|
-
experiment_id TEXT,
|
|
186
|
-
run_id TEXT,
|
|
187
|
-
metrics TEXT,
|
|
188
|
-
parameters TEXT,
|
|
189
|
-
timestamp TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
190
|
-
PRIMARY KEY (experiment_id, run_id),
|
|
191
|
-
FOREIGN KEY (experiment_id) REFERENCES experiments(experiment_id),
|
|
192
|
-
FOREIGN KEY (run_id) REFERENCES runs(run_id)
|
|
193
|
-
)
|
|
194
|
-
""",
|
|
195
|
-
)
|
|
196
|
-
|
|
197
|
-
# Traces table for GenAI monitoring
|
|
198
|
-
cursor.execute(
|
|
199
|
-
"""
|
|
200
|
-
CREATE TABLE IF NOT EXISTS traces (
|
|
201
|
-
event_id TEXT PRIMARY KEY,
|
|
202
|
-
trace_id TEXT,
|
|
203
|
-
parent_id TEXT,
|
|
204
|
-
event_type TEXT,
|
|
205
|
-
name TEXT,
|
|
206
|
-
inputs TEXT,
|
|
207
|
-
outputs TEXT,
|
|
208
|
-
start_time REAL,
|
|
209
|
-
end_time REAL,
|
|
210
|
-
duration REAL,
|
|
211
|
-
status TEXT,
|
|
212
|
-
error TEXT,
|
|
213
|
-
metadata TEXT,
|
|
214
|
-
prompt_tokens INTEGER,
|
|
215
|
-
completion_tokens INTEGER,
|
|
216
|
-
total_tokens INTEGER,
|
|
217
|
-
cost REAL,
|
|
218
|
-
model TEXT,
|
|
219
|
-
project TEXT,
|
|
220
|
-
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
221
|
-
)
|
|
222
|
-
""",
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
# Migration: Add project column to traces if it doesn't exist
|
|
226
|
-
# Migration: Add project column to traces if it doesn't exist
|
|
227
|
-
with contextlib.suppress(sqlite3.OperationalError):
|
|
228
|
-
cursor.execute("ALTER TABLE traces ADD COLUMN project TEXT")
|
|
229
|
-
|
|
230
|
-
# Create indexes for better query performance
|
|
231
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_runs_pipeline ON runs(pipeline_name)")
|
|
232
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_runs_status ON runs(status)")
|
|
233
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_runs_project ON runs(project)")
|
|
234
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_artifacts_run ON artifacts(run_id)")
|
|
235
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_artifacts_project ON artifacts(project)")
|
|
236
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_metrics_run ON metrics(run_id)")
|
|
237
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_parameters_run ON parameters(run_id)")
|
|
238
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_experiments_name ON experiments(name)")
|
|
239
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_experiments_project ON experiments(project)")
|
|
240
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_traces_trace_id ON traces(trace_id)")
|
|
241
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_traces_type ON traces(event_type)")
|
|
242
|
-
cursor.execute("CREATE INDEX IF NOT EXISTS idx_traces_project ON traces(project)")
|
|
243
|
-
|
|
244
|
-
# Pipeline definitions for scheduling
|
|
245
|
-
cursor.execute(
|
|
246
|
-
"""
|
|
247
|
-
CREATE TABLE IF NOT EXISTS pipeline_definitions (
|
|
248
|
-
pipeline_name TEXT PRIMARY KEY,
|
|
249
|
-
definition TEXT NOT NULL,
|
|
250
|
-
created_at TEXT NOT NULL,
|
|
251
|
-
updated_at TEXT NOT NULL
|
|
252
|
-
)
|
|
253
|
-
""",
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
conn.commit()
|
|
257
|
-
conn.close()
|
|
258
|
-
|
|
259
|
-
def save_run(self, run_id: str, metadata: dict) -> None:
|
|
260
|
-
"""Save run metadata to database.
|
|
261
|
-
|
|
262
|
-
Args:
|
|
263
|
-
run_id: Unique run identifier
|
|
264
|
-
metadata: Run metadata dictionary
|
|
265
|
-
"""
|
|
266
|
-
conn = sqlite3.connect(self.db_path)
|
|
267
|
-
cursor = conn.cursor()
|
|
268
|
-
|
|
269
|
-
cursor.execute(
|
|
270
|
-
"""
|
|
271
|
-
INSERT OR REPLACE INTO runs
|
|
272
|
-
(run_id, pipeline_name, status, start_time, end_time, duration, metadata, project)
|
|
273
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
274
|
-
""",
|
|
275
|
-
(
|
|
276
|
-
run_id,
|
|
277
|
-
metadata.get("pipeline_name"),
|
|
278
|
-
metadata.get("status"),
|
|
279
|
-
metadata.get("start_time"),
|
|
280
|
-
metadata.get("end_time"),
|
|
281
|
-
metadata.get("duration"),
|
|
282
|
-
json.dumps(metadata),
|
|
283
|
-
metadata.get("project"),
|
|
284
|
-
),
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
# Save parameters
|
|
288
|
-
if "parameters" in metadata:
|
|
289
|
-
cursor.execute("DELETE FROM parameters WHERE run_id = ?", (run_id,))
|
|
290
|
-
for name, value in metadata["parameters"].items():
|
|
291
|
-
cursor.execute(
|
|
292
|
-
"INSERT INTO parameters (run_id, name, value) VALUES (?, ?, ?)",
|
|
293
|
-
(run_id, name, json.dumps(value)),
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
# Save metrics
|
|
297
|
-
if "metrics" in metadata:
|
|
298
|
-
cursor.execute("DELETE FROM metrics WHERE run_id = ?", (run_id,))
|
|
299
|
-
for name, value in metadata["metrics"].items():
|
|
300
|
-
cursor.execute(
|
|
301
|
-
"INSERT INTO metrics (run_id, name, value, step) VALUES (?, ?, ?, ?)",
|
|
302
|
-
(run_id, name, value, 0),
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
conn.commit()
|
|
306
|
-
conn.close()
|
|
307
|
-
|
|
308
|
-
def load_run(self, run_id: str) -> dict | None:
|
|
309
|
-
"""Load run metadata from database.
|
|
310
|
-
|
|
311
|
-
Args:
|
|
312
|
-
run_id: Unique run identifier
|
|
313
|
-
|
|
314
|
-
Returns:
|
|
315
|
-
Run metadata dictionary or None if not found
|
|
316
|
-
"""
|
|
317
|
-
conn = sqlite3.connect(self.db_path)
|
|
318
|
-
cursor = conn.cursor()
|
|
319
|
-
|
|
320
|
-
cursor.execute("SELECT metadata FROM runs WHERE run_id = ?", (run_id,))
|
|
321
|
-
row = cursor.fetchone()
|
|
322
|
-
|
|
323
|
-
conn.close()
|
|
324
|
-
|
|
325
|
-
if row:
|
|
326
|
-
data = json.loads(row[0])
|
|
327
|
-
# Ensure project is in metadata if it's in the column but not the JSON blob
|
|
328
|
-
# (This might happen if we update the column directly)
|
|
329
|
-
# Actually, let's just return what's in the blob for now,
|
|
330
|
-
# but we should probably sync them.
|
|
331
|
-
return data
|
|
332
|
-
return None
|
|
333
|
-
|
|
334
|
-
def update_run_project(self, run_id: str, project_name: str) -> None:
|
|
335
|
-
"""Update the project for a run.
|
|
336
|
-
|
|
337
|
-
Args:
|
|
338
|
-
run_id: Run identifier
|
|
339
|
-
project_name: Name of the project
|
|
340
|
-
"""
|
|
341
|
-
conn = sqlite3.connect(self.db_path)
|
|
342
|
-
cursor = conn.cursor()
|
|
343
|
-
|
|
344
|
-
# 1. Update the column
|
|
345
|
-
cursor.execute("UPDATE runs SET project = ? WHERE run_id = ?", (project_name, run_id))
|
|
346
|
-
|
|
347
|
-
# 2. Update the JSON blob
|
|
348
|
-
cursor.execute("SELECT metadata FROM runs WHERE run_id = ?", (run_id,))
|
|
349
|
-
row = cursor.fetchone()
|
|
350
|
-
if row:
|
|
351
|
-
metadata = json.loads(row[0])
|
|
352
|
-
metadata["project"] = project_name
|
|
353
|
-
cursor.execute(
|
|
354
|
-
"UPDATE runs SET metadata = ? WHERE run_id = ?",
|
|
355
|
-
(json.dumps(metadata), run_id),
|
|
356
|
-
)
|
|
357
|
-
|
|
358
|
-
conn.commit()
|
|
359
|
-
conn.close()
|
|
360
|
-
|
|
361
|
-
def list_runs(self, limit: int | None = None) -> list[dict]:
|
|
362
|
-
"""List all runs from database.
|
|
363
|
-
|
|
364
|
-
Args:
|
|
365
|
-
limit: Optional limit on number of results
|
|
366
|
-
|
|
367
|
-
Returns:
|
|
368
|
-
List of run metadata dictionaries
|
|
369
|
-
"""
|
|
370
|
-
conn = sqlite3.connect(self.db_path)
|
|
371
|
-
cursor = conn.cursor()
|
|
372
|
-
|
|
373
|
-
query = "SELECT metadata FROM runs ORDER BY created_at DESC"
|
|
374
|
-
if limit:
|
|
375
|
-
query += f" LIMIT {limit}"
|
|
376
|
-
|
|
377
|
-
cursor.execute(query)
|
|
378
|
-
rows = cursor.fetchall()
|
|
379
|
-
|
|
380
|
-
conn.close()
|
|
381
|
-
|
|
382
|
-
return [json.loads(row[0]) for row in rows]
|
|
383
|
-
|
|
384
|
-
def list_pipelines(self, project: str = None) -> list[str]:
|
|
385
|
-
"""List all unique pipeline names.
|
|
386
|
-
|
|
387
|
-
Args:
|
|
388
|
-
project: Optional project name to filter by
|
|
389
|
-
|
|
390
|
-
Returns:
|
|
391
|
-
List of pipeline names
|
|
392
|
-
"""
|
|
393
|
-
conn = sqlite3.connect(self.db_path)
|
|
394
|
-
cursor = conn.cursor()
|
|
395
|
-
|
|
396
|
-
if project:
|
|
397
|
-
cursor.execute(
|
|
398
|
-
"SELECT DISTINCT pipeline_name FROM runs WHERE project = ? ORDER BY pipeline_name",
|
|
399
|
-
(project,),
|
|
400
|
-
)
|
|
401
|
-
else:
|
|
402
|
-
cursor.execute("SELECT DISTINCT pipeline_name FROM runs ORDER BY pipeline_name")
|
|
403
|
-
|
|
404
|
-
rows = cursor.fetchall()
|
|
405
|
-
|
|
406
|
-
conn.close()
|
|
407
|
-
|
|
408
|
-
return [row[0] for row in rows if row[0]]
|
|
409
|
-
|
|
410
|
-
def save_artifact(self, artifact_id: str, metadata: dict) -> None:
|
|
411
|
-
"""Save artifact metadata to database.
|
|
412
|
-
|
|
413
|
-
Args:
|
|
414
|
-
artifact_id: Unique artifact identifier
|
|
415
|
-
metadata: Artifact metadata dictionary
|
|
416
|
-
"""
|
|
417
|
-
conn = sqlite3.connect(self.db_path)
|
|
418
|
-
cursor = conn.cursor()
|
|
419
|
-
|
|
420
|
-
cursor.execute(
|
|
421
|
-
"""
|
|
422
|
-
INSERT OR REPLACE INTO artifacts
|
|
423
|
-
(artifact_id, name, type, run_id, path, metadata, project)
|
|
424
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
425
|
-
""",
|
|
426
|
-
(
|
|
427
|
-
artifact_id,
|
|
428
|
-
metadata.get("name"),
|
|
429
|
-
metadata.get("type"),
|
|
430
|
-
metadata.get("run_id"),
|
|
431
|
-
metadata.get("path"),
|
|
432
|
-
json.dumps(metadata),
|
|
433
|
-
metadata.get("project"),
|
|
434
|
-
),
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
conn.commit()
|
|
438
|
-
conn.close()
|
|
439
|
-
|
|
440
|
-
def load_artifact(self, artifact_id: str) -> dict | None:
|
|
441
|
-
"""Load artifact metadata from database.
|
|
442
|
-
|
|
443
|
-
Args:
|
|
444
|
-
artifact_id: Unique artifact identifier
|
|
445
|
-
|
|
446
|
-
Returns:
|
|
447
|
-
Artifact metadata dictionary or None if not found
|
|
448
|
-
"""
|
|
449
|
-
conn = sqlite3.connect(self.db_path)
|
|
450
|
-
cursor = conn.cursor()
|
|
451
|
-
|
|
452
|
-
cursor.execute("SELECT metadata FROM artifacts WHERE artifact_id = ?", (artifact_id,))
|
|
453
|
-
row = cursor.fetchone()
|
|
454
|
-
|
|
455
|
-
conn.close()
|
|
456
|
-
|
|
457
|
-
if row:
|
|
458
|
-
return json.loads(row[0])
|
|
459
|
-
return None
|
|
460
|
-
|
|
461
|
-
def list_assets(self, limit: int | None = None, **filters) -> list[dict]:
|
|
462
|
-
"""List assets from database with optional filters.
|
|
463
|
-
|
|
464
|
-
Args:
|
|
465
|
-
limit: Optional limit on number of results
|
|
466
|
-
**filters: Filter criteria (type, run_id, etc.)
|
|
467
|
-
|
|
468
|
-
Returns:
|
|
469
|
-
List of artifact metadata dictionaries
|
|
470
|
-
"""
|
|
471
|
-
conn = sqlite3.connect(self.db_path)
|
|
472
|
-
cursor = conn.cursor()
|
|
473
|
-
|
|
474
|
-
conditions = []
|
|
475
|
-
params = []
|
|
476
|
-
|
|
477
|
-
for key, value in filters.items():
|
|
478
|
-
if value is not None:
|
|
479
|
-
conditions.append(f"{key} = ?")
|
|
480
|
-
params.append(value)
|
|
481
|
-
|
|
482
|
-
query = "SELECT metadata FROM artifacts"
|
|
483
|
-
if conditions:
|
|
484
|
-
query += " WHERE " + " AND ".join(conditions)
|
|
485
|
-
|
|
486
|
-
query += " ORDER BY created_at DESC"
|
|
487
|
-
|
|
488
|
-
if limit:
|
|
489
|
-
query += f" LIMIT {limit}"
|
|
490
|
-
|
|
491
|
-
cursor.execute(query, params)
|
|
492
|
-
rows = cursor.fetchall()
|
|
493
|
-
|
|
494
|
-
conn.close()
|
|
495
|
-
|
|
496
|
-
return [json.loads(row[0]) for row in rows]
|
|
497
|
-
|
|
498
|
-
def query(self, **filters) -> list[dict]:
|
|
499
|
-
"""Query runs with filters.
|
|
500
|
-
|
|
501
|
-
Args:
|
|
502
|
-
**filters: Filter criteria (pipeline_name, status, etc.)
|
|
503
|
-
|
|
504
|
-
Returns:
|
|
505
|
-
List of matching run metadata dictionaries
|
|
506
|
-
"""
|
|
507
|
-
conn = sqlite3.connect(self.db_path)
|
|
508
|
-
cursor = conn.cursor()
|
|
509
|
-
|
|
510
|
-
where_clauses = []
|
|
511
|
-
params = []
|
|
512
|
-
|
|
513
|
-
for key, value in filters.items():
|
|
514
|
-
where_clauses.append(f"{key} = ?")
|
|
515
|
-
params.append(value)
|
|
516
|
-
|
|
517
|
-
query = "SELECT metadata FROM runs"
|
|
518
|
-
if where_clauses:
|
|
519
|
-
query += " WHERE " + " AND ".join(where_clauses)
|
|
520
|
-
query += " ORDER BY created_at DESC"
|
|
521
|
-
|
|
522
|
-
cursor.execute(query, params)
|
|
523
|
-
rows = cursor.fetchall()
|
|
524
|
-
|
|
525
|
-
conn.close()
|
|
526
|
-
|
|
527
|
-
return [json.loads(row[0]) for row in rows]
|
|
528
|
-
|
|
529
|
-
def save_metric(self, run_id: str, name: str, value: float, step: int = 0) -> None:
|
|
530
|
-
"""Save a single metric value.
|
|
531
|
-
|
|
532
|
-
Args:
|
|
533
|
-
run_id: Run identifier
|
|
534
|
-
name: Metric name
|
|
535
|
-
value: Metric value
|
|
536
|
-
step: Training step/iteration
|
|
537
|
-
"""
|
|
538
|
-
conn = sqlite3.connect(self.db_path)
|
|
539
|
-
cursor = conn.cursor()
|
|
540
|
-
|
|
541
|
-
cursor.execute(
|
|
542
|
-
"INSERT INTO metrics (run_id, name, value, step) VALUES (?, ?, ?, ?)",
|
|
543
|
-
(run_id, name, value, step),
|
|
544
|
-
)
|
|
545
|
-
|
|
546
|
-
conn.commit()
|
|
547
|
-
conn.close()
|
|
548
|
-
|
|
549
|
-
def get_metrics(self, run_id: str, name: str | None = None) -> list[dict]:
|
|
550
|
-
"""Get metrics for a run.
|
|
551
|
-
|
|
552
|
-
Args:
|
|
553
|
-
run_id: Run identifier
|
|
554
|
-
name: Optional metric name filter
|
|
555
|
-
|
|
556
|
-
Returns:
|
|
557
|
-
List of metric dictionaries
|
|
558
|
-
"""
|
|
559
|
-
conn = sqlite3.connect(self.db_path)
|
|
560
|
-
cursor = conn.cursor()
|
|
561
|
-
|
|
562
|
-
if name:
|
|
563
|
-
cursor.execute(
|
|
564
|
-
"SELECT name, value, step, timestamp FROM metrics WHERE run_id = ? AND name = ? ORDER BY step",
|
|
565
|
-
(run_id, name),
|
|
566
|
-
)
|
|
567
|
-
else:
|
|
568
|
-
cursor.execute(
|
|
569
|
-
"SELECT name, value, step, timestamp FROM metrics WHERE run_id = ? ORDER BY step",
|
|
570
|
-
(run_id,),
|
|
571
|
-
)
|
|
572
|
-
|
|
573
|
-
rows = cursor.fetchall()
|
|
574
|
-
conn.close()
|
|
575
|
-
|
|
576
|
-
return [{"name": row[0], "value": row[1], "step": row[2], "timestamp": row[3]} for row in rows]
|
|
577
|
-
|
|
578
|
-
def log_model_metrics(
|
|
579
|
-
self,
|
|
580
|
-
project: str,
|
|
581
|
-
model_name: str,
|
|
582
|
-
metrics: dict[str, float],
|
|
583
|
-
run_id: str | None = None,
|
|
584
|
-
environment: str | None = None,
|
|
585
|
-
tags: dict | None = None,
|
|
586
|
-
) -> None:
|
|
587
|
-
"""Log production model metrics independent of pipeline runs."""
|
|
588
|
-
if not metrics:
|
|
589
|
-
return
|
|
590
|
-
|
|
591
|
-
conn = sqlite3.connect(self.db_path)
|
|
592
|
-
cursor = conn.cursor()
|
|
593
|
-
tags_json = json.dumps(tags or {})
|
|
594
|
-
|
|
595
|
-
for metric_name, value in metrics.items():
|
|
596
|
-
try:
|
|
597
|
-
metric_value = float(value)
|
|
598
|
-
except (TypeError, ValueError):
|
|
599
|
-
continue
|
|
600
|
-
|
|
601
|
-
cursor.execute(
|
|
602
|
-
"""
|
|
603
|
-
INSERT INTO model_metrics
|
|
604
|
-
(project, model_name, run_id, metric_name, metric_value, environment, tags)
|
|
605
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
606
|
-
""",
|
|
607
|
-
(project, model_name, run_id, metric_name, metric_value, environment, tags_json),
|
|
608
|
-
)
|
|
609
|
-
|
|
610
|
-
conn.commit()
|
|
611
|
-
conn.close()
|
|
612
|
-
|
|
613
|
-
def list_model_metrics(
|
|
614
|
-
self,
|
|
615
|
-
project: str | None = None,
|
|
616
|
-
model_name: str | None = None,
|
|
617
|
-
limit: int = 100,
|
|
618
|
-
) -> list[dict]:
|
|
619
|
-
"""List logged model metrics."""
|
|
620
|
-
conn = sqlite3.connect(self.db_path)
|
|
621
|
-
cursor = conn.cursor()
|
|
622
|
-
|
|
623
|
-
query = """
|
|
624
|
-
SELECT project, model_name, run_id, metric_name, metric_value, environment, tags, created_at
|
|
625
|
-
FROM model_metrics
|
|
626
|
-
"""
|
|
627
|
-
params: list = []
|
|
628
|
-
clauses = []
|
|
629
|
-
|
|
630
|
-
if project:
|
|
631
|
-
clauses.append("project = ?")
|
|
632
|
-
params.append(project)
|
|
633
|
-
if model_name:
|
|
634
|
-
clauses.append("model_name = ?")
|
|
635
|
-
params.append(model_name)
|
|
636
|
-
|
|
637
|
-
if clauses:
|
|
638
|
-
query += " WHERE " + " AND ".join(clauses)
|
|
639
|
-
|
|
640
|
-
query += " ORDER BY created_at DESC LIMIT ?"
|
|
641
|
-
params.append(limit)
|
|
642
|
-
|
|
643
|
-
cursor.execute(query, params)
|
|
644
|
-
rows = cursor.fetchall()
|
|
645
|
-
conn.close()
|
|
646
|
-
|
|
647
|
-
results = []
|
|
648
|
-
for row in rows:
|
|
649
|
-
results.append(
|
|
650
|
-
{
|
|
651
|
-
"project": row[0],
|
|
652
|
-
"model_name": row[1],
|
|
653
|
-
"run_id": row[2],
|
|
654
|
-
"metric_name": row[3],
|
|
655
|
-
"metric_value": row[4],
|
|
656
|
-
"environment": row[5],
|
|
657
|
-
"tags": json.loads(row[6]) if row[6] else {},
|
|
658
|
-
"created_at": row[7],
|
|
659
|
-
},
|
|
660
|
-
)
|
|
661
|
-
return results
|
|
662
|
-
|
|
663
|
-
def save_experiment(self, experiment_id: str, name: str, description: str = "", tags: dict = None) -> None:
|
|
664
|
-
"""Save experiment metadata.
|
|
665
|
-
|
|
666
|
-
Args:
|
|
667
|
-
experiment_id: Unique experiment identifier
|
|
668
|
-
name: Experiment name
|
|
669
|
-
description: Experiment description
|
|
670
|
-
tags: Experiment tags
|
|
671
|
-
"""
|
|
672
|
-
conn = sqlite3.connect(self.db_path)
|
|
673
|
-
cursor = conn.cursor()
|
|
674
|
-
|
|
675
|
-
cursor.execute(
|
|
676
|
-
"""
|
|
677
|
-
INSERT OR REPLACE INTO experiments
|
|
678
|
-
(experiment_id, name, description, tags)
|
|
679
|
-
VALUES (?, ?, ?, ?)
|
|
680
|
-
""",
|
|
681
|
-
(
|
|
682
|
-
experiment_id,
|
|
683
|
-
name,
|
|
684
|
-
description,
|
|
685
|
-
json.dumps(tags or {}),
|
|
686
|
-
),
|
|
687
|
-
)
|
|
688
|
-
|
|
689
|
-
conn.commit()
|
|
690
|
-
conn.close()
|
|
691
|
-
|
|
692
|
-
def log_experiment_run(
|
|
693
|
-
self,
|
|
694
|
-
experiment_id: str,
|
|
695
|
-
run_id: str,
|
|
696
|
-
metrics: dict = None,
|
|
697
|
-
parameters: dict = None,
|
|
698
|
-
) -> None:
|
|
699
|
-
"""Log a run to an experiment.
|
|
700
|
-
|
|
701
|
-
Args:
|
|
702
|
-
experiment_id: Experiment identifier
|
|
703
|
-
run_id: Run identifier
|
|
704
|
-
metrics: Metrics from the run
|
|
705
|
-
parameters: Parameters used in the run
|
|
706
|
-
"""
|
|
707
|
-
conn = sqlite3.connect(self.db_path)
|
|
708
|
-
cursor = conn.cursor()
|
|
709
|
-
|
|
710
|
-
cursor.execute(
|
|
711
|
-
"""
|
|
712
|
-
INSERT OR REPLACE INTO experiment_runs
|
|
713
|
-
(experiment_id, run_id, metrics, parameters)
|
|
714
|
-
VALUES (?, ?, ?, ?)
|
|
715
|
-
""",
|
|
716
|
-
(
|
|
717
|
-
experiment_id,
|
|
718
|
-
run_id,
|
|
719
|
-
json.dumps(metrics or {}),
|
|
720
|
-
json.dumps(parameters or {}),
|
|
721
|
-
),
|
|
722
|
-
)
|
|
723
|
-
|
|
724
|
-
conn.commit()
|
|
725
|
-
conn.close()
|
|
726
|
-
|
|
727
|
-
def list_experiments(self) -> list[dict]:
|
|
728
|
-
"""List all experiments.
|
|
729
|
-
|
|
730
|
-
Returns:
|
|
731
|
-
List of experiment dictionaries
|
|
732
|
-
"""
|
|
733
|
-
conn = sqlite3.connect(self.db_path)
|
|
734
|
-
cursor = conn.cursor()
|
|
735
|
-
|
|
736
|
-
cursor.execute(
|
|
737
|
-
"SELECT experiment_id, name, description, tags, created_at, project FROM experiments ORDER BY created_at DESC",
|
|
738
|
-
)
|
|
739
|
-
rows = cursor.fetchall()
|
|
740
|
-
|
|
741
|
-
experiments = []
|
|
742
|
-
for row in rows:
|
|
743
|
-
# Count runs for each experiment
|
|
744
|
-
cursor.execute("SELECT COUNT(*) FROM experiment_runs WHERE experiment_id = ?", (row[0],))
|
|
745
|
-
run_count = cursor.fetchone()[0]
|
|
746
|
-
|
|
747
|
-
experiments.append(
|
|
748
|
-
{
|
|
749
|
-
"experiment_id": row[0],
|
|
750
|
-
"name": row[1],
|
|
751
|
-
"description": row[2],
|
|
752
|
-
"tags": json.loads(row[3]),
|
|
753
|
-
"created_at": row[4],
|
|
754
|
-
"project": row[5],
|
|
755
|
-
"run_count": run_count,
|
|
756
|
-
},
|
|
757
|
-
)
|
|
758
|
-
conn.close()
|
|
759
|
-
return experiments
|
|
760
|
-
|
|
761
|
-
def update_experiment_project(self, experiment_name: str, project_name: str) -> None:
|
|
762
|
-
"""Update the project for an experiment.
|
|
763
|
-
|
|
764
|
-
Args:
|
|
765
|
-
experiment_name: Name of the experiment
|
|
766
|
-
project_name: New project name
|
|
767
|
-
"""
|
|
768
|
-
conn = sqlite3.connect(self.db_path)
|
|
769
|
-
cursor = conn.cursor()
|
|
770
|
-
|
|
771
|
-
try:
|
|
772
|
-
cursor.execute(
|
|
773
|
-
"UPDATE experiments SET project = ? WHERE name = ?",
|
|
774
|
-
(project_name, experiment_name),
|
|
775
|
-
)
|
|
776
|
-
conn.commit()
|
|
777
|
-
finally:
|
|
778
|
-
conn.close()
|
|
779
|
-
|
|
780
|
-
def get_experiment(self, experiment_id: str) -> dict | None:
|
|
781
|
-
"""Get experiment details.
|
|
782
|
-
|
|
783
|
-
Args:
|
|
784
|
-
experiment_id: Experiment identifier
|
|
785
|
-
|
|
786
|
-
Returns:
|
|
787
|
-
Experiment dictionary or None
|
|
788
|
-
"""
|
|
789
|
-
conn = sqlite3.connect(self.db_path)
|
|
790
|
-
cursor = conn.cursor()
|
|
791
|
-
|
|
792
|
-
cursor.execute(
|
|
793
|
-
"SELECT experiment_id, name, description, tags, created_at FROM experiments WHERE experiment_id = ?",
|
|
794
|
-
(experiment_id,),
|
|
795
|
-
)
|
|
796
|
-
row = cursor.fetchone()
|
|
797
|
-
|
|
798
|
-
if not row:
|
|
799
|
-
conn.close()
|
|
800
|
-
return None
|
|
801
|
-
|
|
802
|
-
experiment = {
|
|
803
|
-
"experiment_id": row[0],
|
|
804
|
-
"name": row[1],
|
|
805
|
-
"description": row[2],
|
|
806
|
-
"tags": json.loads(row[3]),
|
|
807
|
-
"created_at": row[4],
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
# Get runs
|
|
811
|
-
cursor.execute(
|
|
812
|
-
"""
|
|
813
|
-
SELECT er.run_id, er.metrics, er.parameters, er.timestamp, r.status, r.duration
|
|
814
|
-
FROM experiment_runs er
|
|
815
|
-
LEFT JOIN runs r ON er.run_id = r.run_id
|
|
816
|
-
WHERE er.experiment_id = ?
|
|
817
|
-
ORDER BY er.timestamp DESC
|
|
818
|
-
""",
|
|
819
|
-
(experiment_id,),
|
|
820
|
-
)
|
|
821
|
-
|
|
822
|
-
runs = []
|
|
823
|
-
for r in cursor.fetchall():
|
|
824
|
-
runs.append(
|
|
825
|
-
{
|
|
826
|
-
"run_id": r[0],
|
|
827
|
-
"metrics": json.loads(r[1]),
|
|
828
|
-
"parameters": json.loads(r[2]),
|
|
829
|
-
"timestamp": r[3],
|
|
830
|
-
"status": r[4],
|
|
831
|
-
"duration": r[5],
|
|
832
|
-
},
|
|
833
|
-
)
|
|
834
|
-
|
|
835
|
-
experiment["runs"] = runs
|
|
836
|
-
|
|
837
|
-
conn.close()
|
|
838
|
-
return experiment
|
|
839
|
-
|
|
840
|
-
def get_statistics(self) -> dict:
|
|
841
|
-
"""Get database statistics.
|
|
842
|
-
|
|
843
|
-
Returns:
|
|
844
|
-
Dictionary with statistics
|
|
845
|
-
"""
|
|
846
|
-
conn = sqlite3.connect(self.db_path)
|
|
847
|
-
cursor = conn.cursor()
|
|
848
|
-
|
|
849
|
-
stats = {}
|
|
850
|
-
|
|
851
|
-
cursor.execute("SELECT COUNT(*) FROM runs")
|
|
852
|
-
stats["total_runs"] = cursor.fetchone()[0]
|
|
853
|
-
|
|
854
|
-
cursor.execute("SELECT COUNT(*) FROM artifacts")
|
|
855
|
-
stats["total_artifacts"] = cursor.fetchone()[0]
|
|
856
|
-
|
|
857
|
-
cursor.execute("SELECT COUNT(*) FROM metrics")
|
|
858
|
-
stats["total_metrics"] = cursor.fetchone()[0]
|
|
859
|
-
|
|
860
|
-
cursor.execute("SELECT COUNT(DISTINCT pipeline_name) FROM runs")
|
|
861
|
-
stats["total_pipelines"] = cursor.fetchone()[0]
|
|
862
|
-
|
|
863
|
-
cursor.execute("SELECT COUNT(*) FROM experiments")
|
|
864
|
-
stats["total_experiments"] = cursor.fetchone()[0]
|
|
865
|
-
|
|
866
|
-
conn.close()
|
|
867
|
-
|
|
868
|
-
return stats
|
|
869
|
-
|
|
870
|
-
def save_trace_event(self, event: dict) -> None:
|
|
871
|
-
"""Save a trace event.
|
|
872
|
-
|
|
873
|
-
Args:
|
|
874
|
-
event: Trace event dictionary
|
|
875
|
-
"""
|
|
876
|
-
conn = sqlite3.connect(self.db_path)
|
|
877
|
-
cursor = conn.cursor()
|
|
878
|
-
|
|
879
|
-
cursor.execute(
|
|
880
|
-
"""
|
|
881
|
-
INSERT OR REPLACE INTO traces
|
|
882
|
-
(event_id, trace_id, parent_id, event_type, name, inputs, outputs,
|
|
883
|
-
start_time, end_time, duration, status, error, metadata,
|
|
884
|
-
prompt_tokens, completion_tokens, total_tokens, cost, model)
|
|
885
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
886
|
-
""",
|
|
887
|
-
(
|
|
888
|
-
event["event_id"],
|
|
889
|
-
event["trace_id"],
|
|
890
|
-
event["parent_id"],
|
|
891
|
-
event["event_type"],
|
|
892
|
-
event["name"],
|
|
893
|
-
json.dumps(event.get("inputs", {})),
|
|
894
|
-
json.dumps(event.get("outputs", {})),
|
|
895
|
-
event.get("start_time"),
|
|
896
|
-
event.get("end_time"),
|
|
897
|
-
event.get("duration"),
|
|
898
|
-
event.get("status"),
|
|
899
|
-
event.get("error"),
|
|
900
|
-
json.dumps(event.get("metadata", {})),
|
|
901
|
-
event.get("prompt_tokens", 0),
|
|
902
|
-
event.get("completion_tokens", 0),
|
|
903
|
-
event.get("total_tokens", 0),
|
|
904
|
-
event.get("cost", 0.0),
|
|
905
|
-
event.get("model"),
|
|
906
|
-
),
|
|
907
|
-
)
|
|
908
|
-
|
|
909
|
-
conn.commit()
|
|
910
|
-
conn.close()
|
|
911
|
-
|
|
912
|
-
def get_trace(self, trace_id: str) -> list[dict]:
|
|
913
|
-
"""Get all events for a trace.
|
|
914
|
-
|
|
915
|
-
Args:
|
|
916
|
-
trace_id: Trace identifier
|
|
917
|
-
|
|
918
|
-
Returns:
|
|
919
|
-
List of event dictionaries
|
|
920
|
-
"""
|
|
921
|
-
conn = sqlite3.connect(self.db_path)
|
|
922
|
-
cursor = conn.cursor()
|
|
923
|
-
|
|
924
|
-
cursor.execute(
|
|
925
|
-
"""
|
|
926
|
-
SELECT * FROM traces WHERE trace_id = ? ORDER BY start_time
|
|
927
|
-
""",
|
|
928
|
-
(trace_id,),
|
|
929
|
-
)
|
|
930
|
-
|
|
931
|
-
columns = [description[0] for description in cursor.description]
|
|
932
|
-
rows = cursor.fetchall()
|
|
933
|
-
|
|
934
|
-
events = []
|
|
935
|
-
for row in rows:
|
|
936
|
-
event = dict(zip(columns, row, strict=False))
|
|
937
|
-
# Parse JSON fields
|
|
938
|
-
for field in ["inputs", "outputs", "metadata"]:
|
|
939
|
-
if event[field]:
|
|
940
|
-
with contextlib.suppress(builtins.BaseException):
|
|
941
|
-
event[field] = json.loads(event[field])
|
|
942
|
-
events.append(event)
|
|
943
|
-
|
|
944
|
-
conn.close()
|
|
945
|
-
return events
|
|
946
|
-
|
|
947
|
-
def save_pipeline_definition(self, pipeline_name: str, definition: dict) -> None:
|
|
948
|
-
"""Save pipeline definition for scheduling."""
|
|
949
|
-
from datetime import datetime
|
|
950
|
-
|
|
951
|
-
conn = sqlite3.connect(self.db_path)
|
|
952
|
-
cursor = conn.cursor()
|
|
953
|
-
now = datetime.now().isoformat()
|
|
954
|
-
|
|
955
|
-
# Check if definition already exists
|
|
956
|
-
cursor.execute(
|
|
957
|
-
"SELECT pipeline_name FROM pipeline_definitions WHERE pipeline_name = ?",
|
|
958
|
-
(pipeline_name,),
|
|
959
|
-
)
|
|
960
|
-
exists = cursor.fetchone()
|
|
961
|
-
|
|
962
|
-
if exists:
|
|
963
|
-
# Update existing
|
|
964
|
-
cursor.execute(
|
|
965
|
-
"""
|
|
966
|
-
UPDATE pipeline_definitions
|
|
967
|
-
SET definition = ?, updated_at = ?
|
|
968
|
-
WHERE pipeline_name = ?
|
|
969
|
-
""",
|
|
970
|
-
(json.dumps(definition), now, pipeline_name),
|
|
971
|
-
)
|
|
972
|
-
else:
|
|
973
|
-
# Insert new
|
|
974
|
-
cursor.execute(
|
|
975
|
-
"""
|
|
976
|
-
INSERT INTO pipeline_definitions (pipeline_name, definition, created_at, updated_at)
|
|
977
|
-
VALUES (?, ?, ?, ?)
|
|
978
|
-
""",
|
|
979
|
-
(pipeline_name, json.dumps(definition), now, now),
|
|
980
|
-
)
|
|
981
|
-
|
|
982
|
-
conn.commit()
|
|
983
|
-
conn.close()
|
|
984
|
-
|
|
985
|
-
def update_pipeline_project(self, pipeline_name: str, project_name: str) -> None:
|
|
986
|
-
"""Update the project for all runs of a pipeline.
|
|
987
|
-
|
|
988
|
-
Args:
|
|
989
|
-
pipeline_name: Name of the pipeline
|
|
990
|
-
project_name: New project name
|
|
991
|
-
"""
|
|
992
|
-
conn = sqlite3.connect(self.db_path)
|
|
993
|
-
cursor = conn.cursor()
|
|
994
|
-
|
|
995
|
-
try:
|
|
996
|
-
# 1. Update the project column for all runs
|
|
997
|
-
cursor.execute(
|
|
998
|
-
"UPDATE runs SET project = ? WHERE pipeline_name = ?",
|
|
999
|
-
(project_name, pipeline_name),
|
|
1000
|
-
)
|
|
1001
|
-
|
|
1002
|
-
# 2. Update the JSON metadata blob for each run
|
|
1003
|
-
cursor.execute(
|
|
1004
|
-
"SELECT run_id, metadata FROM runs WHERE pipeline_name = ?",
|
|
1005
|
-
(pipeline_name,),
|
|
1006
|
-
)
|
|
1007
|
-
rows = cursor.fetchall()
|
|
1008
|
-
for run_id, metadata_json in rows:
|
|
1009
|
-
metadata = json.loads(metadata_json)
|
|
1010
|
-
metadata["project"] = project_name
|
|
1011
|
-
cursor.execute(
|
|
1012
|
-
"UPDATE runs SET metadata = ? WHERE run_id = ?",
|
|
1013
|
-
(json.dumps(metadata), run_id),
|
|
1014
|
-
)
|
|
1015
|
-
|
|
1016
|
-
# 3. Update artifacts table
|
|
1017
|
-
cursor.execute("PRAGMA table_info(artifacts)")
|
|
1018
|
-
columns = [info[1] for info in cursor.fetchall()]
|
|
1019
|
-
if "project" in columns:
|
|
1020
|
-
cursor.execute(
|
|
1021
|
-
"""
|
|
1022
|
-
UPDATE artifacts
|
|
1023
|
-
SET project = ?
|
|
1024
|
-
WHERE run_id IN (SELECT run_id FROM runs WHERE pipeline_name = ?)
|
|
1025
|
-
""",
|
|
1026
|
-
(project_name, pipeline_name),
|
|
1027
|
-
)
|
|
1028
|
-
|
|
1029
|
-
# 4. Update traces table
|
|
1030
|
-
cursor.execute("PRAGMA table_info(traces)")
|
|
1031
|
-
columns = [info[1] for info in cursor.fetchall()]
|
|
1032
|
-
if "project" in columns:
|
|
1033
|
-
# Update traces linked to runs of this pipeline
|
|
1034
|
-
# Note: This assumes we can link traces to runs via metadata or some other way
|
|
1035
|
-
# For now, let's assume traces might have run_id in metadata or we just update by project if we had it
|
|
1036
|
-
# But here we are moving a pipeline to a project.
|
|
1037
|
-
# If traces have a 'project' column, we should update it for traces belonging to these runs.
|
|
1038
|
-
# Since traces don't explicitly have run_id column in schema (it's in metadata),
|
|
1039
|
-
# we might need a more complex query or just skip if not easily linkable.
|
|
1040
|
-
# However, if we assume traces are logged with project context, we might not need to update them
|
|
1041
|
-
# if they were already correct. But if we are MOVING, we need to.
|
|
1042
|
-
# Let's try to update traces that have run_id in their metadata matching these runs.
|
|
1043
|
-
# This is expensive in SQLite with JSON.
|
|
1044
|
-
# Alternative: If traces are associated with the pipeline name directly?
|
|
1045
|
-
# For now, let's skip complex JSON matching for traces to avoid performance issues
|
|
1046
|
-
# unless we add a run_id column to traces.
|
|
1047
|
-
pass
|
|
1048
|
-
|
|
1049
|
-
# 5. Update model_metrics table
|
|
1050
|
-
cursor.execute("PRAGMA table_info(model_metrics)")
|
|
1051
|
-
columns = [info[1] for info in cursor.fetchall()]
|
|
1052
|
-
if "project" in columns:
|
|
1053
|
-
cursor.execute(
|
|
1054
|
-
"""
|
|
1055
|
-
UPDATE model_metrics
|
|
1056
|
-
SET project = ?
|
|
1057
|
-
WHERE run_id IN (SELECT run_id FROM runs WHERE pipeline_name = ?)
|
|
1058
|
-
""",
|
|
1059
|
-
(project_name, pipeline_name),
|
|
1060
|
-
)
|
|
1061
|
-
|
|
1062
|
-
# 6. Update experiments table
|
|
1063
|
-
# If an experiment contains runs from this pipeline, should the experiment be moved?
|
|
1064
|
-
# Maybe not automatically, as an experiment might contain runs from multiple pipelines.
|
|
1065
|
-
# But if the user wants "recursive", let's at least update the experiment_runs link
|
|
1066
|
-
# (which doesn't have project) - wait, experiments have project.
|
|
1067
|
-
# Let's find experiments that ONLY contain runs from this pipeline and move them?
|
|
1068
|
-
# Or just leave experiments as is?
|
|
1069
|
-
# The user said "same for all related objects, experiments etc".
|
|
1070
|
-
# Let's be safe and NOT move experiments automatically as they are higher level grouping.
|
|
1071
|
-
# BUT, we should ensure that the runs inside the experiment are consistent.
|
|
1072
|
-
# The runs are already updated in step 1.
|
|
1073
|
-
# So, we are good on experiments.
|
|
1074
|
-
|
|
1075
|
-
conn.commit()
|
|
1076
|
-
finally:
|
|
1077
|
-
conn.close()
|
|
1078
|
-
|
|
1079
|
-
def get_pipeline_definition(self, pipeline_name: str) -> dict | None:
|
|
1080
|
-
"""Retrieve pipeline definition."""
|
|
1081
|
-
conn = sqlite3.connect(self.db_path)
|
|
1082
|
-
cursor = conn.cursor()
|
|
1083
|
-
cursor.execute(
|
|
1084
|
-
"SELECT definition FROM pipeline_definitions WHERE pipeline_name = ?",
|
|
1085
|
-
(pipeline_name,),
|
|
1086
|
-
)
|
|
1087
|
-
row = cursor.fetchone()
|
|
1088
|
-
conn.close()
|
|
1089
|
-
|
|
1090
|
-
if row:
|
|
1091
|
-
return json.loads(row[0])
|
|
1092
|
-
return None
|
|
52
|
+
# Alias for backward compatibility
|
|
53
|
+
SQLiteMetadataStore = SQLMetadataStore
|