flowyml 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,6 @@
1
1
  """Metadata storage backends for flowyml."""
2
2
 
3
- import json
4
- import sqlite3
5
3
  from abc import ABC, abstractmethod
6
- from pathlib import Path
7
- import contextlib
8
- import builtins
9
4
 
10
5
 
11
6
  class MetadataStore(ABC):
@@ -52,1041 +47,7 @@ class MetadataStore(ABC):
52
47
  pass
53
48
 
54
49
 
55
- class SQLiteMetadataStore(MetadataStore):
56
- """SQLite-based metadata storage."""
50
+ from flowyml.storage.sql import SQLMetadataStore # noqa: E402
57
51
 
58
- def __init__(self, db_path: str = ".flowyml/metadata.db"):
59
- """Initialize SQLite metadata store.
60
-
61
- Args:
62
- db_path: Path to SQLite database file
63
- """
64
- self.db_path = Path(db_path)
65
- self.db_path.parent.mkdir(parents=True, exist_ok=True)
66
- self._init_db()
67
-
68
- def _init_db(self) -> None:
69
- """Initialize database schema."""
70
- conn = sqlite3.connect(self.db_path)
71
- cursor = conn.cursor()
72
-
73
- # Runs table
74
- cursor.execute(
75
- """
76
- CREATE TABLE IF NOT EXISTS runs (
77
- run_id TEXT PRIMARY KEY,
78
- pipeline_name TEXT,
79
- status TEXT,
80
- start_time TEXT,
81
- end_time TEXT,
82
- duration REAL,
83
- metadata TEXT,
84
- project TEXT,
85
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
86
- )
87
- """,
88
- )
89
-
90
- # Migration: Add project column if it doesn't exist
91
- # Migration: Add project column if it doesn't exist
92
- with contextlib.suppress(sqlite3.OperationalError):
93
- cursor.execute("ALTER TABLE runs ADD COLUMN project TEXT")
94
-
95
- # Artifacts table
96
- cursor.execute(
97
- """
98
- CREATE TABLE IF NOT EXISTS artifacts (
99
- artifact_id TEXT PRIMARY KEY,
100
- name TEXT,
101
- type TEXT,
102
- run_id TEXT,
103
- path TEXT,
104
- metadata TEXT,
105
- project TEXT,
106
- created_at TEXT DEFAULT CURRENT_TIMESTAMP,
107
- FOREIGN KEY (run_id) REFERENCES runs(run_id)
108
- )
109
- """,
110
- )
111
-
112
- # Migration: Add project column to artifacts if it doesn't exist
113
- # Migration: Add project column to artifacts if it doesn't exist
114
- with contextlib.suppress(sqlite3.OperationalError):
115
- cursor.execute("ALTER TABLE artifacts ADD COLUMN project TEXT")
116
-
117
- # Metrics table
118
- cursor.execute(
119
- """
120
- CREATE TABLE IF NOT EXISTS metrics (
121
- id INTEGER PRIMARY KEY AUTOINCREMENT,
122
- run_id TEXT,
123
- name TEXT,
124
- value REAL,
125
- step INTEGER,
126
- timestamp TEXT DEFAULT CURRENT_TIMESTAMP,
127
- FOREIGN KEY (run_id) REFERENCES runs(run_id)
128
- )
129
- """,
130
- )
131
-
132
- # Model metrics table
133
- cursor.execute(
134
- """
135
- CREATE TABLE IF NOT EXISTS model_metrics (
136
- id INTEGER PRIMARY KEY AUTOINCREMENT,
137
- project TEXT,
138
- model_name TEXT,
139
- run_id TEXT,
140
- metric_name TEXT,
141
- metric_value REAL,
142
- environment TEXT,
143
- tags TEXT,
144
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
145
- )
146
- """,
147
- )
148
-
149
- # Parameters table
150
- cursor.execute(
151
- """
152
- CREATE TABLE IF NOT EXISTS parameters (
153
- id INTEGER PRIMARY KEY AUTOINCREMENT,
154
- run_id TEXT,
155
- name TEXT,
156
- value TEXT,
157
- FOREIGN KEY (run_id) REFERENCES runs(run_id)
158
- )
159
- """,
160
- )
161
-
162
- # Experiments table
163
- cursor.execute(
164
- """
165
- CREATE TABLE IF NOT EXISTS experiments (
166
- experiment_id TEXT PRIMARY KEY,
167
- name TEXT,
168
- description TEXT,
169
- tags TEXT,
170
- project TEXT,
171
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
172
- )
173
- """,
174
- )
175
-
176
- # Migration: Add project column to experiments if it doesn't exist
177
- # Migration: Add project column to experiments if it doesn't exist
178
- with contextlib.suppress(sqlite3.OperationalError):
179
- cursor.execute("ALTER TABLE experiments ADD COLUMN project TEXT")
180
-
181
- # Experiment Runs link table
182
- cursor.execute(
183
- """
184
- CREATE TABLE IF NOT EXISTS experiment_runs (
185
- experiment_id TEXT,
186
- run_id TEXT,
187
- metrics TEXT,
188
- parameters TEXT,
189
- timestamp TEXT DEFAULT CURRENT_TIMESTAMP,
190
- PRIMARY KEY (experiment_id, run_id),
191
- FOREIGN KEY (experiment_id) REFERENCES experiments(experiment_id),
192
- FOREIGN KEY (run_id) REFERENCES runs(run_id)
193
- )
194
- """,
195
- )
196
-
197
- # Traces table for GenAI monitoring
198
- cursor.execute(
199
- """
200
- CREATE TABLE IF NOT EXISTS traces (
201
- event_id TEXT PRIMARY KEY,
202
- trace_id TEXT,
203
- parent_id TEXT,
204
- event_type TEXT,
205
- name TEXT,
206
- inputs TEXT,
207
- outputs TEXT,
208
- start_time REAL,
209
- end_time REAL,
210
- duration REAL,
211
- status TEXT,
212
- error TEXT,
213
- metadata TEXT,
214
- prompt_tokens INTEGER,
215
- completion_tokens INTEGER,
216
- total_tokens INTEGER,
217
- cost REAL,
218
- model TEXT,
219
- project TEXT,
220
- created_at TEXT DEFAULT CURRENT_TIMESTAMP
221
- )
222
- """,
223
- )
224
-
225
- # Migration: Add project column to traces if it doesn't exist
226
- # Migration: Add project column to traces if it doesn't exist
227
- with contextlib.suppress(sqlite3.OperationalError):
228
- cursor.execute("ALTER TABLE traces ADD COLUMN project TEXT")
229
-
230
- # Create indexes for better query performance
231
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_runs_pipeline ON runs(pipeline_name)")
232
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_runs_status ON runs(status)")
233
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_runs_project ON runs(project)")
234
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_artifacts_run ON artifacts(run_id)")
235
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_artifacts_project ON artifacts(project)")
236
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_metrics_run ON metrics(run_id)")
237
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_parameters_run ON parameters(run_id)")
238
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_experiments_name ON experiments(name)")
239
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_experiments_project ON experiments(project)")
240
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_traces_trace_id ON traces(trace_id)")
241
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_traces_type ON traces(event_type)")
242
- cursor.execute("CREATE INDEX IF NOT EXISTS idx_traces_project ON traces(project)")
243
-
244
- # Pipeline definitions for scheduling
245
- cursor.execute(
246
- """
247
- CREATE TABLE IF NOT EXISTS pipeline_definitions (
248
- pipeline_name TEXT PRIMARY KEY,
249
- definition TEXT NOT NULL,
250
- created_at TEXT NOT NULL,
251
- updated_at TEXT NOT NULL
252
- )
253
- """,
254
- )
255
-
256
- conn.commit()
257
- conn.close()
258
-
259
- def save_run(self, run_id: str, metadata: dict) -> None:
260
- """Save run metadata to database.
261
-
262
- Args:
263
- run_id: Unique run identifier
264
- metadata: Run metadata dictionary
265
- """
266
- conn = sqlite3.connect(self.db_path)
267
- cursor = conn.cursor()
268
-
269
- cursor.execute(
270
- """
271
- INSERT OR REPLACE INTO runs
272
- (run_id, pipeline_name, status, start_time, end_time, duration, metadata, project)
273
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
274
- """,
275
- (
276
- run_id,
277
- metadata.get("pipeline_name"),
278
- metadata.get("status"),
279
- metadata.get("start_time"),
280
- metadata.get("end_time"),
281
- metadata.get("duration"),
282
- json.dumps(metadata),
283
- metadata.get("project"),
284
- ),
285
- )
286
-
287
- # Save parameters
288
- if "parameters" in metadata:
289
- cursor.execute("DELETE FROM parameters WHERE run_id = ?", (run_id,))
290
- for name, value in metadata["parameters"].items():
291
- cursor.execute(
292
- "INSERT INTO parameters (run_id, name, value) VALUES (?, ?, ?)",
293
- (run_id, name, json.dumps(value)),
294
- )
295
-
296
- # Save metrics
297
- if "metrics" in metadata:
298
- cursor.execute("DELETE FROM metrics WHERE run_id = ?", (run_id,))
299
- for name, value in metadata["metrics"].items():
300
- cursor.execute(
301
- "INSERT INTO metrics (run_id, name, value, step) VALUES (?, ?, ?, ?)",
302
- (run_id, name, value, 0),
303
- )
304
-
305
- conn.commit()
306
- conn.close()
307
-
308
- def load_run(self, run_id: str) -> dict | None:
309
- """Load run metadata from database.
310
-
311
- Args:
312
- run_id: Unique run identifier
313
-
314
- Returns:
315
- Run metadata dictionary or None if not found
316
- """
317
- conn = sqlite3.connect(self.db_path)
318
- cursor = conn.cursor()
319
-
320
- cursor.execute("SELECT metadata FROM runs WHERE run_id = ?", (run_id,))
321
- row = cursor.fetchone()
322
-
323
- conn.close()
324
-
325
- if row:
326
- data = json.loads(row[0])
327
- # Ensure project is in metadata if it's in the column but not the JSON blob
328
- # (This might happen if we update the column directly)
329
- # Actually, let's just return what's in the blob for now,
330
- # but we should probably sync them.
331
- return data
332
- return None
333
-
334
- def update_run_project(self, run_id: str, project_name: str) -> None:
335
- """Update the project for a run.
336
-
337
- Args:
338
- run_id: Run identifier
339
- project_name: Name of the project
340
- """
341
- conn = sqlite3.connect(self.db_path)
342
- cursor = conn.cursor()
343
-
344
- # 1. Update the column
345
- cursor.execute("UPDATE runs SET project = ? WHERE run_id = ?", (project_name, run_id))
346
-
347
- # 2. Update the JSON blob
348
- cursor.execute("SELECT metadata FROM runs WHERE run_id = ?", (run_id,))
349
- row = cursor.fetchone()
350
- if row:
351
- metadata = json.loads(row[0])
352
- metadata["project"] = project_name
353
- cursor.execute(
354
- "UPDATE runs SET metadata = ? WHERE run_id = ?",
355
- (json.dumps(metadata), run_id),
356
- )
357
-
358
- conn.commit()
359
- conn.close()
360
-
361
- def list_runs(self, limit: int | None = None) -> list[dict]:
362
- """List all runs from database.
363
-
364
- Args:
365
- limit: Optional limit on number of results
366
-
367
- Returns:
368
- List of run metadata dictionaries
369
- """
370
- conn = sqlite3.connect(self.db_path)
371
- cursor = conn.cursor()
372
-
373
- query = "SELECT metadata FROM runs ORDER BY created_at DESC"
374
- if limit:
375
- query += f" LIMIT {limit}"
376
-
377
- cursor.execute(query)
378
- rows = cursor.fetchall()
379
-
380
- conn.close()
381
-
382
- return [json.loads(row[0]) for row in rows]
383
-
384
- def list_pipelines(self, project: str = None) -> list[str]:
385
- """List all unique pipeline names.
386
-
387
- Args:
388
- project: Optional project name to filter by
389
-
390
- Returns:
391
- List of pipeline names
392
- """
393
- conn = sqlite3.connect(self.db_path)
394
- cursor = conn.cursor()
395
-
396
- if project:
397
- cursor.execute(
398
- "SELECT DISTINCT pipeline_name FROM runs WHERE project = ? ORDER BY pipeline_name",
399
- (project,),
400
- )
401
- else:
402
- cursor.execute("SELECT DISTINCT pipeline_name FROM runs ORDER BY pipeline_name")
403
-
404
- rows = cursor.fetchall()
405
-
406
- conn.close()
407
-
408
- return [row[0] for row in rows if row[0]]
409
-
410
- def save_artifact(self, artifact_id: str, metadata: dict) -> None:
411
- """Save artifact metadata to database.
412
-
413
- Args:
414
- artifact_id: Unique artifact identifier
415
- metadata: Artifact metadata dictionary
416
- """
417
- conn = sqlite3.connect(self.db_path)
418
- cursor = conn.cursor()
419
-
420
- cursor.execute(
421
- """
422
- INSERT OR REPLACE INTO artifacts
423
- (artifact_id, name, type, run_id, path, metadata, project)
424
- VALUES (?, ?, ?, ?, ?, ?, ?)
425
- """,
426
- (
427
- artifact_id,
428
- metadata.get("name"),
429
- metadata.get("type"),
430
- metadata.get("run_id"),
431
- metadata.get("path"),
432
- json.dumps(metadata),
433
- metadata.get("project"),
434
- ),
435
- )
436
-
437
- conn.commit()
438
- conn.close()
439
-
440
- def load_artifact(self, artifact_id: str) -> dict | None:
441
- """Load artifact metadata from database.
442
-
443
- Args:
444
- artifact_id: Unique artifact identifier
445
-
446
- Returns:
447
- Artifact metadata dictionary or None if not found
448
- """
449
- conn = sqlite3.connect(self.db_path)
450
- cursor = conn.cursor()
451
-
452
- cursor.execute("SELECT metadata FROM artifacts WHERE artifact_id = ?", (artifact_id,))
453
- row = cursor.fetchone()
454
-
455
- conn.close()
456
-
457
- if row:
458
- return json.loads(row[0])
459
- return None
460
-
461
- def list_assets(self, limit: int | None = None, **filters) -> list[dict]:
462
- """List assets from database with optional filters.
463
-
464
- Args:
465
- limit: Optional limit on number of results
466
- **filters: Filter criteria (type, run_id, etc.)
467
-
468
- Returns:
469
- List of artifact metadata dictionaries
470
- """
471
- conn = sqlite3.connect(self.db_path)
472
- cursor = conn.cursor()
473
-
474
- conditions = []
475
- params = []
476
-
477
- for key, value in filters.items():
478
- if value is not None:
479
- conditions.append(f"{key} = ?")
480
- params.append(value)
481
-
482
- query = "SELECT metadata FROM artifacts"
483
- if conditions:
484
- query += " WHERE " + " AND ".join(conditions)
485
-
486
- query += " ORDER BY created_at DESC"
487
-
488
- if limit:
489
- query += f" LIMIT {limit}"
490
-
491
- cursor.execute(query, params)
492
- rows = cursor.fetchall()
493
-
494
- conn.close()
495
-
496
- return [json.loads(row[0]) for row in rows]
497
-
498
- def query(self, **filters) -> list[dict]:
499
- """Query runs with filters.
500
-
501
- Args:
502
- **filters: Filter criteria (pipeline_name, status, etc.)
503
-
504
- Returns:
505
- List of matching run metadata dictionaries
506
- """
507
- conn = sqlite3.connect(self.db_path)
508
- cursor = conn.cursor()
509
-
510
- where_clauses = []
511
- params = []
512
-
513
- for key, value in filters.items():
514
- where_clauses.append(f"{key} = ?")
515
- params.append(value)
516
-
517
- query = "SELECT metadata FROM runs"
518
- if where_clauses:
519
- query += " WHERE " + " AND ".join(where_clauses)
520
- query += " ORDER BY created_at DESC"
521
-
522
- cursor.execute(query, params)
523
- rows = cursor.fetchall()
524
-
525
- conn.close()
526
-
527
- return [json.loads(row[0]) for row in rows]
528
-
529
- def save_metric(self, run_id: str, name: str, value: float, step: int = 0) -> None:
530
- """Save a single metric value.
531
-
532
- Args:
533
- run_id: Run identifier
534
- name: Metric name
535
- value: Metric value
536
- step: Training step/iteration
537
- """
538
- conn = sqlite3.connect(self.db_path)
539
- cursor = conn.cursor()
540
-
541
- cursor.execute(
542
- "INSERT INTO metrics (run_id, name, value, step) VALUES (?, ?, ?, ?)",
543
- (run_id, name, value, step),
544
- )
545
-
546
- conn.commit()
547
- conn.close()
548
-
549
- def get_metrics(self, run_id: str, name: str | None = None) -> list[dict]:
550
- """Get metrics for a run.
551
-
552
- Args:
553
- run_id: Run identifier
554
- name: Optional metric name filter
555
-
556
- Returns:
557
- List of metric dictionaries
558
- """
559
- conn = sqlite3.connect(self.db_path)
560
- cursor = conn.cursor()
561
-
562
- if name:
563
- cursor.execute(
564
- "SELECT name, value, step, timestamp FROM metrics WHERE run_id = ? AND name = ? ORDER BY step",
565
- (run_id, name),
566
- )
567
- else:
568
- cursor.execute(
569
- "SELECT name, value, step, timestamp FROM metrics WHERE run_id = ? ORDER BY step",
570
- (run_id,),
571
- )
572
-
573
- rows = cursor.fetchall()
574
- conn.close()
575
-
576
- return [{"name": row[0], "value": row[1], "step": row[2], "timestamp": row[3]} for row in rows]
577
-
578
- def log_model_metrics(
579
- self,
580
- project: str,
581
- model_name: str,
582
- metrics: dict[str, float],
583
- run_id: str | None = None,
584
- environment: str | None = None,
585
- tags: dict | None = None,
586
- ) -> None:
587
- """Log production model metrics independent of pipeline runs."""
588
- if not metrics:
589
- return
590
-
591
- conn = sqlite3.connect(self.db_path)
592
- cursor = conn.cursor()
593
- tags_json = json.dumps(tags or {})
594
-
595
- for metric_name, value in metrics.items():
596
- try:
597
- metric_value = float(value)
598
- except (TypeError, ValueError):
599
- continue
600
-
601
- cursor.execute(
602
- """
603
- INSERT INTO model_metrics
604
- (project, model_name, run_id, metric_name, metric_value, environment, tags)
605
- VALUES (?, ?, ?, ?, ?, ?, ?)
606
- """,
607
- (project, model_name, run_id, metric_name, metric_value, environment, tags_json),
608
- )
609
-
610
- conn.commit()
611
- conn.close()
612
-
613
- def list_model_metrics(
614
- self,
615
- project: str | None = None,
616
- model_name: str | None = None,
617
- limit: int = 100,
618
- ) -> list[dict]:
619
- """List logged model metrics."""
620
- conn = sqlite3.connect(self.db_path)
621
- cursor = conn.cursor()
622
-
623
- query = """
624
- SELECT project, model_name, run_id, metric_name, metric_value, environment, tags, created_at
625
- FROM model_metrics
626
- """
627
- params: list = []
628
- clauses = []
629
-
630
- if project:
631
- clauses.append("project = ?")
632
- params.append(project)
633
- if model_name:
634
- clauses.append("model_name = ?")
635
- params.append(model_name)
636
-
637
- if clauses:
638
- query += " WHERE " + " AND ".join(clauses)
639
-
640
- query += " ORDER BY created_at DESC LIMIT ?"
641
- params.append(limit)
642
-
643
- cursor.execute(query, params)
644
- rows = cursor.fetchall()
645
- conn.close()
646
-
647
- results = []
648
- for row in rows:
649
- results.append(
650
- {
651
- "project": row[0],
652
- "model_name": row[1],
653
- "run_id": row[2],
654
- "metric_name": row[3],
655
- "metric_value": row[4],
656
- "environment": row[5],
657
- "tags": json.loads(row[6]) if row[6] else {},
658
- "created_at": row[7],
659
- },
660
- )
661
- return results
662
-
663
- def save_experiment(self, experiment_id: str, name: str, description: str = "", tags: dict = None) -> None:
664
- """Save experiment metadata.
665
-
666
- Args:
667
- experiment_id: Unique experiment identifier
668
- name: Experiment name
669
- description: Experiment description
670
- tags: Experiment tags
671
- """
672
- conn = sqlite3.connect(self.db_path)
673
- cursor = conn.cursor()
674
-
675
- cursor.execute(
676
- """
677
- INSERT OR REPLACE INTO experiments
678
- (experiment_id, name, description, tags)
679
- VALUES (?, ?, ?, ?)
680
- """,
681
- (
682
- experiment_id,
683
- name,
684
- description,
685
- json.dumps(tags or {}),
686
- ),
687
- )
688
-
689
- conn.commit()
690
- conn.close()
691
-
692
- def log_experiment_run(
693
- self,
694
- experiment_id: str,
695
- run_id: str,
696
- metrics: dict = None,
697
- parameters: dict = None,
698
- ) -> None:
699
- """Log a run to an experiment.
700
-
701
- Args:
702
- experiment_id: Experiment identifier
703
- run_id: Run identifier
704
- metrics: Metrics from the run
705
- parameters: Parameters used in the run
706
- """
707
- conn = sqlite3.connect(self.db_path)
708
- cursor = conn.cursor()
709
-
710
- cursor.execute(
711
- """
712
- INSERT OR REPLACE INTO experiment_runs
713
- (experiment_id, run_id, metrics, parameters)
714
- VALUES (?, ?, ?, ?)
715
- """,
716
- (
717
- experiment_id,
718
- run_id,
719
- json.dumps(metrics or {}),
720
- json.dumps(parameters or {}),
721
- ),
722
- )
723
-
724
- conn.commit()
725
- conn.close()
726
-
727
- def list_experiments(self) -> list[dict]:
728
- """List all experiments.
729
-
730
- Returns:
731
- List of experiment dictionaries
732
- """
733
- conn = sqlite3.connect(self.db_path)
734
- cursor = conn.cursor()
735
-
736
- cursor.execute(
737
- "SELECT experiment_id, name, description, tags, created_at, project FROM experiments ORDER BY created_at DESC",
738
- )
739
- rows = cursor.fetchall()
740
-
741
- experiments = []
742
- for row in rows:
743
- # Count runs for each experiment
744
- cursor.execute("SELECT COUNT(*) FROM experiment_runs WHERE experiment_id = ?", (row[0],))
745
- run_count = cursor.fetchone()[0]
746
-
747
- experiments.append(
748
- {
749
- "experiment_id": row[0],
750
- "name": row[1],
751
- "description": row[2],
752
- "tags": json.loads(row[3]),
753
- "created_at": row[4],
754
- "project": row[5],
755
- "run_count": run_count,
756
- },
757
- )
758
- conn.close()
759
- return experiments
760
-
761
- def update_experiment_project(self, experiment_name: str, project_name: str) -> None:
762
- """Update the project for an experiment.
763
-
764
- Args:
765
- experiment_name: Name of the experiment
766
- project_name: New project name
767
- """
768
- conn = sqlite3.connect(self.db_path)
769
- cursor = conn.cursor()
770
-
771
- try:
772
- cursor.execute(
773
- "UPDATE experiments SET project = ? WHERE name = ?",
774
- (project_name, experiment_name),
775
- )
776
- conn.commit()
777
- finally:
778
- conn.close()
779
-
780
- def get_experiment(self, experiment_id: str) -> dict | None:
781
- """Get experiment details.
782
-
783
- Args:
784
- experiment_id: Experiment identifier
785
-
786
- Returns:
787
- Experiment dictionary or None
788
- """
789
- conn = sqlite3.connect(self.db_path)
790
- cursor = conn.cursor()
791
-
792
- cursor.execute(
793
- "SELECT experiment_id, name, description, tags, created_at FROM experiments WHERE experiment_id = ?",
794
- (experiment_id,),
795
- )
796
- row = cursor.fetchone()
797
-
798
- if not row:
799
- conn.close()
800
- return None
801
-
802
- experiment = {
803
- "experiment_id": row[0],
804
- "name": row[1],
805
- "description": row[2],
806
- "tags": json.loads(row[3]),
807
- "created_at": row[4],
808
- }
809
-
810
- # Get runs
811
- cursor.execute(
812
- """
813
- SELECT er.run_id, er.metrics, er.parameters, er.timestamp, r.status, r.duration
814
- FROM experiment_runs er
815
- LEFT JOIN runs r ON er.run_id = r.run_id
816
- WHERE er.experiment_id = ?
817
- ORDER BY er.timestamp DESC
818
- """,
819
- (experiment_id,),
820
- )
821
-
822
- runs = []
823
- for r in cursor.fetchall():
824
- runs.append(
825
- {
826
- "run_id": r[0],
827
- "metrics": json.loads(r[1]),
828
- "parameters": json.loads(r[2]),
829
- "timestamp": r[3],
830
- "status": r[4],
831
- "duration": r[5],
832
- },
833
- )
834
-
835
- experiment["runs"] = runs
836
-
837
- conn.close()
838
- return experiment
839
-
840
- def get_statistics(self) -> dict:
841
- """Get database statistics.
842
-
843
- Returns:
844
- Dictionary with statistics
845
- """
846
- conn = sqlite3.connect(self.db_path)
847
- cursor = conn.cursor()
848
-
849
- stats = {}
850
-
851
- cursor.execute("SELECT COUNT(*) FROM runs")
852
- stats["total_runs"] = cursor.fetchone()[0]
853
-
854
- cursor.execute("SELECT COUNT(*) FROM artifacts")
855
- stats["total_artifacts"] = cursor.fetchone()[0]
856
-
857
- cursor.execute("SELECT COUNT(*) FROM metrics")
858
- stats["total_metrics"] = cursor.fetchone()[0]
859
-
860
- cursor.execute("SELECT COUNT(DISTINCT pipeline_name) FROM runs")
861
- stats["total_pipelines"] = cursor.fetchone()[0]
862
-
863
- cursor.execute("SELECT COUNT(*) FROM experiments")
864
- stats["total_experiments"] = cursor.fetchone()[0]
865
-
866
- conn.close()
867
-
868
- return stats
869
-
870
- def save_trace_event(self, event: dict) -> None:
871
- """Save a trace event.
872
-
873
- Args:
874
- event: Trace event dictionary
875
- """
876
- conn = sqlite3.connect(self.db_path)
877
- cursor = conn.cursor()
878
-
879
- cursor.execute(
880
- """
881
- INSERT OR REPLACE INTO traces
882
- (event_id, trace_id, parent_id, event_type, name, inputs, outputs,
883
- start_time, end_time, duration, status, error, metadata,
884
- prompt_tokens, completion_tokens, total_tokens, cost, model)
885
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
886
- """,
887
- (
888
- event["event_id"],
889
- event["trace_id"],
890
- event["parent_id"],
891
- event["event_type"],
892
- event["name"],
893
- json.dumps(event.get("inputs", {})),
894
- json.dumps(event.get("outputs", {})),
895
- event.get("start_time"),
896
- event.get("end_time"),
897
- event.get("duration"),
898
- event.get("status"),
899
- event.get("error"),
900
- json.dumps(event.get("metadata", {})),
901
- event.get("prompt_tokens", 0),
902
- event.get("completion_tokens", 0),
903
- event.get("total_tokens", 0),
904
- event.get("cost", 0.0),
905
- event.get("model"),
906
- ),
907
- )
908
-
909
- conn.commit()
910
- conn.close()
911
-
912
- def get_trace(self, trace_id: str) -> list[dict]:
913
- """Get all events for a trace.
914
-
915
- Args:
916
- trace_id: Trace identifier
917
-
918
- Returns:
919
- List of event dictionaries
920
- """
921
- conn = sqlite3.connect(self.db_path)
922
- cursor = conn.cursor()
923
-
924
- cursor.execute(
925
- """
926
- SELECT * FROM traces WHERE trace_id = ? ORDER BY start_time
927
- """,
928
- (trace_id,),
929
- )
930
-
931
- columns = [description[0] for description in cursor.description]
932
- rows = cursor.fetchall()
933
-
934
- events = []
935
- for row in rows:
936
- event = dict(zip(columns, row, strict=False))
937
- # Parse JSON fields
938
- for field in ["inputs", "outputs", "metadata"]:
939
- if event[field]:
940
- with contextlib.suppress(builtins.BaseException):
941
- event[field] = json.loads(event[field])
942
- events.append(event)
943
-
944
- conn.close()
945
- return events
946
-
947
- def save_pipeline_definition(self, pipeline_name: str, definition: dict) -> None:
948
- """Save pipeline definition for scheduling."""
949
- from datetime import datetime
950
-
951
- conn = sqlite3.connect(self.db_path)
952
- cursor = conn.cursor()
953
- now = datetime.now().isoformat()
954
-
955
- # Check if definition already exists
956
- cursor.execute(
957
- "SELECT pipeline_name FROM pipeline_definitions WHERE pipeline_name = ?",
958
- (pipeline_name,),
959
- )
960
- exists = cursor.fetchone()
961
-
962
- if exists:
963
- # Update existing
964
- cursor.execute(
965
- """
966
- UPDATE pipeline_definitions
967
- SET definition = ?, updated_at = ?
968
- WHERE pipeline_name = ?
969
- """,
970
- (json.dumps(definition), now, pipeline_name),
971
- )
972
- else:
973
- # Insert new
974
- cursor.execute(
975
- """
976
- INSERT INTO pipeline_definitions (pipeline_name, definition, created_at, updated_at)
977
- VALUES (?, ?, ?, ?)
978
- """,
979
- (pipeline_name, json.dumps(definition), now, now),
980
- )
981
-
982
- conn.commit()
983
- conn.close()
984
-
985
- def update_pipeline_project(self, pipeline_name: str, project_name: str) -> None:
986
- """Update the project for all runs of a pipeline.
987
-
988
- Args:
989
- pipeline_name: Name of the pipeline
990
- project_name: New project name
991
- """
992
- conn = sqlite3.connect(self.db_path)
993
- cursor = conn.cursor()
994
-
995
- try:
996
- # 1. Update the project column for all runs
997
- cursor.execute(
998
- "UPDATE runs SET project = ? WHERE pipeline_name = ?",
999
- (project_name, pipeline_name),
1000
- )
1001
-
1002
- # 2. Update the JSON metadata blob for each run
1003
- cursor.execute(
1004
- "SELECT run_id, metadata FROM runs WHERE pipeline_name = ?",
1005
- (pipeline_name,),
1006
- )
1007
- rows = cursor.fetchall()
1008
- for run_id, metadata_json in rows:
1009
- metadata = json.loads(metadata_json)
1010
- metadata["project"] = project_name
1011
- cursor.execute(
1012
- "UPDATE runs SET metadata = ? WHERE run_id = ?",
1013
- (json.dumps(metadata), run_id),
1014
- )
1015
-
1016
- # 3. Update artifacts table
1017
- cursor.execute("PRAGMA table_info(artifacts)")
1018
- columns = [info[1] for info in cursor.fetchall()]
1019
- if "project" in columns:
1020
- cursor.execute(
1021
- """
1022
- UPDATE artifacts
1023
- SET project = ?
1024
- WHERE run_id IN (SELECT run_id FROM runs WHERE pipeline_name = ?)
1025
- """,
1026
- (project_name, pipeline_name),
1027
- )
1028
-
1029
- # 4. Update traces table
1030
- cursor.execute("PRAGMA table_info(traces)")
1031
- columns = [info[1] for info in cursor.fetchall()]
1032
- if "project" in columns:
1033
- # Update traces linked to runs of this pipeline
1034
- # Note: This assumes we can link traces to runs via metadata or some other way
1035
- # For now, let's assume traces might have run_id in metadata or we just update by project if we had it
1036
- # But here we are moving a pipeline to a project.
1037
- # If traces have a 'project' column, we should update it for traces belonging to these runs.
1038
- # Since traces don't explicitly have run_id column in schema (it's in metadata),
1039
- # we might need a more complex query or just skip if not easily linkable.
1040
- # However, if we assume traces are logged with project context, we might not need to update them
1041
- # if they were already correct. But if we are MOVING, we need to.
1042
- # Let's try to update traces that have run_id in their metadata matching these runs.
1043
- # This is expensive in SQLite with JSON.
1044
- # Alternative: If traces are associated with the pipeline name directly?
1045
- # For now, let's skip complex JSON matching for traces to avoid performance issues
1046
- # unless we add a run_id column to traces.
1047
- pass
1048
-
1049
- # 5. Update model_metrics table
1050
- cursor.execute("PRAGMA table_info(model_metrics)")
1051
- columns = [info[1] for info in cursor.fetchall()]
1052
- if "project" in columns:
1053
- cursor.execute(
1054
- """
1055
- UPDATE model_metrics
1056
- SET project = ?
1057
- WHERE run_id IN (SELECT run_id FROM runs WHERE pipeline_name = ?)
1058
- """,
1059
- (project_name, pipeline_name),
1060
- )
1061
-
1062
- # 6. Update experiments table
1063
- # If an experiment contains runs from this pipeline, should the experiment be moved?
1064
- # Maybe not automatically, as an experiment might contain runs from multiple pipelines.
1065
- # But if the user wants "recursive", let's at least update the experiment_runs link
1066
- # (which doesn't have project) - wait, experiments have project.
1067
- # Let's find experiments that ONLY contain runs from this pipeline and move them?
1068
- # Or just leave experiments as is?
1069
- # The user said "same for all related objects, experiments etc".
1070
- # Let's be safe and NOT move experiments automatically as they are higher level grouping.
1071
- # BUT, we should ensure that the runs inside the experiment are consistent.
1072
- # The runs are already updated in step 1.
1073
- # So, we are good on experiments.
1074
-
1075
- conn.commit()
1076
- finally:
1077
- conn.close()
1078
-
1079
- def get_pipeline_definition(self, pipeline_name: str) -> dict | None:
1080
- """Retrieve pipeline definition."""
1081
- conn = sqlite3.connect(self.db_path)
1082
- cursor = conn.cursor()
1083
- cursor.execute(
1084
- "SELECT definition FROM pipeline_definitions WHERE pipeline_name = ?",
1085
- (pipeline_name,),
1086
- )
1087
- row = cursor.fetchone()
1088
- conn.close()
1089
-
1090
- if row:
1091
- return json.loads(row[0])
1092
- return None
52
+ # Alias for backward compatibility
53
+ SQLiteMetadataStore = SQLMetadataStore