podstack 1.3.13__tar.gz → 1.3.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {podstack-1.3.13 → podstack-1.3.15}/PKG-INFO +161 -8
  2. {podstack-1.3.13 → podstack-1.3.15}/README.md +160 -7
  3. {podstack-1.3.13 → podstack-1.3.15}/podstack/registry/__init__.py +255 -1
  4. {podstack-1.3.13 → podstack-1.3.15}/podstack/registry/client.py +482 -4
  5. {podstack-1.3.13 → podstack-1.3.15}/podstack/registry/experiment.py +90 -1
  6. {podstack-1.3.13 → podstack-1.3.15}/podstack.egg-info/PKG-INFO +161 -8
  7. {podstack-1.3.13 → podstack-1.3.15}/pyproject.toml +1 -1
  8. {podstack-1.3.13 → podstack-1.3.15}/LICENSE +0 -0
  9. {podstack-1.3.13 → podstack-1.3.15}/podstack/__init__.py +0 -0
  10. {podstack-1.3.13 → podstack-1.3.15}/podstack/annotations.py +0 -0
  11. {podstack-1.3.13 → podstack-1.3.15}/podstack/client.py +0 -0
  12. {podstack-1.3.13 → podstack-1.3.15}/podstack/exceptions.py +0 -0
  13. {podstack-1.3.13 → podstack-1.3.15}/podstack/execution.py +0 -0
  14. {podstack-1.3.13 → podstack-1.3.15}/podstack/gpu_runner.py +0 -0
  15. {podstack-1.3.13 → podstack-1.3.15}/podstack/models.py +0 -0
  16. {podstack-1.3.13 → podstack-1.3.15}/podstack/notebook.py +0 -0
  17. {podstack-1.3.13 → podstack-1.3.15}/podstack/registry/autolog.py +0 -0
  18. {podstack-1.3.13 → podstack-1.3.15}/podstack/registry/exceptions.py +0 -0
  19. {podstack-1.3.13 → podstack-1.3.15}/podstack/registry/model.py +0 -0
  20. {podstack-1.3.13 → podstack-1.3.15}/podstack/registry/model_utils.py +0 -0
  21. {podstack-1.3.13 → podstack-1.3.15}/podstack.egg-info/SOURCES.txt +0 -0
  22. {podstack-1.3.13 → podstack-1.3.15}/podstack.egg-info/dependency_links.txt +0 -0
  23. {podstack-1.3.13 → podstack-1.3.15}/podstack.egg-info/requires.txt +0 -0
  24. {podstack-1.3.13 → podstack-1.3.15}/podstack.egg-info/top_level.txt +0 -0
  25. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/__init__.py +0 -0
  26. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/app.py +0 -0
  27. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/exceptions.py +0 -0
  28. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/image.py +0 -0
  29. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/runner.py +0 -0
  30. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/secret.py +0 -0
  31. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/utils.py +0 -0
  32. {podstack-1.3.13 → podstack-1.3.15}/podstack_gpu/volume.py +0 -0
  33. {podstack-1.3.13 → podstack-1.3.15}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: podstack
3
- Version: 1.3.13
3
+ Version: 1.3.15
4
4
  Summary: Official Python SDK for Podstack GPU Notebook Platform
5
5
  Author-email: Podstack <support@podstack.ai>
6
6
  License-Expression: MIT
@@ -298,9 +298,9 @@ with registry.start_run(name="training-v1") as run:
298
298
  # Set tags
299
299
  registry.set_tag("framework", "pytorch")
300
300
 
301
- # Log artifacts
302
- registry.log_artifact("model.pt", "model")
303
- registry.log_artifact("training_curves.png", "plots")
301
+ # Upload artifacts to cloud artifact store
302
+ registry.log_artifact("model.pt")
303
+ registry.log_artifact("training_curves.png", artifact_path="plots/curves.png")
304
304
 
305
305
  # Log dataset provenance (first-class resource, deduped by content hash)
306
306
  registry.log_dataset("imdb-reviews", path="data/imdb.csv", context="training")
@@ -316,7 +316,7 @@ with registry.start_run(name="training-v1") as run:
316
316
  ```python
317
317
  from podstack import registry
318
318
 
319
- # Log a model object (auto-detects framework)
319
+ # Serialize and upload the model to the artifact store (auto-detects framework)
320
320
  registry.log_model(model, artifact_path="model", framework="pytorch")
321
321
 
322
322
  # Register in model registry
@@ -332,7 +332,7 @@ registry.set_model_stage("my-classifier", version=1, stage="production")
332
332
  # Set aliases
333
333
  registry.set_model_alias("my-classifier", alias="champion", version=1)
334
334
 
335
- # Load model from registry
335
+ # Load model from any machine — files are downloaded automatically if missing locally
336
336
  model = registry.load_model("my-classifier", stage="production")
337
337
  ```
338
338
 
@@ -659,6 +659,155 @@ model = registry.get_model("sentiment-bert")
659
659
  lineage = registry.get_model_lineage(model.id)
660
660
  ```
661
661
 
662
+ ### Artifact Storage
663
+
664
+ Podstack stores every artifact you log — model files, plots, CSV exports, anything — in the project's cloud artifact store. Artifacts are keyed by run ID, so the same file can be retrieved from any machine, by any project member, at any time.
665
+
666
+ #### `log_artifact()` — upload a file for the active run
667
+
668
+ ```python
669
+ # Upload a single file (uses the filename as the artifact path)
670
+ registry.log_artifact("model.pt")
671
+
672
+ # Upload with an explicit path inside the artifact store
673
+ registry.log_artifact("training_curves.png", artifact_path="plots/curves.png")
674
+ registry.log_artifact("feature_importance.csv", artifact_path="analysis/features.csv")
675
+ ```
676
+
677
+ **Parameters:**
678
+
679
+ | Parameter | Type | Default | Description |
680
+ |-----------|------|---------|-------------|
681
+ | `local_path` | `str` | required | Path to the local file to upload |
682
+ | `artifact_path` | `str` | filename | Relative path inside the artifact store. Defaults to `os.path.basename(local_path)` |
683
+
684
+ If the artifact store is temporarily unreachable, the SDK saves the file to a local fallback cache (`~/.podstack/artifacts/<run_id>/`) so your run is never interrupted.
685
+
686
+ **Via the `Run` object** — equivalent to calling `registry.log_artifact()`:
687
+
688
+ ```python
689
+ with registry.start_run("training-v1") as run:
690
+ run.log_artifact("confusion_matrix.png", artifact_path="plots/confusion_matrix.png")
691
+ run.log_artifact("model.pkl")
692
+ ```
693
+
694
+ #### `list_artifacts()` — list all artifacts for a run
695
+
696
+ ```python
697
+ artifacts = registry.list_artifacts(run_id)
698
+ for a in artifacts:
699
+ print(f"{a['path']:40s} {a['size'] / 1e6:.1f} MB {a['last_modified']}")
700
+ ```
701
+
702
+ **Parameters:**
703
+
704
+ | Parameter | Type | Description |
705
+ |-----------|------|-------------|
706
+ | `run_id` | `str` | ID of the run to query |
707
+
708
+ **Returns:** `list[dict]` — one entry per artifact:
709
+
710
+ | Key | Type | Description |
711
+ |-----|------|-------------|
712
+ | `path` | `str` | Relative artifact path (e.g. `"plots/curves.png"`) |
713
+ | `size` | `int` | File size in bytes |
714
+ | `etag` | `str` | Content hash for integrity verification |
715
+ | `last_modified` | `str` | ISO 8601 upload timestamp |
716
+
717
+ #### `download_artifact()` — retrieve an artifact
718
+
719
+ Downloads a specific artifact from the cloud store into a local directory. Falls back to the local cache when the store is unreachable.
720
+
721
+ ```python
722
+ # Download a single file
723
+ dest = registry.download_artifact("run-id", "model/model.pkl", "./downloads/")
724
+ print(f"Saved to: {dest}")
725
+
726
+ # Download a whole model directory
727
+ dest = registry.download_artifact("run-id", "model", "./local_models/")
728
+ ```
729
+
730
+ **Parameters:**
731
+
732
+ | Parameter | Type | Description |
733
+ |-----------|------|-------------|
734
+ | `run_id` | `str` | ID of the run that logged the artifact |
735
+ | `artifact_path` | `str` | Relative artifact path as logged (e.g. `"model/model.pkl"`) |
736
+ | `local_path` | `str` | Destination directory |
737
+
738
+ **Returns:** `str` — absolute path to the downloaded file or directory.
739
+
740
+ **Raises:** `ArtifactNotFoundError` if the artifact cannot be found in the store or the local cache.
741
+
742
+ #### Models as artifacts: `log_model()` and `load_model()`
743
+
744
+ `log_model()` serializes your model to disk and uploads every resulting file to the artifact store in one call. `load_model()` resolves the registered model version, downloads any missing files from the store, then deserializes the model — so it works correctly from any machine regardless of where training happened.
745
+
746
+ ```python
747
+ # ── Training machine ──────────────────────────────────────────────────────────
748
+ with registry.start_run("bert-finetune-v3") as run:
749
+ # train...
750
+ registry.log_model(model, artifact_path="model", framework="pytorch")
751
+
752
+ registry.register_model("sentiment-bert", run_id=run.id)
753
+ registry.set_model_stage("sentiment-bert", version=3, stage="production")
754
+
755
+ # ── Any machine (CI, inference server, colleague's laptop) ───────────────────
756
+ # Model files are downloaded automatically from the artifact store if not cached
757
+ model = registry.load_model("sentiment-bert", stage="production")
758
+ ```
759
+
760
+ **`log_model()` parameters:**
761
+
762
+ | Parameter | Type | Default | Description |
763
+ |-----------|------|---------|-------------|
764
+ | `model` | any | required | Model object (PyTorch, TensorFlow, sklearn, HuggingFace, or any picklable object) |
765
+ | `artifact_path` | `str` | `"model"` | Sub-path inside the artifact store |
766
+ | `framework` | `str` | auto-detected | `"pytorch"`, `"tensorflow"`, `"sklearn"`, `"huggingface"`, or `"pickle"` |
767
+ | `metadata` | `dict` | `None` | Arbitrary key-value metadata stored as run params |
768
+
769
+ **`load_model()` parameters:**
770
+
771
+ | Parameter | Type | Default | Description |
772
+ |-----------|------|---------|-------------|
773
+ | `model_name` | `str` | required | Registered model name |
774
+ | `version` | `int` | `None` | Specific version to load. Mutually exclusive with `stage` |
775
+ | `stage` | `str` | `None` | Stage to load from: `"development"`, `"staging"`, `"production"`, `"archived"` |
776
+ | `framework` | `str` | from run params | Override framework for deserialization |
777
+
778
+ #### Viewing artifacts in the dashboard
779
+
780
+ Every artifact logged with `log_artifact()` or `log_model()` appears automatically in the **Artifacts tab** of the run's detail page in the Podstack dashboard. No extra steps are needed — the tab populates from the same store the SDK writes to.
781
+
782
+ The Artifacts tab shows:
783
+
784
+ | Column | Description |
785
+ |--------|-------------|
786
+ | **Path** | The relative artifact path as logged (e.g. `model/model.pkl`, `plots/curves.png`) |
787
+ | **Type badge** | File extension, color-coded by category — model weights, data files, images, configs, etc. |
788
+ | **Size** | Formatted file size (B / KB / MB) |
789
+ | **Uploaded** | Timestamp of when the file was stored |
790
+ | **Download** | One-click download button — opens a short-lived direct download link in the browser |
791
+
792
+ A footer below the list shows the combined size of all artifacts for the run.
793
+
794
+ ```python
795
+ # Everything logged here shows up in the dashboard Artifacts tab
796
+ with registry.start_run("bert-finetune-v3") as run:
797
+ registry.log_params({"lr": 2e-5, "epochs": 3})
798
+ registry.log_metrics({"accuracy": 0.93})
799
+
800
+ # These all appear as separate rows in the Artifacts tab
801
+ registry.log_artifact("confusion_matrix.png", artifact_path="plots/confusion_matrix.png")
802
+ registry.log_artifact("feature_importance.csv", artifact_path="analysis/features.csv")
803
+ registry.log_model(model, artifact_path="model", framework="pytorch")
804
+ # ↳ each model file (model.pkl, config.json, etc.) appears as its own row
805
+ ```
806
+
807
+ #### Access control
808
+
809
+ Artifact upload and download URLs are issued by the registry API and require a valid API key and project membership. The URLs are short-lived, ensuring that access always reflects the current state of your project — a revoked key can no longer generate new URLs. Any member of a project can upload and download artifacts for runs within that project.
810
+
662
811
  ### List and Browse
663
812
 
664
813
  ```python
@@ -670,8 +819,12 @@ experiments = registry.list_experiments()
670
819
  # List models
671
820
  models = registry.list_models()
672
821
 
673
- # Download artifacts
674
- registry.download_artifact("run-id", "model/model.pt", "./downloads/")
822
+ # List artifacts for a specific run
823
+ artifacts = registry.list_artifacts(run_id)
824
+
825
+ # Download a specific artifact to a local directory
826
+ dest = registry.download_artifact("run-id", "model/model.pt", "./downloads/")
827
+ print(f"Saved to: {dest}")
675
828
  ```
676
829
 
677
830
  ## GPU Runner - Direct Code Execution
@@ -246,9 +246,9 @@ with registry.start_run(name="training-v1") as run:
246
246
  # Set tags
247
247
  registry.set_tag("framework", "pytorch")
248
248
 
249
- # Log artifacts
250
- registry.log_artifact("model.pt", "model")
251
- registry.log_artifact("training_curves.png", "plots")
249
+ # Upload artifacts to cloud artifact store
250
+ registry.log_artifact("model.pt")
251
+ registry.log_artifact("training_curves.png", artifact_path="plots/curves.png")
252
252
 
253
253
  # Log dataset provenance (first-class resource, deduped by content hash)
254
254
  registry.log_dataset("imdb-reviews", path="data/imdb.csv", context="training")
@@ -264,7 +264,7 @@ with registry.start_run(name="training-v1") as run:
264
264
  ```python
265
265
  from podstack import registry
266
266
 
267
- # Log a model object (auto-detects framework)
267
+ # Serialize and upload the model to the artifact store (auto-detects framework)
268
268
  registry.log_model(model, artifact_path="model", framework="pytorch")
269
269
 
270
270
  # Register in model registry
@@ -280,7 +280,7 @@ registry.set_model_stage("my-classifier", version=1, stage="production")
280
280
  # Set aliases
281
281
  registry.set_model_alias("my-classifier", alias="champion", version=1)
282
282
 
283
- # Load model from registry
283
+ # Load model from any machine — files are downloaded automatically if missing locally
284
284
  model = registry.load_model("my-classifier", stage="production")
285
285
  ```
286
286
 
@@ -607,6 +607,155 @@ model = registry.get_model("sentiment-bert")
607
607
  lineage = registry.get_model_lineage(model.id)
608
608
  ```
609
609
 
610
+ ### Artifact Storage
611
+
612
+ Podstack stores every artifact you log — model files, plots, CSV exports, anything — in the project's cloud artifact store. Artifacts are keyed by run ID, so the same file can be retrieved from any machine, by any project member, at any time.
613
+
614
+ #### `log_artifact()` — upload a file for the active run
615
+
616
+ ```python
617
+ # Upload a single file (uses the filename as the artifact path)
618
+ registry.log_artifact("model.pt")
619
+
620
+ # Upload with an explicit path inside the artifact store
621
+ registry.log_artifact("training_curves.png", artifact_path="plots/curves.png")
622
+ registry.log_artifact("feature_importance.csv", artifact_path="analysis/features.csv")
623
+ ```
624
+
625
+ **Parameters:**
626
+
627
+ | Parameter | Type | Default | Description |
628
+ |-----------|------|---------|-------------|
629
+ | `local_path` | `str` | required | Path to the local file to upload |
630
+ | `artifact_path` | `str` | filename | Relative path inside the artifact store. Defaults to `os.path.basename(local_path)` |
631
+
632
+ If the artifact store is temporarily unreachable, the SDK saves the file to a local fallback cache (`~/.podstack/artifacts/<run_id>/`) so your run is never interrupted.
633
+
634
+ **Via the `Run` object** — equivalent to calling `registry.log_artifact()`:
635
+
636
+ ```python
637
+ with registry.start_run("training-v1") as run:
638
+ run.log_artifact("confusion_matrix.png", artifact_path="plots/confusion_matrix.png")
639
+ run.log_artifact("model.pkl")
640
+ ```
641
+
642
+ #### `list_artifacts()` — list all artifacts for a run
643
+
644
+ ```python
645
+ artifacts = registry.list_artifacts(run_id)
646
+ for a in artifacts:
647
+ print(f"{a['path']:40s} {a['size'] / 1e6:.1f} MB {a['last_modified']}")
648
+ ```
649
+
650
+ **Parameters:**
651
+
652
+ | Parameter | Type | Description |
653
+ |-----------|------|-------------|
654
+ | `run_id` | `str` | ID of the run to query |
655
+
656
+ **Returns:** `list[dict]` — one entry per artifact:
657
+
658
+ | Key | Type | Description |
659
+ |-----|------|-------------|
660
+ | `path` | `str` | Relative artifact path (e.g. `"plots/curves.png"`) |
661
+ | `size` | `int` | File size in bytes |
662
+ | `etag` | `str` | Content hash for integrity verification |
663
+ | `last_modified` | `str` | ISO 8601 upload timestamp |
664
+
665
+ #### `download_artifact()` — retrieve an artifact
666
+
667
+ Downloads a specific artifact from the cloud store into a local directory. Falls back to the local cache when the store is unreachable.
668
+
669
+ ```python
670
+ # Download a single file
671
+ dest = registry.download_artifact("run-id", "model/model.pkl", "./downloads/")
672
+ print(f"Saved to: {dest}")
673
+
674
+ # Download a whole model directory
675
+ dest = registry.download_artifact("run-id", "model", "./local_models/")
676
+ ```
677
+
678
+ **Parameters:**
679
+
680
+ | Parameter | Type | Description |
681
+ |-----------|------|-------------|
682
+ | `run_id` | `str` | ID of the run that logged the artifact |
683
+ | `artifact_path` | `str` | Relative artifact path as logged (e.g. `"model/model.pkl"`) |
684
+ | `local_path` | `str` | Destination directory |
685
+
686
+ **Returns:** `str` — absolute path to the downloaded file or directory.
687
+
688
+ **Raises:** `ArtifactNotFoundError` if the artifact cannot be found in the store or the local cache.
689
+
690
+ #### Models as artifacts: `log_model()` and `load_model()`
691
+
692
+ `log_model()` serializes your model to disk and uploads every resulting file to the artifact store in one call. `load_model()` resolves the registered model version, downloads any missing files from the store, then deserializes the model — so it works correctly from any machine regardless of where training happened.
693
+
694
+ ```python
695
+ # ── Training machine ──────────────────────────────────────────────────────────
696
+ with registry.start_run("bert-finetune-v3") as run:
697
+ # train...
698
+ registry.log_model(model, artifact_path="model", framework="pytorch")
699
+
700
+ registry.register_model("sentiment-bert", run_id=run.id)
701
+ registry.set_model_stage("sentiment-bert", version=3, stage="production")
702
+
703
+ # ── Any machine (CI, inference server, colleague's laptop) ───────────────────
704
+ # Model files are downloaded automatically from the artifact store if not cached
705
+ model = registry.load_model("sentiment-bert", stage="production")
706
+ ```
707
+
708
+ **`log_model()` parameters:**
709
+
710
+ | Parameter | Type | Default | Description |
711
+ |-----------|------|---------|-------------|
712
+ | `model` | any | required | Model object (PyTorch, TensorFlow, sklearn, HuggingFace, or any picklable object) |
713
+ | `artifact_path` | `str` | `"model"` | Sub-path inside the artifact store |
714
+ | `framework` | `str` | auto-detected | `"pytorch"`, `"tensorflow"`, `"sklearn"`, `"huggingface"`, or `"pickle"` |
715
+ | `metadata` | `dict` | `None` | Arbitrary key-value metadata stored as run params |
716
+
717
+ **`load_model()` parameters:**
718
+
719
+ | Parameter | Type | Default | Description |
720
+ |-----------|------|---------|-------------|
721
+ | `model_name` | `str` | required | Registered model name |
722
+ | `version` | `int` | `None` | Specific version to load. Mutually exclusive with `stage` |
723
+ | `stage` | `str` | `None` | Stage to load from: `"development"`, `"staging"`, `"production"`, `"archived"` |
724
+ | `framework` | `str` | from run params | Override framework for deserialization |
725
+
726
+ #### Viewing artifacts in the dashboard
727
+
728
+ Every artifact logged with `log_artifact()` or `log_model()` appears automatically in the **Artifacts tab** of the run's detail page in the Podstack dashboard. No extra steps are needed — the tab populates from the same store the SDK writes to.
729
+
730
+ The Artifacts tab shows:
731
+
732
+ | Column | Description |
733
+ |--------|-------------|
734
+ | **Path** | The relative artifact path as logged (e.g. `model/model.pkl`, `plots/curves.png`) |
735
+ | **Type badge** | File extension, color-coded by category — model weights, data files, images, configs, etc. |
736
+ | **Size** | Formatted file size (B / KB / MB) |
737
+ | **Uploaded** | Timestamp of when the file was stored |
738
+ | **Download** | One-click download button — opens a short-lived direct download link in the browser |
739
+
740
+ A footer below the list shows the combined size of all artifacts for the run.
741
+
742
+ ```python
743
+ # Everything logged here shows up in the dashboard Artifacts tab
744
+ with registry.start_run("bert-finetune-v3") as run:
745
+ registry.log_params({"lr": 2e-5, "epochs": 3})
746
+ registry.log_metrics({"accuracy": 0.93})
747
+
748
+ # These all appear as separate rows in the Artifacts tab
749
+ registry.log_artifact("confusion_matrix.png", artifact_path="plots/confusion_matrix.png")
750
+ registry.log_artifact("feature_importance.csv", artifact_path="analysis/features.csv")
751
+ registry.log_model(model, artifact_path="model", framework="pytorch")
752
+ # ↳ each model file (model.pkl, config.json, etc.) appears as its own row
753
+ ```
754
+
755
+ #### Access control
756
+
757
+ Artifact upload and download URLs are issued by the registry API and require a valid API key and project membership. The URLs are short-lived, ensuring that access always reflects the current state of your project — a revoked key can no longer generate new URLs. Any member of a project can upload and download artifacts for runs within that project.
758
+
610
759
  ### List and Browse
611
760
 
612
761
  ```python
@@ -618,8 +767,12 @@ experiments = registry.list_experiments()
618
767
  # List models
619
768
  models = registry.list_models()
620
769
 
621
- # Download artifacts
622
- registry.download_artifact("run-id", "model/model.pt", "./downloads/")
770
+ # List artifacts for a specific run
771
+ artifacts = registry.list_artifacts(run_id)
772
+
773
+ # Download a specific artifact to a local directory
774
+ dest = registry.download_artifact("run-id", "model/model.pt", "./downloads/")
775
+ print(f"Saved to: {dest}")
623
776
  ```
624
777
 
625
778
  ## GPU Runner - Direct Code Execution
@@ -47,18 +47,22 @@ __all__ = [
47
47
  "set_experiment",
48
48
  "get_experiment",
49
49
  "list_experiments",
50
+ "archive_experiment",
50
51
  "start_run",
51
52
  "end_run",
53
+ "get_run",
54
+ "list_runs",
52
55
  "log_params",
53
56
  "log_metrics",
54
57
  "log_artifact",
55
58
  "set_tag",
59
+ "update_run_notes",
56
60
  "register_model",
57
61
  "get_model",
58
62
  "list_models",
59
63
  "set_model_stage",
60
64
  "set_model_alias",
61
- # New methods
65
+ # MLOps helpers
62
66
  "log_model",
63
67
  "load_model",
64
68
  "log_dataset",
@@ -66,6 +70,32 @@ __all__ = [
66
70
  "get_metric_history",
67
71
  "download_artifact",
68
72
  "search_runs",
73
+ "get_run_datasets",
74
+ "get_model_lineage",
75
+ "autolog",
76
+ # HPO Sweeps
77
+ "create_sweep",
78
+ "get_sweep",
79
+ "list_sweeps",
80
+ "suggest_trial_params",
81
+ "create_trial",
82
+ "complete_trial",
83
+ "list_trials",
84
+ "stop_sweep",
85
+ # Alerts
86
+ "create_alert",
87
+ "list_alerts",
88
+ "delete_alert",
89
+ # Approvals
90
+ "list_pending_approvals",
91
+ "approve_promotion",
92
+ "reject_promotion",
93
+ # Schedules
94
+ "create_schedule",
95
+ "get_schedule",
96
+ "update_schedule",
97
+ "delete_schedule",
98
+ "list_schedules",
69
99
  # Classes
70
100
  "Experiment",
71
101
  "Run",
@@ -400,3 +430,227 @@ def search_runs(
400
430
  List of matching Run objects.
401
431
  """
402
432
  return _get_client().search_runs(experiment_id, status, max_results, offset)
433
+
434
+
435
+ def get_run(run_id: str):
436
+ """Get a run by ID."""
437
+ return _get_client().get_run(run_id)
438
+
439
+
440
+ def list_runs(
441
+ experiment_id: str = None,
442
+ status: str = None,
443
+ limit: int = 20,
444
+ offset: int = 0
445
+ ) -> list:
446
+ """List runs, optionally filtered by experiment or status."""
447
+ return _get_client().list_runs(experiment_id, status, limit, offset)
448
+
449
+
450
+ def archive_experiment(experiment_id: str):
451
+ """Archive an experiment."""
452
+ return _get_client().archive_experiment(experiment_id)
453
+
454
+
455
+ def update_run_notes(run_id: str, notes: str):
456
+ """Update the free-form notes for a run."""
457
+ _get_client().update_run_notes(run_id, notes)
458
+
459
+
460
+ def get_run_datasets(run_id: str) -> list:
461
+ """List datasets logged for a run."""
462
+ return _get_client().get_run_datasets(run_id)
463
+
464
+
465
+ def get_model_lineage(model_id: str) -> dict:
466
+ """Get full lineage for a model (versions → runs → datasets)."""
467
+ return _get_client().get_model_lineage(model_id)
468
+
469
+
470
+ def autolog(
471
+ framework: str = None,
472
+ log_every_n_steps: int = 1,
473
+ log_system_metrics: bool = True,
474
+ system_metrics_interval: float = 10.0,
475
+ ):
476
+ """
477
+ Enable automatic logging for ML training frameworks.
478
+
479
+ Supports pytorch_lightning, huggingface, and sklearn.
480
+ Auto-detects available frameworks when framework is None.
481
+ """
482
+ _get_client().autolog(framework, log_every_n_steps, log_system_metrics, system_metrics_interval)
483
+
484
+
485
+ # ==================== HPO Sweeps ====================
486
+
487
+ def create_sweep(
488
+ experiment_id: str,
489
+ name: str,
490
+ search_space: dict,
491
+ strategy: str = "random",
492
+ max_trials: int = 20,
493
+ metric=None,
494
+ direction: str = "minimize",
495
+ ) -> dict:
496
+ """
497
+ Create a hyperparameter optimization sweep.
498
+
499
+ Args:
500
+ experiment_id: Experiment to run trials in.
501
+ name: Sweep name.
502
+ search_space: Dict mapping param names to spec dicts.
503
+ strategy: "random" (default) or "grid".
504
+ max_trials: Maximum number of trials.
505
+ metric: Metric key to optimize (str), or dict with "name" and "direction" keys.
506
+ direction: "minimize" (default) or "maximize". Ignored if metric is a dict.
507
+
508
+ Returns:
509
+ Sweep dict with id, status, etc.
510
+ """
511
+ if isinstance(metric, dict):
512
+ direction = metric.get("direction", direction)
513
+ metric = metric.get("name", None)
514
+ return _get_client().create_sweep(
515
+ experiment_id, name, search_space, strategy, max_trials, metric, direction
516
+ )
517
+
518
+
519
+ def get_sweep(sweep_id: str) -> dict:
520
+ """Get a sweep by ID."""
521
+ return _get_client().get_sweep(sweep_id)
522
+
523
+
524
+ def list_sweeps(experiment_id: str) -> list:
525
+ """List all sweeps for an experiment."""
526
+ return _get_client().list_sweeps(experiment_id)
527
+
528
+
529
+ def suggest_trial_params(sweep_id: str) -> dict:
530
+ """Get suggested hyperparameter values for the next trial."""
531
+ return _get_client().suggest_trial_params(sweep_id)
532
+
533
+
534
+ def create_trial(sweep_id: str, run_id: str, params: dict) -> dict:
535
+ """Record a new trial linked to a sweep and run."""
536
+ return _get_client().create_trial(sweep_id, run_id, params)
537
+
538
+
539
+ def complete_trial(sweep_id: str, trial_id: str, value: float) -> None:
540
+ """Mark a trial as completed with its objective metric value."""
541
+ _get_client().complete_trial(sweep_id, trial_id, value)
542
+
543
+
544
+ def list_trials(sweep_id: str) -> list:
545
+ """List all trials for a sweep."""
546
+ return _get_client().list_trials(sweep_id)
547
+
548
+
549
+ def stop_sweep(sweep_id: str) -> None:
550
+ """Stop a running sweep."""
551
+ _get_client().stop_sweep(sweep_id)
552
+
553
+
554
+ # ==================== Alerts ====================
555
+
556
+ def create_alert(
557
+ run_id: str,
558
+ metric_key: str,
559
+ condition: str,
560
+ threshold: float,
561
+ notify_email: str = None,
562
+ notify_slack: str = None,
563
+ ) -> dict:
564
+ """
565
+ Create a metric threshold alert for a run.
566
+
567
+ Args:
568
+ run_id: Run to monitor.
569
+ metric_key: Metric name to watch.
570
+ condition: One of gt, lt, gte, lte, eq.
571
+ threshold: Trigger threshold value.
572
+ notify_email: Email address to notify.
573
+ notify_slack: Slack webhook URL to notify.
574
+
575
+ Returns:
576
+ Alert dict with id.
577
+ """
578
+ return _get_client().create_alert(run_id, metric_key, condition, threshold, notify_email, notify_slack)
579
+
580
+
581
+ def list_alerts(run_id: str) -> list:
582
+ """List all alerts for a run."""
583
+ return _get_client().list_alerts(run_id)
584
+
585
+
586
+ def delete_alert(alert_id: str) -> None:
587
+ """Delete an alert by ID."""
588
+ _get_client().delete_alert(alert_id)
589
+
590
+
591
+ # ==================== Approvals ====================
592
+
593
+ def list_pending_approvals() -> list:
594
+ """List all pending model promotion approval requests in the project."""
595
+ return _get_client().list_pending_approvals()
596
+
597
+
598
+ def approve_promotion(request_id: str, comment: str = None) -> dict:
599
+ """Approve a pending model stage promotion request."""
600
+ return _get_client().approve_promotion(request_id, comment)
601
+
602
+
603
+ def reject_promotion(request_id: str, comment: str = None) -> dict:
604
+ """Reject a pending model stage promotion request."""
605
+ return _get_client().reject_promotion(request_id, comment)
606
+
607
+
608
+ # ==================== Schedules ====================
609
+
610
+ def create_schedule(
611
+ name: str,
612
+ experiment_id: str,
613
+ cron_expr: str,
614
+ run_name: str = None,
615
+ run_config: dict = None,
616
+ webhook_url: str = None,
617
+ ) -> dict:
618
+ """
619
+ Create a recurring training schedule using a cron expression.
620
+
621
+ Args:
622
+ name: Schedule name.
623
+ experiment_id: Experiment to create runs in.
624
+ cron_expr: 5-field cron expression (e.g. "0 2 * * 1").
625
+ run_name: Base name for created runs.
626
+ run_config: Optional params to log on each scheduled run.
627
+ webhook_url: Optional URL to POST after each run fires.
628
+
629
+ Returns:
630
+ Schedule dict with id, next_fire_at, etc.
631
+ """
632
+ return _get_client().create_schedule(name, experiment_id, cron_expr, run_name, run_config, webhook_url)
633
+
634
+
635
+ def list_schedules() -> list:
636
+ """List all training schedules in the project."""
637
+ return _get_client().list_schedules()
638
+
639
+
640
+ def get_schedule(schedule_id: str) -> dict:
641
+ """Get a schedule by ID."""
642
+ return _get_client().get_schedule(schedule_id)
643
+
644
+
645
+ def update_schedule(
646
+ schedule_id: str,
647
+ enabled: bool = None,
648
+ cron_expr: str = None,
649
+ ) -> dict:
650
+ """Update a schedule's enabled state or cron expression."""
651
+ return _get_client().update_schedule(schedule_id, enabled, cron_expr)
652
+
653
+
654
+ def delete_schedule(schedule_id: str) -> None:
655
+ """Delete a schedule."""
656
+ _get_client().delete_schedule(schedule_id)