podstack 1.3.14__tar.gz → 1.3.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {podstack-1.3.14 → podstack-1.3.15}/PKG-INFO +1 -1
- {podstack-1.3.14 → podstack-1.3.15}/podstack/registry/__init__.py +255 -1
- {podstack-1.3.14 → podstack-1.3.15}/podstack.egg-info/PKG-INFO +1 -1
- {podstack-1.3.14 → podstack-1.3.15}/pyproject.toml +1 -1
- {podstack-1.3.14 → podstack-1.3.15}/LICENSE +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/README.md +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/__init__.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/annotations.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/client.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/exceptions.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/execution.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/gpu_runner.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/models.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/notebook.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/registry/autolog.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/registry/client.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/registry/exceptions.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/registry/experiment.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/registry/model.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack/registry/model_utils.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack.egg-info/SOURCES.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack.egg-info/dependency_links.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack.egg-info/requires.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack.egg-info/top_level.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/__init__.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/app.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/exceptions.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/image.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/runner.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/secret.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/utils.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/podstack_gpu/volume.py +0 -0
- {podstack-1.3.14 → podstack-1.3.15}/setup.cfg +0 -0
|
@@ -47,18 +47,22 @@ __all__ = [
|
|
|
47
47
|
"set_experiment",
|
|
48
48
|
"get_experiment",
|
|
49
49
|
"list_experiments",
|
|
50
|
+
"archive_experiment",
|
|
50
51
|
"start_run",
|
|
51
52
|
"end_run",
|
|
53
|
+
"get_run",
|
|
54
|
+
"list_runs",
|
|
52
55
|
"log_params",
|
|
53
56
|
"log_metrics",
|
|
54
57
|
"log_artifact",
|
|
55
58
|
"set_tag",
|
|
59
|
+
"update_run_notes",
|
|
56
60
|
"register_model",
|
|
57
61
|
"get_model",
|
|
58
62
|
"list_models",
|
|
59
63
|
"set_model_stage",
|
|
60
64
|
"set_model_alias",
|
|
61
|
-
#
|
|
65
|
+
# MLOps helpers
|
|
62
66
|
"log_model",
|
|
63
67
|
"load_model",
|
|
64
68
|
"log_dataset",
|
|
@@ -66,6 +70,32 @@ __all__ = [
|
|
|
66
70
|
"get_metric_history",
|
|
67
71
|
"download_artifact",
|
|
68
72
|
"search_runs",
|
|
73
|
+
"get_run_datasets",
|
|
74
|
+
"get_model_lineage",
|
|
75
|
+
"autolog",
|
|
76
|
+
# HPO Sweeps
|
|
77
|
+
"create_sweep",
|
|
78
|
+
"get_sweep",
|
|
79
|
+
"list_sweeps",
|
|
80
|
+
"suggest_trial_params",
|
|
81
|
+
"create_trial",
|
|
82
|
+
"complete_trial",
|
|
83
|
+
"list_trials",
|
|
84
|
+
"stop_sweep",
|
|
85
|
+
# Alerts
|
|
86
|
+
"create_alert",
|
|
87
|
+
"list_alerts",
|
|
88
|
+
"delete_alert",
|
|
89
|
+
# Approvals
|
|
90
|
+
"list_pending_approvals",
|
|
91
|
+
"approve_promotion",
|
|
92
|
+
"reject_promotion",
|
|
93
|
+
# Schedules
|
|
94
|
+
"create_schedule",
|
|
95
|
+
"get_schedule",
|
|
96
|
+
"update_schedule",
|
|
97
|
+
"delete_schedule",
|
|
98
|
+
"list_schedules",
|
|
69
99
|
# Classes
|
|
70
100
|
"Experiment",
|
|
71
101
|
"Run",
|
|
@@ -400,3 +430,227 @@ def search_runs(
|
|
|
400
430
|
List of matching Run objects.
|
|
401
431
|
"""
|
|
402
432
|
return _get_client().search_runs(experiment_id, status, max_results, offset)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def get_run(run_id: str):
|
|
436
|
+
"""Get a run by ID."""
|
|
437
|
+
return _get_client().get_run(run_id)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def list_runs(
|
|
441
|
+
experiment_id: str = None,
|
|
442
|
+
status: str = None,
|
|
443
|
+
limit: int = 20,
|
|
444
|
+
offset: int = 0
|
|
445
|
+
) -> list:
|
|
446
|
+
"""List runs, optionally filtered by experiment or status."""
|
|
447
|
+
return _get_client().list_runs(experiment_id, status, limit, offset)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def archive_experiment(experiment_id: str):
|
|
451
|
+
"""Archive an experiment."""
|
|
452
|
+
return _get_client().archive_experiment(experiment_id)
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def update_run_notes(run_id: str, notes: str):
|
|
456
|
+
"""Update the free-form notes for a run."""
|
|
457
|
+
_get_client().update_run_notes(run_id, notes)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def get_run_datasets(run_id: str) -> list:
|
|
461
|
+
"""List datasets logged for a run."""
|
|
462
|
+
return _get_client().get_run_datasets(run_id)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def get_model_lineage(model_id: str) -> dict:
|
|
466
|
+
"""Get full lineage for a model (versions → runs → datasets)."""
|
|
467
|
+
return _get_client().get_model_lineage(model_id)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def autolog(
|
|
471
|
+
framework: str = None,
|
|
472
|
+
log_every_n_steps: int = 1,
|
|
473
|
+
log_system_metrics: bool = True,
|
|
474
|
+
system_metrics_interval: float = 10.0,
|
|
475
|
+
):
|
|
476
|
+
"""
|
|
477
|
+
Enable automatic logging for ML training frameworks.
|
|
478
|
+
|
|
479
|
+
Supports pytorch_lightning, huggingface, and sklearn.
|
|
480
|
+
Auto-detects available frameworks when framework is None.
|
|
481
|
+
"""
|
|
482
|
+
_get_client().autolog(framework, log_every_n_steps, log_system_metrics, system_metrics_interval)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
# ==================== HPO Sweeps ====================
|
|
486
|
+
|
|
487
|
+
def create_sweep(
|
|
488
|
+
experiment_id: str,
|
|
489
|
+
name: str,
|
|
490
|
+
search_space: dict,
|
|
491
|
+
strategy: str = "random",
|
|
492
|
+
max_trials: int = 20,
|
|
493
|
+
metric=None,
|
|
494
|
+
direction: str = "minimize",
|
|
495
|
+
) -> dict:
|
|
496
|
+
"""
|
|
497
|
+
Create a hyperparameter optimization sweep.
|
|
498
|
+
|
|
499
|
+
Args:
|
|
500
|
+
experiment_id: Experiment to run trials in.
|
|
501
|
+
name: Sweep name.
|
|
502
|
+
search_space: Dict mapping param names to spec dicts.
|
|
503
|
+
strategy: "random" (default) or "grid".
|
|
504
|
+
max_trials: Maximum number of trials.
|
|
505
|
+
metric: Metric key to optimize (str), or dict with "name" and "direction" keys.
|
|
506
|
+
direction: "minimize" (default) or "maximize". Ignored if metric is a dict.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Sweep dict with id, status, etc.
|
|
510
|
+
"""
|
|
511
|
+
if isinstance(metric, dict):
|
|
512
|
+
direction = metric.get("direction", direction)
|
|
513
|
+
metric = metric.get("name", None)
|
|
514
|
+
return _get_client().create_sweep(
|
|
515
|
+
experiment_id, name, search_space, strategy, max_trials, metric, direction
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def get_sweep(sweep_id: str) -> dict:
|
|
520
|
+
"""Get a sweep by ID."""
|
|
521
|
+
return _get_client().get_sweep(sweep_id)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def list_sweeps(experiment_id: str) -> list:
|
|
525
|
+
"""List all sweeps for an experiment."""
|
|
526
|
+
return _get_client().list_sweeps(experiment_id)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def suggest_trial_params(sweep_id: str) -> dict:
|
|
530
|
+
"""Get suggested hyperparameter values for the next trial."""
|
|
531
|
+
return _get_client().suggest_trial_params(sweep_id)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def create_trial(sweep_id: str, run_id: str, params: dict) -> dict:
|
|
535
|
+
"""Record a new trial linked to a sweep and run."""
|
|
536
|
+
return _get_client().create_trial(sweep_id, run_id, params)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def complete_trial(sweep_id: str, trial_id: str, value: float) -> None:
|
|
540
|
+
"""Mark a trial as completed with its objective metric value."""
|
|
541
|
+
_get_client().complete_trial(sweep_id, trial_id, value)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def list_trials(sweep_id: str) -> list:
|
|
545
|
+
"""List all trials for a sweep."""
|
|
546
|
+
return _get_client().list_trials(sweep_id)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def stop_sweep(sweep_id: str) -> None:
|
|
550
|
+
"""Stop a running sweep."""
|
|
551
|
+
_get_client().stop_sweep(sweep_id)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
# ==================== Alerts ====================
|
|
555
|
+
|
|
556
|
+
def create_alert(
|
|
557
|
+
run_id: str,
|
|
558
|
+
metric_key: str,
|
|
559
|
+
condition: str,
|
|
560
|
+
threshold: float,
|
|
561
|
+
notify_email: str = None,
|
|
562
|
+
notify_slack: str = None,
|
|
563
|
+
) -> dict:
|
|
564
|
+
"""
|
|
565
|
+
Create a metric threshold alert for a run.
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
run_id: Run to monitor.
|
|
569
|
+
metric_key: Metric name to watch.
|
|
570
|
+
condition: One of gt, lt, gte, lte, eq.
|
|
571
|
+
threshold: Trigger threshold value.
|
|
572
|
+
notify_email: Email address to notify.
|
|
573
|
+
notify_slack: Slack webhook URL to notify.
|
|
574
|
+
|
|
575
|
+
Returns:
|
|
576
|
+
Alert dict with id.
|
|
577
|
+
"""
|
|
578
|
+
return _get_client().create_alert(run_id, metric_key, condition, threshold, notify_email, notify_slack)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def list_alerts(run_id: str) -> list:
|
|
582
|
+
"""List all alerts for a run."""
|
|
583
|
+
return _get_client().list_alerts(run_id)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def delete_alert(alert_id: str) -> None:
|
|
587
|
+
"""Delete an alert by ID."""
|
|
588
|
+
_get_client().delete_alert(alert_id)
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
# ==================== Approvals ====================
|
|
592
|
+
|
|
593
|
+
def list_pending_approvals() -> list:
|
|
594
|
+
"""List all pending model promotion approval requests in the project."""
|
|
595
|
+
return _get_client().list_pending_approvals()
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def approve_promotion(request_id: str, comment: str = None) -> dict:
|
|
599
|
+
"""Approve a pending model stage promotion request."""
|
|
600
|
+
return _get_client().approve_promotion(request_id, comment)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def reject_promotion(request_id: str, comment: str = None) -> dict:
|
|
604
|
+
"""Reject a pending model stage promotion request."""
|
|
605
|
+
return _get_client().reject_promotion(request_id, comment)
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
# ==================== Schedules ====================
|
|
609
|
+
|
|
610
|
+
def create_schedule(
|
|
611
|
+
name: str,
|
|
612
|
+
experiment_id: str,
|
|
613
|
+
cron_expr: str,
|
|
614
|
+
run_name: str = None,
|
|
615
|
+
run_config: dict = None,
|
|
616
|
+
webhook_url: str = None,
|
|
617
|
+
) -> dict:
|
|
618
|
+
"""
|
|
619
|
+
Create a recurring training schedule using a cron expression.
|
|
620
|
+
|
|
621
|
+
Args:
|
|
622
|
+
name: Schedule name.
|
|
623
|
+
experiment_id: Experiment to create runs in.
|
|
624
|
+
cron_expr: 5-field cron expression (e.g. "0 2 * * 1").
|
|
625
|
+
run_name: Base name for created runs.
|
|
626
|
+
run_config: Optional params to log on each scheduled run.
|
|
627
|
+
webhook_url: Optional URL to POST after each run fires.
|
|
628
|
+
|
|
629
|
+
Returns:
|
|
630
|
+
Schedule dict with id, next_fire_at, etc.
|
|
631
|
+
"""
|
|
632
|
+
return _get_client().create_schedule(name, experiment_id, cron_expr, run_name, run_config, webhook_url)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def list_schedules() -> list:
|
|
636
|
+
"""List all training schedules in the project."""
|
|
637
|
+
return _get_client().list_schedules()
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def get_schedule(schedule_id: str) -> dict:
|
|
641
|
+
"""Get a schedule by ID."""
|
|
642
|
+
return _get_client().get_schedule(schedule_id)
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def update_schedule(
|
|
646
|
+
schedule_id: str,
|
|
647
|
+
enabled: bool = None,
|
|
648
|
+
cron_expr: str = None,
|
|
649
|
+
) -> dict:
|
|
650
|
+
"""Update a schedule's enabled state or cron expression."""
|
|
651
|
+
return _get_client().update_schedule(schedule_id, enabled, cron_expr)
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def delete_schedule(schedule_id: str) -> None:
|
|
655
|
+
"""Delete a schedule."""
|
|
656
|
+
_get_client().delete_schedule(schedule_id)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|