podstack 1.3.14__tar.gz → 1.3.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {podstack-1.3.14 → podstack-1.3.16}/PKG-INFO +1 -1
- {podstack-1.3.14 → podstack-1.3.16}/podstack/registry/__init__.py +288 -12
- {podstack-1.3.14 → podstack-1.3.16}/podstack/registry/client.py +9 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack.egg-info/PKG-INFO +1 -1
- {podstack-1.3.14 → podstack-1.3.16}/pyproject.toml +1 -1
- {podstack-1.3.14 → podstack-1.3.16}/LICENSE +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/README.md +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/__init__.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/annotations.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/client.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/exceptions.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/execution.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/gpu_runner.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/models.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/notebook.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/registry/autolog.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/registry/exceptions.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/registry/experiment.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/registry/model.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack/registry/model_utils.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack.egg-info/SOURCES.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack.egg-info/dependency_links.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack.egg-info/requires.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack.egg-info/top_level.txt +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/__init__.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/app.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/exceptions.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/image.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/runner.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/secret.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/utils.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/podstack_gpu/volume.py +0 -0
- {podstack-1.3.14 → podstack-1.3.16}/setup.cfg +0 -0
|
@@ -47,18 +47,22 @@ __all__ = [
|
|
|
47
47
|
"set_experiment",
|
|
48
48
|
"get_experiment",
|
|
49
49
|
"list_experiments",
|
|
50
|
+
"archive_experiment",
|
|
50
51
|
"start_run",
|
|
51
52
|
"end_run",
|
|
53
|
+
"get_run",
|
|
54
|
+
"list_runs",
|
|
52
55
|
"log_params",
|
|
53
56
|
"log_metrics",
|
|
54
57
|
"log_artifact",
|
|
55
58
|
"set_tag",
|
|
59
|
+
"update_run_notes",
|
|
56
60
|
"register_model",
|
|
57
61
|
"get_model",
|
|
58
62
|
"list_models",
|
|
59
63
|
"set_model_stage",
|
|
60
64
|
"set_model_alias",
|
|
61
|
-
#
|
|
65
|
+
# MLOps helpers
|
|
62
66
|
"log_model",
|
|
63
67
|
"load_model",
|
|
64
68
|
"log_dataset",
|
|
@@ -66,6 +70,32 @@ __all__ = [
|
|
|
66
70
|
"get_metric_history",
|
|
67
71
|
"download_artifact",
|
|
68
72
|
"search_runs",
|
|
73
|
+
"get_run_datasets",
|
|
74
|
+
"get_model_lineage",
|
|
75
|
+
"autolog",
|
|
76
|
+
# HPO Sweeps
|
|
77
|
+
"create_sweep",
|
|
78
|
+
"get_sweep",
|
|
79
|
+
"list_sweeps",
|
|
80
|
+
"suggest_trial_params",
|
|
81
|
+
"create_trial",
|
|
82
|
+
"complete_trial",
|
|
83
|
+
"list_trials",
|
|
84
|
+
"stop_sweep",
|
|
85
|
+
# Alerts
|
|
86
|
+
"create_alert",
|
|
87
|
+
"list_alerts",
|
|
88
|
+
"delete_alert",
|
|
89
|
+
# Approvals
|
|
90
|
+
"list_pending_approvals",
|
|
91
|
+
"approve_promotion",
|
|
92
|
+
"reject_promotion",
|
|
93
|
+
# Schedules
|
|
94
|
+
"create_schedule",
|
|
95
|
+
"get_schedule",
|
|
96
|
+
"update_schedule",
|
|
97
|
+
"delete_schedule",
|
|
98
|
+
"list_schedules",
|
|
69
99
|
# Classes
|
|
70
100
|
"Experiment",
|
|
71
101
|
"Run",
|
|
@@ -317,25 +347,47 @@ def load_model(model_name: str, version: int = None, stage: str = None, framewor
|
|
|
317
347
|
def log_dataset(
|
|
318
348
|
name: str,
|
|
319
349
|
path: str = None,
|
|
350
|
+
source_path: str = None,
|
|
351
|
+
df=None,
|
|
352
|
+
context: str = "training",
|
|
353
|
+
split: str = None,
|
|
354
|
+
digest: str = None,
|
|
355
|
+
source_type: str = "local",
|
|
356
|
+
tags: dict = None,
|
|
320
357
|
version: str = None,
|
|
321
358
|
description: str = None,
|
|
322
|
-
digest: str = None,
|
|
323
359
|
num_rows: int = None,
|
|
324
|
-
num_features: int = None
|
|
360
|
+
num_features: int = None,
|
|
325
361
|
):
|
|
326
362
|
"""
|
|
327
|
-
Log dataset
|
|
363
|
+
Log a dataset to the active run.
|
|
328
364
|
|
|
329
365
|
Args:
|
|
330
366
|
name: Dataset name.
|
|
331
|
-
path:
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
367
|
+
path: Local file path or URI.
|
|
368
|
+
source_path: Alias for path.
|
|
369
|
+
df: Optional pandas DataFrame for auto schema/profile computation.
|
|
370
|
+
context: One of "training", "validation", "test".
|
|
371
|
+
split: Alias for context ("train", "val", "test").
|
|
372
|
+
digest: SHA-256 hex digest (auto-computed from path if not provided).
|
|
373
|
+
source_type: One of "local", "s3", "gcs", "url".
|
|
374
|
+
tags: Optional dict of string tags.
|
|
375
|
+
"""
|
|
376
|
+
return _get_client().log_dataset(
|
|
377
|
+
name=name,
|
|
378
|
+
path=path,
|
|
379
|
+
source_path=source_path,
|
|
380
|
+
df=df,
|
|
381
|
+
context=context,
|
|
382
|
+
split=split,
|
|
383
|
+
digest=digest,
|
|
384
|
+
source_type=source_type,
|
|
385
|
+
tags=tags,
|
|
386
|
+
version=version,
|
|
387
|
+
description=description,
|
|
388
|
+
num_rows=num_rows,
|
|
389
|
+
num_features=num_features,
|
|
390
|
+
)
|
|
339
391
|
|
|
340
392
|
|
|
341
393
|
def compare_runs(run_ids: list, metric_keys: list = None) -> dict:
|
|
@@ -400,3 +452,227 @@ def search_runs(
|
|
|
400
452
|
List of matching Run objects.
|
|
401
453
|
"""
|
|
402
454
|
return _get_client().search_runs(experiment_id, status, max_results, offset)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def get_run(run_id: str):
|
|
458
|
+
"""Get a run by ID."""
|
|
459
|
+
return _get_client().get_run(run_id)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def list_runs(
|
|
463
|
+
experiment_id: str = None,
|
|
464
|
+
status: str = None,
|
|
465
|
+
limit: int = 20,
|
|
466
|
+
offset: int = 0
|
|
467
|
+
) -> list:
|
|
468
|
+
"""List runs, optionally filtered by experiment or status."""
|
|
469
|
+
return _get_client().list_runs(experiment_id, status, limit, offset)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def archive_experiment(experiment_id: str):
|
|
473
|
+
"""Archive an experiment."""
|
|
474
|
+
return _get_client().archive_experiment(experiment_id)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def update_run_notes(run_id: str, notes: str):
|
|
478
|
+
"""Update the free-form notes for a run."""
|
|
479
|
+
_get_client().update_run_notes(run_id, notes)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def get_run_datasets(run_id: str) -> list:
|
|
483
|
+
"""List datasets logged for a run."""
|
|
484
|
+
return _get_client().get_run_datasets(run_id)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def get_model_lineage(model_id: str) -> dict:
|
|
488
|
+
"""Get full lineage for a model (versions → runs → datasets)."""
|
|
489
|
+
return _get_client().get_model_lineage(model_id)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def autolog(
|
|
493
|
+
framework: str = None,
|
|
494
|
+
log_every_n_steps: int = 1,
|
|
495
|
+
log_system_metrics: bool = True,
|
|
496
|
+
system_metrics_interval: float = 10.0,
|
|
497
|
+
):
|
|
498
|
+
"""
|
|
499
|
+
Enable automatic logging for ML training frameworks.
|
|
500
|
+
|
|
501
|
+
Supports pytorch_lightning, huggingface, and sklearn.
|
|
502
|
+
Auto-detects available frameworks when framework is None.
|
|
503
|
+
"""
|
|
504
|
+
_get_client().autolog(framework, log_every_n_steps, log_system_metrics, system_metrics_interval)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
# ==================== HPO Sweeps ====================
|
|
508
|
+
|
|
509
|
+
def create_sweep(
|
|
510
|
+
experiment_id: str,
|
|
511
|
+
name: str,
|
|
512
|
+
search_space: dict,
|
|
513
|
+
strategy: str = "random",
|
|
514
|
+
max_trials: int = 20,
|
|
515
|
+
metric=None,
|
|
516
|
+
direction: str = "minimize",
|
|
517
|
+
) -> dict:
|
|
518
|
+
"""
|
|
519
|
+
Create a hyperparameter optimization sweep.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
experiment_id: Experiment to run trials in.
|
|
523
|
+
name: Sweep name.
|
|
524
|
+
search_space: Dict mapping param names to spec dicts.
|
|
525
|
+
strategy: "random" (default) or "grid".
|
|
526
|
+
max_trials: Maximum number of trials.
|
|
527
|
+
metric: Metric key to optimize (str), or dict with "name" and "direction" keys.
|
|
528
|
+
direction: "minimize" (default) or "maximize". Ignored if metric is a dict.
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
Sweep dict with id, status, etc.
|
|
532
|
+
"""
|
|
533
|
+
if isinstance(metric, dict):
|
|
534
|
+
direction = metric.get("direction", direction)
|
|
535
|
+
metric = metric.get("name", None)
|
|
536
|
+
return _get_client().create_sweep(
|
|
537
|
+
experiment_id, name, search_space, strategy, max_trials, metric, direction
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def get_sweep(sweep_id: str) -> dict:
|
|
542
|
+
"""Get a sweep by ID."""
|
|
543
|
+
return _get_client().get_sweep(sweep_id)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def list_sweeps(experiment_id: str) -> list:
|
|
547
|
+
"""List all sweeps for an experiment."""
|
|
548
|
+
return _get_client().list_sweeps(experiment_id)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def suggest_trial_params(sweep_id: str) -> dict:
|
|
552
|
+
"""Get suggested hyperparameter values for the next trial."""
|
|
553
|
+
return _get_client().suggest_trial_params(sweep_id)
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def create_trial(sweep_id: str, run_id: str, params: dict) -> dict:
|
|
557
|
+
"""Record a new trial linked to a sweep and run."""
|
|
558
|
+
return _get_client().create_trial(sweep_id, run_id, params)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def complete_trial(sweep_id: str, trial_id: str, value: float) -> None:
|
|
562
|
+
"""Mark a trial as completed with its objective metric value."""
|
|
563
|
+
_get_client().complete_trial(sweep_id, trial_id, value)
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def list_trials(sweep_id: str) -> list:
|
|
567
|
+
"""List all trials for a sweep."""
|
|
568
|
+
return _get_client().list_trials(sweep_id)
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def stop_sweep(sweep_id: str) -> None:
|
|
572
|
+
"""Stop a running sweep."""
|
|
573
|
+
_get_client().stop_sweep(sweep_id)
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
# ==================== Alerts ====================
|
|
577
|
+
|
|
578
|
+
def create_alert(
|
|
579
|
+
run_id: str,
|
|
580
|
+
metric_key: str,
|
|
581
|
+
condition: str,
|
|
582
|
+
threshold: float,
|
|
583
|
+
notify_email: str = None,
|
|
584
|
+
notify_slack: str = None,
|
|
585
|
+
) -> dict:
|
|
586
|
+
"""
|
|
587
|
+
Create a metric threshold alert for a run.
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
run_id: Run to monitor.
|
|
591
|
+
metric_key: Metric name to watch.
|
|
592
|
+
condition: One of gt, lt, gte, lte, eq.
|
|
593
|
+
threshold: Trigger threshold value.
|
|
594
|
+
notify_email: Email address to notify.
|
|
595
|
+
notify_slack: Slack webhook URL to notify.
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
Alert dict with id.
|
|
599
|
+
"""
|
|
600
|
+
return _get_client().create_alert(run_id, metric_key, condition, threshold, notify_email, notify_slack)
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def list_alerts(run_id: str) -> list:
|
|
604
|
+
"""List all alerts for a run."""
|
|
605
|
+
return _get_client().list_alerts(run_id)
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def delete_alert(alert_id: str) -> None:
|
|
609
|
+
"""Delete an alert by ID."""
|
|
610
|
+
_get_client().delete_alert(alert_id)
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
# ==================== Approvals ====================
|
|
614
|
+
|
|
615
|
+
def list_pending_approvals() -> list:
|
|
616
|
+
"""List all pending model promotion approval requests in the project."""
|
|
617
|
+
return _get_client().list_pending_approvals()
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def approve_promotion(request_id: str, comment: str = None) -> dict:
|
|
621
|
+
"""Approve a pending model stage promotion request."""
|
|
622
|
+
return _get_client().approve_promotion(request_id, comment)
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def reject_promotion(request_id: str, comment: str = None) -> dict:
|
|
626
|
+
"""Reject a pending model stage promotion request."""
|
|
627
|
+
return _get_client().reject_promotion(request_id, comment)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# ==================== Schedules ====================
|
|
631
|
+
|
|
632
|
+
def create_schedule(
|
|
633
|
+
name: str,
|
|
634
|
+
experiment_id: str,
|
|
635
|
+
cron_expr: str,
|
|
636
|
+
run_name: str = None,
|
|
637
|
+
run_config: dict = None,
|
|
638
|
+
webhook_url: str = None,
|
|
639
|
+
) -> dict:
|
|
640
|
+
"""
|
|
641
|
+
Create a recurring training schedule using a cron expression.
|
|
642
|
+
|
|
643
|
+
Args:
|
|
644
|
+
name: Schedule name.
|
|
645
|
+
experiment_id: Experiment to create runs in.
|
|
646
|
+
cron_expr: 5-field cron expression (e.g. "0 2 * * 1").
|
|
647
|
+
run_name: Base name for created runs.
|
|
648
|
+
run_config: Optional params to log on each scheduled run.
|
|
649
|
+
webhook_url: Optional URL to POST after each run fires.
|
|
650
|
+
|
|
651
|
+
Returns:
|
|
652
|
+
Schedule dict with id, next_fire_at, etc.
|
|
653
|
+
"""
|
|
654
|
+
return _get_client().create_schedule(name, experiment_id, cron_expr, run_name, run_config, webhook_url)
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def list_schedules() -> list:
|
|
658
|
+
"""List all training schedules in the project."""
|
|
659
|
+
return _get_client().list_schedules()
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def get_schedule(schedule_id: str) -> dict:
|
|
663
|
+
"""Get a schedule by ID."""
|
|
664
|
+
return _get_client().get_schedule(schedule_id)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def update_schedule(
|
|
668
|
+
schedule_id: str,
|
|
669
|
+
enabled: bool = None,
|
|
670
|
+
cron_expr: str = None,
|
|
671
|
+
) -> dict:
|
|
672
|
+
"""Update a schedule's enabled state or cron expression."""
|
|
673
|
+
return _get_client().update_schedule(schedule_id, enabled, cron_expr)
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def delete_schedule(schedule_id: str) -> None:
|
|
677
|
+
"""Delete a schedule."""
|
|
678
|
+
_get_client().delete_schedule(schedule_id)
|
|
@@ -977,8 +977,10 @@ class RegistryClient:
|
|
|
977
977
|
self,
|
|
978
978
|
name: str,
|
|
979
979
|
path: str = None,
|
|
980
|
+
source_path: str = None,
|
|
980
981
|
df=None,
|
|
981
982
|
context: str = "training",
|
|
983
|
+
split: str = None,
|
|
982
984
|
digest: str = None,
|
|
983
985
|
source_type: str = "local",
|
|
984
986
|
tags: dict = None,
|
|
@@ -1021,6 +1023,13 @@ class RegistryClient:
|
|
|
1021
1023
|
if not self._active_run:
|
|
1022
1024
|
raise NoActiveRunError()
|
|
1023
1025
|
|
|
1026
|
+
# Aliases
|
|
1027
|
+
if source_path and not path:
|
|
1028
|
+
path = source_path
|
|
1029
|
+
if split:
|
|
1030
|
+
_split_map = {"train": "training", "val": "validation", "valid": "validation", "test": "test"}
|
|
1031
|
+
context = _split_map.get(split.lower(), split)
|
|
1032
|
+
|
|
1024
1033
|
schema: Dict[str, str] = {}
|
|
1025
1034
|
profile: Dict[str, Any] = {}
|
|
1026
1035
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|