PyPI - ml-dash - Versions diffs - 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

ml-dash 0.5.8py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

ml_dash/__init__.py +35 -9
ml_dash/auth/__init__.py +51 -0
ml_dash/auth/constants.py +10 -0
ml_dash/auth/device_flow.py +237 -0
ml_dash/auth/device_secret.py +49 -0
ml_dash/auth/exceptions.py +31 -0
ml_dash/auth/token_storage.py +262 -0
ml_dash/auto_start.py +37 -14
ml_dash/cli.py +14 -2
ml_dash/cli_commands/download.py +10 -38
ml_dash/cli_commands/list.py +10 -34
ml_dash/cli_commands/login.py +225 -0
ml_dash/cli_commands/logout.py +54 -0
ml_dash/cli_commands/upload.py +3 -53
ml_dash/client.py +67 -34
ml_dash/config.py +15 -1
ml_dash/experiment.py +151 -55
ml_dash/files.py +97 -0
ml_dash/metric.py +192 -3
ml_dash/params.py +92 -3
ml_dash/remote_auto_start.py +55 -0
ml_dash/storage.py +366 -235
{ml_dash-0.5.8.dist-info → ml_dash-0.6.0.dist-info}/METADATA +5 -1
ml_dash-0.6.0.dist-info/RECORD +29 -0
ml_dash-0.5.8.dist-info/RECORD +0 -20
{ml_dash-0.5.8.dist-info → ml_dash-0.6.0.dist-info}/WHEEL +0 -0
{ml_dash-0.5.8.dist-info → ml_dash-0.6.0.dist-info}/entry_points.txt +0 -0

ml_dash/experiment.py CHANGED Viewed

@@ -67,6 +67,83 @@ class RunManager:
         """Mark experiment as cancelled (status: CANCELLED)."""
         self._experiment._close(status="CANCELLED")
+    @property
+    def folder(self) -> Optional[str]:
+        """
+        Get the current folder for this experiment.
+        Returns:
+            Current folder path or None
+        Example:
+            current_folder = exp.run.folder
+        """
+        return self._experiment.folder
+    @folder.setter
+    def folder(self, value: Optional[str]) -> None:
+        """
+        Set the folder for this experiment before initialization.
+        This can ONLY be set before the experiment is started (initialized).
+        Once the experiment is opened, the folder cannot be changed.
+        Supports template variables:
+        - {RUN.name} - Experiment name
+        - {RUN.project} - Project name
+        Args:
+            value: Folder path with optional template variables
+                   (e.g., "experiments/{RUN.name}" or None)
+        Raises:
+            RuntimeError: If experiment is already initialized/open
+        Examples:
+            from ml_dash import dxp
+            # Static folder
+            dxp.run.folder = "experiments/vision/resnet"
+            # Template with experiment name
+            dxp.run.folder = "/iclr_2024/{RUN.name}"
+            # Template with multiple variables
+            dxp.run.folder = "{RUN.project}/experiments/{RUN.name}"
+            # Now start the experiment
+            with dxp.run:
+                dxp.params.set(lr=0.001)
+        """
+        if self._experiment._is_open:
+            raise RuntimeError(
+                "Cannot change folder after experiment is initialized. "
+                "Set folder before calling start() or entering 'with' block."
+            )
+        # Process template variables if present
+        if value and '{RUN.' in value:
+            # Generate unique run ID (timestamp-based)
+            from datetime import datetime
+            run_timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
+            # Simple string replacement for template variables
+            # Supports: {RUN.name}, {RUN.project}, {RUN.id}, {RUN.timestamp}
+            replacements = {
+                '{RUN.name}': f"{self._experiment.name}_{run_timestamp}",  # Unique name with timestamp
+                '{RUN.project}': self._experiment.project,
+                '{RUN.id}': run_timestamp,  # Just the timestamp
+                '{RUN.timestamp}': run_timestamp,  # Alias for id
+            }
+            # Replace all template variables
+            for template, replacement in replacements.items():
+                if template in value:
+                    value = value.replace(template, replacement)
+        # Update the folder on the experiment
+        self._experiment.folder = value
     def __enter__(self) -> "Experiment":
         """Context manager entry - starts the experiment."""
         return self.start()
@@ -105,7 +182,7 @@ class Experiment:
     experiment = Experiment(
         name="my-experiment",
         project="my-project",
-        remote="http://localhost:3000",
+        remote="https://api.dash.ml",
         api_key="your-jwt-token"
     )
@@ -139,7 +216,6 @@ class Experiment:
         # Mode configuration
         remote: Optional[str] = None,
         api_key: Optional[str] = None,
-        user_name: Optional[str] = None,
         local_path: Optional[str] = None,
         # Internal parameters
         _write_protected: bool = False,
@@ -155,9 +231,8 @@ class Experiment:
             bindrs: Optional list of bindrs
             folder: Optional folder path (e.g., "/experiments/baseline")
             metadata: Optional metadata dict
-            remote: Remote API URL (e.g., "http://localhost:3000")
-            api_key: JWT token for authentication (if not provided, will be generated from user_name)
-            user_name: Username for authentication (generates API key if api_key not provided)
+            remote: Remote API URL (e.g., "https://api.dash.ml")
+            api_key: JWT token for authentication (auto-loaded from storage if not provided)
             local_path: Local storage root path (for local mode)
             _write_protected: Internal parameter - if True, experiment becomes immutable after creation
         """
@@ -170,10 +245,6 @@ class Experiment:
         self._write_protected = _write_protected
         self.metadata = metadata
-        # Generate API key from username if not provided
-        if remote and not api_key and user_name:
-            api_key = self._generate_api_key_from_username(user_name)
         # Determine operation mode
         if remote and local_path:
             self.mode = OperationMode.HYBRID
@@ -183,7 +254,7 @@ class Experiment:
             self.mode = OperationMode.LOCAL
         else:
             raise ValueError(
-                "Must specify either 'remote' (with api_key/user_name) or 'local_path'"
+                "Must specify either 'remote' (with api_key) or 'local_path'"
             )
         # Initialize backend
@@ -192,10 +263,10 @@ class Experiment:
         self._experiment_id: Optional[str] = None
         self._experiment_data: Optional[Dict[str, Any]] = None
         self._is_open = False
+        self._metrics_manager: Optional['MetricsManager'] = None  # Cached metrics manager
         if self.mode in (OperationMode.REMOTE, OperationMode.HYBRID):
-            if not api_key:
-                raise ValueError("Either api_key or user_name is required for remote mode")
+            # api_key can be None - RemoteClient will auto-load from storage
             self._client = RemoteClient(base_url=remote, api_key=api_key)
         if self.mode in (OperationMode.LOCAL, OperationMode.HYBRID):
@@ -203,43 +274,6 @@ class Experiment:
                 raise ValueError("local_path is required for local mode")
             self._storage = LocalStorage(root_path=Path(local_path))
-    @staticmethod
-    def _generate_api_key_from_username(user_name: str) -> str:
-        """
-        Generate a deterministic API key (JWT) from username.
-        This is a temporary solution until proper user authentication is implemented.
-        Generates a unique user ID from the username and creates a JWT token.
-        Args:
-            user_name: Username to generate API key from
-        Returns:
-            JWT token string
-        """
-        import hashlib
-        import time
-        import jwt
-        # Generate deterministic user ID from username (first 10 digits of SHA256 hash)
-        user_id = str(int(hashlib.sha256(user_name.encode()).hexdigest()[:16], 16))[:10]
-        # JWT payload
-        payload = {
-            "userId": user_id,
-            "userName": user_name,
-            "iat": int(time.time()),
-            "exp": int(time.time()) + (30 * 24 * 60 * 60)  # 30 days expiration
-        }
-        # Secret key for signing (should match server's JWT_SECRET)
-        secret = "your-secret-key-change-this-in-production"
-        # Generate JWT
-        token = jwt.encode(payload, secret, algorithm="HS256")
-        return token
     def _open(self) -> "Experiment":
         """
         Internal method to open the experiment (create or update on server/filesystem).
@@ -350,7 +384,12 @@ class Experiment:
             RuntimeError: If experiment is not open
         """
         if not self._is_open:
-            raise RuntimeError("Experiment not open. Use experiment.run.start() or context manager.")
+            raise RuntimeError(
+                "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
+                "Example:\n"
+                "  with dxp.run:\n"
+                "      dxp.params.set(lr=0.001)"
+            )
         return ParametersBuilder(self)
@@ -395,7 +434,12 @@ class Experiment:
             ValueError: If log level is invalid
         """
         if not self._is_open:
-            raise RuntimeError("Experiment not open. Use experiment.open() or context manager.")
+            raise RuntimeError(
+                "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
+                "Example:\n"
+                "  with dxp.run:\n"
+                "      dxp.log().info('Training started')"
+            )
         # Fluent mode: return LogBuilder
         if message is None:
@@ -424,7 +468,7 @@ class Experiment:
     ) -> None:
         """
         Internal method to write a log entry immediately.
-        No buffering - writes directly to storage/remote.
+        No buffering - writes directly to storage/remote AND stdout/stderr.
         Args:
             message: Log message
@@ -441,6 +485,9 @@ class Experiment:
         if metadata:
             log_entry["metadata"] = metadata
+        # Mirror to stdout/stderr before writing to storage
+        self._print_log(message, level, metadata)
         # Write immediately (no buffering)
         if self._client:
             # Remote mode: send to API (wrapped in array for batch API)
@@ -454,12 +501,50 @@ class Experiment:
             self._storage.write_log(
                 project=self.project,
                 experiment=self.name,
+                folder=self.folder,
                 message=log_entry["message"],
                 level=log_entry["level"],
                 metadata=log_entry.get("metadata"),
                 timestamp=log_entry["timestamp"]
             )
+    def _print_log(
+        self,
+        message: str,
+        level: str,
+        metadata: Optional[Dict[str, Any]]
+    ) -> None:
+        """
+        Print log to stdout or stderr based on level.
+        ERROR and FATAL go to stderr, all others go to stdout.
+        Args:
+            message: Log message
+            level: Log level
+            metadata: Optional metadata dict
+        """
+        import sys
+        # Format the log message
+        level_upper = level.upper()
+        # Build metadata string if present
+        metadata_str = ""
+        if metadata:
+            # Format metadata as key=value pairs
+            pairs = [f"{k}={v}" for k, v in metadata.items()]
+            metadata_str = f" [{', '.join(pairs)}]"
+        # Format: [LEVEL] message [key=value, ...]
+        formatted_message = f"[{level_upper}] {message}{metadata_str}"
+        # Route to stdout or stderr based on level
+        if level in ("error", "fatal"):
+            print(formatted_message, file=sys.stderr)
+        else:
+            print(formatted_message, file=sys.stdout)
     def files(self, **kwargs) -> FileBuilder:
         """
         Get a FileBuilder for fluent file operations.
@@ -485,7 +570,12 @@ class Experiment:
             experiment.files(file_id="123").delete()
         """
         if not self._is_open:
-            raise RuntimeError("Experiment not open. Use experiment.open() or context manager.")
+            raise RuntimeError(
+                "Experiment not started. Use 'with experiment.run:' or call experiment.run.start() first.\n"
+                "Example:\n"
+                "  with dxp.run:\n"
+                "      dxp.files().save()"
+            )
         return FileBuilder(self, **kwargs)
@@ -540,6 +630,7 @@ class Experiment:
             result = self._storage.write_file(
                 project=self.project,
                 experiment=self.name,
+                folder=self.folder,
                 file_path=file_path,
                 prefix=prefix,
                 filename=filename,
@@ -716,6 +807,7 @@ class Experiment:
             self._storage.write_parameters(
                 project=self.project,
                 experiment=self.name,
+                folder=self.folder,
                 data=flattened_params
             )
@@ -787,7 +879,10 @@ class Experiment:
                 "Use 'with Experiment(...).run as experiment:' or call experiment.run.start() first."
             )
-        return MetricsManager(self)
+        # Cache the MetricsManager instance to preserve MetricBuilder cache across calls
+        if self._metrics_manager is None:
+            self._metrics_manager = MetricsManager(self)
+        return self._metrics_manager
     def _append_to_metric(
         self,
@@ -828,6 +923,7 @@ class Experiment:
             result = self._storage.append_to_metric(
                 project=self.project,
                 experiment=self.name,
+                folder=self.folder,
                 metric_name=name,
                 data=data,
                 description=description,
@@ -999,7 +1095,7 @@ def ml_dash_experiment(
         @ml_dash_experiment(
             name="my-experiment",
             project="my-project",
-            remote="http://localhost:3000",
+            remote="https://api.dash.ml",
             api_key="your-token"
         )
         def train_model():

ml_dash/files.py CHANGED Viewed

@@ -607,6 +607,103 @@ class FileBuilder:
             except Exception:
                 pass
+    def duplicate(self, source: Union[str, Dict[str, Any]], to: str) -> Dict[str, Any]:
+        """
+        Duplicate an existing file to a new path within the same experiment.
+        Useful for checkpoint rotation patterns where you save versioned checkpoints
+        and maintain a "latest" or "best" pointer.
+        Args:
+            source: Source file - either file ID (str) or metadata dict with 'id' key
+            to: Target path like "models/latest.pt" or "/checkpoints/best.pt"
+        Returns:
+            File metadata dict for the duplicated file with id, path, filename, checksum, etc.
+        Raises:
+            RuntimeError: If experiment is not open or write-protected
+            ValueError: If source file not found or target path invalid
+        Examples:
+            # Using file ID
+            dxp.files().duplicate("file-id-123", to="models/latest.pt")
+            # Using metadata dict from save_torch
+            snapshot = dxp.files(prefix="/models").save_torch(model, f"model_{epoch:05d}.pt")
+            dxp.files().duplicate(snapshot, to="models/latest.pt")
+            # Checkpoint rotation pattern
+            snap = dxp.files(prefix="/checkpoints").save_torch(model, f"model_{epoch:05d}.pt")
+            dxp.files().duplicate(snap, to="checkpoints/best.pt")
+        """
+        import tempfile
+        import os
+        if not self._experiment._is_open:
+            raise RuntimeError("Experiment not open. Use experiment.run.start() or context manager.")
+        if self._experiment._write_protected:
+            raise RuntimeError("Experiment is write-protected and cannot be modified.")
+        # Extract source file ID
+        if isinstance(source, str):
+            source_id = source
+        elif isinstance(source, dict) and 'id' in source:
+            source_id = source['id']
+        else:
+            raise ValueError("source must be a file ID (str) or metadata dict with 'id' key")
+        if not source_id:
+            raise ValueError("Invalid source: file ID is empty")
+        # Parse target path into prefix and filename
+        to = to.lstrip('/')
+        if '/' in to:
+            target_prefix, target_filename = to.rsplit('/', 1)
+            target_prefix = '/' + target_prefix
+        else:
+            target_prefix = '/'
+            target_filename = to
+        if not target_filename:
+            raise ValueError(f"Invalid target path '{to}': must include filename")
+        # Download source file to temp location
+        temp_dir = tempfile.mkdtemp()
+        temp_path = os.path.join(temp_dir, target_filename)
+        try:
+            # Download the source file
+            downloaded_path = self._experiment._download_file(
+                file_id=source_id,
+                dest_path=temp_path
+            )
+            # Save to new location using existing save() method
+            original_file_path = self._file_path
+            original_prefix = self._prefix
+            self._file_path = downloaded_path
+            self._prefix = target_prefix
+            # Upload and get result
+            result = self.save()
+            # Restore original values
+            self._file_path = original_file_path
+            self._prefix = original_prefix
+            return result
+        finally:
+            # Clean up temp file and directory
+            try:
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+                os.rmdir(temp_dir)
+            except Exception:
+                pass
 def compute_sha256(file_path: str) -> str:
     """

ml_dash/metric.py CHANGED Viewed

@@ -6,11 +6,165 @@ validation losses, system measurements, etc.
 """
 from typing import Dict, Any, List, Optional, TYPE_CHECKING
+from collections import defaultdict
+import statistics
 if TYPE_CHECKING:
     from .experiment import Experiment
+class SummaryCache:
+    """
+    Buffer for collecting metric values and computing statistics periodically.
+    Inspired by ml-logger's SummaryCache design:
+    - Lazy computation: Store raw values, compute stats on demand
+    - Hierarchical naming: Stats get suffixes (loss.mean, loss.std)
+    - Robust handling: Converts None → NaN, filters before stats
+    """
+    def __init__(self, metric_builder: 'MetricBuilder'):
+        """
+        Initialize SummaryCache.
+        Args:
+            metric_builder: Parent MetricBuilder instance
+        """
+        self._metric_builder = metric_builder
+        self._buffer: Dict[str, List[float]] = defaultdict(list)
+        self._metadata: Dict[str, Any] = {}  # For set() metadata
+    def store(self, **kwargs) -> None:
+        """
+        Store values in buffer without immediate logging (deferred computation).
+        Args:
+            **kwargs: Metric values to buffer (e.g., loss=0.5, accuracy=0.9)
+        Example:
+            cache.store(loss=0.5, accuracy=0.9)
+            cache.store(loss=0.48)  # Accumulates
+        """
+        for key, value in kwargs.items():
+            # Handle None values gracefully
+            if value is None:
+                value = float('nan')
+            try:
+                self._buffer[key].append(float(value))
+            except (TypeError, ValueError):
+                # Skip non-numeric values silently
+                continue
+    def set(self, **kwargs) -> None:
+        """
+        Set metadata values without aggregation (replaces previous values).
+        Used for contextual metadata like learning rate, epoch number, etc.
+        These values are included in the final data point when summarize() is called.
+        Args:
+            **kwargs: Metadata to set (e.g., lr=0.001, epoch=5)
+        Example:
+            cache.set(lr=0.001, epoch=5)
+            cache.set(lr=0.0005)  # Replaces lr, keeps epoch
+        """
+        self._metadata.update(kwargs)
+    def _compute_stats(self) -> Dict[str, float]:
+        """
+        Compute statistics from buffered values (idempotent, read-only).
+        Returns:
+            Dict with hierarchical metric names (key.mean, key.std, etc.)
+        Note: This is idempotent - can be called multiple times without side effects.
+        """
+        stats_data = {}
+        for key, values in self._buffer.items():
+            if not values:
+                continue
+            # Filter out NaN values (ml-logger pattern)
+            clean_values = [v for v in values if not (isinstance(v, float) and v != v)]
+            if not clean_values:
+                continue
+            # Compute statistics with hierarchical naming
+            stats_data[f"{key}.mean"] = statistics.mean(clean_values)
+            stats_data[f"{key}.min"] = min(clean_values)
+            stats_data[f"{key}.max"] = max(clean_values)
+            stats_data[f"{key}.count"] = len(clean_values)
+            # Std dev requires at least 2 values
+            if len(clean_values) >= 2:
+                stats_data[f"{key}.std"] = statistics.stdev(clean_values)
+            else:
+                stats_data[f"{key}.std"] = 0.0
+        return stats_data
+    def summarize(self, clear: bool = True) -> None:
+        """
+        Compute statistics from buffered values and log them (non-idempotent).
+        Args:
+            clear: If True (default), clear buffer after computing statistics.
+                  This creates a "rolling window" behavior matching ml-logger's "tiled" mode.
+        Example:
+            # After storing 10 loss values and setting lr=0.001:
+            cache.store(loss=0.5)
+            cache.set(lr=0.001, epoch=5)
+            cache.summarize()
+            # Logs: {lr: 0.001, epoch: 5, loss.mean: 0.5, loss.std: 0.0, ...}
+        Note: This is non-idempotent - calling it multiple times has side effects.
+        """
+        if not self._buffer and not self._metadata:
+            return
+        # Compute statistics (delegated to idempotent method)
+        stats_data = self._compute_stats()
+        # Merge metadata with statistics
+        output_data = {**self._metadata, **stats_data}
+        if not output_data:
+            return
+        # Append combined data as a single metric data point
+        self._metric_builder.append(**output_data)
+        # Clear buffer if requested (default behavior for "tiled" mode)
+        if clear:
+            self._buffer.clear()
+            self._metadata.clear()  # Also clear metadata
+    def peek(self, *keys: str, limit: int = 5) -> Dict[str, List[float]]:
+        """
+        Non-destructive inspection of buffered values (idempotent, read-only).
+        Args:
+            *keys: Optional specific keys to peek at. If empty, shows all.
+            limit: Number of most recent values to show (default 5)
+        Returns:
+            Dict of buffered values (truncated to last `limit` items)
+        Example:
+            cache.peek('loss', limit=3)  # {'loss': [0.5, 0.48, 0.52]}
+        """
+        keys_to_show = keys if keys else self._buffer.keys()
+        return {
+            k: self._buffer[k][-limit:] if limit else self._buffer[k]
+            for k in keys_to_show
+            if k in self._buffer and self._buffer[k]
+        }
 class MetricsManager:
     """
     Manager for metric operations that supports both named and unnamed usage.
@@ -39,11 +193,12 @@ class MetricsManager:
             experiment: Parent Experiment instance
         """
         self._experiment = experiment
+        self._metric_builders: Dict[str, 'MetricBuilder'] = {}  # Cache for MetricBuilder instances
     def __call__(self, name: str, description: Optional[str] = None,
                  tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None) -> 'MetricBuilder':
         """
-        Get a MetricBuilder for a specific metric name.
+        Get a MetricBuilder for a specific metric name (cached for reuse).
         Args:
             name: Metric name (unique within experiment)
@@ -52,12 +207,20 @@ class MetricsManager:
             metadata: Optional structured metadata
         Returns:
-            MetricBuilder instance for the named metric
+            MetricBuilder instance for the named metric (same instance on repeated calls)
         Examples:
             experiment.metrics("loss").append(value=0.5, step=1)
+        Note:
+            MetricBuilder instances are cached by name, so repeated calls with the
+            same name return the same instance. This ensures summary_cache works
+            correctly when called multiple times within a loop.
         """
-        return MetricBuilder(self._experiment, name, description, tags, metadata)
+        # Cache key includes name only (description/tags/metadata are set once on first call)
+        if name not in self._metric_builders:
+            self._metric_builders[name] = MetricBuilder(self._experiment, name, description, tags, metadata)
+        return self._metric_builders[name]
     def append(self, name: Optional[str] = None, data: Optional[Dict[str, Any]] = None, **kwargs) -> Dict[str, Any]:
         """
@@ -157,6 +320,7 @@ class MetricBuilder:
         self._description = description
         self._tags = tags
         self._metadata = metadata
+        self._summary_cache = None  # Lazy initialization
     def append(self, **kwargs) -> 'MetricBuilder':
         """
@@ -290,3 +454,28 @@ class MetricBuilder:
                 print(f"{metric['name']}: {metric['totalDataPoints']} points")
         """
         return self._experiment._list_metrics()
+    @property
+    def summary_cache(self) -> SummaryCache:
+        """
+        Get summary cache for this metric (lazy initialization).
+        The summary cache allows buffering values and computing statistics
+        periodically, which is much more efficient than logging every value.
+        Returns:
+            SummaryCache instance for this metric
+        Example:
+            metric = experiment.metrics("train")
+            # Store values every batch
+            metric.summary_cache.store(loss=0.5)
+            metric.summary_cache.store(loss=0.48)
+            # Set metadata
+            metric.summary_cache.set(lr=0.001, epoch=1)
+            # Compute stats and log periodically
+            metric.summary_cache.summarize()
+        """
+        if self._summary_cache is None:
+            self._summary_cache = SummaryCache(self)
+        return self._summary_cache

ml-dash 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl

ml-dash 0.5.8py3-none-any.whl → 0.6.0py3-none-any.whl