PyPI - ml-dash - Versions diffs - 0.5.6__py3-none-any.whl → 0.5.8__py3-none-any.whl - Mend

ml-dash 0.5.6py3-none-any.whl → 0.5.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

ml_dash/cli.py +67 -0
ml_dash/cli_commands/__init__.py +1 -0
ml_dash/cli_commands/download.py +797 -0
ml_dash/cli_commands/list.py +343 -0
ml_dash/cli_commands/upload.py +1298 -0
ml_dash/client.py +360 -0
ml_dash/config.py +119 -0
ml_dash/files.py +4 -4
ml_dash/storage.py +64 -13
{ml_dash-0.5.6.dist-info → ml_dash-0.5.8.dist-info}/METADATA +2 -1
ml_dash-0.5.8.dist-info/RECORD +20 -0
{ml_dash-0.5.6.dist-info → ml_dash-0.5.8.dist-info}/WHEEL +1 -1
ml_dash-0.5.8.dist-info/entry_points.txt +3 -0
ml_dash-0.5.6.dist-info/RECORD +0 -13

ml_dash/client.py CHANGED Viewed

@@ -581,6 +581,366 @@ class RemoteClient:
         response.raise_for_status()
         return response.json()["metrics"]
+    def graphql_query(self, query: str, variables: Optional[Dict] = None) -> Dict[str, Any]:
+        """
+        Execute a GraphQL query.
+        Args:
+            query: GraphQL query string
+            variables: Optional variables for the query
+        Returns:
+            Query result data
+        Raises:
+            httpx.HTTPStatusError: If request fails
+            Exception: If GraphQL returns errors
+        """
+        response = self._client.post(
+            "/graphql",
+            json={"query": query, "variables": variables or {}}
+        )
+        response.raise_for_status()
+        result = response.json()
+        if "errors" in result:
+            raise Exception(f"GraphQL errors: {result['errors']}")
+        return result.get("data", {})
+    def list_projects_graphql(self, namespace_slug: str) -> List[Dict[str, Any]]:
+        """
+        List all projects in a namespace via GraphQL.
+        Args:
+            namespace_slug: Namespace slug
+        Returns:
+            List of project dicts with experimentCount
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        query = """
+        query Projects($namespaceSlug: String!) {
+          projects(namespaceSlug: $namespaceSlug) {
+            id
+            name
+            slug
+            description
+            tags
+          }
+        }
+        """
+        result = self.graphql_query(query, {"namespaceSlug": namespace_slug})
+        projects = result.get("projects", [])
+        # For each project, count experiments
+        for project in projects:
+            exp_query = """
+            query ExperimentsCount($namespaceSlug: String!, $projectSlug: String!) {
+              experiments(namespaceSlug: $namespaceSlug, projectSlug: $projectSlug) {
+                id
+              }
+            }
+            """
+            exp_result = self.graphql_query(exp_query, {
+                "namespaceSlug": namespace_slug,
+                "projectSlug": project['slug']
+            })
+            experiments = exp_result.get("experiments", [])
+            project['experimentCount'] = len(experiments)
+        return projects
+    def list_experiments_graphql(
+        self, namespace_slug: str, project_slug: str, status: Optional[str] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        List experiments in a project via GraphQL.
+        Args:
+            namespace_slug: Namespace slug
+            project_slug: Project slug
+            status: Optional experiment status filter (RUNNING, COMPLETED, FAILED, CANCELLED)
+        Returns:
+            List of experiment dicts with metadata
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        query = """
+        query Experiments($namespaceSlug: String!, $projectSlug: String!, $status: ExperimentStatus) {
+          experiments(namespaceSlug: $namespaceSlug, projectSlug: $projectSlug, status: $status) {
+            id
+            name
+            description
+            tags
+            status
+            startedAt
+            endedAt
+            metadata
+            project {
+              slug
+            }
+            logMetadata {
+              totalLogs
+            }
+            metrics {
+              name
+              metricMetadata {
+                totalDataPoints
+              }
+            }
+            files {
+              id
+              filename
+              path
+              contentType
+              sizeBytes
+              checksum
+              description
+              tags
+              metadata
+            }
+            parameters {
+              id
+              data
+            }
+          }
+        }
+        """
+        result = self.graphql_query(query, {
+            "namespaceSlug": namespace_slug,
+            "projectSlug": project_slug,
+            "status": status
+        })
+        return result.get("experiments", [])
+    def get_experiment_graphql(
+        self, namespace_slug: str, project_slug: str, experiment_name: str
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get a single experiment via GraphQL.
+        Args:
+            namespace_slug: Namespace slug
+            project_slug: Project slug
+            experiment_name: Experiment name
+        Returns:
+            Experiment dict with metadata, or None if not found
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        query = """
+        query Experiment($namespaceSlug: String!, $projectSlug: String!, $experimentName: String!) {
+          experiment(namespaceSlug: $namespaceSlug, projectSlug: $projectSlug, experimentName: $experimentName) {
+            id
+            name
+            description
+            tags
+            status
+            metadata
+            project {
+              slug
+            }
+            logMetadata {
+              totalLogs
+            }
+            metrics {
+              name
+              metricMetadata {
+                totalDataPoints
+              }
+            }
+            files {
+              id
+              filename
+              path
+              contentType
+              sizeBytes
+              checksum
+              description
+              tags
+              metadata
+            }
+            parameters {
+              id
+              data
+            }
+          }
+        }
+        """
+        result = self.graphql_query(query, {
+            "namespaceSlug": namespace_slug,
+            "projectSlug": project_slug,
+            "experimentName": experiment_name
+        })
+        return result.get("experiment")
+    def download_file_streaming(
+        self, experiment_id: str, file_id: str, dest_path: str
+    ) -> str:
+        """
+        Download a file with streaming for large files.
+        Args:
+            experiment_id: Experiment ID (Snowflake ID)
+            file_id: File ID (Snowflake ID)
+            dest_path: Destination path to save file
+        Returns:
+            Path to downloaded file
+        Raises:
+            httpx.HTTPStatusError: If request fails
+            ValueError: If checksum verification fails
+        """
+        # Get metadata first for checksum
+        file_metadata = self.get_file(experiment_id, file_id)
+        expected_checksum = file_metadata["checksum"]
+        # Stream download
+        with self._client.stream("GET", f"/experiments/{experiment_id}/files/{file_id}/download") as response:
+            response.raise_for_status()
+            with open(dest_path, "wb") as f:
+                for chunk in response.iter_bytes(chunk_size=8192):
+                    f.write(chunk)
+        # Verify checksum
+        from .files import verify_checksum
+        if not verify_checksum(dest_path, expected_checksum):
+            import os
+            os.remove(dest_path)
+            raise ValueError(f"Checksum verification failed for file {file_id}")
+        return dest_path
+    def query_logs(
+        self,
+        experiment_id: str,
+        limit: Optional[int] = None,
+        offset: Optional[int] = None,
+        order_by: Optional[str] = None,
+        order: Optional[str] = None,
+        level: Optional[List[str]] = None,
+        start_time: Optional[str] = None,
+        end_time: Optional[str] = None,
+        search: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Query logs for an experiment.
+        Args:
+            experiment_id: Experiment ID
+            limit: Maximum number of logs to return
+            offset: Number of logs to skip
+            order_by: Field to order by (timestamp or sequenceNumber)
+            order: Sort order (asc or desc)
+            level: List of log levels to filter by
+            start_time: Filter logs after this timestamp
+            end_time: Filter logs before this timestamp
+            search: Search query for log messages
+        Returns:
+            Dict with logs array and pagination info
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        params: Dict[str, str] = {}
+        if limit is not None:
+            params["limit"] = str(limit)
+        if offset is not None:
+            params["offset"] = str(offset)
+        if order_by is not None:
+            params["orderBy"] = order_by
+        if order is not None:
+            params["order"] = order
+        if level is not None:
+            params["level"] = ",".join(level)
+        if start_time is not None:
+            params["startTime"] = start_time
+        if end_time is not None:
+            params["endTime"] = end_time
+        if search is not None:
+            params["search"] = search
+        response = self._client.get(f"/experiments/{experiment_id}/logs", params=params)
+        response.raise_for_status()
+        return response.json()
+    def get_metric_data(
+        self,
+        experiment_id: str,
+        metric_name: str,
+        start_index: Optional[int] = None,
+        limit: Optional[int] = None,
+        buffer_only: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Get data points for a metric.
+        Args:
+            experiment_id: Experiment ID
+            metric_name: Name of the metric
+            start_index: Starting index for pagination
+            limit: Maximum number of data points to return
+            buffer_only: If True, only fetch buffer data (skip chunks)
+        Returns:
+            Dict with dataPoints array and pagination info
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        params: Dict[str, str] = {}
+        if start_index is not None:
+            params["startIndex"] = str(start_index)
+        if limit is not None:
+            params["limit"] = str(limit)
+        if buffer_only:
+            params["bufferOnly"] = "true"
+        response = self._client.get(
+            f"/experiments/{experiment_id}/metrics/{metric_name}/data",
+            params=params
+        )
+        response.raise_for_status()
+        return response.json()
+    def download_metric_chunk(
+        self,
+        experiment_id: str,
+        metric_name: str,
+        chunk_number: int,
+    ) -> Dict[str, Any]:
+        """
+        Download a specific chunk by chunk number.
+        Args:
+            experiment_id: Experiment ID
+            metric_name: Name of the metric
+            chunk_number: Chunk number to download
+        Returns:
+            Dict with chunk data including chunkNumber, startIndex, endIndex, dataCount, and data array
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        response = self._client.get(
+            f"/experiments/{experiment_id}/metrics/{metric_name}/chunks/{chunk_number}"
+        )
+        response.raise_for_status()
+        return response.json()
     def close(self):
         """Close the HTTP client."""
         self._client.close()

ml_dash/config.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Configuration file management for ML-Dash CLI."""
+from pathlib import Path
+import json
+from typing import Optional, Dict, Any
+class Config:
+    """
+    Manages ML-Dash CLI configuration file.
+    Configuration is stored in ~/.ml-dash/config.json with structure:
+    {
+        "remote_url": "https://api.dash.ml",
+        "api_key": "token",
+        "default_batch_size": 100
+    }
+    """
+    DEFAULT_CONFIG_DIR = Path.home() / ".ml-dash"
+    CONFIG_FILE = "config.json"
+    def __init__(self, config_dir: Optional[Path] = None):
+        """
+        Initialize config manager.
+        Args:
+            config_dir: Config directory path (defaults to ~/.ml-dash)
+        """
+        self.config_dir = config_dir or self.DEFAULT_CONFIG_DIR
+        self.config_path = self.config_dir / self.CONFIG_FILE
+        self._data = self._load()
+    def _load(self) -> Dict[str, Any]:
+        """Load config from file."""
+        if self.config_path.exists():
+            try:
+                with open(self.config_path, "r") as f:
+                    return json.load(f)
+            except (json.JSONDecodeError, IOError):
+                # If config is corrupted, return empty dict
+                return {}
+        return {}
+    def save(self):
+        """Save config to file."""
+        self.config_dir.mkdir(parents=True, exist_ok=True)
+        with open(self.config_path, "w") as f:
+            json.dump(self._data, f, indent=2)
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        Get config value.
+        Args:
+            key: Config key
+            default: Default value if key not found
+        Returns:
+            Config value or default
+        """
+        return self._data.get(key, default)
+    def set(self, key: str, value: Any):
+        """
+        Set config value and save.
+        Args:
+            key: Config key
+            value: Config value
+        """
+        self._data[key] = value
+        self.save()
+    def delete(self, key: str):
+        """
+        Delete config key and save.
+        Args:
+            key: Config key to delete
+        """
+        if key in self._data:
+            del self._data[key]
+            self.save()
+    def clear(self):
+        """Clear all config and save."""
+        self._data = {}
+        self.save()
+    @property
+    def remote_url(self) -> Optional[str]:
+        """Get default remote URL."""
+        return self.get("remote_url")
+    @remote_url.setter
+    def remote_url(self, url: str):
+        """Set default remote URL."""
+        self.set("remote_url", url)
+    @property
+    def api_key(self) -> Optional[str]:
+        """Get default API key."""
+        return self.get("api_key")
+    @api_key.setter
+    def api_key(self, key: str):
+        """Set default API key."""
+        self.set("api_key", key)
+    @property
+    def batch_size(self) -> int:
+        """Get default batch size for uploads."""
+        return self.get("default_batch_size", 100)
+    @batch_size.setter
+    def batch_size(self, size: int):
+        """Set default batch size."""
+        self.set("default_batch_size", size)

ml_dash/files.py CHANGED Viewed

@@ -69,7 +69,7 @@ class FileBuilder:
         Raises:
             RuntimeError: If experiment is not open or write-protected
             ValueError: If file_path not provided or file doesn't exist
-            ValueError: If file size exceeds 5GB limit
+            ValueError: If file size exceeds 100GB limit
         Examples:
             result = experiment.files(file_path="./model.pt", prefix="/models").save()
@@ -91,11 +91,11 @@ class FileBuilder:
         if not file_path.is_file():
             raise ValueError(f"Path is not a file: {self._file_path}")
-        # Check file size (max 5GB)
+        # Check file size (max 100GB)
         file_size = file_path.stat().st_size
-        MAX_FILE_SIZE = 5 * 1024 * 1024 * 1024  # 5GB in bytes
+        MAX_FILE_SIZE = 100 * 1024 * 1024 * 1024  # 100GB in bytes
         if file_size > MAX_FILE_SIZE:
-            raise ValueError(f"File size ({file_size} bytes) exceeds 5GB limit")
+            raise ValueError(f"File size ({file_size} bytes) exceeds 100GB limit")
         # Compute checksum
         checksum = compute_sha256(str(file_path))

ml_dash/storage.py CHANGED Viewed

@@ -62,8 +62,16 @@ class LocalStorage:
         Returns:
             Path to experiment directory
         """
+        # Determine base path - include folder in hierarchy if specified
+        if folder is not None:
+            # Strip leading / to make it relative, then use as base path
+            folder_path = folder.lstrip('/')
+            base_path = self.root_path / folder_path
+        else:
+            base_path = self.root_path
         # Create project directory
-        project_dir = self.root_path / project
+        project_dir = base_path / project
         project_dir.mkdir(parents=True, exist_ok=True)
         # Create experiment directory
@@ -138,7 +146,7 @@ class LocalStorage:
             timestamp: ISO timestamp string
             metadata: Optional metadata
         """
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         logs_dir = experiment_dir / "logs"
         logs_file = logs_dir / "logs.jsonl"
         seq_file = logs_dir / ".log_sequence"
@@ -184,7 +192,7 @@ class LocalStorage:
             metric_name: Metric name
             data: Data point
         """
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         metric_file = experiment_dir / "metrics" / f"{metric_name}.jsonl"
         data_point = {
@@ -216,7 +224,7 @@ class LocalStorage:
             experiment: Experiment name
             data: Flattened parameter dict with dot notation (already flattened)
         """
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         params_file = experiment_dir / "parameters.json"
         # Read existing if present
@@ -263,7 +271,7 @@ class LocalStorage:
         Returns:
             Flattened parameter dict, or None if file doesn't exist
         """
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         params_file = experiment_dir / "parameters.json"
         if not params_file.exists():
@@ -315,7 +323,7 @@ class LocalStorage:
         import shutil
         from .files import generate_snowflake_id
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         files_dir = experiment_dir / "files"
         metadata_file = files_dir / ".files_metadata.json"
@@ -411,7 +419,7 @@ class LocalStorage:
         Returns:
             List of file metadata dicts (only non-deleted files)
         """
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         metadata_file = experiment_dir / "files" / ".files_metadata.json"
         if not metadata_file.exists():
@@ -464,7 +472,7 @@ class LocalStorage:
         import shutil
         from .files import verify_checksum
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         files_dir = experiment_dir / "files"
         metadata_file = files_dir / ".files_metadata.json"
@@ -529,7 +537,7 @@ class LocalStorage:
         Raises:
             FileNotFoundError: If file not found
         """
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         metadata_file = experiment_dir / "files" / ".files_metadata.json"
         if not metadata_file.exists():
@@ -588,7 +596,7 @@ class LocalStorage:
         Raises:
             FileNotFoundError: If file not found
         """
-        experiment_dir = self.root_path / project / experiment
+        experiment_dir = self._get_experiment_dir(project, experiment)
         metadata_file = experiment_dir / "files" / ".files_metadata.json"
         if not metadata_file.exists():
@@ -628,9 +636,52 @@ class LocalStorage:
         return updated_file
-    def _get_experiment_dir(self, project: str, experiment: str) -> Path:
-        """Get experiment directory path."""
-        return self.root_path / project / experiment
+    def _get_experiment_dir(self, project: str, experiment: str, folder: Optional[str] = None) -> Path:
+        """
+        Get experiment directory path.
+        If folder is not provided, tries to read it from experiment.json metadata.
+        Falls back to root_path/project/experiment if not found.
+        """
+        # If folder explicitly provided, use it
+        if folder is not None:
+            folder_path = folder.lstrip('/')
+            return self.root_path / folder_path / project / experiment
+        # Try to read folder from experiment metadata
+        # Check common locations where experiment might exist
+        possible_paths = []
+        # First, try without folder (most common case)
+        default_path = self.root_path / project / experiment
+        possible_paths.append(default_path)
+        # Then scan for experiment.json in subdirectories (for folder-based experiments)
+        try:
+            for item in self.root_path.rglob(f"*/{project}/{experiment}/experiment.json"):
+                exp_dir = item.parent
+                if exp_dir not in [p for p in possible_paths]:
+                    possible_paths.insert(0, exp_dir)  # Prioritize found paths
+        except:
+            pass
+        # Check each possible path for experiment.json with folder metadata
+        for path in possible_paths:
+            exp_json = path / "experiment.json"
+            if exp_json.exists():
+                try:
+                    with open(exp_json, 'r') as f:
+                        metadata = json.load(f)
+                        if metadata.get('folder'):
+                            folder_path = metadata['folder'].lstrip('/')
+                            return self.root_path / folder_path / project / experiment
+                except:
+                    pass
+                # Found experiment.json, use this path even if no folder metadata
+                return path
+        # Fallback to default path
+        return default_path
     def append_to_metric(
         self,

{ml_dash-0.5.6.dist-info → ml_dash-0.5.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: ml-dash
-Version: 0.5.6
+Version: 0.5.8
 Summary: ML experiment tracking and data storage
 Keywords: machine-learning,experiment-tracking,mlops,data-storage
 Author: Ge Yang, Tom Tao
@@ -41,6 +41,7 @@ Requires-Dist: pyjwt>=2.8.0
 Requires-Dist: imageio>=2.31.0
 Requires-Dist: imageio-ffmpeg>=0.4.9
 Requires-Dist: scikit-image>=0.21.0
+Requires-Dist: rich>=13.0.0
 Requires-Dist: pytest>=8.0.0 ; extra == 'dev'
 Requires-Dist: pytest-asyncio>=0.23.0 ; extra == 'dev'
 Requires-Dist: sphinx>=7.2.0 ; extra == 'dev'

ml_dash-0.5.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+ml_dash/__init__.py,sha256=o_LrWVJBY_VkUGhSBs5wdb_NqEsHD1AK9HGsjZGxHxQ,1414
+ml_dash/auto_start.py,sha256=c3XcXFpZdvjtWauEoK5043Gw9k0L_5IDq4fdiB2ha88,959
+ml_dash/cli.py,sha256=lyWVVhmsflSXQt2UCDb8IqC-mSRQwwlB2l1qEIYBUb8,1743
+ml_dash/cli_commands/__init__.py,sha256=bjAmV7MsW-bhtW_4SnLJ0Cfkt9h82vMDC8ebW1Ke8KE,38
+ml_dash/cli_commands/download.py,sha256=TomyUFwelqfQHfh60K7rCyCwEZVp1CkMToogprgC64Q,29614
+ml_dash/cli_commands/list.py,sha256=Cx9yWsTV5HPaevYpQ6BugCEr5z_4bhxQ0T51OXExuTU,10900
+ml_dash/cli_commands/upload.py,sha256=jo6FVdbuokTz64rjvOEWWhLBzlh2gM0Ru4TRNv9hX60,47943
+ml_dash/client.py,sha256=31C2Kb3KULwhrb3UlpCFY7HDA3-kvj3XVmWUvXEvQHY,27993
+ml_dash/config.py,sha256=iQbHCu4lM_Sg8YadyEXSJ6Ht9yKIJHN26L7L-rMH4gE,3112
+ml_dash/experiment.py,sha256=K36HkHJb_O2-vdaPPOCq74_2nZtfiLaS0o7qhTntD8Q,30646
+ml_dash/files.py,sha256=JptjoxGJiXJ-nkj6C7vDhw-cgJRCB0cHt_SIUJG665o,23024
+ml_dash/log.py,sha256=0yXaNnFwYeBI3tRLHX3kkqWRpg0MbSGwmgjnOfsElCk,5350
+ml_dash/metric.py,sha256=c0Zl0wEufmQuVfwIMvrORLwqe92Iaf0PfKRgmlgQWzQ,10343
+ml_dash/params.py,sha256=xaByDSVar4D1pZqxTANkMPeZTL5-V7ewJe5TXfPLhMQ,5980
+ml_dash/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ml_dash/storage.py,sha256=eldh_gMivwTlVhSosu-KYrHrG9mmEMspc7MfY5L7Wzc,32991
+ml_dash-0.5.8.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
+ml_dash-0.5.8.dist-info/entry_points.txt,sha256=dYs2EHX1uRNO7AQGNnVaJJpgiy0Z9q7tiy4fHSyaf3Q,46
+ml_dash-0.5.8.dist-info/METADATA,sha256=HywjX8kVHUXB5OD3bFeCTDUDwCM_FsCULTp_WJ_Z0eI,6175
+ml_dash-0.5.8.dist-info/RECORD,,

ml-dash 0.5.6__py3-none-any.whl → 0.5.8__py3-none-any.whl

ml-dash 0.5.6py3-none-any.whl → 0.5.8py3-none-any.whl