PyPI - ml-dash - Versions diffs - 0.6.5__py3-none-any.whl → 0.6.7__py3-none-any.whl - Mend

ml-dash 0.6.5py3-none-any.whl → 0.6.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

ml_dash/__init__.py +1 -2
ml_dash/auto_start.py +1 -4
ml_dash/buffer.py +735 -0
ml_dash/cli.py +7 -1
ml_dash/cli_commands/create.py +145 -0
ml_dash/cli_commands/download.py +177 -0
ml_dash/cli_commands/list.py +146 -0
ml_dash/cli_commands/upload.py +148 -4
ml_dash/client.py +328 -25
ml_dash/experiment.py +491 -457
ml_dash/files.py +228 -70
ml_dash/run.py +92 -3
ml_dash/storage.py +403 -2
ml_dash/track.py +263 -0
{ml_dash-0.6.5.dist-info → ml_dash-0.6.7.dist-info}/METADATA +1 -1
{ml_dash-0.6.5.dist-info → ml_dash-0.6.7.dist-info}/RECORD +18 -15
{ml_dash-0.6.5.dist-info → ml_dash-0.6.7.dist-info}/WHEEL +0 -0
{ml_dash-0.6.5.dist-info → ml_dash-0.6.7.dist-info}/entry_points.txt +0 -0

ml_dash/cli_commands/upload.py CHANGED Viewed

@@ -148,6 +148,18 @@ def add_parser(subparsers) -> argparse.ArgumentParser:
     help="JWT token for authentication (optional - auto-loads from 'ml-dash login' if not provided)",
   )
+  # Track upload mode
+  parser.add_argument(
+    "--tracks",
+    type=str,
+    help="Upload track data file (e.g., robot_position.jsonl). Requires --remote-path.",
+  )
+  parser.add_argument(
+    "--remote-path",
+    type=str,
+    help="Remote path for track (e.g., 'namespace/project/exp/robot/position')",
+  )
   """
   cd .dash/geyang
@@ -265,7 +277,11 @@ def discover_experiments(
   Args:
       local_path: Root path of local storage
-      project_filter: Glob pattern to filter experiments by prefix (e.g., "tom/*/exp*")
+      project_filter: Either a simple project name (e.g., "proj1") or a glob
+                     pattern for the full path (e.g., "tom/*/exp*"). If the
+                     filter contains '/', '*', or '?', it's treated as a glob
+                     pattern matched against the full relative path. Otherwise,
+                     it's matched exactly against the project name.
       experiment_filter: Only discover this experiment (requires project_filter)
   Returns:
@@ -319,9 +335,18 @@ def discover_experiments(
       # Apply filters with glob pattern support
       if project_filter:
-        # Support glob pattern matching on the full relative path
-        if not fnmatch.fnmatch(full_relative_path, project_filter):
-          continue
+        # Check if project_filter is a glob pattern or simple project name
+        is_glob_pattern = any(c in project_filter for c in ['*', '?', '/'])
+        if is_glob_pattern:
+          # Treat as glob pattern - match against full relative path
+          if not fnmatch.fnmatch(full_relative_path, project_filter):
+            continue
+        else:
+          # Treat as simple project name - match against parsed project
+          if project_name != project_filter:
+            continue
       if experiment_filter and exp_name != experiment_filter:
         continue
@@ -1066,6 +1091,121 @@ class ExperimentUploader:
     return total_uploaded
+def cmd_upload_track(args: argparse.Namespace) -> int:
+  """Upload track data file to remote server."""
+  from datetime import datetime
+  # Load config
+  config = Config()
+  remote_url = args.dash_url or config.remote_url
+  api_key = args.api_key or config.api_key
+  if not remote_url:
+    console.print("[red]Error:[/red] --dash-url is required (or set in config)")
+    return 1
+  if not args.tracks or not args.remote_path:
+    console.print("[red]Error:[/red] Both --tracks and --remote-path are required for track upload")
+    console.print("Usage: ml-dash upload --tracks <local-file> --remote-path namespace/project/exp/topic")
+    return 1
+  # Parse local file path
+  local_file = Path(args.tracks)
+  if not local_file.exists():
+    console.print(f"[red]Error:[/red] File not found: {local_file}")
+    return 1
+  # Parse remote path: namespace/project/.../experiment/topic
+  remote_path = args.remote_path.strip("/")
+  parts = remote_path.split("/")
+  if len(parts) < 4:
+    console.print(
+      "[red]Error:[/red] Remote path must be: 'namespace/project/experiment/topic'"
+    )
+    console.print("Example: geyang/project/exp1/robot/position")
+    return 1
+  namespace = parts[0]
+  project = parts[1]
+  experiment_name = parts[-2]  # Second to last is experiment
+  topic = "/".join(parts[-1:])  # Last part is topic (could be multi-level)
+  console.print(f"[bold]Uploading track data...[/bold]")
+  console.print(f"  Local file: {local_file}")
+  console.print(f"  Namespace: {namespace}")
+  console.print(f"  Project: {project}")
+  console.print(f"  Experiment: {experiment_name}")
+  console.print(f"  Topic: {topic}")
+  try:
+    # Initialize remote client
+    remote_client = RemoteClient(base_url=remote_url, namespace=namespace, api_key=api_key)
+    # Get experiment ID
+    exp_data = remote_client.get_experiment_graphql(project, experiment_name)
+    if not exp_data:
+      console.print(
+        f"[red]Error:[/red] Experiment '{experiment_name}' not found in project '{project}'"
+      )
+      return 1
+    experiment_id = exp_data["id"]
+    # Read local file (assume JSONL format)
+    console.print(f"\n[cyan]Reading local file...[/cyan]")
+    entries = []
+    with open(local_file, 'r') as f:
+      for line_num, line in enumerate(f, 1):
+        if line.strip():
+          try:
+            entry = json.loads(line)
+            if "timestamp" not in entry:
+              console.print(f"[yellow]Warning:[/yellow] Line {line_num} missing timestamp, skipping")
+              continue
+            entries.append(entry)
+          except json.JSONDecodeError as e:
+            console.print(f"[yellow]Warning:[/yellow] Line {line_num} invalid JSON: {e}")
+            continue
+    if not entries:
+      console.print("[red]Error:[/red] No valid entries found in file")
+      return 1
+    console.print(f"  Found {len(entries)} entries")
+    # Upload in batches
+    console.print(f"\n[cyan]Uploading to server...[/cyan]")
+    batch_size = 1000
+    total_uploaded = 0
+    for i in range(0, len(entries), batch_size):
+      batch = entries[i:i + batch_size]
+      remote_client.append_batch_to_track(
+        experiment_id=experiment_id,
+        topic=topic,
+        entries=batch
+      )
+      total_uploaded += len(batch)
+      console.print(f"  Uploaded {total_uploaded}/{len(entries)} entries")
+    # Success
+    console.print(f"\n[green]✓ Track data uploaded successfully[/green]")
+    console.print(f"  Total entries: {total_uploaded}")
+    console.print(f"  Topic: {topic}")
+    console.print(f"  Experiment: {namespace}/{project}/{experiment_name}")
+    return 0
+  except Exception as e:
+    console.print(f"[red]Error uploading track data:[/red] {e}")
+    if args.verbose:
+      import traceback
+      console.print(traceback.format_exc())
+    return 1
 def cmd_upload(args: argparse.Namespace) -> int:
   """
   Execute upload command.
@@ -1076,6 +1216,10 @@ def cmd_upload(args: argparse.Namespace) -> int:
   Returns:
       Exit code (0 for success, 1 for error)
   """
+  # Handle track upload if --tracks is specified
+  if args.tracks:
+    return cmd_upload_track(args)
   # Load config
   config = Config()

ml_dash/client.py CHANGED Viewed

@@ -6,21 +6,61 @@ from typing import Optional, Dict, Any, List
 import httpx
+def _serialize_value(value: Any) -> Any:
+    """
+    Convert value to JSON-serializable format.
+    Handles numpy arrays, nested dicts/lists, etc.
+    Args:
+        value: Value to serialize
+    Returns:
+        JSON-serializable value
+    """
+    # Check for numpy array
+    if hasattr(value, '__array__') or (hasattr(value, 'tolist') and hasattr(value, 'dtype')):
+        # It's a numpy array
+        try:
+            return value.tolist()
+        except AttributeError:
+            pass
+    # Check for numpy scalar types
+    if hasattr(value, 'item'):
+        try:
+            return value.item()
+        except (AttributeError, ValueError):
+            pass
+    # Recursively handle dicts
+    if isinstance(value, dict):
+        return {k: _serialize_value(v) for k, v in value.items()}
+    # Recursively handle lists
+    if isinstance(value, (list, tuple)):
+        return [_serialize_value(v) for v in value]
+    # Return as-is for other types (int, float, str, bool, None)
+    return value
 class RemoteClient:
     """Client for communicating with ML-Dash server."""
-    def __init__(self, base_url: str, namespace: str, api_key: Optional[str] = None):
+    def __init__(self, base_url: str, namespace: Optional[str] = None, api_key: Optional[str] = None):
         """
         Initialize remote client.
         Args:
             base_url: Base URL of ML-Dash server (e.g., "http://localhost:3000")
-            namespace: Namespace slug (e.g., "my-namespace")
+            namespace: Namespace slug (e.g., "my-namespace"). If not provided, will be queried from server.
             api_key: JWT token for authentication (optional - auto-loads from storage if not provided)
         Note:
             If no api_key is provided, token will be loaded from storage on first API call.
             If still not found, AuthenticationError will be raised at that time.
+            If no namespace is provided, it will be fetched from the server on first API call.
         """
         # Store original base URL for GraphQL (no /api prefix)
         self.graphql_base_url = base_url.rstrip("/")
@@ -28,9 +68,6 @@ class RemoteClient:
         # Add /api prefix to base URL for REST API calls
         self.base_url = base_url.rstrip("/") + "/api"
-        # Store namespace
-        self.namespace = namespace
         # If no api_key provided, try to load from storage
         if not api_key:
             from .auth.token_storage import get_token_storage
@@ -39,10 +76,70 @@ class RemoteClient:
             api_key = storage.load("ml-dash-token")
         self.api_key = api_key
+        # Store namespace (can be None, will be fetched on first API call if needed)
+        self._namespace = namespace
+        self._namespace_fetched = False
         self._rest_client = None
         self._gql_client = None
         self._id_cache: Dict[str, str] = {}  # Cache for slug -> ID mappings
+    @property
+    def namespace(self) -> str:
+        """
+        Get namespace, fetching from server if not already set.
+        Returns:
+            Namespace slug
+        Raises:
+            AuthenticationError: If not authenticated
+            ValueError: If namespace cannot be determined
+        """
+        if self._namespace:
+            return self._namespace
+        if not self._namespace_fetched:
+            # Fetch namespace from server
+            self._namespace = self._fetch_namespace_from_server()
+            self._namespace_fetched = True
+        if not self._namespace:
+            raise ValueError("Could not determine namespace. Please provide --namespace explicitly.")
+        return self._namespace
+    @namespace.setter
+    def namespace(self, value: str):
+        """Set namespace."""
+        self._namespace = value
+        self._namespace_fetched = True
+    def _fetch_namespace_from_server(self) -> Optional[str]:
+        """
+        Fetch current user's namespace from server.
+        Returns:
+            Namespace slug or None if cannot be determined
+        """
+        try:
+            self._ensure_authenticated()
+            # Query server for current user's namespace
+            query = """
+            query GetMyNamespace {
+              me {
+                username
+              }
+            }
+            """
+            result = self.graphql_query(query)
+            username = result.get("me", {}).get("username")
+            return username
+        except Exception:
+            return None
     def _ensure_authenticated(self):
         """Check if authenticated, raise error if not."""
         if not self.api_key:
@@ -1224,14 +1321,18 @@ class RemoteClient:
             }
             files {
               id
-              filename
-              path
-              contentType
-              sizeBytes
-              checksum
+              name
+              pPath
               description
               tags
               metadata
+              physicalFile {
+                filename
+                contentType
+                sizeBytes
+                checksum
+                s3Url
+              }
             }
             parameters {
               id
@@ -1248,16 +1349,15 @@ class RemoteClient:
         return result.get("experiments", [])
     def get_experiment_graphql(
-        self, project_slug: str, experiment_name: str
+        self, project_slug: str, experiment_name: str, namespace_slug: Optional[str] = None
     ) -> Optional[Dict[str, Any]]:
         """
         Get a single experiment via GraphQL.
-        Namespace is automatically inferred from JWT token on the server.
         Args:
             project_slug: Project slug
             experiment_name: Experiment name
+            namespace_slug: Namespace slug (optional - defaults to client's namespace)
         Returns:
             Experiment dict with metadata, or None if not found
@@ -1266,8 +1366,8 @@ class RemoteClient:
             httpx.HTTPStatusError: If request fails
         """
         query = """
-        query Experiment($projectSlug: String!, $experimentName: String!) {
-          experiment(projectSlug: $projectSlug, experimentName: $experimentName) {
+        query Experiment($namespaceSlug: String, $projectSlug: String!, $experimentName: String!) {
+          experiment(namespaceSlug: $namespaceSlug, projectSlug: $projectSlug, experimentName: $experimentName) {
             id
             name
             description
@@ -1291,14 +1391,18 @@ class RemoteClient:
             }
             files {
               id
-              filename
-              path
-              contentType
-              sizeBytes
-              checksum
+              name
+              pPath
               description
               tags
               metadata
+              physicalFile {
+                filename
+                contentType
+                sizeBytes
+                checksum
+                s3Url
+              }
             }
             parameters {
               id
@@ -1307,7 +1411,11 @@ class RemoteClient:
           }
         }
         """
+        # Use provided namespace or fall back to client's namespace
+        ns = namespace_slug or self.namespace
         variables = {
+            "namespaceSlug": ns,
             "projectSlug": project_slug,
             "experimentName": experiment_name
         }
@@ -1366,14 +1474,18 @@ class RemoteClient:
             }
             files {
               id
-              filename
-              path
-              contentType
-              sizeBytes
-              checksum
+              name
+              pPath
               description
               tags
               metadata
+              physicalFile {
+                filename
+                contentType
+                sizeBytes
+                checksum
+                s3Url
+              }
             }
           }
         }
@@ -1543,6 +1655,197 @@ class RemoteClient:
         response.raise_for_status()
         return response.json()
+    # =============================================================================
+    # Track Methods
+    # =============================================================================
+    def create_track(
+        self,
+        experiment_id: str,
+        topic: str,
+        description: Optional[str] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Create a new track for timestamped multi-modal data.
+        Args:
+            experiment_id: Experiment ID (Snowflake ID)
+            topic: Track topic (e.g., "robot/position", "camera/rgb")
+            description: Optional track description
+            tags: Optional tags
+            metadata: Optional metadata (e.g., fps, units)
+        Returns:
+            Dict with track ID and metadata
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        payload = {
+            "topic": topic,
+        }
+        if description:
+            payload["description"] = description
+        if tags:
+            payload["tags"] = tags
+        if metadata:
+            payload["metadata"] = metadata
+        response = self._client.post(
+            f"/experiments/{experiment_id}/tracks",
+            json=payload,
+        )
+        response.raise_for_status()
+        return response.json()
+    def append_to_track(
+        self,
+        experiment_id: str,
+        topic: str,
+        timestamp: float,
+        data: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """
+        Append a single entry to a track.
+        Args:
+            experiment_id: Experiment ID (Snowflake ID)
+            topic: Track topic (e.g., "robot/position")
+            timestamp: Numeric timestamp
+            data: Data fields as dict (will be flattened with dot-notation)
+        Returns:
+            Dict with timestamp and flattened data
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        import urllib.parse
+        topic_encoded = urllib.parse.quote(topic, safe='')
+        response = self._client.post(
+            f"/experiments/{experiment_id}/tracks/{topic_encoded}/append",
+            json={"timestamp": timestamp, "data": data},
+        )
+        response.raise_for_status()
+        return response.json()
+    def append_batch_to_track(
+        self,
+        experiment_id: str,
+        topic: str,
+        entries: List[Dict[str, Any]],
+    ) -> Dict[str, Any]:
+        """
+        Append multiple entries to a track in batch.
+        Args:
+            experiment_id: Experiment ID (Snowflake ID)
+            topic: Track topic (e.g., "robot/position")
+            entries: List of entries, each with 'timestamp' and other data fields
+        Returns:
+            Dict with count of entries added
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        import urllib.parse
+        topic_encoded = urllib.parse.quote(topic, safe='')
+        # Serialize entries to handle numpy arrays
+        serialized_entries = [_serialize_value(entry) for entry in entries]
+        response = self._client.post(
+            f"/experiments/{experiment_id}/tracks/{topic_encoded}/append_batch",
+            json={"entries": serialized_entries},
+        )
+        response.raise_for_status()
+        return response.json()
+    def get_track_data(
+        self,
+        experiment_id: str,
+        topic: str,
+        start_timestamp: Optional[float] = None,
+        end_timestamp: Optional[float] = None,
+        columns: Optional[List[str]] = None,
+        format: str = "json",
+    ) -> Any:
+        """
+        Get track data with optional filtering.
+        Args:
+            experiment_id: Experiment ID
+            topic: Track topic
+            start_timestamp: Optional start timestamp filter
+            end_timestamp: Optional end timestamp filter
+            columns: Optional list of columns to retrieve
+            format: Export format ('json', 'jsonl', 'parquet', 'mcap')
+        Returns:
+            Track data in requested format (dict for json, bytes for jsonl/parquet/mcap)
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        import urllib.parse
+        topic_encoded = urllib.parse.quote(topic, safe='')
+        params: Dict[str, str] = {"format": format}
+        if start_timestamp is not None:
+            params["start_timestamp"] = str(start_timestamp)
+        if end_timestamp is not None:
+            params["end_timestamp"] = str(end_timestamp)
+        if columns:
+            params["columns"] = ",".join(columns)
+        response = self._client.get(
+            f"/experiments/{experiment_id}/tracks/{topic_encoded}/data",
+            params=params,
+        )
+        response.raise_for_status()
+        # Return bytes for binary formats, dict for JSON
+        if format in ("jsonl", "parquet", "mcap"):
+            return response.content
+        return response.json()
+    def list_tracks(
+        self,
+        experiment_id: str,
+        topic_filter: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """
+        List all tracks in an experiment.
+        Args:
+            experiment_id: Experiment ID
+            topic_filter: Optional topic filter (e.g., "robot/*" for prefix match)
+        Returns:
+            List of track metadata dicts
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        params: Dict[str, str] = {}
+        if topic_filter:
+            params["topic"] = topic_filter
+        response = self._client.get(
+            f"/experiments/{experiment_id}/tracks",
+            params=params,
+        )
+        response.raise_for_status()
+        result = response.json()
+        return result.get("tracks", [])
     def close(self):
         """Close the HTTP clients."""
         self._client.close()

ml-dash 0.6.5__py3-none-any.whl → 0.6.7__py3-none-any.whl

ml-dash 0.6.5py3-none-any.whl → 0.6.7py3-none-any.whl