PyPI - ml-dash - Versions diffs - 0.6.6__py3-none-any.whl → 0.6.9__py3-none-any.whl - Mend

ml-dash 0.6.6py3-none-any.whl → 0.6.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

ml_dash/buffer.py +735 -0
ml_dash/cli_commands/download.py +177 -0
ml_dash/cli_commands/list.py +146 -0
ml_dash/cli_commands/profile.py +141 -6
ml_dash/cli_commands/upload.py +131 -0
ml_dash/client.py +265 -20
ml_dash/experiment.py +286 -126
ml_dash/files.py +228 -70
ml_dash/storage.py +403 -0
ml_dash/track.py +263 -0
{ml_dash-0.6.6.dist-info → ml_dash-0.6.9.dist-info}/METADATA +81 -5
{ml_dash-0.6.6.dist-info → ml_dash-0.6.9.dist-info}/RECORD +14 -12
{ml_dash-0.6.6.dist-info → ml_dash-0.6.9.dist-info}/WHEEL +0 -0
{ml_dash-0.6.6.dist-info → ml_dash-0.6.9.dist-info}/entry_points.txt +0 -0

ml_dash/storage.py CHANGED Viewed

@@ -1095,3 +1095,406 @@ class LocalStorage:
             )
     return metrics
+  # ============================================================================
+  # Track Storage Methods
+  # ============================================================================
+  def _serialize_value(self, value: Any) -> Any:
+    """
+    Convert value to JSON-serializable format.
+    Handles numpy arrays, nested dicts/lists, etc.
+    Args:
+        value: Value to serialize
+    Returns:
+        JSON-serializable value
+    """
+    # Check for numpy array
+    if hasattr(value, '__array__') or (hasattr(value, 'tolist') and hasattr(value, 'dtype')):
+      # It's a numpy array
+      try:
+        return value.tolist()
+      except AttributeError:
+        pass
+    # Check for numpy scalar types
+    if hasattr(value, 'item'):
+      try:
+        return value.item()
+      except (AttributeError, ValueError):
+        pass
+    # Recursively handle dicts
+    if isinstance(value, dict):
+      return {k: self._serialize_value(v) for k, v in value.items()}
+    # Recursively handle lists
+    if isinstance(value, (list, tuple)):
+      return [self._serialize_value(v) for v in value]
+    # Return as-is for other types (int, float, str, bool, None)
+    return value
+  def _flatten_dict(self, obj: Any, prefix: str = '') -> Dict[str, Any]:
+    """
+    Flatten nested dict with dot notation (e.g., camera.pos).
+    Args:
+        obj: Object to flatten
+        prefix: Current key prefix
+    Returns:
+        Flattened dict
+    """
+    result = {}
+    if not isinstance(obj, dict):
+      # Serialize the value before returning
+      serialized = self._serialize_value(obj)
+      return {prefix: serialized} if prefix else serialized
+    for key, value in obj.items():
+      new_key = f"{prefix}.{key}" if prefix else key
+      if isinstance(value, dict):
+        result.update(self._flatten_dict(value, new_key))
+      else:
+        # Serialize the value
+        result[new_key] = self._serialize_value(value)
+    return result
+  def append_batch_to_track(
+    self,
+    owner: str,
+    project: str,
+    prefix: str,
+    topic: str,
+    entries: List[Dict[str, Any]],
+  ) -> Dict[str, Any]:
+    """
+    Append batch of timestamped entries to a track in local storage.
+    Storage format:
+    .dash/{owner}/{project}/{prefix}/tracks/{topic_safe}/
+        data.jsonl  # Timestamped entries (one JSON object per line)
+        metadata.json  # Track metadata (topic, columns, stats)
+    Args:
+        owner: Owner/user
+        project: Project name
+        prefix: Experiment prefix
+        topic: Track topic (e.g., "robot/position")
+        entries: List of entries with timestamp and data fields
+    Returns:
+        Dict with trackId, count
+    """
+    experiment_dir = self._get_experiment_dir(owner, project, prefix)
+    tracks_dir = experiment_dir / "tracks"
+    tracks_dir.mkdir(parents=True, exist_ok=True)
+    # Sanitize topic for directory name (replace / with _)
+    topic_safe = topic.replace("/", "_")
+    track_dir = tracks_dir / topic_safe
+    track_dir.mkdir(exist_ok=True)
+    data_file = track_dir / "data.jsonl"
+    metadata_file = track_dir / "metadata.json"
+    # File-based lock for concurrent writes
+    lock_file = track_dir / ".metadata.lock"
+    with self._file_lock(lock_file):
+      # Load or initialize metadata
+      if metadata_file.exists():
+        try:
+          with open(metadata_file, "r") as f:
+            track_meta = json.load(f)
+        except (json.JSONDecodeError, IOError):
+          # Corrupted metadata, reinitialize
+          track_meta = {
+            "trackId": f"local-track-{topic_safe}",
+            "topic": topic,
+            "columns": [],
+            "totalEntries": 0,
+            "firstTimestamp": None,
+            "lastTimestamp": None,
+            "createdAt": datetime.utcnow().isoformat() + "Z",
+          }
+      else:
+        track_meta = {
+          "trackId": f"local-track-{topic_safe}",
+          "topic": topic,
+          "columns": [],
+          "totalEntries": 0,
+          "firstTimestamp": None,
+          "lastTimestamp": None,
+          "createdAt": datetime.utcnow().isoformat() + "Z",
+        }
+      # Process entries and update metadata
+      all_columns = set(track_meta["columns"])
+      min_ts = track_meta["firstTimestamp"]
+      max_ts = track_meta["lastTimestamp"]
+      processed_entries = []
+      for entry in entries:
+        timestamp = entry.get("timestamp")
+        if timestamp is None:
+          continue
+        # Extract data fields (everything except timestamp)
+        data_fields = {k: v for k, v in entry.items() if k != "timestamp"}
+        # Flatten nested structures
+        flattened = self._flatten_dict(data_fields)
+        # Update column set
+        all_columns.update(flattened.keys())
+        # Update timestamp range
+        if min_ts is None or timestamp < min_ts:
+          min_ts = timestamp
+        if max_ts is None or timestamp > max_ts:
+          max_ts = timestamp
+        processed_entries.append({
+          "timestamp": timestamp,
+          **flattened
+        })
+      # Append entries to JSONL file (sorted by timestamp for consistency)
+      processed_entries.sort(key=lambda x: x["timestamp"])
+      with open(data_file, "a") as f:
+        for entry in processed_entries:
+          f.write(json.dumps(entry) + "\n")
+      # Update metadata
+      track_meta["columns"] = sorted(list(all_columns))
+      track_meta["totalEntries"] += len(processed_entries)
+      track_meta["firstTimestamp"] = min_ts
+      track_meta["lastTimestamp"] = max_ts
+      # Write metadata
+      with open(metadata_file, "w") as f:
+        json.dump(track_meta, f, indent=2)
+      return {
+        "trackId": track_meta["trackId"],
+        "count": len(processed_entries),
+      }
+  def read_track_data(
+    self,
+    owner: str,
+    project: str,
+    prefix: str,
+    topic: str,
+    start_timestamp: Optional[float] = None,
+    end_timestamp: Optional[float] = None,
+    columns: Optional[List[str]] = None,
+    format: str = "json",
+  ) -> Any:
+    """
+    Read track data from local storage with optional filtering.
+    Args:
+        owner: Owner/user
+        project: Project name
+        prefix: Experiment prefix
+        topic: Track topic
+        start_timestamp: Optional start timestamp filter
+        end_timestamp: Optional end timestamp filter
+        columns: Optional list of columns to retrieve
+        format: Export format ('json', 'jsonl', 'parquet', 'mocap')
+    Returns:
+        Track data in requested format
+    """
+    experiment_dir = self._get_experiment_dir(owner, project, prefix)
+    topic_safe = topic.replace("/", "_")
+    track_dir = experiment_dir / "tracks" / topic_safe
+    data_file = track_dir / "data.jsonl"
+    if not data_file.exists():
+      if format == "json":
+        return {"entries": [], "count": 0}
+      elif format == "jsonl":
+        return b""
+      elif format == "parquet":
+        # Return empty parquet file
+        import pyarrow as pa
+        import pyarrow.parquet as pq
+        import io
+        table = pa.table({"timestamp": []})
+        buf = io.BytesIO()
+        pq.write_table(table, buf)
+        return buf.getvalue()
+      elif format == "mocap":
+        return {
+          "version": "1.0",
+          "metadata": {"topic": topic, "frameCount": 0, "duration": 0},
+          "channels": [],
+          "frames": []
+        }
+    # Read all entries from JSONL file
+    entries = []
+    with open(data_file, "r") as f:
+      for line in f:
+        if line.strip():
+          entry = json.loads(line)
+          # Filter by timestamp range
+          timestamp = entry.get("timestamp")
+          if start_timestamp is not None and timestamp < start_timestamp:
+            continue
+          if end_timestamp is not None and timestamp > end_timestamp:
+            continue
+          # Filter by columns
+          if columns:
+            filtered_entry = {"timestamp": timestamp}
+            for col in columns:
+              if col in entry:
+                filtered_entry[col] = entry[col]
+            entries.append(filtered_entry)
+          else:
+            entries.append(entry)
+    # Return in requested format
+    if format == "json":
+      return {"entries": entries, "count": len(entries)}
+    elif format == "jsonl":
+      lines = [json.dumps(entry) for entry in entries]
+      return "\n".join(lines).encode('utf-8')
+    elif format == "parquet":
+      # Convert to Apache Parquet
+      import pyarrow as pa
+      import pyarrow.parquet as pq
+      import io
+      if not entries:
+        table = pa.table({"timestamp": []})
+      else:
+        # Build schema from entries
+        table = pa.Table.from_pylist(entries)
+      buf = io.BytesIO()
+      pq.write_table(table, buf, compression='zstd')
+      return buf.getvalue()
+    elif format == "mocap":
+      # Read metadata
+      metadata_file = track_dir / "metadata.json"
+      track_meta = {}
+      if metadata_file.exists():
+        with open(metadata_file, "r") as f:
+          track_meta = json.load(f)
+      # Build mocap format
+      if not entries:
+        return {
+          "version": "1.0",
+          "metadata": {
+            "topic": topic,
+            "frameCount": 0,
+            "duration": 0,
+            "startTime": 0,
+            "endTime": 0,
+          },
+          "channels": [],
+          "frames": []
+        }
+      first_ts = entries[0]["timestamp"]
+      last_ts = entries[-1]["timestamp"]
+      duration = last_ts - first_ts
+      fps = track_meta.get("metadata", {}).get("fps", 30) if isinstance(track_meta.get("metadata"), dict) else 30
+      # Get all channels (columns)
+      all_channels = set()
+      for entry in entries:
+        all_channels.update(k for k in entry.keys() if k != "timestamp")
+      return {
+        "version": "1.0",
+        "metadata": {
+          "topic": topic,
+          "description": track_meta.get("description"),
+          "tags": track_meta.get("tags", []),
+          "fps": fps,
+          "duration": duration,
+          "frameCount": len(entries),
+          "startTime": first_ts,
+          "endTime": last_ts,
+        },
+        "channels": sorted(list(all_channels)),
+        "frames": [{"time": e["timestamp"], **{k: v for k, v in e.items() if k != "timestamp"}} for e in entries]
+      }
+    else:
+      raise ValueError(f"Unsupported format: {format}")
+  def list_tracks(
+    self,
+    owner: str,
+    project: str,
+    prefix: str,
+    topic_filter: Optional[str] = None,
+  ) -> List[Dict[str, Any]]:
+    """
+    List all tracks in an experiment.
+    Args:
+        owner: Owner/user
+        project: Project name
+        prefix: Experiment prefix
+        topic_filter: Optional topic filter (e.g., "robot/*")
+    Returns:
+        List of track summaries
+    """
+    experiment_dir = self._get_experiment_dir(owner, project, prefix)
+    tracks_dir = experiment_dir / "tracks"
+    if not tracks_dir.exists():
+      return []
+    tracks = []
+    for track_dir in tracks_dir.iterdir():
+      if track_dir.is_dir():
+        metadata_file = track_dir / "metadata.json"
+        if metadata_file.exists():
+          with open(metadata_file, "r") as f:
+            track_meta = json.load(f)
+            topic = track_meta["topic"]
+            # Apply topic filter
+            if topic_filter:
+              if topic_filter.endswith("/*"):
+                # Prefix match
+                prefix_match = topic_filter[:-2]
+                if not topic.startswith(prefix_match):
+                  continue
+              elif topic != topic_filter:
+                # Exact match
+                continue
+            tracks.append({
+              "id": track_meta["trackId"],
+              "topic": topic,
+              "totalEntries": track_meta["totalEntries"],
+              "firstTimestamp": track_meta.get("firstTimestamp"),
+              "lastTimestamp": track_meta.get("lastTimestamp"),
+              "columns": track_meta.get("columns", []),
+              "createdAt": track_meta.get("createdAt"),
+            })
+    return tracks

ml_dash/track.py ADDED Viewed

@@ -0,0 +1,263 @@
+"""
+Track API - Timestamped multi-modal data logging for ML experiments.
+Tracks are used for storing sparse timestamped data like robot trajectories,
+camera poses, sensor readings, etc. Each track has a topic (e.g., "robot/position")
+and stores entries with timestamps and arbitrary data fields.
+"""
+from typing import Dict, Any, List, Optional, TYPE_CHECKING
+from collections import defaultdict
+if TYPE_CHECKING:
+    from .experiment import Experiment
+class TracksManager:
+    """
+    Manager for track operations with support for global and per-topic flush.
+    Usage:
+        # Append to specific topic
+        experiment.tracks("robot/position").append(q=[0.1, 0.2], _ts=1.0)
+        # Flush all topics
+        experiment.tracks.flush()
+        # Flush specific topic
+        experiment.tracks("robot/position").flush()
+    """
+    def __init__(self, experiment: 'Experiment'):
+        """
+        Initialize TracksManager.
+        Args:
+            experiment: Parent Experiment instance
+        """
+        self._experiment = experiment
+        self._track_builders: Dict[str, 'TrackBuilder'] = {}  # Cache for TrackBuilder instances
+    def __call__(self, topic: str) -> 'TrackBuilder':
+        """
+        Get TrackBuilder for a specific topic.
+        Args:
+            topic: Track topic (e.g., "robot/position", "camera/rgb")
+        Returns:
+            TrackBuilder instance for the topic
+        Example:
+            experiment.tracks("robot/position").append(x=1.0, y=2.0, _ts=0.5)
+        """
+        if topic not in self._track_builders:
+            self._track_builders[topic] = TrackBuilder(self._experiment, topic, tracks_manager=self)
+        return self._track_builders[topic]
+    def flush(self) -> None:
+        """
+        Flush all topics to storage (remote or local).
+        This will write all buffered track entries to the server/filesystem.
+        Example:
+            experiment.tracks.flush()
+        """
+        # Flush all topics via background buffer manager
+        if self._experiment._buffer_manager:
+            self._experiment._buffer_manager.flush_tracks()
+class TrackBuilder:
+    """
+    Builder for track operations.
+    Provides fluent API for appending timestamped data to tracks.
+    Usage:
+        # Append single entry
+        experiment.tracks("robot/position").append(q=[0.1, 0.2], e=[0.5, 0.6], _ts=1.0)
+        # Flush specific topic
+        experiment.tracks("robot/position").flush()
+    """
+    def __init__(
+        self,
+        experiment: 'Experiment',
+        topic: str,
+        description: Optional[str] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        tracks_manager: Optional['TracksManager'] = None
+    ):
+        """
+        Initialize TrackBuilder.
+        Args:
+            experiment: Parent Experiment instance
+            topic: Track topic (e.g., "robot/position")
+            description: Optional track description
+            tags: Optional tags for categorization
+            metadata: Optional structured metadata (fps, units, etc.)
+            tracks_manager: Parent TracksManager (for global flush)
+        """
+        self._experiment = experiment
+        self._topic = topic
+        self._description = description
+        self._tags = tags
+        self._metadata = metadata
+        self._tracks_manager = tracks_manager
+    def append(self, **kwargs) -> 'TrackBuilder':
+        """
+        Append a single timestamped entry to the track.
+        The _ts parameter is required for the timestamp. All other kwargs are data fields.
+        Entries with the same _ts will be merged when flushed.
+        Args:
+            _ts: Timestamp (required)
+            **kwargs: Data fields (e.g., q=[0.1, 0.2], e=[0.5, 0.6])
+        Returns:
+            Self for method chaining
+        Raises:
+            ValueError: If _ts is not provided
+        Example:
+            experiment.tracks("robot/position").append(
+                q=[0.1, -0.22, 0.45],
+                e=[0.5, 0.0, 0.6],
+                a=[1.0, 0.0],
+                v=[0.01, 0.02],
+                _ts=2.0
+            )
+        """
+        # Extract timestamp
+        if '_ts' not in kwargs:
+            raise ValueError("Timestamp '_ts' is required for track.append()")
+        timestamp = kwargs.pop('_ts')
+        # Validate timestamp
+        try:
+            timestamp = float(timestamp)
+        except (TypeError, ValueError):
+            raise ValueError(f"Timestamp '_ts' must be numeric, got: {type(timestamp)}")
+        # Remaining kwargs are data fields
+        data = kwargs
+        # Write to experiment (will be buffered)
+        self._experiment._write_track(self._topic, timestamp, data)
+        return self
+    def flush(self) -> 'TrackBuilder':
+        """
+        Flush this topic's buffered entries to storage.
+        Example:
+            experiment.tracks("robot/position").flush()
+        Returns:
+            Self for method chaining
+        """
+        if self._experiment._buffer_manager:
+            self._experiment._buffer_manager.flush_track(self._topic)
+        return self
+    def read(
+        self,
+        start_timestamp: Optional[float] = None,
+        end_timestamp: Optional[float] = None,
+        columns: Optional[List[str]] = None,
+        format: str = "json"
+    ) -> Any:
+        """
+        Read track data with optional filtering.
+        Args:
+            start_timestamp: Optional start timestamp filter
+            end_timestamp: Optional end timestamp filter
+            columns: Optional list of columns to retrieve
+            format: Export format ('json', 'jsonl', 'parquet', 'mocap')
+        Returns:
+            Track data in requested format
+        Raises:
+            ValueError: If experiment not opened or no client configured
+        Example:
+            # Get all data as JSON
+            data = experiment.tracks("robot/position").read()
+            # Get data in time range
+            data = experiment.tracks("robot/position").read(
+                start_timestamp=0.0,
+                end_timestamp=10.0
+            )
+            # Export as JSONL
+            jsonl_bytes = experiment.tracks("robot/position").read(format="jsonl")
+            # Export as Parquet
+            parquet_bytes = experiment.tracks("robot/position").read(format="parquet")
+            # Export as Mocap JSON
+            mocap_data = experiment.tracks("robot/position").read(format="mocap")
+        """
+        # Remote mode
+        if self._experiment.run._client:
+            # Need experiment ID for remote mode
+            if not self._experiment._experiment_id:
+                raise ValueError("Experiment must be opened before reading tracks. Use 'with experiment.run:'")
+            return self._experiment.run._client.get_track_data(
+                experiment_id=self._experiment._experiment_id,
+                topic=self._topic,
+                start_timestamp=start_timestamp,
+                end_timestamp=end_timestamp,
+                columns=columns,
+                format=format
+            )
+        # Local mode
+        if self._experiment.run._storage:
+            return self._experiment.run._storage.read_track_data(
+                owner=self._experiment.run.owner,
+                project=self._experiment.run.project,
+                prefix=self._experiment.run._folder_path,
+                topic=self._topic,
+                start_timestamp=start_timestamp,
+                end_timestamp=end_timestamp,
+                columns=columns,
+                format=format
+            )
+        raise ValueError("No client or storage configured for experiment")
+    def list_entries(self) -> List[Dict[str, Any]]:
+        """
+        List all entries in this track (for remote mode).
+        Returns:
+            List of entry dicts
+        Example:
+            entries = experiment.tracks("robot/position").list_entries()
+        """
+        # Just read with default JSON format
+        result = self.read(format="json")
+        if isinstance(result, dict) and "entries" in result:
+            return result["entries"]
+        return []

ml-dash 0.6.6__py3-none-any.whl → 0.6.9__py3-none-any.whl

ml-dash 0.6.6py3-none-any.whl → 0.6.9py3-none-any.whl