PyPI - robocandywrapper - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

robocandywrapper 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{robocandywrapper-0.2.2/robocandywrapper.egg-info → robocandywrapper-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: robocandywrapper
-Version: 0.2.2
+Version: 0.2.3
 Summary: Sweet wrappers for extending and remixing LeRobot Datasets
 Author: RoboCandyWrapper Contributors
 License: MIT License

{robocandywrapper-0.2.2 → robocandywrapper-0.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "robocandywrapper"
-version = "0.2.2"
+version = "0.2.3"
 description = "Sweet wrappers for extending and remixing LeRobot Datasets"
 readme = "README.md"
 requires-python = ">=3.10"

{robocandywrapper-0.2.2 → robocandywrapper-0.2.3}/robocandywrapper/__init__.py RENAMED Viewed

@@ -17,7 +17,7 @@ from robocandywrapper.constants import (
     EPISODE_OUTCOME_PLUGIN_NAME,
 )
-__version__ = "0.2.1"
+__version__ = "0.2.3"
 __all__ = [
     "DatasetPlugin",

{robocandywrapper-0.2.2 → robocandywrapper-0.2.3}/robocandywrapper/dataformats/lerobot_21/dataset.py RENAMED Viewed

@@ -93,6 +93,7 @@ class LeRobot21DatasetMetadata:
         try:
             if force_cache_sync:
                 raise FileNotFoundError
+            self.pull_from_repo()
             self.load_metadata()
         except (FileNotFoundError, NotADirectoryError):
             if is_valid_version(self.revision):
@@ -728,7 +729,11 @@ class LeRobot21Dataset(torch.utils.data.Dataset):
         item = {}
         for vid_key, query_ts in query_timestamps.items():
             video_path = self.root / self.meta.get_video_file_path(ep_idx, vid_key)
-            frames = decode_video_frames(video_path, query_ts, self.tolerance_s, self.video_backend)
+            try:
+                frames = decode_video_frames(video_path, query_ts, self.tolerance_s, self.video_backend)
+            except Exception as e:
+                # fall back to trying to decode with pyav
+                frames = decode_video_frames(video_path, query_ts, self.tolerance_s, "pyav")
             item[vid_key] = frames.squeeze(0)
         return item
@@ -768,6 +773,11 @@ class LeRobot21Dataset(torch.utils.data.Dataset):
         task_idx = item["task_index"].item()
         item["task"] = self.meta.tasks[task_idx]
+        # Hack - add gripper position to end
+        # only applies to a specific dataset
+        # if "observation.eef_6d_pose" in item and item["observation.eef_6d_pose"].shape[0] == 6:
+        #     item["observation.eef_6d_pose"] = torch.cat([item["observation.eef_6d_pose"], item["observation.state"][-1:]], dim=0)
         return item
     def __repr__(self):

{robocandywrapper-0.2.2 → robocandywrapper-0.2.3}/robocandywrapper/factory.py RENAMED Viewed

@@ -76,6 +76,7 @@ def _create_datasets(
     observation_delta_indices: Optional[List] = None,
     reward_delta_indices: Optional[List] = None,
     use_imagenet_stats: bool = True,
+    load_videos: bool = True,
 ) -> List[LeRobotDataset | LeRobot21Dataset]:
     """Private helper to create dataset instances from a list of repo IDs.
@@ -92,6 +93,8 @@ def _create_datasets(
         observation_delta_indices: Frame indices for observations.
         reward_delta_indices: Frame indices for rewards.
         use_imagenet_stats: Whether to apply ImageNet normalization stats.
+        load_videos: Whether to download and load video files (default: True).
+            Set to False to skip video downloads when not needed.
     Returns:
         List of dataset instances.
@@ -170,6 +173,7 @@ def _create_datasets(
             image_transforms=None,  # Will be applied by WrappedRobotDataset
             revision=revision,
             video_backend=video_backend,
+            download_videos=load_videos,
         )
         # Apply ImageNet stats if needed
@@ -189,12 +193,18 @@ def _create_datasets(
 def make_dataset(
     cfg: TrainPipelineConfig,
     plugins: Optional[list[DatasetPlugin]] = None,
+    key_rename_map: Optional[dict[str, str]] = None,
+    load_videos: bool = True,
 ) -> WrappedRobotDataset:
     """Handles the logic of setting up delta timestamps and image transforms before creating a dataset.
     Args:
         cfg (TrainPipelineConfig): A TrainPipelineConfig config which contains a DatasetConfig and a PreTrainedConfig.
         plugins (Optional[list[DatasetPlugin]]): Optional list of plugins to attach to the dataset(s).
+        key_rename_map (Optional[dict[str, str]]): Optional mapping from source keys to target keys
+            for unifying datasets with different naming conventions. Example: {"action.pos": "action"}
+        load_videos (bool): Whether to download and load video files (default: True).
+            Set to False to skip video downloads when not needed.
     Returns:
         WrappedRobotDataset: A wrapped dataset with plugin support.
@@ -221,6 +231,7 @@ def make_dataset(
         observation_delta_indices=cfg.policy.observation_delta_indices,
         reward_delta_indices=cfg.policy.reward_delta_indices,
         use_imagenet_stats=cfg.dataset.use_imagenet_stats,
+        load_videos=load_videos,
     )
     # Wrap in WrappedRobotDataset with plugins
@@ -228,6 +239,7 @@ def make_dataset(
         datasets=datasets,
         plugins=plugins,
         image_transforms=image_transforms,
+        key_rename_map=key_rename_map,
     )
     return wrapped_dataset
@@ -243,6 +255,8 @@ def make_dataset_without_config(
     revision: str | None = None,
     use_imagenet_stats: bool = True,
     plugins: Optional[list[DatasetPlugin]] = None,
+    key_rename_map: Optional[dict[str, str]] = None,
+    load_videos: bool = True,
 ) -> WrappedRobotDataset:
     """Handles the logic of setting up delta timestamps and image transforms before creating a dataset.
@@ -259,6 +273,10 @@ def make_dataset_without_config(
         revision (str, optional): Dataset revision
         use_imagenet_stats (bool): Whether to use ImageNet normalization stats (default: True)
         plugins (Optional[list[DatasetPlugin]]): Optional list of plugins to attach to the dataset(s)
+        key_rename_map (Optional[dict[str, str]]): Optional mapping from source keys to target keys
+            for unifying datasets with different naming conventions. Example: {"action.pos": "action"}
+        load_videos (bool): Whether to download and load video files (default: True).
+            Set to False to skip video downloads when not needed.
     Returns:
         WrappedRobotDataset: A wrapped dataset with plugin support.
@@ -283,12 +301,14 @@ def make_dataset_without_config(
         action_delta_indices=action_delta_indices,
         observation_delta_indices=observation_delta_indices,
         use_imagenet_stats=use_imagenet_stats,
+        load_videos=load_videos,
     )
     # Wrap in WrappedRobotDataset with plugins
     wrapped_dataset = WrappedRobotDataset(
         datasets=datasets,
         plugins=plugins,
+        key_rename_map=key_rename_map,
     )
     return wrapped_dataset

{robocandywrapper-0.2.2 → robocandywrapper-0.2.3}/robocandywrapper/wrapper.py RENAMED Viewed

@@ -23,6 +23,7 @@ class WrappedRobotDataset(torch.utils.data.Dataset):
         warn_on_key_conflicts: bool = True,
         error_on_key_conflicts: bool = True,
         dataset_weights: Optional[dict[str, float]] = None,
+        key_rename_map: Optional[dict[str, str]] = None,
         **kwargs
     ):
         """
@@ -35,6 +36,14 @@ class WrappedRobotDataset(torch.utils.data.Dataset):
             warn_on_key_conflicts: Warn when plugins have overlapping keys (if not raising errors)
             error_on_key_conflicts: Raise error on key conflicts (default: True)
             dataset_weights: Optional weights for computing weighted stats (e.g., {"dataset_id": 2.0})
+            key_rename_map: Optional mapping from source keys to target keys for unifying
+                datasets with different naming conventions. Keys are renamed before the
+                intersection logic runs, allowing datasets with different key names to be
+                combined. Example: {"action.pos": "action", "trajectory": "action"}
+                Note: When a key is renamed, any corresponding "_is_pad" key (added by
+                LeRobot when using delta_timestamps) is automatically renamed as well.
+                E.g., "action.pos" -> "action" also renames "action.pos_is_pad" -> "action_is_pad".
         """
         super().__init__()
@@ -64,6 +73,10 @@ class WrappedRobotDataset(torch.utils.data.Dataset):
             self._cumulative_lengths.append(self._cumulative_lengths[-1] + length)
         self._total_length = self._cumulative_lengths[-1]
+        # Key rename mapping: unify differently-named keys across datasets
+        self.key_rename_map = key_rename_map or {}
+        self._dataset_renames = self._compute_dataset_renames()
         # Plugin management: one plugin class, many instances (one per dataset)
         self._plugins: list[DatasetPlugin] = plugins or []
         self._plugin_instances: list[list[PluginInstance]] = []
@@ -91,41 +104,51 @@ class WrappedRobotDataset(torch.utils.data.Dataset):
         # Disable any data keys that are not common across all of the datasets. Note: we may relax this
         # restriction in future iterations of this class. For now, this is necessary at least for being able
         # to use PyTorch's default DataLoader collate function.
+        #
+        # Key rename mapping is applied first (conceptually), so intersection is computed on
+        # "effective" features (post-rename). This allows datasets with different key names to be
+        # unified before the intersection check.
         self.disabled_features = set()
-        intersection_features = set(self._datasets[0].features)
-        for ds in self._datasets:
-            intersection_features.intersection_update(ds.features)
+        intersection_features = self._get_effective_features(0)
+        for i in range(len(self._datasets)):
+            intersection_features.intersection_update(self._get_effective_features(i))
         if len(intersection_features) == 0:
             raise RuntimeError(
                 "Multiple datasets were provided but they had no keys common to all of them. "
                 "The multi-dataset functionality currently only keeps common keys."
             )
-        for repo_id, ds in zip(self.repo_ids, self._datasets, strict=True):
-            extra_keys = set(ds.features).difference(intersection_features)
+        for i, repo_id in enumerate(self.repo_ids):
+            effective_keys = self._get_effective_features(i)
+            extra_keys = effective_keys.difference(intersection_features)
             if extra_keys:
                 logging.warning(
-                f"keys {extra_keys} of {repo_id} were disabled as they are not contained in all the "
-                "other datasets."
+                    f"keys {extra_keys} of {repo_id} were disabled as they are not contained in all the "
+                    "other datasets."
                 )
             self.disabled_features.update(extra_keys)
         # Validate that common features have compatible shapes
+        # Note: We need to look up the original key name for renamed keys
         for key in intersection_features:
             shapes = []
-            for ds in self._datasets:
-                if key in ds.meta.features:
-                    feature_shape = ds.meta.features[key].get('shape', [])
+            shape_details = []
+            for i, ds in enumerate(self._datasets):
+                # Find the original key (may be renamed)
+                renames = self._dataset_renames[i]
+                reverse_renames = {v: k for k, v in renames.items()}
+                original_key = reverse_renames.get(key, key)
+                if original_key in ds.meta.features:
+                    feature_shape = ds.meta.features[original_key].get('shape', [])
                     shapes.append(tuple(feature_shape))
+                    if original_key != key:
+                        shape_details.append(f"{ds.repo_id}: {feature_shape} (from '{original_key}')")
+                    else:
+                        shape_details.append(f"{ds.repo_id}: {feature_shape}")
             # Check if all shapes are the same
             unique_shapes = set(shapes)
             if len(unique_shapes) > 1:
-                shape_details = []
-                for ds in self._datasets:
-                    if key in ds.meta.features:
-                        shape = ds.meta.features[key].get('shape', [])
-                        shape_details.append(f"{ds.repo_id}: {shape}")
                 raise ValueError(
                     f"Incompatible shapes for feature '{key}' across datasets:\n" +
                     "\n".join(f"  - {detail}" for detail in shape_details) +
@@ -296,6 +319,77 @@ class WrappedRobotDataset(torch.utils.data.Dataset):
         # Also update the cached stats property
         self.stats = self._meta.stats
+    def _compute_dataset_renames(self) -> list[dict[str, str]]:
+        """
+        Pre-compute which key renames apply to each dataset.
+        For each dataset, determines which source keys from key_rename_map exist
+        and can be renamed (i.e., target key doesn't already exist).
+        Also automatically handles derived _is_pad keys that LeRobot adds when
+        delta_timestamps are used. For example, if renaming "action.pos" -> "action",
+        this will also rename "action.pos_is_pad" -> "action_is_pad".
+        Returns:
+            List of dicts mapping source_key -> target_key for each dataset
+        """
+        dataset_renames = []
+        for dataset in self._datasets:
+            ds_renames = {}
+            ds_keys = set(dataset.features)
+            for source, target in self.key_rename_map.items():
+                if source in ds_keys:
+                    if target in ds_keys:
+                        # Target already exists in this dataset - skip rename to avoid conflict
+                        logging.warning(
+                            f"Skipping rename '{source}' -> '{target}' for {dataset.repo_id}: "
+                            f"target key already exists in dataset"
+                        )
+                    else:
+                        ds_renames[source] = target
+                        # Also handle the _is_pad suffix that LeRobot adds for delta_timestamps
+                        # These keys are dynamically added during __getitem__ and may not be in
+                        # dataset.features, but we still want to rename them consistently
+                        is_pad_source = f"{source}_is_pad"
+                        is_pad_target = f"{target}_is_pad"
+                        # Check for conflicts on the _is_pad key as well
+                        if is_pad_target in ds_keys:
+                            logging.warning(
+                                f"Skipping derived rename '{is_pad_source}' -> '{is_pad_target}' "
+                                f"for {dataset.repo_id}: target key already exists in dataset"
+                            )
+                        else:
+                            ds_renames[is_pad_source] = is_pad_target
+            dataset_renames.append(ds_renames)
+        return dataset_renames
+    def _get_effective_features(self, dataset_idx: int) -> set[str]:
+        """
+        Get the effective feature keys for a dataset after applying renames.
+        Args:
+            dataset_idx: Index of the dataset
+        Returns:
+            Set of feature keys that would exist after renaming
+        """
+        ds = self._datasets[dataset_idx]
+        renames = self._dataset_renames[dataset_idx]
+        effective = set()
+        for key in ds.features:
+            if key in renames:
+                effective.add(renames[key])
+            else:
+                effective.add(key)
+        return effective
     def _validate_plugin_keys(self):
         """
         Check for key conflicts between plugins.
@@ -483,7 +577,14 @@ class WrappedRobotDataset(torch.utils.data.Dataset):
         # Add dataset index
         item["dataset_index"] = torch.tensor(dataset_idx)
-        # Remove disabled features
+        # Apply key renaming for this dataset (before filtering disabled features)
+        # This unifies differently-named keys across datasets
+        renames = self._dataset_renames[dataset_idx]
+        for source, target in renames.items():
+            if source in item:
+                item[target] = item.pop(source)
+        # Remove disabled features (now operates on effective/renamed key names)
         for data_key in self.disabled_features:
             if data_key in item:
                 del item[data_key]

{robocandywrapper-0.2.2 → robocandywrapper-0.2.3/robocandywrapper.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: robocandywrapper
-Version: 0.2.2
+Version: 0.2.3
 Summary: Sweet wrappers for extending and remixing LeRobot Datasets
 Author: RoboCandyWrapper Contributors
 License: MIT License

{robocandywrapper-0.2.2 → robocandywrapper-0.2.3}/setup.py RENAMED Viewed

@@ -9,7 +9,7 @@ long_description = readme_file.read_text(encoding="utf-8") if readme_file.exists
 setup(
     name="robocandywrapper",
-    version="0.2.1",
+    version="0.2.3",
     description="Sweet wrappers for extending and remixing LeRobot Datasets",
     long_description=long_description,
     long_description_content_type="text/markdown",