PyPI - opentau - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

opentau 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

opentau/configs/default.py +16 -0
opentau/configs/deployment.py +85 -0
opentau/configs/train.py +5 -0
opentau/datasets/factory.py +43 -10
opentau/datasets/lerobot_dataset.py +19 -19
opentau/datasets/video_utils.py +11 -6
opentau/policies/pi05/configuration_pi05.py +9 -6
opentau/policies/pi05/modeling_pi05.py +296 -30
opentau/policies/pi05/paligemma_with_expert.py +20 -20
opentau/scripts/grpc/__init__.py +19 -0
opentau/scripts/grpc/client.py +601 -0
opentau/scripts/grpc/robot_inference_pb2.py +61 -0
opentau/scripts/grpc/robot_inference_pb2_grpc.py +210 -0
opentau/scripts/grpc/server.py +313 -0
opentau/scripts/launch.py +12 -4
opentau/scripts/train.py +94 -17
opentau/scripts/visualize_dataset.py +141 -38
opentau/utils/transformers_patch.py +251 -20
{opentau-0.1.1.dist-info → opentau-0.2.0.dist-info}/METADATA +37 -17
{opentau-0.1.1.dist-info → opentau-0.2.0.dist-info}/RECORD +24 -21
{opentau-0.1.1.dist-info → opentau-0.2.0.dist-info}/WHEEL +1 -1
{opentau-0.1.1.dist-info → opentau-0.2.0.dist-info}/entry_points.txt +1 -0
opentau/scripts/libero_simulation_parallel.py +0 -356
opentau/scripts/libero_simulation_sequential.py +0 -122
opentau/scripts/visualize_dataset_html.py +0 -507
{opentau-0.1.1.dist-info → opentau-0.2.0.dist-info}/licenses/LICENSE +0 -0
{opentau-0.1.1.dist-info → opentau-0.2.0.dist-info}/top_level.txt +0 -0

opentau/configs/default.py CHANGED Viewed

@@ -96,6 +96,11 @@ class DatasetConfig:
     data_features_name_mapping: dict[str, str] | None = None
     loss_type_mapping: str | None = None
+    # Ratio of the dataset to be used for validation. Please specify a value.
+    # If `val_freq` is set to 0, a validation dataset will not be created and this value will be ignored.
+    # Defaults to 0.05.
+    val_split_ratio: float = 0.05
     def __post_init__(self):
         """Validate dataset configuration and register custom mappings if provided."""
         if (self.repo_id is None) == (self.grounding is None):
@@ -148,6 +153,11 @@ class DatasetMixtureConfig:
     image_resample_strategy: str = "nearest"
     # Resample strategy for non-image features, such as action or state
     vector_resample_strategy: str = "nearest"
+    # Ratio of the dataset to be used for validation. Please specify a value.
+    # If `val_freq` is set to 0, a validation dataset will not be created and this value will be ignored.
+    # This value is applied to all datasets in the mixture.
+    # Defaults to 0.05.
+    val_split_ratio: float = 0.05
     def __post_init__(self):
         """Validate dataset mixture configuration."""
@@ -163,6 +173,12 @@ class DatasetMixtureConfig:
             raise ValueError(
                 f"`vector_resample_strategy` must be one of ['linear', 'nearest'], got {self.vector_resample_strategy}."
             )
+        if self.val_split_ratio < 0 or self.val_split_ratio > 1:
+            raise ValueError(f"`val_split_ratio` must be between 0 and 1, got {self.val_split_ratio}.")
+        # set the val_split_ratio for all datasets in the mixture
+        for dataset_cfg in self.datasets:
+            dataset_cfg.val_split_ratio = self.val_split_ratio
 @dataclass

opentau/configs/deployment.py ADDED Viewed

@@ -0,0 +1,85 @@
+# Copyright 2026 Tensor Auto Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Deployment configuration classes for inference servers.
+This module provides configuration classes for deploying trained models
+as inference servers, including gRPC server settings.
+"""
+from dataclasses import dataclass
+@dataclass
+class ServerConfig:
+    """Configuration for the gRPC inference server.
+    This class contains all configuration parameters needed to run a gRPC
+    inference server for robot policy models.
+    Args:
+        port: Port number to serve on. Must be between 1 and 65535.
+            Defaults to 50051.
+        max_workers: Maximum number of gRPC worker threads for handling
+            concurrent requests. Defaults to 4.
+        max_send_message_length_mb: Maximum size of outgoing messages in
+            megabytes. Defaults to 100.
+        max_receive_message_length_mb: Maximum size of incoming messages in
+            megabytes. Defaults to 100.
+    Raises:
+        ValueError: If port is not in valid range or max_workers is less than 1.
+    Example:
+        >>> config = ServerConfig(port=50051, max_workers=8)
+        >>> config.port
+        50051
+    """
+    port: int = 50051
+    max_workers: int = 4
+    max_send_message_length_mb: int = 100
+    max_receive_message_length_mb: int = 100
+    def __post_init__(self):
+        """Validate server configuration parameters."""
+        if not 1 <= self.port <= 65535:
+            raise ValueError(f"`port` must be between 1 and 65535, got {self.port}.")
+        if self.max_workers < 1:
+            raise ValueError(f"`max_workers` must be at least 1, got {self.max_workers}.")
+        if self.max_send_message_length_mb < 1:
+            raise ValueError(
+                f"`max_send_message_length_mb` must be at least 1, got {self.max_send_message_length_mb}."
+            )
+        if self.max_receive_message_length_mb < 1:
+            raise ValueError(
+                f"`max_receive_message_length_mb` must be at least 1, got {self.max_receive_message_length_mb}."
+            )
+    @property
+    def max_send_message_length(self) -> int:
+        """Get maximum send message length in bytes.
+        Returns:
+            Maximum send message length in bytes.
+        """
+        return self.max_send_message_length_mb * 1024 * 1024
+    @property
+    def max_receive_message_length(self) -> int:
+        """Get maximum receive message length in bytes.
+        Returns:
+            Maximum receive message length in bytes.
+        """
+        return self.max_receive_message_length_mb * 1024 * 1024

opentau/configs/train.py CHANGED Viewed

@@ -32,6 +32,7 @@ from huggingface_hub.errors import HfHubHTTPError
 from opentau.configs import parser
 from opentau.configs.default import DatasetMixtureConfig, EvalConfig, WandBConfig
+from opentau.configs.deployment import ServerConfig
 from opentau.configs.policies import PreTrainedConfig
 from opentau.envs.configs import EnvConfig
 from opentau.optim import OptimizerConfig
@@ -116,6 +117,7 @@ class TrainPipelineConfig(HubMixin):
             is disabled. Defaults to 0.
         last_checkpoint_only: If True, only evaluate the last checkpoint.
             Defaults to True.
+        server: Configuration for the gRPC inference server. Defaults to ServerConfig().
     """
     dataset_mixture: DatasetMixtureConfig
@@ -163,7 +165,10 @@ class TrainPipelineConfig(HubMixin):
     env: EnvConfig | None = None
     eval: EvalConfig | None = field(default_factory=EvalConfig)
     eval_freq: int = 0  # evaluate every eval_freq steps
+    val_freq: int = 0  # validate every val_freq steps, if 0, then a validation split is not created
     last_checkpoint_only: bool = True
+    # gRPC inference server configuration
+    server: ServerConfig = field(default_factory=ServerConfig)
     def __post_init__(self):
         """Initialize post-creation attributes and validate batch size configuration."""

opentau/datasets/factory.py CHANGED Viewed

@@ -61,7 +61,11 @@ Example:
         >>> dataloader = mixture.get_dataloader()
 """
+import copy
+from typing import Tuple, Union
 import numpy as np
+import torch
 # NOTE: Don't delete; imported for side effects.
 import opentau.datasets.grounding.clevr  # noqa: F401
@@ -151,9 +155,13 @@ def make_dataset(
     cfg: DatasetConfig,
     train_cfg: TrainPipelineConfig,
     return_advantage_input: bool = False,
-) -> BaseDataset:
+) -> Union[BaseDataset, Tuple[BaseDataset, BaseDataset]]:
     """Handles the logic of setting up delta timestamps and image transforms before creating a dataset.
+    A train and validation dataset are returned if `train_cfg.val_freq` is greater than 0.
+    The validation dataset is a subset of the train dataset, and is used for evaluation during training.
+    The validation dataset is created by splitting the train dataset into train and validation sets based on `cfg.val_split_ratio`.
     Args:
         cfg (DatasetConfig): A DatasetConfig used to create a LeRobotDataset.
         train_cfg (TrainPipelineConfig): A TrainPipelineConfig config which contains a DatasetConfig and a PreTrainedConfig.
@@ -161,10 +169,11 @@ def make_dataset(
             "episode_end_idx", "current_idx", "last_step", "episode_index", and "timestamp". Defaults to False.
     Raises:
-        NotImplementedError: The MultiLeRobotDataset is currently deactivated.
+        ValueError: If exactly one of `cfg.grounding` and `cfg.repo_id` is not provided.
+        ValueError: If `cfg.grounding` is not a supported grounding dataset.
     Returns:
-        BaseDataset
+        BaseDataset or Tuple[BaseDataset, BaseDataset]: A single dataset or a tuple of (train_dataset, val_dataset) if val_freq > 0.
     """
     image_transforms = ImageTransforms(cfg.image_transforms) if cfg.image_transforms.enable else None
@@ -209,12 +218,20 @@ def make_dataset(
                     dataset.meta.stats[key] = {}
                 dataset.meta.stats[key][stats_type] = np.array(stats, dtype=np.float32)
+    if train_cfg.val_freq > 0:
+        val_size = int(len(dataset) * cfg.val_split_ratio)
+        train_size = len(dataset) - val_size
+        train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
+        train_dataset.meta = copy.deepcopy(dataset.meta)
+        val_dataset.meta = copy.deepcopy(dataset.meta)
+        return train_dataset, val_dataset
     return dataset
 def make_dataset_mixture(
     cfg: TrainPipelineConfig, return_advantage_input: bool = False
-) -> WeightedDatasetMixture:
+) -> Union[WeightedDatasetMixture, Tuple[WeightedDatasetMixture, WeightedDatasetMixture]]:
     """Creates a dataset mixture from the provided TrainPipelineConfig.
     Args:
@@ -223,10 +240,26 @@ def make_dataset_mixture(
             "episode_end_idx", "current_idx", "last_step", "episode_index", and "timestamp". Defaults to False.
     Returns:
-        WeightedDatasetMixture: An instance of WeightedDatasetMixture containing the datasets.
+        WeightedDatasetMixture or Tuple[WeightedDatasetMixture, WeightedDatasetMixture]: An instance of WeightedDatasetMixture containing the datasets, or a tuple of (train_mixture, val_mixture) if val_freq > 0.
     """
-    datasets = [
-        make_dataset(dataset_cfg, cfg, return_advantage_input=return_advantage_input)
-        for dataset_cfg in cfg.dataset_mixture.datasets
-    ]
-    return WeightedDatasetMixture(cfg, datasets, cfg.dataset_mixture.weights, cfg.dataset_mixture.action_freq)
+    datasets = []
+    val_datasets = []
+    for dataset_cfg in cfg.dataset_mixture.datasets:
+        res = make_dataset(dataset_cfg, cfg, return_advantage_input=return_advantage_input)
+        if isinstance(res, tuple):
+            datasets.append(res[0])
+            val_datasets.append(res[1])
+        else:
+            datasets.append(res)
+    train_mixture = WeightedDatasetMixture(
+        cfg, datasets, cfg.dataset_mixture.weights, cfg.dataset_mixture.action_freq
+    )
+    if val_datasets:
+        val_mixture = WeightedDatasetMixture(
+            cfg, val_datasets, cfg.dataset_mixture.weights, cfg.dataset_mixture.action_freq
+        )
+        return train_mixture, val_mixture
+    return train_mixture

opentau/datasets/lerobot_dataset.py CHANGED Viewed

@@ -150,6 +150,7 @@ from opentau.policies.value.configuration_value import ValueConfig
 from opentau.policies.value.reward import (
     calculate_return_bins_with_equal_width,
 )
+from opentau.utils.accelerate_utils import get_proc_accelerator
 from opentau.utils.utils import on_accelerate_main_proc
@@ -324,8 +325,17 @@ class LeRobotDatasetMetadata(DatasetMetadata):
             if is_valid_version(self.revision):
                 self.revision = get_safe_version(self.repo_id, self.revision)
-            (self.root / "meta").mkdir(exist_ok=True, parents=True)
-            self.pull_from_repo(allow_patterns="meta/")
+            # In distributed training, only rank 0 downloads to avoid race conditions
+            # where other ranks read metadata before the download has finished.
+            acc = get_proc_accelerator()
+            if acc is not None and acc.num_processes > 1:
+                if acc.is_main_process:
+                    (self.root / "meta").mkdir(exist_ok=True, parents=True)
+                    self.pull_from_repo(allow_patterns="meta/")
+                acc.wait_for_everyone()
+            else:
+                (self.root / "meta").mkdir(exist_ok=True, parents=True)
+                self.pull_from_repo(allow_patterns="meta/")
             self.load_metadata()
     def load_metadata(self) -> None:
@@ -633,7 +643,9 @@ class BaseDataset(torch.utils.data.Dataset):
         For example, {"image_key": torch.zeros(2, 3, 224, 224), "image_key_is_pad": [False, True] } will become
         {
             "image_key": torch.zeros(3, 224, 224),
+            "image_key_local": torch.zeros(3, 224, 224),
             "image_key_is_pad: False,
+            "image_key_local_is_pad": True,
         }.
         """
         raise NotImplementedError
@@ -723,14 +735,6 @@ class BaseDataset(torch.utils.data.Dataset):
             if isinstance(value, torch.Tensor) and value.dtype.is_floating_point:
                 standard_item[key] = value.to(dtype=torch.bfloat16)
-        # ensure that non-empty strings contain exactly one newline character at the end of the string
-        for key in ["prompt", "response"]:
-            if standard_item[key].endswith(
-                "\n"
-            ):  # ensure there isn't going to be an extra space at the end after calling replace
-                standard_item[key] = standard_item[key][:-1]
-            standard_item[key] = standard_item[key].replace("\n", " ") + "\n"
         return standard_item
     def resize_with_pad(self, img, width, height, pad_value=0) -> torch.Tensor:
@@ -1787,16 +1791,12 @@ class LeRobotDataset(BaseDataset):
         cam_keys = {v for k, v in name_map.items() if k.startswith("camera")}
         for k in cam_keys:
             images = item.pop(k)
-            assert len(images) == 2, (
-                f"{k} in {self.__class__} is expected to have length 2, got shape={images.shape}"
-            )
-            item[k + "_local"], item[k] = images
+            if len(images) == 2:
+                item[k + "_local"], item[k] = images
-            pads = item.pop(k + "_is_pad")
-            assert len(pads) == 2, (
-                f"{k} in {self.__class__} is expected to have length 2, got shape={pads.shape}"
-            )
-            item[k + "_local_is_pad"], item[k + "_is_pad"] = pads
+            pads = item.get(k + "_is_pad")
+            if hasattr(pads, "__len__") and len(pads) == 2:
+                item[k + "_local_is_pad"], item[k + "_is_pad"] = pads
     @staticmethod
     def compute_delta_params(

opentau/datasets/video_utils.py CHANGED Viewed

@@ -108,6 +108,7 @@ import pyarrow as pa
 import torch
 import torchvision
 from datasets.features.features import register_feature
+from packaging import version
 from PIL import Image
@@ -117,13 +118,17 @@ def get_safe_default_codec() -> str:
     Returns:
         Backend name: "torchcodec" if available, otherwise "pyav".
     """
-    if importlib.util.find_spec("torchcodec"):
-        return "torchcodec"
-    else:
-        logging.warning(
-            "'torchcodec' is not available in your platform, falling back to 'pyav' as a default decoder"
-        )
+    if version.parse(torch.__version__) >= version.parse("2.8.0"):
         return "pyav"
+    else:
+        if importlib.util.find_spec("torchcodec"):
+            return "torchcodec"
+        else:
+            logging.warning(
+                "'torchcodec' is not available in your platform, falling back to 'pyav' as a default decoder"
+            )
+            return "pyav"
 def decode_video_frames(

opentau/policies/pi05/configuration_pi05.py CHANGED Viewed

@@ -49,18 +49,18 @@ class PI05Config(PreTrainedConfig):
             Defaults to identity for visual features and mean-std for state and action.
         max_state_dim: Maximum dimension for state vectors. Shorter vectors are padded. Defaults to 32.
         max_action_dim: Maximum dimension for action vectors. Shorter vectors are padded. Defaults to 32.
+        predict_response: Whether to predict the response. Defaults to False.
         resize_imgs_with_padding: Target size (height, width) for image resizing with padding.
             Defaults to (224, 224).
         empty_cameras: Number of empty camera inputs to add. Used for specific adaptations like
             Aloha simulation. Defaults to 0.
-        tokenizer_max_length: Maximum length for tokenizer. Defaults to 256.
+        prompt_max_length: Maximum length for tokenizer. Defaults to 256.
         discrete_action_max_length: Maximum length for discrete action tokens. Defaults to 32.
         proj_width: Width of the projection layer. Defaults to 1024.
         dropout: Dropout rate. Defaults to 0.1.
         num_steps: Number of flow matching steps for decoding. Defaults to 10.
         init_strategy: Initialization strategy. One of "no_init", "full_he_init", "expert_only_he_init".
             Defaults to "full_he_init".
-        use_cache: Whether to use KV cache during inference. Defaults to True.
         attention_implementation: Attention implementation to use ("eager" or "fa2"). Defaults to "eager".
         freeze_vision_encoder: Whether to freeze the vision encoder during fine-tuning. Defaults to True.
         train_expert_only: Whether to train only the expert module. Defaults to False.
@@ -89,6 +89,7 @@ class PI05Config(PreTrainedConfig):
     # Shorter state and action vectors will be padded
     max_state_dim: int = 32
     max_action_dim: int = 32
+    predict_response: bool = False
     # Image preprocessing
     resize_imgs_with_padding: tuple[int, int] = (224, 224)
@@ -97,8 +98,11 @@ class PI05Config(PreTrainedConfig):
     # left and right wrist cameras in addition to the top camera.
     empty_cameras: int = 0
-    # Tokenizer
-    tokenizer_max_length: int = 256
+    # Language Tokenizer
+    prompt_max_length: int = 256
+    # Response Tokenizer
+    response_max_length: int = 52
     # Maximum length of the action tokens
     discrete_action_max_length: int = 32
@@ -116,8 +120,7 @@ class PI05Config(PreTrainedConfig):
     init_strategy: Literal["no_init", "full_he_init", "expert_only_he_init"] = "full_he_init"
     # Attention utils
-    use_cache: bool = True
-    attention_implementation: str = "eager"  # or fa2
+    attention_implementation: str = "eager"
     # Finetuning settings
     freeze_vision_encoder: bool = True

opentau 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

opentau 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl