PyPI - mani-skill-nightly - Versions diffs - 2025.6.7.814__py3-none-any.whl → 2025.6.13.2242__py3-none-any.whl - Mend

mani-skill-nightly 2025.6.7.814py3-none-any.whl → 2025.6.13.2242py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

mani_skill/agents/robots/lerobot/__init__.py ADDED Viewed

File without changes

mani_skill/agents/robots/lerobot/manipulator.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+Code based on https://github.com/huggingface/lerobot for supporting real robot control via the unified LeRobot interface.
+"""
+import time
+from typing import List, Optional
+import numpy as np
+import torch
+from mani_skill.agents.base_real_agent import BaseRealAgent
+from mani_skill.utils import common
+from mani_skill.utils.structs.types import Array
+try:
+    from lerobot.common.cameras.camera import Camera
+    from lerobot.common.motors.motors_bus import MotorNormMode
+    from lerobot.common.robots.robot import Robot
+    from lerobot.common.utils.robot_utils import busy_wait
+except ImportError:
+    pass
+class LeRobotRealAgent(BaseRealAgent):
+    """
+    LeRobotRealAgent is a general class for controlling real robots via the LeRobot system. You simply just pass in the Robot instance you create via LeRobot and pass it here to make it work with ManiSkill Sim2Real environment interfaces.
+    Args:
+        robot (Robot): The Robot instance you create via LeRobot.
+        use_cached_qpos (bool): Whether to cache the fetched qpos values. If True, the qpos will be
+            read from the cache instead of the real robot when possible. This cache is only invalidated when
+            set_target_qpos or set_target_qvel is called. This can be useful if you want to easily have higher frequency (> 30Hz) control since qpos reading from the robot is
+            currently the slowest part of LeRobot for some of the supported motors.
+    """
+    def __init__(self, robot: Robot, use_cached_qpos: bool = True, **kwargs):
+        super().__init__(**kwargs)
+        self._captured_sensor_data = None
+        self.real_robot = robot
+        self.use_cached_qpos = use_cached_qpos
+        self._cached_qpos = None
+        self._motor_keys: List[str] = None
+        if self.real_robot.name == "so100_follower":
+            self.real_robot.bus.motors["gripper"].norm_mode = MotorNormMode.DEGREES
+    def start(self):
+        self.real_robot.connect()
+    def stop(self):
+        self.real_robot.disconnect()
+    def set_target_qpos(self, qpos: Array):
+        self._cached_qpos = None
+        qpos = common.to_cpu_tensor(qpos).flatten()
+        qpos = torch.rad2deg(qpos)
+        qpos = {f"{self._motor_keys[i]}.pos": qpos[i] for i in range(len(qpos))}
+        # NOTE (stao): It seems the calibration from LeRobot has some offsets in some joints. We fix reading them here to match the expected behavior
+        if self.real_robot.name == "so100_follower":
+            qpos["elbow_flex.pos"] = qpos["elbow_flex.pos"] + 6.8
+        self.real_robot.send_action(qpos)
+    def reset(self, qpos: Array):
+        qpos = common.to_cpu_tensor(qpos)
+        freq = 30
+        target_pos = self.qpos
+        max_rad_per_step = 0.025
+        for _ in range(int(20 * freq)):
+            start_loop_t = time.perf_counter()
+            delta_step = (qpos - target_pos).clip(
+                min=-max_rad_per_step, max=max_rad_per_step
+            )
+            if np.linalg.norm(delta_step) <= 1e-4:
+                break
+            target_pos += delta_step
+            self.set_target_qpos(target_pos)
+            dt_s = time.perf_counter() - start_loop_t
+            busy_wait(1 / freq - dt_s)
+    def capture_sensor_data(self, sensor_names: Optional[List[str]] = None):
+        sensor_obs = dict()
+        cameras: dict[str, Camera] = self.real_robot.cameras
+        if sensor_names is None:
+            sensor_names = list(cameras.keys())
+        for name in sensor_names:
+            data = cameras[name].async_read()
+            # until https://github.com/huggingface/lerobot/issues/860 is resolved we temporarily assume this is RGB data only otherwise need to write a few extra if statements to check
+            # if isinstance(cameras[name], IntelRealSenseCamera):
+            sensor_obs[name] = dict(rgb=(common.to_tensor(data)).unsqueeze(0))
+        self._captured_sensor_data = sensor_obs
+    def get_sensor_data(self, sensor_names: Optional[List[str]] = None):
+        if self._captured_sensor_data is None:
+            raise RuntimeError(
+                "No sensor data captured yet. Please call capture_sensor_data() first."
+            )
+        if sensor_names is None:
+            return self._captured_sensor_data
+        else:
+            return {
+                k: v for k, v in self._captured_sensor_data.items() if k in sensor_names
+            }
+    def get_qpos(self):
+        # NOTE (stao): the slowest part of inference is reading the qpos from the robot. Each time it takes about 5-6 milliseconds, meaning control frequency is capped at 200Hz.
+        # and if you factor in other operations like policy inference etc. the max control frequency is typically more like 30-60 Hz.
+        # Moreover on the rare occassions reading qpos can take 40 milliseconds which causes the control step to fall behind the desired control frequency.
+        if self.use_cached_qpos and self._cached_qpos is not None:
+            return self._cached_qpos.clone()
+        qpos_deg = self.real_robot.bus.sync_read("Present_Position")
+        # NOTE (stao): It seems the calibration from LeRobot has some offsets in some joints. We fix reading them here to match the expected behavior
+        if self.real_robot.name == "so100_follower":
+            qpos_deg["elbow_flex"] = qpos_deg["elbow_flex"] - 6.8
+        if self._motor_keys is None:
+            self._motor_keys = list(qpos_deg.keys())
+        qpos_deg = common.flatten_state_dict(qpos_deg)
+        qpos = torch.deg2rad(torch.tensor(qpos_deg)).unsqueeze(0)
+        self._cached_qpos = qpos
+        return qpos
+    def get_qvel(self):
+        raise NotImplementedError

mani_skill/agents/robots/so100/so_100.py CHANGED Viewed

@@ -18,7 +18,7 @@ from mani_skill.utils.structs.pose import Pose
 @register_agent()
 class SO100(BaseAgent):
     uid = "so100"
-    urdf_path = f"{PACKAGE_ASSET_DIR}/robots/so100/SO_5DOF_ARM100_8j/so100.urdf"
+    urdf_path = f"{PACKAGE_ASSET_DIR}/robots/so100/so100.urdf"
     urdf_config = dict(
         _materials=dict(
             gripper=dict(static_friction=2, dynamic_friction=2, restitution=0.0)
@@ -31,11 +31,7 @@ class SO100(BaseAgent):
     keyframes = dict(
         rest=Keyframe(
-            qpos=np.array([0, 2.2, 3.017, -0.25, 0, 0.6044]),
-            pose=sapien.Pose(q=euler2quat(0, 0, np.pi / 2)),
-        ),
-        elevated_turn=Keyframe(
-            qpos=np.array([0, 2.2, 2.75, -0.25, -np.pi / 2, 1.0]),
+            qpos=np.array([0, -1.5708, 1.5708, 0.66, 0, -1.1]),
             pose=sapien.Pose(q=euler2quat(0, 0, np.pi / 2)),
         ),
         zero=Keyframe(
@@ -44,6 +40,17 @@ class SO100(BaseAgent):
         ),
     )
+    arm_joint_names = [
+        "shoulder_pan",
+        "shoulder_lift",
+        "elbow_flex",
+        "wrist_flex",
+        "wrist_roll",
+    ]
+    gripper_joint_names = [
+        "gripper",
+    ]
     @property
     def _controller_configs(self):
         pd_joint_pos = PDJointPosControllerConfig(
@@ -56,16 +63,19 @@ class SO100(BaseAgent):
             normalize_action=False,
         )
+        # max delta permitted of 0.05 since the robot is not as accurate as more expensive arms
+        # and moving too fast can cause the robot to shake too much and damage the hardware
         pd_joint_delta_pos = PDJointPosControllerConfig(
             [joint.name for joint in self.robot.active_joints],
-            -0.1,
-            0.1,
+            [-0.05, -0.05, -0.05, -0.05, -0.05, -0.2],
+            [0.05, 0.05, 0.05, 0.05, 0.05, 0.2],
             stiffness=[1e3] * 6,
             damping=[1e2] * 6,
             force_limit=100,
             use_delta=True,
             use_target=False,
         )
         pd_joint_target_delta_pos = copy.deepcopy(pd_joint_delta_pos)
         pd_joint_target_delta_pos.use_target = True
@@ -122,8 +132,49 @@ class SO100(BaseAgent):
         )
         return torch.logical_and(lflag, rflag)
+    def _after_loading_articulation(self):
+        super()._after_loading_articulation()
+        # self.set_colors()
+        self.finger1_link = self.robot.links_map["Fixed_Jaw"]
+        self.finger2_link = self.robot.links_map["Moving_Jaw"]
+        self.finger1_tip = self.robot.links_map["Fixed_Jaw_tip"]
+        self.finger2_tip = self.robot.links_map["Moving_Jaw_tip"]
+    @property
+    def tcp_pos(self):
+        # computes the tool center point as the mid point between the the fixed and moving jaw's tips
+        return (self.finger1_tip.pose.p + self.finger2_tip.pose.p) / 2
+    def is_grasping(self, object: Actor, min_force=0.5, max_angle=110):
+        """Check if the robot is grasping an object
+        Args:
+            object (Actor): The object to check if the robot is grasping
+            min_force (float, optional): Minimum force before the robot is considered to be grasping the object in Newtons. Defaults to 0.5.
+            max_angle (int, optional): Maximum angle of contact to consider grasping. Defaults to 85.
+        """
+        l_contact_forces = self.scene.get_pairwise_contact_forces(
+            self.finger1_link, object
+        )
+        r_contact_forces = self.scene.get_pairwise_contact_forces(
+            self.finger2_link, object
+        )
+        lforce = torch.linalg.norm(l_contact_forces, axis=1)
+        rforce = torch.linalg.norm(r_contact_forces, axis=1)
+        # direction to open the gripper
+        ldirection = self.finger1_link.pose.to_transformation_matrix()[..., :3, 1]
+        rdirection = -self.finger2_link.pose.to_transformation_matrix()[..., :3, 1]
+        langle = common.compute_angle_between(ldirection, l_contact_forces)
+        rangle = common.compute_angle_between(rdirection, r_contact_forces)
+        lflag = torch.logical_and(
+            lforce >= min_force, torch.rad2deg(langle) <= max_angle
+        )
+        rflag = torch.logical_and(
+            rforce >= min_force, torch.rad2deg(rangle) <= max_angle
+        )
+        return torch.logical_and(lflag, rflag)
     def is_static(self, threshold=0.2):
-        qvel = self.robot.get_qvel()[
-            :, :-2
-        ]  # exclude the gripper joint and gripper rotation joint.
+        qvel = self.robot.get_qvel()[:, :-1]  # exclude the gripper joint
         return torch.max(torch.abs(qvel), 1)[0] <= threshold

mani_skill/assets/robots/so100/README.md CHANGED Viewed

@@ -7,4 +7,4 @@ Changes made:
 - Fixed joint tags from continuous to revolute which permit joint limits
 - Fixed joint directions and orientations to match the real robot's joints
 - removed spaces in link names
-- manual decomposition of gripper link collision meshes into simpler meshes
+- manual decomposition of gripper link collision meshes into simpler meshes

mani_skill/assets/robots/so100/{SO_5DOF_ARM100_8j/so100.urdf → so100.urdf} RENAMED Viewed

@@ -124,7 +124,7 @@
     </collision>
   </link>
   <joint
-    name="Rotation"
+    name="shoulder_pan"
     type="revolute">
     <origin
       xyz="0 -0.0452 0.0165"
@@ -137,8 +137,8 @@
       xyz="0 -1 0" />
       <!-- note for the so100 arm there is no well defined effort/velocity limits at the moment -->
     <limit
-      lower="-2.1"
-      upper="2.1"
+      lower="-2.0"
+      upper="2.0"
       effort="0"
       velocity="0" />
   </joint>
@@ -200,20 +200,20 @@
     </collision>
   </link>
   <joint
-    name="Pitch"
+    name="shoulder_lift"
     type="revolute">
     <origin
       xyz="0 0.1025 0.0306"
-      rpy="1.5708 0 0" />
+      rpy="0 0 0" />
     <parent
       link="Rotation_Pitch" />
     <child
       link="Upper_Arm" />
     <axis
-      xyz="-1 0 0" />
+      xyz="1 0 0" />
     <limit
-      lower="-0.1"
-      upper="3.45"
+      lower="-1.5708"
+      upper="1.5708"
       effort="0"
       velocity="0" />
   </joint>
@@ -275,11 +275,11 @@
     </collision>
   </link>
   <joint
-    name="Elbow"
+    name="elbow_flex"
     type="revolute">
     <origin
       xyz="0 0.11257 0.028"
-      rpy="-1.5708 0 0" />
+      rpy="0 0 0" />
     <parent
       link="Upper_Arm" />
     <child
@@ -287,8 +287,8 @@
     <axis
       xyz="1 0 0" />
     <limit
-      lower="-0.2"
-      upper="3.14159"
+      lower="-1.5708"
+      upper="1.5708"
       effort="0"
       velocity="0" />
   </joint>
@@ -350,7 +350,7 @@
     </collision>
   </link>
   <joint
-    name="Wrist_Pitch"
+    name="wrist_flex"
     type="revolute">
     <origin
       xyz="0 0.0052 0.1349"
@@ -425,7 +425,7 @@
     </collision>
   </link>
   <joint
-    name="Wrist_Roll"
+    name="wrist_roll"
     type="revolute">
     <origin
       xyz="0 -0.0601 0"
@@ -435,7 +435,7 @@
     <child
       link="Fixed_Jaw" />
     <axis
-      xyz="0 -1 0" />
+      xyz="0 1 0" />
     <limit
       lower="-3.14159"
       upper="3.14159"
@@ -498,11 +498,11 @@
     </collision>
   </link>
   <joint
-    name="Jaw"
+    name="gripper"
     type="revolute">
     <origin
       xyz="-0.0202 -0.0244 0"
-      rpy="3.1416 0 3.33" />
+      rpy="0 3.14159 -0.9" />
     <parent
       link="Fixed_Jaw" />
     <child
@@ -510,8 +510,8 @@
     <axis
       xyz="0 0 1" />
     <limit
-      lower="0"
-      upper="1.7"
+      lower="-1.1"
+      upper="1.1"
       effort="0"
       velocity="0" />
   </joint>

mani_skill/envs/sapien_env.py CHANGED Viewed

@@ -1224,7 +1224,11 @@ class BaseEnv(gym.Env):
         """
         Get environment state dictionary. Override to include task information (e.g., goal)
         """
-        return self.scene.get_sim_state()
+        sim_state = self.scene.get_sim_state()
+        controller_state = self.agent.controller.get_state()
+        if len(controller_state) > 0:
+            sim_state["controller"] = controller_state
+        return sim_state
     def get_state(self):
         """

mani_skill/envs/sim2real_env.py CHANGED Viewed

@@ -26,7 +26,6 @@ class Sim2RealEnv(gym.Env):
     Args:
         sim_env (BaseEnv): The simulation environment that the real environment should be aligned with.
         agent (BaseRealAgent): The real robot agent to control. This must be an object that inherits from BaseRealAgent.
-        obs_mode (str): The observation mode to use.
         real_reset_function (Optional[Callable[[Sim2RealEnv, Optional[int], Optional[dict]], None]]): The function to call to reset the real robot. By default this is None and we use a default reset function which
             calls the simulation reset function and resets the agent/robot qpos to whatever the simulation reset function sampled, then prompts the user to press enter before continuing running.
             This function is given access to the Sim2RealEnv instance, the given seed and options dictionary similar to a standard gym reset function. The default function and example is shown below:
@@ -38,38 +37,40 @@ class Sim2RealEnv(gym.Env):
                     self.agent.reset(qpos=self.base_sim_env.agent.robot.qpos.cpu().flatten())
                     input("Press enter if the environment is reset")
-        sensor_data_processing_function (Optional[Callable[[Dict], Dict]]): The function to call to process the sensor data returned by the BaseRealAgent.get_sensor_data function.
+        sensor_data_preprocessing_function (Optional[Callable[[Dict], Dict]]): The function to call to process the sensor data returned by the BaseRealAgent.get_sensor_data function.
             By default this is None and we use a default processing function which does the following for each sensor type:
             - Camera: Perform a center crop of the real sensor image (rgb or depth) to have the same aspect ratio as the simulation sensor image. Then resize the image to the simulation sensor image shape using cv2.resize
+        skip_data_checks (bool): If False, this will reset the sim and real environments once to check if observations are aligned. It is recommended
+            to keep this False.
+        control_freq (Optional[int]): The control frequency of the real robot. By default this is None and we use the same control frequency as the simulation environment.
     """
-    metadata = {"render_modes": ["human", "rgb_array", "sensors", "all"]}
+    metadata = {"render_modes": ["rgb_array", "sensors", "all"]}
     def __init__(
         self,
-        sim_env: BaseEnv,
+        sim_env: gym.Env,
         agent: BaseRealAgent,
-        obs_mode: str = "rgb",
         real_reset_function: Optional[
             Callable[["Sim2RealEnv", Optional[int], Optional[dict]], None]
         ] = None,
-        sensor_data_processing_function: Optional[Callable[[Dict], Dict]] = None,
-        # obs_mode: Optional[str] = None,
-        reward_mode: Optional[str] = "none",
-        # control_mode: Optional[str] = None,
+        sensor_data_preprocessing_function: Optional[Callable[[Dict], Dict]] = None,
         render_mode: Optional[str] = "sensors",
-        # robot_uids: BaseRealAgent = None,
+        skip_data_checks: bool = False,
+        control_freq: Optional[int] = None,
     ):
         self.sim_env = sim_env
         self.num_envs = 1
         assert (
-            self.sim_env.backend.sim_backend == "physx_cpu"
+            self.sim_env.unwrapped.backend.sim_backend == "physx_cpu"
         ), "For the Sim2RealEnv we expect the simulation to be using the physx_cpu simulation backend currently in order to correctly align the robot"
         # copy over some sim parameters/settings
-        self.device = self.sim_env.backend.device
-        self.sim_freq = self.sim_env.sim_freq
-        self.control_freq = self.sim_env.control_freq
+        self.device = self.sim_env.unwrapped.backend.device
+        self.sim_freq = self.sim_env.unwrapped.sim_freq
+        self.control_freq = control_freq or self.sim_env.unwrapped.control_freq
         # control timing
         self.control_dt = 1 / self.control_freq
@@ -78,6 +79,8 @@ class Sim2RealEnv(gym.Env):
         self.base_sim_env: BaseEnv = sim_env.unwrapped
         """the unwrapped simulation environment"""
+        obs_mode = self.base_sim_env.obs_mode
+        reward_mode = self.base_sim_env.reward_mode
         self._reward_mode = reward_mode
         self._obs_mode = obs_mode
         self.reward_mode = reward_mode
@@ -141,52 +144,15 @@ class Sim2RealEnv(gym.Env):
         # TODO create real controller class based on sim one?? Or can we just fake the data
         self.agent._sim_agent.controller.qpos
-        self.sensor_data_processing_function = sensor_data_processing_function
-        # automatically try and generate a visual observation processing function to align a real camera with the simulated camera
-        if sensor_data_processing_function is None:
-            camera_sensor_names = [
-                name
-                for name in self._sensor_names
-                if isinstance(self.base_sim_env.scene.sensors[name], Camera)
-            ]
-            def sensor_data_processing_function(sensor_data: Dict):
-                import cv2
-                for sensor_name in camera_sensor_names:
-                    sim_sensor_cfg = self.base_sim_env._sensor_configs[sensor_name]
-                    assert isinstance(sim_sensor_cfg, CameraConfig)
-                    target_h, target_w = sim_sensor_cfg.height, sim_sensor_cfg.width
-                    real_sensor_data = sensor_data[sensor_name]
-                    # crop to same aspect ratio
-                    for key in ["rgb", "depth"]:
-                        if key in real_sensor_data:
-                            img = real_sensor_data[key][0].numpy()
-                            xy_res = img.shape[:2]
-                            crop_res = np.min(xy_res)
-                            cutoff = (np.max(xy_res) - crop_res) // 2
-                            if xy_res[0] == xy_res[1]:
-                                pass
-                            elif np.argmax(xy_res) == 0:
-                                img = img[cutoff:-cutoff, :, :]
-                            else:
-                                img = img[:, cutoff:-cutoff, :]
-                            real_sensor_data[key] = common.to_tensor(
-                                cv2.resize(img, (target_w, target_h))
-                            ).unsqueeze(0)
-                    sensor_data[sensor_name] = real_sensor_data
-                return sensor_data
-            self.sensor_data_processing_function = sensor_data_processing_function
-        sample_sim_obs, _ = self.sim_env.reset()
-        sample_real_obs, _ = self.reset()
-        # perform checks to avoid errors in alignments
-        self._check_observations(sample_sim_obs, sample_real_obs)
+        if sensor_data_preprocessing_function is not None:
+            self.preprocess_sensor_data = sensor_data_preprocessing_function
+        if not skip_data_checks:
+            sample_sim_obs, _ = self.sim_env.reset()
+            sample_real_obs, _ = self.reset()
+            # perform checks to avoid errors in observation space alignment
+            self._check_observations(sample_sim_obs, sample_real_obs)
     @property
     def elapsed_steps(self):
@@ -289,7 +255,7 @@ class Sim2RealEnv(gym.Env):
         data = self.agent.get_sensor_data(self._sensor_names)
         # observation data needs to be processed to be the same shape in simulation
         # default strategy is to do a center crop to the same shape as simulation and then resize image to the same shape as simulation
-        data = self.sensor_data_processing_function(data)
+        data = self.preprocess_sensor_data(data)
         return data
     def _get_obs_with_sensor_data(
@@ -379,3 +345,39 @@ class Sim2RealEnv(gym.Env):
     def close(self):
         self.agent.stop()
+    def preprocess_sensor_data(
+        self, sensor_data: Dict, sensor_names: Optional[List[str]] = None
+    ):
+        import cv2
+        if sensor_names is None:
+            sensor_names = list(sensor_data.keys())
+        for sensor_name in sensor_names:
+            sim_sensor_cfg = self.base_sim_env._sensor_configs[sensor_name]
+            assert isinstance(sim_sensor_cfg, CameraConfig)
+            target_h, target_w = sim_sensor_cfg.height, sim_sensor_cfg.width
+            real_sensor_data = sensor_data[sensor_name]
+            # crop to same aspect ratio
+            for key in ["rgb", "depth"]:
+                if key in real_sensor_data:
+                    img = real_sensor_data[key][0].numpy()
+                    xy_res = img.shape[:2]
+                    crop_res = np.min(xy_res)
+                    cutoff = (np.max(xy_res) - crop_res) // 2
+                    if xy_res[0] == xy_res[1]:
+                        pass
+                    elif np.argmax(xy_res) == 0:
+                        img = img[cutoff:-cutoff, :, :]
+                    else:
+                        img = img[:, cutoff:-cutoff, :]
+                    real_sensor_data[key] = common.to_tensor(
+                        cv2.resize(img, (target_w, target_h))
+                    ).unsqueeze(0)
+            sensor_data[sensor_name] = real_sensor_data
+        return sensor_data
+    def __getattr__(self, name):
+        return getattr(self.base_sim_env, name)

mani_skill/envs/tasks/digital_twins/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1 1	from .bridge_dataset_eval import *
2	+ from .so100_arm import *

mani_skill/envs/tasks/digital_twins/base_env.py CHANGED Viewed

@@ -96,24 +96,29 @@ class BaseDigitalTwinEnv(BaseEnv):
         super()._after_reconfigure(options)
         # after reconfiguration in CPU/GPU sim we have initialized all ids of objects in the scene.
         # and can now get the list of segmentation ids to keep
-        per_scene_ids = []
-        for object in self._objects_to_remove_from_greenscreen:
-            per_scene_ids.append(object.per_scene_id)
-        self._segmentation_ids_to_keep = torch.unique(torch.concatenate(per_scene_ids))
-        self._objects_to_remove_from_greenscreen = []
-        # load the overlay images
-        for camera_name in self.rgb_overlay_paths.keys():
-            sensor = self._sensor_configs[camera_name]
-            if isinstance(sensor, CameraConfig):
-                if isinstance(self._rgb_overlay_images[camera_name], torch.Tensor):
-                    continue
-                rgb_overlay_img = cv2.resize(
-                    self._rgb_overlay_images[camera_name], (sensor.width, sensor.height)
-                )
-                self._rgb_overlay_images[camera_name] = common.to_tensor(
-                    rgb_overlay_img, device=self.device
-                )
+        if self.rgb_overlay_mode != "none":
+            per_scene_ids = []
+            for object in self._objects_to_remove_from_greenscreen:
+                per_scene_ids.append(object.per_scene_id)
+            self._segmentation_ids_to_keep = torch.unique(
+                torch.concatenate(per_scene_ids)
+            )
+            self._objects_to_remove_from_greenscreen = []
+            # load the overlay images
+            for camera_name in self.rgb_overlay_paths.keys():
+                sensor = self._sensor_configs[camera_name]
+                if isinstance(sensor, CameraConfig):
+                    if isinstance(self._rgb_overlay_images[camera_name], torch.Tensor):
+                        continue
+                    rgb_overlay_img = cv2.resize(
+                        self._rgb_overlay_images[camera_name],
+                        (sensor.width, sensor.height),
+                    )
+                    self._rgb_overlay_images[camera_name] = common.to_tensor(
+                        rgb_overlay_img, device=self.device
+                    )
     def _green_sceen_rgb(self, rgb, segmentation, overlay_img):
         """returns green screened RGB data given a batch of RGB and segmentation images and one overlay image"""

mani_skill/envs/tasks/digital_twins/so100_arm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .grasp_cube import SO100GraspCubeEnv

mani-skill-nightly 2025.6.7.814__py3-none-any.whl → 2025.6.13.2242__py3-none-any.whl

mani-skill-nightly 2025.6.7.814py3-none-any.whl → 2025.6.13.2242py3-none-any.whl