mani-skill-nightly 2025.4.5.813__py3-none-any.whl → 2025.4.5.2036__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. mani_skill/agents/base_real_agent.py +202 -0
  2. mani_skill/agents/controllers/base_controller.py +13 -2
  3. mani_skill/agents/controllers/passive_controller.py +2 -0
  4. mani_skill/agents/controllers/pd_joint_pos.py +2 -0
  5. mani_skill/agents/controllers/pd_joint_pos_vel.py +2 -0
  6. mani_skill/agents/controllers/pd_joint_vel.py +2 -0
  7. mani_skill/agents/robots/__init__.py +2 -0
  8. mani_skill/agents/robots/koch/__init__.py +1 -0
  9. mani_skill/agents/robots/koch/koch.py +168 -0
  10. mani_skill/agents/robots/koch/koch_real.py +5 -0
  11. mani_skill/agents/robots/so100/__init__.py +1 -0
  12. mani_skill/agents/robots/so100/so_100.py +118 -0
  13. mani_skill/agents/robots/so100/so_100_real.py +5 -0
  14. mani_skill/assets/robots/koch/LICENSE +507 -0
  15. mani_skill/assets/robots/koch/README.md +8 -0
  16. mani_skill/assets/robots/koch/follower_arm_v1.1.srdf +9 -0
  17. mani_skill/assets/robots/koch/follower_arm_v1.1.urdf +635 -0
  18. mani_skill/assets/robots/koch/meshes/base_link.glb +0 -0
  19. mani_skill/assets/robots/koch/meshes/base_link.stl +0 -0
  20. mani_skill/assets/robots/koch/meshes/centered_base_link.stl +0 -0
  21. mani_skill/assets/robots/koch/meshes/gripper.glb +0 -0
  22. mani_skill/assets/robots/koch/meshes/gripper.stl +0 -0
  23. mani_skill/assets/robots/koch/meshes/gripper_collision_part_1.glb +0 -0
  24. mani_skill/assets/robots/koch/meshes/gripper_collision_part_2.glb +0 -0
  25. mani_skill/assets/robots/koch/meshes/gripper_collision_part_3.glb +0 -0
  26. mani_skill/assets/robots/koch/meshes/link_1.stl +0 -0
  27. mani_skill/assets/robots/koch/meshes/link_1_motor.glb +0 -0
  28. mani_skill/assets/robots/koch/meshes/link_2.stl +0 -0
  29. mani_skill/assets/robots/koch/meshes/link_2_collision_chassis.glb +0 -0
  30. mani_skill/assets/robots/koch/meshes/link_2_collision_motor.glb +0 -0
  31. mani_skill/assets/robots/koch/meshes/link_2_motor.glb +0 -0
  32. mani_skill/assets/robots/koch/meshes/link_2_rotation_connector.glb +0 -0
  33. mani_skill/assets/robots/koch/meshes/link_2_rotation_connector.stl +0 -0
  34. mani_skill/assets/robots/koch/meshes/link_3.stl +0 -0
  35. mani_skill/assets/robots/koch/meshes/link_3_collision_chassis_part_1.glb +0 -0
  36. mani_skill/assets/robots/koch/meshes/link_3_collision_chassis_part_2.glb +0 -0
  37. mani_skill/assets/robots/koch/meshes/link_3_collision_chassis_part_3.glb +0 -0
  38. mani_skill/assets/robots/koch/meshes/link_3_collision_chassis_part_4.glb +0 -0
  39. mani_skill/assets/robots/koch/meshes/link_3_collision_chassis_part_5.glb +0 -0
  40. mani_skill/assets/robots/koch/meshes/link_3_collision_motor.glb +0 -0
  41. mani_skill/assets/robots/koch/meshes/link_3_motor.glb +0 -0
  42. mani_skill/assets/robots/koch/meshes/link_3_motor.stl +0 -0
  43. mani_skill/assets/robots/koch/meshes/link_3_part.glb +0 -0
  44. mani_skill/assets/robots/koch/meshes/link_3_part.stl +0 -0
  45. mani_skill/assets/robots/koch/meshes/link_4.stl +0 -0
  46. mani_skill/assets/robots/koch/meshes/link_4_collision_chassis_part_1.glb +0 -0
  47. mani_skill/assets/robots/koch/meshes/link_4_collision_chassis_part_2.glb +0 -0
  48. mani_skill/assets/robots/koch/meshes/link_4_collision_chassis_part_3.glb +0 -0
  49. mani_skill/assets/robots/koch/meshes/link_4_collision_motor.glb +0 -0
  50. mani_skill/assets/robots/koch/meshes/link_4_motor.glb +0 -0
  51. mani_skill/assets/robots/koch/meshes/link_4_part.glb +0 -0
  52. mani_skill/assets/robots/koch/meshes/link_5.stl +0 -0
  53. mani_skill/assets/robots/koch/meshes/link_5_motor.glb +0 -0
  54. mani_skill/assets/robots/koch/meshes/link_5_part.glb +0 -0
  55. mani_skill/assets/robots/koch/meshes/link_6.stl +0 -0
  56. mani_skill/assets/robots/koch/meshes/link_6_collision_part_2.glb +0 -0
  57. mani_skill/assets/robots/koch/meshes/link_6_collision_part_3.glb +0 -0
  58. mani_skill/assets/robots/koch/meshes/link_6_collision_part_4.glb +0 -0
  59. mani_skill/assets/robots/koch/meshes/link_6_motor.glb +0 -0
  60. mani_skill/assets/robots/koch/meshes/link_6_part.glb +0 -0
  61. mani_skill/assets/robots/so100/LICENSE +201 -0
  62. mani_skill/assets/robots/so100/README.md +10 -0
  63. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Base.STL +0 -0
  64. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Fixed_Jaw.STL +0 -0
  65. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Fixed_Jaw_part1.ply +0 -0
  66. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Fixed_Jaw_part2.ply +0 -0
  67. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Lower_Arm.STL +0 -0
  68. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Moving Jaw.STL +0 -0
  69. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Moving_Jaw_part1.ply +0 -0
  70. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Moving_Jaw_part2.ply +0 -0
  71. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Moving_Jaw_part3.ply +0 -0
  72. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Rotation_Pitch.STL +0 -0
  73. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Upper_Arm.STL +0 -0
  74. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/meshes/Wrist_Pitch_Roll.STL +0 -0
  75. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/original.srdf +8 -0
  76. mani_skill/assets/robots/so100/SO_5DOF_ARM100_8j/original.urdf +470 -0
  77. mani_skill/envs/sapien_env.py +70 -9
  78. mani_skill/envs/sim2real_env.py +381 -0
  79. mani_skill/envs/tasks/digital_twins/base_env.py +74 -74
  80. mani_skill/envs/tasks/digital_twins/bridge_dataset_eval/base_env.py +6 -0
  81. mani_skill/envs/tasks/digital_twins/bridge_dataset_eval/put_on_in_scene.py +14 -1
  82. mani_skill/envs/utils/randomization/__init__.py +1 -0
  83. mani_skill/envs/utils/randomization/camera.py +60 -0
  84. mani_skill/examples/demo_robot.py +1 -0
  85. mani_skill/utils/sapien_utils.py +7 -6
  86. mani_skill/utils/structs/articulation.py +44 -18
  87. {mani_skill_nightly-2025.4.5.813.dist-info → mani_skill_nightly-2025.4.5.2036.dist-info}/METADATA +1 -1
  88. {mani_skill_nightly-2025.4.5.813.dist-info → mani_skill_nightly-2025.4.5.2036.dist-info}/RECORD +91 -19
  89. {mani_skill_nightly-2025.4.5.813.dist-info → mani_skill_nightly-2025.4.5.2036.dist-info}/LICENSE +0 -0
  90. {mani_skill_nightly-2025.4.5.813.dist-info → mani_skill_nightly-2025.4.5.2036.dist-info}/WHEEL +0 -0
  91. {mani_skill_nightly-2025.4.5.813.dist-info → mani_skill_nightly-2025.4.5.2036.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,381 @@
1
+ import time
2
+ from typing import Any, Callable, Dict, List, Optional
3
+
4
+ import gymnasium as gym
5
+ import numpy as np
6
+ import torch
7
+
8
+ from mani_skill.agents.base_real_agent import BaseRealAgent
9
+ from mani_skill.envs.sapien_env import BaseEnv
10
+ from mani_skill.sensors.camera import Camera, CameraConfig
11
+ from mani_skill.utils import common
12
+ from mani_skill.utils.logging_utils import logger
13
+
14
+
15
+ class Sim2RealEnv(gym.Env):
16
+ """
17
+ Sim2RealEnv is a class that lets you interface with a real robot and align the real robot and environment with a simulation environment. It tries to ensure the action and observation space
18
+ are the exact same in the real and simulation environments. Any wrappers you apply to the simulation environment are also used in the Sim2RealEnv automatically.
19
+
20
+ There are some caveats in which you may need to override this class / write your own code instead:
21
+
22
+ - If you use privileged features in the simulation environment like an object's pose then we cannot retrieve those poses in the real environment. You can for example override the `_get_obs_extra` function to compute those values in the real environment via a perception pipeline.
23
+
24
+ - While we align controllers and observation shapes/ordering as much as possible, there can still be distribution shifts between the simulation and real environment. These can include vision gaps (sim images looking not like the real world) and sensor biases and noise.
25
+
26
+ Args:
27
+ sim_env (BaseEnv): The simulation environment that the real environment should be aligned with.
28
+ agent (BaseRealAgent): The real robot agent to control. This must be an object that inherits from BaseRealAgent.
29
+ obs_mode (str): The observation mode to use.
30
+ real_reset_function (Optional[Callable[[Sim2RealEnv, Optional[int], Optional[dict]], None]]): The function to call to reset the real robot. By default this is None and we use a default reset function which
31
+ calls the simulation reset function and resets the agent/robot qpos to whatever the simulation reset function sampled, then prompts the user to press enter before continuing running.
32
+ This function is given access to the Sim2RealEnv instance, the given seed and options dictionary similar to a standard gym reset function. The default function and example is shown below:
33
+
34
+ .. code-block:: python
35
+
36
+ def real_reset_function(self, seed=None, options=None):
37
+ self.sim_env.reset(seed=seed, options=options)
38
+ self.agent.reset(qpos=self.base_sim_env.agent.robot.qpos.cpu().flatten())
39
+ input("Press enter if the environment is reset")
40
+
41
+ sensor_data_processing_function (Optional[Callable[[Dict], Dict]]): The function to call to process the sensor data returned by the BaseRealAgent.get_sensor_data function.
42
+ By default this is None and we use a default processing function which does the following for each sensor type:
43
+ - Camera: Perform a center crop of the real sensor image (rgb or depth) to have the same aspect ratio as the simulation sensor image. Then resize the image to the simulation sensor image shape using cv2.resize
44
+ """
45
+
46
+ metadata = {"render_modes": ["human", "rgb_array", "sensors", "all"]}
47
+
48
+ def __init__(
49
+ self,
50
+ sim_env: BaseEnv,
51
+ agent: BaseRealAgent,
52
+ obs_mode: str = "rgb",
53
+ real_reset_function: Optional[
54
+ Callable[["Sim2RealEnv", Optional[int], Optional[dict]], None]
55
+ ] = None,
56
+ sensor_data_processing_function: Optional[Callable[[Dict], Dict]] = None,
57
+ # obs_mode: Optional[str] = None,
58
+ reward_mode: Optional[str] = "none",
59
+ # control_mode: Optional[str] = None,
60
+ render_mode: Optional[str] = "sensors",
61
+ # robot_uids: BaseRealAgent = None,
62
+ ):
63
+ self.sim_env = sim_env
64
+ self.num_envs = 1
65
+ assert (
66
+ self.sim_env.backend.sim_backend == "physx_cpu"
67
+ ), "For the Sim2RealEnv we expect the simulation to be using the physx_cpu simulation backend currently in order to correctly align the robot"
68
+
69
+ # copy over some sim parameters/settings
70
+ self.device = self.sim_env.backend.device
71
+ self.sim_freq = self.sim_env.sim_freq
72
+ self.control_freq = self.sim_env.control_freq
73
+
74
+ # control timing
75
+ self.control_dt = 1 / self.control_freq
76
+ self.last_control_time: Optional[float] = None
77
+
78
+ self.base_sim_env: BaseEnv = sim_env.unwrapped
79
+ """the unwrapped simulation environment"""
80
+
81
+ self._reward_mode = reward_mode
82
+ self._obs_mode = obs_mode
83
+ self.reward_mode = reward_mode
84
+ self.obs_mode = obs_mode
85
+ self.obs_mode_struct = self.base_sim_env.obs_mode_struct
86
+ self.render_mode = render_mode
87
+
88
+ self._elapsed_steps = torch.zeros((1,), dtype=torch.int32)
89
+
90
+ # setup spaces
91
+ self._orig_single_action_space = self.base_sim_env._orig_single_action_space
92
+ self.action_space = self.sim_env.action_space
93
+ self.observation_space = self.sim_env.observation_space
94
+
95
+ # setup step and reset functions and handle wrappers for the user
96
+
97
+ def default_real_reset_function(self: Sim2RealEnv, seed=None, options=None):
98
+ self.sim_env.reset(seed=seed, options=options)
99
+ self.agent.reset(qpos=self.base_sim_env.agent.robot.qpos.cpu().flatten())
100
+ input("Press enter if the environment is reset")
101
+
102
+ self.real_reset_function = real_reset_function or default_real_reset_function
103
+
104
+ class RealEnvStepReset(gym.Env):
105
+ def step(dummy_self, action):
106
+ ret = self.base_sim_env.__class__.step(self, action)
107
+ return ret
108
+
109
+ def render(dummy_self):
110
+ return self.render()
111
+
112
+ def reset(dummy_self, seed=None, options=None):
113
+ # TODO: reset controller/agent
114
+ return self.get_obs(), {"reconfigure": False}
115
+
116
+ @property
117
+ def unwrapped(dummy_self):
118
+ # reference the Sim2RealEnv instance
119
+ return self
120
+
121
+ cur_env = self.sim_env
122
+ wrappers: List[gym.Wrapper] = []
123
+ while isinstance(cur_env, gym.Wrapper):
124
+ wrappers.append(cur_env)
125
+ cur_env = cur_env.env
126
+
127
+ self._handle_wrappers = len(wrappers) > 0
128
+ if self._handle_wrappers:
129
+ self._first_wrapper = wrappers[0]
130
+ self._last_wrapper = wrappers[-1]
131
+
132
+ self._env_with_real_step_reset = RealEnvStepReset()
133
+ """a simple object that defines the real step/reset functions for gym wrappers to call and use."""
134
+
135
+ self._sensor_names = list(self.base_sim_env.scene.sensors.keys())
136
+ """list of sensors the simulation environment uses"""
137
+
138
+ # setup the real agent based on the simulation agent
139
+ self.agent = agent
140
+ self.agent._sim_agent = self.base_sim_env.agent
141
+ # TODO create real controller class based on sim one?? Or can we just fake the data
142
+ self.agent._sim_agent.controller.qpos
143
+
144
+ self.sensor_data_processing_function = sensor_data_processing_function
145
+
146
+ # automatically try and generate a visual observation processing function to align a real camera with the simulated camera
147
+ if sensor_data_processing_function is None:
148
+ camera_sensor_names = [
149
+ name
150
+ for name in self._sensor_names
151
+ if isinstance(self.base_sim_env.scene.sensors[name], Camera)
152
+ ]
153
+
154
+ def sensor_data_processing_function(sensor_data: Dict):
155
+ import cv2
156
+
157
+ for sensor_name in camera_sensor_names:
158
+ sim_sensor_cfg = self.base_sim_env._sensor_configs[sensor_name]
159
+ assert isinstance(sim_sensor_cfg, CameraConfig)
160
+ target_h, target_w = sim_sensor_cfg.height, sim_sensor_cfg.width
161
+ real_sensor_data = sensor_data[sensor_name]
162
+
163
+ # crop to same aspect ratio
164
+ for key in ["rgb", "depth"]:
165
+ if key in real_sensor_data:
166
+ img = real_sensor_data[key][0].numpy()
167
+ xy_res = img.shape[:2]
168
+ crop_res = np.min(xy_res)
169
+ cutoff = (np.max(xy_res) - crop_res) // 2
170
+ if xy_res[0] == xy_res[1]:
171
+ pass
172
+ elif np.argmax(xy_res) == 0:
173
+ img = img[cutoff:-cutoff, :, :]
174
+ else:
175
+ img = img[:, cutoff:-cutoff, :]
176
+ real_sensor_data[key] = common.to_tensor(
177
+ cv2.resize(img, (target_w, target_h))
178
+ ).unsqueeze(0)
179
+
180
+ sensor_data[sensor_name] = real_sensor_data
181
+ return sensor_data
182
+
183
+ self.sensor_data_processing_function = sensor_data_processing_function
184
+
185
+ sample_sim_obs, _ = self.sim_env.reset()
186
+ sample_real_obs, _ = self.reset()
187
+
188
+ # perform checks to avoid errors in alignments
189
+ self._check_observations(sample_sim_obs, sample_real_obs)
190
+
191
+ @property
192
+ def elapsed_steps(self):
193
+ return self._elapsed_steps
194
+
195
+ def _step_action(self, action):
196
+ """Re-implementation of the simulated BaseEnv._step_action function for real environments. This uses the simulation agent's
197
+ controller to compute the joint targets/velocities without stepping the simulator"""
198
+ action = common.to_tensor(action)
199
+ if action.shape == self._orig_single_action_space.shape:
200
+ action = common.batch(action)
201
+ # NOTE (stao): this won't work for interpolated target joint position control methods at the moment
202
+ self.base_sim_env.agent.set_action(action)
203
+
204
+ # to best ensure whatever signals we send to the simulator robot we also send to the real robot we directly inspect
205
+ # what drive targets the simulator controller sends and what was set by that controller on the simulated robot
206
+ sim_articulation = self.agent.controller.articulation
207
+ if self.last_control_time is None:
208
+ self.last_control_time = time.perf_counter()
209
+ else:
210
+ dt = time.perf_counter() - self.last_control_time
211
+ if dt < self.control_dt:
212
+ time.sleep(self.control_dt - dt)
213
+ else:
214
+ logger.warning(
215
+ f"Control dt {self.control_dt} was not reached, actual dt was {dt}"
216
+ )
217
+ self.last_control_time = time.perf_counter()
218
+ if self.agent.controller.sets_target_qpos:
219
+ self.agent.set_target_qpos(sim_articulation.drive_targets)
220
+ if self.agent.controller.sets_target_qvel:
221
+ self.agent.set_target_qvel(sim_articulation.drive_velocities)
222
+
223
+ def step(self, action):
224
+ """
225
+ In order to make users able to use most gym environment wrappers without having to write extra code for the real environment
226
+ we temporarily swap the last wrapper's .env property with the RealEnvStepReset environment that has the real step/reset functions
227
+ """
228
+ if self._handle_wrappers:
229
+ orig_env = self._last_wrapper.env
230
+ self._last_wrapper.env = self._env_with_real_step_reset
231
+ ret = self._first_wrapper.step(action)
232
+ self._last_wrapper.env = orig_env
233
+ else:
234
+ ret = self._env_with_real_step_reset.step(action)
235
+ # ensure sim agent qpos is synced
236
+ self.base_sim_env.agent.robot.set_qpos(self.agent.robot.qpos)
237
+ return ret
238
+
239
+ def reset(self, seed=None, options=None):
240
+ self.real_reset_function(self, seed, options)
241
+ if self._handle_wrappers:
242
+ orig_env = self._last_wrapper.env
243
+ self._last_wrapper.env = self._env_with_real_step_reset
244
+ ret = self._first_wrapper.reset(seed=seed, options=options)
245
+ self._last_wrapper.env = orig_env
246
+ else:
247
+ ret = self._env_with_real_step_reset.reset(seed, options)
248
+ # sets sim to whatever the real agent reset to in order to sync them. Some controllers use the agent's
249
+ # current qpos and as this is the sim controller we copy the real world agent qpos so it behaves the same
250
+ # moreover some properties of the robot like forward kinematic computed poses are done through the simulated robot and so qpos has to be up to date
251
+ self.base_sim_env.agent.robot.set_qpos(self.agent.robot.qpos)
252
+ self.agent.controller.reset()
253
+ return ret
254
+
255
+ # -------------------------------------------------------------------------- #
256
+ # reimplementations of simulation BaseEnv observation related functions
257
+ # -------------------------------------------------------------------------- #
258
+ def get_obs(self, info=None, unflattened=False):
259
+ # uses the original environment's get_obs function. Override this only if you want complete control over the returned observations before any wrappers are applied.
260
+ return self.base_sim_env.__class__.get_obs(self, info, unflattened)
261
+
262
+ def _flatten_raw_obs(self, obs: Any):
263
+ return self.base_sim_env.__class__._flatten_raw_obs(self, obs)
264
+
265
+ def _get_obs_agent(self):
266
+ # using the original user implemented sim env's _get_obs_agent function in case they modify it e.g. to remove qvel values as they might be too noisy
267
+ return self.base_sim_env.__class__._get_obs_agent(self)
268
+
269
+ def _get_obs_extra(self, info: Dict):
270
+ # using the original user implemented sim env's _get_obs_extra function in case they modify it e.g. to include engineered features like the tcp_pose of the robot
271
+ try:
272
+ return self.base_sim_env.__class__._get_obs_extra(self, info)
273
+ except:
274
+ # Print the original error
275
+ import traceback
276
+
277
+ print(f"Error in _get_obs_extra: {traceback.format_exc()}")
278
+
279
+ # Print another message
280
+ print(
281
+ "If there is an error above a common cause is that the _get_obs_extra function defined in the simulation environment is using information not available in the real environment or real agent."
282
+ "In this case you can override the _get_obs_extra function in the Sim2RealEnv class to compute the desired information in the real environment via a e.g., perception pipeline."
283
+ )
284
+ exit(-1)
285
+
286
+ def _get_obs_sensor_data(self, apply_texture_transforms: bool = True):
287
+ # note apply_texture_transforms is not used for real envs, data is expected to already be transformed to standard texture names, types, and shapes.
288
+ self.agent.capture_sensor_data(self._sensor_names)
289
+ data = self.agent.get_sensor_data(self._sensor_names)
290
+ # observation data needs to be processed to be the same shape in simulation
291
+ # default strategy is to do a center crop to the same shape as simulation and then resize image to the same shape as simulation
292
+ data = self.sensor_data_processing_function(data)
293
+ return data
294
+
295
+ def _get_obs_with_sensor_data(
296
+ self, info: Dict, apply_texture_transforms: bool = True
297
+ ) -> dict:
298
+ """Get the observation with sensor data"""
299
+ return self.base_sim_env.__class__._get_obs_with_sensor_data(
300
+ self, info, apply_texture_transforms
301
+ )
302
+
303
+ def get_sensor_params(self):
304
+ return self.agent.get_sensor_params(self._sensor_names)
305
+
306
+ def get_info(self):
307
+ info = dict(elapsed_steps=self._elapsed_steps)
308
+ return info
309
+
310
+ # -------------------------------------------------------------------------- #
311
+ # reimplementations of simulation BaseEnv render related functions.
312
+ # -------------------------------------------------------------------------- #
313
+ def render(self):
314
+ return self.base_sim_env.__class__.render(self)
315
+
316
+ def render_sensors(self):
317
+ return self.base_sim_env.__class__.render_sensors(self)
318
+
319
+ def get_sensor_images(self):
320
+ # used by render_sensors
321
+ obs = self._get_obs_sensor_data()
322
+ sensor_images = dict()
323
+ for name, sensor in self.base_sim_env.scene.sensors.items():
324
+ if isinstance(sensor, Camera):
325
+ sensor_images[name] = sensor.get_images(obs[name])
326
+ return sensor_images
327
+
328
+ # -------------------------------------------------------------------------- #
329
+ # reimplementations of simulation BaseEnv reward related functions. By default you can leave this alone but if you do want to
330
+ # support computing rewards in the real world you can override these functions.
331
+ # -------------------------------------------------------------------------- #
332
+ def get_reward(self, obs, action, info):
333
+ return self.base_sim_env.__class__.get_reward(self, obs, action, info)
334
+
335
+ def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info: Dict):
336
+ """
337
+ Computes the sparse reward. By default this function tries to use the success/fail information in
338
+ returned by the evaluate function and gives +1 if success, -1 if fail, 0 otherwise"""
339
+ return self.base_sim_env.__class__.compute_sparse_reward(
340
+ self, obs, action, info
341
+ )
342
+
343
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
344
+ raise NotImplementedError()
345
+
346
+ def compute_normalized_dense_reward(
347
+ self, obs: Any, action: torch.Tensor, info: Dict
348
+ ):
349
+ raise NotImplementedError()
350
+
351
+ # -------------------------------------------------------------------------- #
352
+ # various checks
353
+ # -------------------------------------------------------------------------- #
354
+ def _check_observations(self, sample_sim_obs, sample_real_obs):
355
+ """checks if the visual observations are aligned in terms of shape and resolution and expected data types"""
356
+
357
+ # recursive check if the data is all the same shape
358
+ def check_observation_match(sim_obs, real_obs, path=[]):
359
+ """Recursively check if observations match in shape and dtype"""
360
+ if isinstance(sim_obs, dict):
361
+ for key in sim_obs.keys():
362
+ if key not in real_obs:
363
+ raise KeyError(
364
+ f"Key obs[\"{'.'.join(path + [key])}]\"] found in simulation observation but not in real observation"
365
+ )
366
+ check_observation_match(
367
+ sim_obs[key], real_obs[key], path=path + [key]
368
+ )
369
+ else:
370
+ assert (
371
+ sim_obs.shape == real_obs.shape
372
+ ), f"Shape mismatch: obs[\"{'.'.join(path)}\"]: {sim_obs.shape} vs {real_obs.shape}"
373
+ assert (
374
+ sim_obs.dtype == real_obs.dtype
375
+ ), f"Dtype mismatch: obs[\"{'.'.join(path)}\"]: {sim_obs.dtype} vs {real_obs.dtype}"
376
+
377
+ # Call the recursive function to check observations
378
+ check_observation_match(sample_sim_obs, sample_real_obs)
379
+
380
+ def close(self):
381
+ self.agent.stop()
@@ -1,16 +1,15 @@
1
1
  import os
2
- from typing import Dict, List
2
+ from typing import Dict, List, Union
3
3
 
4
4
  import cv2
5
- import gymnasium as gym
6
- import numpy as np
7
- import sapien.physx as physx
8
5
  import torch
9
6
 
10
- from mani_skill import ASSET_DIR
11
7
  from mani_skill.envs.sapien_env import BaseEnv
12
8
  from mani_skill.sensors.camera import CameraConfig
13
9
  from mani_skill.utils import common, sapien_utils
10
+ from mani_skill.utils.structs.actor import Actor
11
+ from mani_skill.utils.structs.articulation import Articulation
12
+ from mani_skill.utils.structs.link import Link
14
13
  from mani_skill.utils.structs.types import SimConfig
15
14
 
16
15
 
@@ -20,33 +19,45 @@ class BaseDigitalTwinEnv(BaseEnv):
20
19
  This is based on the [SIMPLER](https://simpler-env.github.io/) and currently has the following tricks for
21
20
  making accurate simulated environments of real world datasets
22
21
 
23
- Greenscreening: Add a greenscreened real image to the background to make the images more realistic and more closer to the distribution
24
- of real world data.
22
+ Greenscreening: Add a greenscreened real image to the background to make the images more realistic and closer to the distribution
23
+ of real world data. To use the functionality in your own custom task you can do the following:
25
24
 
26
- Note that this is not a general purpose system for building digital twins you can train and then transfer
27
- to the real world. This is designed to support fast evaluation in simulation of real world policies.
25
+ .. code-block:: python
26
+
27
+ class MyTask(BaseDigitalTwinEnv):
28
+ def __init__(self, **kwargs):
29
+ self.rgb_overlay_paths = {"camera_name": "path/to/greenscreen/image.png"}
30
+ super().__init__(**kwargs)
31
+ def _load_scene(self, options: dict):
32
+ # load your objects as usual e.g. a cube at self.cube
33
+
34
+ # exclude the robot and cube from the greenscreen process
35
+ self.remove_object_from_greenscreen(self.robot)
36
+ self.remove_object_from_greenscreen(self.cube)
37
+
38
+
39
+ Use `self.remove_object_from_greenscreen(object: Actor | Link | Articulation)` to exclude those objects from the greenscreen process.
28
40
  """
29
41
 
30
42
  rgb_overlay_paths: Dict[str, str] = None
31
43
  """dict mapping camera name to the file path of the greenscreening image"""
32
44
  _rgb_overlay_images: Dict[str, torch.Tensor] = dict()
33
- rgb_always_overlay_objects: List[str] = []
34
- """List of names of actors/links that should be covered by the greenscreen"""
35
- rgb_overlay_mode: str = (
36
- "background" # 'background' or 'object' or 'debug' or combinations of them
37
- )
38
- """which RGB overlay mode to use during the greenscreen process"""
45
+ """dict mapping camera name to the image torch tensor"""
46
+ rgb_overlay_mode: str = "background"
47
+ """which RGB overlay mode to use during the greenscreen process. The default is 'background' which enables greenscreening like normal. The other option is 'debug' mode which
48
+ will make the opacity of the original render and greenscreen overlay both 50%. The third option is "none" which will not perform any greenscreening."""
49
+
50
+ _objects_to_remove_from_greenscreen: List[Union[Actor, Link]] = []
51
+ """list of articulations/actors/links that should be removed from the greenscreen process"""
52
+ _segmentation_ids_to_keep: torch.Tensor = None
53
+ """torch tensor of segmentation ids that reference the objects that should not be greenscreened"""
39
54
 
40
55
  def __init__(self, **kwargs):
41
56
  # Load the "greenscreen" image, which is used to overlay the background portions of simulation observation
42
57
  if self.rgb_overlay_paths is not None:
43
58
  for camera_name, path in self.rgb_overlay_paths.items():
44
59
  if not os.path.exists(path):
45
- raise FileNotFoundError(
46
- f"rgb_overlay_path {path} is not found."
47
- "If you installed this repo through 'pip install .' , "
48
- "you can download this directory https://github.com/simpler-env/ManiSkill2_real2sim/tree/main/data to get the real-world image overlay assets. "
49
- )
60
+ raise FileNotFoundError(f"rgb_overlay_path {path} is not found.")
50
61
  self._rgb_overlay_images[camera_name] = cv2.cvtColor(
51
62
  cv2.imread(path), cv2.COLOR_BGR2RGB
52
63
  ) # (H, W, 3); float32
@@ -69,28 +80,29 @@ class BaseDigitalTwinEnv(BaseEnv):
69
80
  def _load_scene(self, options: dict):
70
81
  """
71
82
  Load assets for a digital twin scene in
72
-
73
83
  """
74
84
 
75
- def _after_reconfigure(self, options: dict):
76
- target_object_actor_ids = [
77
- x._objs[0].per_scene_id
78
- for x in self.scene.actors.values()
79
- if x.name
80
- not in ["ground", "goal_site", "", "arena"]
81
- + self.rgb_always_overlay_objects
82
- ]
83
- self.target_object_actor_ids = torch.tensor(
84
- target_object_actor_ids, dtype=torch.int16, device=self.device
85
- )
86
- # get the robot link ids
87
- robot_links = self.agent.robot.get_links()
88
- self.robot_link_ids = torch.tensor(
89
- [x._objs[0].entity.per_scene_id for x in robot_links],
90
- dtype=torch.int16,
91
- device=self.device,
92
- )
85
+ def remove_object_from_greenscreen(self, object: Union[Articulation, Actor, Link]):
86
+ """remove an actor/articulation/link from the greenscreen process"""
87
+ if isinstance(object, Articulation):
88
+ for link in object.get_links():
89
+ self._objects_to_remove_from_greenscreen.append(link)
90
+ elif isinstance(object, Actor):
91
+ self._objects_to_remove_from_greenscreen.append(object)
92
+ elif isinstance(object, Link):
93
+ self._objects_to_remove_from_greenscreen.append(object)
93
94
 
95
+ def _after_reconfigure(self, options: dict):
96
+ super()._after_reconfigure(options)
97
+ # after reconfiguration in CPU/GPU sim we have initialized all ids of objects in the scene.
98
+ # and can now get the list of segmentation ids to keep
99
+ per_scene_ids = []
100
+ for object in self._objects_to_remove_from_greenscreen:
101
+ per_scene_ids.append(object.per_scene_id)
102
+ self._segmentation_ids_to_keep = torch.unique(torch.concatenate(per_scene_ids))
103
+ self._objects_to_remove_from_greenscreen = []
104
+
105
+ # load the overlay images
94
106
  for camera_name in self.rgb_overlay_paths.keys():
95
107
  sensor = self._sensor_configs[camera_name]
96
108
  if isinstance(sensor, CameraConfig):
@@ -106,47 +118,35 @@ class BaseDigitalTwinEnv(BaseEnv):
106
118
  def _green_sceen_rgb(self, rgb, segmentation, overlay_img):
107
119
  """returns green screened RGB data given a batch of RGB and segmentation images and one overlay image"""
108
120
  actor_seg = segmentation[..., 0]
109
- mask = torch.ones_like(actor_seg, device=actor_seg.device)
110
- if actor_seg.device != self.robot_link_ids.device:
111
- # if using CPU simulation, the device of the robot_link_ids and target_object_actor_ids will be CPU first
112
- # but for most users who use the sapien_cuda render backend image data will be on the GPU.
113
- self.robot_link_ids = self.robot_link_ids.to(actor_seg.device)
114
- self.target_object_actor_ids = self.target_object_actor_ids.to(
121
+ mask = torch.ones_like(actor_seg, device=actor_seg.device, dtype=torch.bool)
122
+ if self._segmentation_ids_to_keep.device != actor_seg.device:
123
+ self._segmentation_ids_to_keep = self._segmentation_ids_to_keep.to(
115
124
  actor_seg.device
116
125
  )
117
- if ("background" in self.rgb_overlay_mode) or (
118
- "debug" in self.rgb_overlay_mode
119
- ):
120
- if ("object" not in self.rgb_overlay_mode) or (
121
- "debug" in self.rgb_overlay_mode
122
- ):
123
- # only overlay the background and keep the foregrounds (robot and target objects) rendered in simulation
124
- mask[
125
- torch.isin(
126
- actor_seg,
127
- torch.concatenate(
128
- [self.robot_link_ids, self.target_object_actor_ids]
129
- ),
130
- )
131
- ] = 0
132
- else:
133
- # overlay everything except the robot links
134
- mask[np.isin(actor_seg, self.robot_link_ids)] = 0.0
135
- else:
136
- raise NotImplementedError(self.rgb_overlay_mode)
126
+ if self.rgb_overlay_mode == "background":
127
+ # only overlay the background and keep the foregrounds (robot and target objects) rendered in simulation
128
+ mask[
129
+ torch.isin(
130
+ actor_seg,
131
+ self._segmentation_ids_to_keep,
132
+ )
133
+ ] = 0
137
134
  mask = mask[..., None]
138
135
 
139
136
  # perform overlay on the RGB observation image
140
137
  if "debug" not in self.rgb_overlay_mode:
141
- rgb = rgb * (1 - mask) + overlay_img * mask
138
+ rgb = rgb * (~mask) + overlay_img * mask
142
139
  else:
143
140
  rgb = rgb * 0.5 + overlay_img * 0.5
141
+ rgb = rgb.to(torch.uint8)
144
142
  return rgb
145
143
 
146
- def get_obs(self, info: dict = None):
147
- obs = super().get_obs(info)
144
+ def _get_obs_sensor_data(self, apply_texture_transforms: bool = True):
145
+ obs = super()._get_obs_sensor_data(apply_texture_transforms)
148
146
 
149
147
  # "greenscreen" process
148
+ if self.rgb_overlay_mode == "none":
149
+ return obs
150
150
  if (
151
151
  self.obs_mode_struct.visual.rgb
152
152
  and self.obs_mode_struct.visual.segmentation
@@ -156,20 +156,20 @@ class BaseDigitalTwinEnv(BaseEnv):
156
156
  for camera_name in self._rgb_overlay_images.keys():
157
157
  # obtain overlay mask based on segmentation info
158
158
  assert (
159
- "segmentation" in obs["sensor_data"][camera_name].keys()
159
+ "segmentation" in obs[camera_name].keys()
160
160
  ), "Image overlay requires segment info in the observation!"
161
161
  if (
162
162
  self._rgb_overlay_images[camera_name].device
163
- != obs["sensor_data"][camera_name]["rgb"].device
163
+ != obs[camera_name]["rgb"].device
164
164
  ):
165
165
  self._rgb_overlay_images[camera_name] = self._rgb_overlay_images[
166
166
  camera_name
167
- ].to(obs["sensor_data"][camera_name]["rgb"].device)
167
+ ].to(obs[camera_name]["rgb"].device)
168
168
  overlay_img = self._rgb_overlay_images[camera_name]
169
169
  green_screened_rgb = self._green_sceen_rgb(
170
- obs["sensor_data"][camera_name]["rgb"],
171
- obs["sensor_data"][camera_name]["segmentation"],
170
+ obs[camera_name]["rgb"],
171
+ obs[camera_name]["segmentation"],
172
172
  overlay_img,
173
173
  )
174
- obs["sensor_data"][camera_name]["rgb"] = green_screened_rgb
174
+ obs[camera_name]["rgb"] = green_screened_rgb
175
175
  return obs
@@ -158,6 +158,8 @@ class BaseBridgeEnv(BaseDigitalTwinEnv):
158
158
  SUPPORTED_OBS_MODES = ["rgb+segmentation"]
159
159
  SUPPORTED_REWARD_MODES = ["none"]
160
160
  scene_setting: Literal["flat_table", "sink"] = "flat_table"
161
+ objects_excluded_from_greenscreening: List[str] = []
162
+ """object ids that should not be greenscreened"""
161
163
 
162
164
  obj_static_friction = 0.5
163
165
  obj_dynamic_friction = 0.5
@@ -344,6 +346,10 @@ class BaseBridgeEnv(BaseDigitalTwinEnv):
344
346
  raise ValueError(f"Model {model_id} does not have bbox info.")
345
347
  self.episode_model_bbox_sizes = model_bbox_sizes
346
348
 
349
+ for obj_name in self.objects_excluded_from_greenscreening:
350
+ self.remove_object_from_greenscreen(self.objs[obj_name])
351
+ self.remove_object_from_greenscreen(self.agent.robot)
352
+
347
353
  def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
348
354
  # NOTE: this part of code is not GPU parallelized
349
355
  with torch.device(self.device):
@@ -16,6 +16,10 @@ from mani_skill.utils.registration import register_env
16
16
  )
17
17
  class PutCarrotOnPlateInScene(BaseBridgeEnv):
18
18
  scene_setting = "flat_table"
19
+ objects_excluded_from_greenscreening = [
20
+ "bridge_carrot_generated_modified",
21
+ "bridge_plate_objaverse_larger",
22
+ ]
19
23
 
20
24
  def __init__(self, **kwargs):
21
25
  xy_center = np.array([-0.16, 0.00])
@@ -74,7 +78,7 @@ class PutCarrotOnPlateInScene(BaseBridgeEnv):
74
78
  )
75
79
  class PutEggplantInBasketScene(BaseBridgeEnv):
76
80
  scene_setting = "sink"
77
- rgb_always_overlay_objects = ["sink", "dummy_sink_target_plane"]
81
+ objects_excluded_from_greenscreening = ["eggplant"]
78
82
 
79
83
  def __init__(self, **kwargs):
80
84
  source_obj_name = "eggplant"
@@ -154,6 +158,10 @@ class PutEggplantInBasketScene(BaseBridgeEnv):
154
158
  )
155
159
  class StackGreenCubeOnYellowCubeBakedTexInScene(BaseBridgeEnv):
156
160
  MODEL_JSON = "info_bridge_custom_baked_tex_v0.json"
161
+ objects_excluded_from_greenscreening = [
162
+ "baked_green_cube_3cm",
163
+ "baked_yellow_cube_3cm",
164
+ ]
157
165
 
158
166
  def __init__(
159
167
  self,
@@ -213,6 +221,11 @@ class StackGreenCubeOnYellowCubeBakedTexInScene(BaseBridgeEnv):
213
221
  asset_download_ids=["bridge_v2_real2sim"],
214
222
  )
215
223
  class PutSpoonOnTableClothInScene(BaseBridgeEnv):
224
+ objects_excluded_from_greenscreening = [
225
+ "table_cloth_generated_shorter",
226
+ "bridge_spoon_generated_modified",
227
+ ]
228
+
216
229
  def __init__(
217
230
  self,
218
231
  **kwargs,