mani-skill-nightly 2025.10.22.143__py3-none-any.whl → 2025.10.22.157__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mani-skill-nightly might be problematic. Click here for more details.

Files changed (122) hide show
  1. mani_skill/agents/base_agent.py +20 -14
  2. mani_skill/agents/base_real_agent.py +6 -6
  3. mani_skill/agents/controllers/base_controller.py +6 -6
  4. mani_skill/agents/controllers/pd_joint_pos.py +2 -2
  5. mani_skill/agents/controllers/utils/kinematics.py +27 -12
  6. mani_skill/agents/multi_agent.py +5 -5
  7. mani_skill/agents/registration.py +3 -4
  8. mani_skill/agents/robots/allegro_hand/allegro.py +1 -2
  9. mani_skill/agents/robots/allegro_hand/allegro_touch.py +3 -3
  10. mani_skill/agents/robots/dclaw/dclaw.py +2 -3
  11. mani_skill/agents/robots/fetch/fetch.py +2 -2
  12. mani_skill/agents/robots/floating_ability_hand/floating_ability_hand.py +10 -13
  13. mani_skill/agents/robots/floating_robotiq_2f_85_gripper/floating_robotiq_2f_85_gripper.py +2 -2
  14. mani_skill/agents/robots/lerobot/manipulator.py +4 -4
  15. mani_skill/agents/robots/panda/panda_stick.py +2 -2
  16. mani_skill/agents/robots/trifingerpro/trifingerpro.py +1 -2
  17. mani_skill/agents/robots/xarm/xarm7_ability.py +2 -2
  18. mani_skill/agents/utils.py +2 -2
  19. mani_skill/envs/minimal_template.py +4 -4
  20. mani_skill/envs/sapien_env.py +32 -32
  21. mani_skill/envs/scene.py +27 -27
  22. mani_skill/envs/scenes/base_env.py +3 -3
  23. mani_skill/envs/sim2real_env.py +10 -10
  24. mani_skill/envs/tasks/control/ant.py +6 -6
  25. mani_skill/envs/tasks/control/cartpole.py +4 -4
  26. mani_skill/envs/tasks/control/hopper.py +7 -7
  27. mani_skill/envs/tasks/control/humanoid.py +20 -20
  28. mani_skill/envs/tasks/dexterity/insert_flower.py +41 -23
  29. mani_skill/envs/tasks/dexterity/rotate_single_object_in_hand.py +6 -6
  30. mani_skill/envs/tasks/dexterity/rotate_valve.py +5 -5
  31. mani_skill/envs/tasks/digital_twins/base_env.py +4 -4
  32. mani_skill/envs/tasks/digital_twins/bridge_dataset_eval/base_env.py +22 -12
  33. mani_skill/envs/tasks/digital_twins/so100_arm/grasp_cube.py +4 -4
  34. mani_skill/envs/tasks/drawing/draw.py +1 -3
  35. mani_skill/envs/tasks/drawing/draw_svg.py +6 -8
  36. mani_skill/envs/tasks/drawing/draw_triangle.py +1 -2
  37. mani_skill/envs/tasks/empty_env.py +1 -3
  38. mani_skill/envs/tasks/fmb/fmb.py +1 -2
  39. mani_skill/envs/tasks/humanoid/humanoid_pick_place.py +7 -7
  40. mani_skill/envs/tasks/humanoid/humanoid_stand.py +5 -5
  41. mani_skill/envs/tasks/humanoid/transport_box.py +4 -4
  42. mani_skill/envs/tasks/mobile_manipulation/open_cabinet_drawer.py +8 -8
  43. mani_skill/envs/tasks/mobile_manipulation/robocasa/kitchen.py +2 -3
  44. mani_skill/envs/tasks/quadruped/quadruped_reach.py +5 -5
  45. mani_skill/envs/tasks/quadruped/quadruped_spin.py +5 -5
  46. mani_skill/envs/tasks/rotate_cube.py +4 -4
  47. mani_skill/envs/tasks/tabletop/assembling_kits.py +2 -2
  48. mani_skill/envs/tasks/tabletop/lift_peg_upright.py +4 -4
  49. mani_skill/envs/tasks/tabletop/peg_insertion_side.py +4 -4
  50. mani_skill/envs/tasks/tabletop/pick_clutter_ycb.py +4 -4
  51. mani_skill/envs/tasks/tabletop/pick_cube.py +4 -4
  52. mani_skill/envs/tasks/tabletop/pick_single_ycb.py +5 -5
  53. mani_skill/envs/tasks/tabletop/place_sphere.py +4 -4
  54. mani_skill/envs/tasks/tabletop/plug_charger.py +2 -2
  55. mani_skill/envs/tasks/tabletop/poke_cube.py +4 -4
  56. mani_skill/envs/tasks/tabletop/pull_cube.py +5 -5
  57. mani_skill/envs/tasks/tabletop/pull_cube_tool.py +4 -4
  58. mani_skill/envs/tasks/tabletop/push_cube.py +6 -6
  59. mani_skill/envs/tasks/tabletop/push_t.py +4 -4
  60. mani_skill/envs/tasks/tabletop/roll_ball.py +4 -4
  61. mani_skill/envs/tasks/tabletop/stack_cube.py +4 -4
  62. mani_skill/envs/tasks/tabletop/stack_pyramid.py +44 -25
  63. mani_skill/envs/tasks/tabletop/turn_faucet.py +4 -4
  64. mani_skill/envs/tasks/tabletop/two_robot_pick_cube.py +4 -4
  65. mani_skill/envs/tasks/tabletop/two_robot_stack_cube.py +4 -4
  66. mani_skill/envs/template.py +4 -4
  67. mani_skill/envs/utils/observations/observations.py +2 -3
  68. mani_skill/envs/utils/randomization/batched_rng.py +7 -7
  69. mani_skill/envs/utils/randomization/samplers.py +2 -2
  70. mani_skill/examples/benchmarking/envs/maniskill/franka_move.py +2 -2
  71. mani_skill/examples/benchmarking/envs/maniskill/franka_pick_cube.py +2 -2
  72. mani_skill/examples/benchmarking/profiling.py +2 -2
  73. mani_skill/examples/demo_random_action.py +1 -1
  74. mani_skill/render/shaders.py +5 -5
  75. mani_skill/sensors/base_sensor.py +1 -2
  76. mani_skill/sensors/camera.py +4 -4
  77. mani_skill/utils/assets/data.py +3 -3
  78. mani_skill/utils/building/_mjcf_loader.py +11 -11
  79. mani_skill/utils/building/actor_builder.py +4 -4
  80. mani_skill/utils/building/articulation_builder.py +3 -3
  81. mani_skill/utils/building/mjcf_loader.py +6 -6
  82. mani_skill/utils/building/urdf_loader.py +6 -6
  83. mani_skill/utils/common.py +2 -2
  84. mani_skill/utils/geometry/bounding_cylinder.py +4 -4
  85. mani_skill/utils/geometry/geometry.py +1 -3
  86. mani_skill/utils/geometry/trimesh_utils.py +1 -3
  87. mani_skill/utils/gym_utils.py +2 -4
  88. mani_skill/utils/registration.py +6 -6
  89. mani_skill/utils/sapien_utils.py +21 -21
  90. mani_skill/utils/scene_builder/ai2thor/constants.py +1 -2
  91. mani_skill/utils/scene_builder/ai2thor/scene_builder.py +9 -9
  92. mani_skill/utils/scene_builder/control/planar/scene_builder.py +2 -4
  93. mani_skill/utils/scene_builder/kitchen_counter/scene_builder.py +1 -2
  94. mani_skill/utils/scene_builder/registration.py +1 -2
  95. mani_skill/utils/scene_builder/replicacad/rearrange/scene_builder.py +16 -16
  96. mani_skill/utils/scene_builder/replicacad/scene_builder.py +15 -15
  97. mani_skill/utils/scene_builder/robocasa/fixtures/windows.py +2 -4
  98. mani_skill/utils/scene_builder/robocasa/scene_builder.py +5 -5
  99. mani_skill/utils/scene_builder/scene_builder.py +15 -15
  100. mani_skill/utils/scene_builder/table/scene_builder.py +1 -2
  101. mani_skill/utils/structs/actor.py +6 -6
  102. mani_skill/utils/structs/articulation.py +32 -30
  103. mani_skill/utils/structs/articulation_joint.py +6 -6
  104. mani_skill/utils/structs/base.py +14 -9
  105. mani_skill/utils/structs/drive.py +2 -2
  106. mani_skill/utils/structs/link.py +10 -8
  107. mani_skill/utils/structs/pose.py +3 -3
  108. mani_skill/utils/structs/render_camera.py +4 -4
  109. mani_skill/utils/visualization/jupyter_utils.py +1 -3
  110. mani_skill/utils/visualization/misc.py +5 -5
  111. mani_skill/utils/wrappers/cached_reset.py +5 -3
  112. mani_skill/utils/wrappers/flatten.py +1 -2
  113. mani_skill/utils/wrappers/record.py +10 -8
  114. mani_skill/utils/wrappers/visual_encoders.py +2 -2
  115. mani_skill/vector/wrappers/gymnasium.py +23 -13
  116. mani_skill/vector/wrappers/sb3.py +5 -5
  117. {mani_skill_nightly-2025.10.22.143.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/METADATA +1 -1
  118. {mani_skill_nightly-2025.10.22.143.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/RECORD +122 -122
  119. {mani_skill_nightly-2025.10.22.143.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/WHEEL +0 -0
  120. {mani_skill_nightly-2025.10.22.143.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/licenses/LICENSE +0 -0
  121. {mani_skill_nightly-2025.10.22.143.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/licenses/LICENSE-3RD-PARTY +0 -0
  122. {mani_skill_nightly-2025.10.22.143.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import copy
2
2
  import os
3
- from typing import Any, Dict
3
+ from typing import Any
4
4
 
5
5
  import numpy as np
6
6
  import sapien
@@ -65,7 +65,7 @@ class HumanoidPickPlaceEnv(BaseEnv):
65
65
  "fail": torch.zeros(self.num_envs, device=self.device, dtype=bool),
66
66
  }
67
67
 
68
- def _get_obs_extra(self, info: Dict):
68
+ def _get_obs_extra(self, info: dict):
69
69
  return dict()
70
70
 
71
71
 
@@ -100,7 +100,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
100
100
  100,
101
101
  )
102
102
 
103
- def _load_scene(self, options: Dict):
103
+ def _load_scene(self, options: dict):
104
104
  super()._load_scene(options)
105
105
  scale = self.kitchen_scene_scale
106
106
  builder = self.scene.create_actor_builder()
@@ -149,7 +149,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
149
149
  "is_grasped": is_grasped,
150
150
  }
151
151
 
152
- def _get_obs_extra(self, info: Dict):
152
+ def _get_obs_extra(self, info: dict):
153
153
  # in reality some people hack is_grasped into observations by checking if the gripper can close fully or not
154
154
  obs = dict(
155
155
  is_grasped=info["is_grasped"],
@@ -168,7 +168,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
168
168
  """a dense reward that rewards the agent for opening their hand"""
169
169
  return 1 - torch.tanh(self.agent.right_hand_dist_to_open_grasp())
170
170
 
171
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
171
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
172
172
  tcp_to_obj_dist = torch.linalg.norm(
173
173
  self.apple.pose.p - self.agent.right_tcp.pose.p, axis=1
174
174
  )
@@ -201,7 +201,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
201
201
  return reward
202
202
 
203
203
  def compute_normalized_dense_reward(
204
- self, obs: Any, action: torch.Tensor, info: Dict
204
+ self, obs: Any, action: torch.Tensor, info: dict
205
205
  ):
206
206
  return self.compute_dense_reward(obs=obs, action=action, info=info) / 10
207
207
 
@@ -254,7 +254,7 @@ class UnitreeG1PlaceAppleInBowlEnv(HumanoidPlaceAppleInBowl):
254
254
  scene_config=SceneConfig(contact_offset=0.01),
255
255
  )
256
256
 
257
- def _initialize_episode(self, env_idx: torch.Tensor, options: Dict):
257
+ def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
258
258
  super()._initialize_episode(env_idx, options)
259
259
  with torch.device(self.device):
260
260
  b = len(env_idx)
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -46,17 +46,17 @@ class HumanoidStandEnv(BaseEnv):
46
46
  self.agent.is_fallen()
47
47
  return {"is_standing": is_standing, "fail": ~is_standing}
48
48
 
49
- def _get_obs_extra(self, info: Dict):
49
+ def _get_obs_extra(self, info: dict):
50
50
  return dict()
51
51
 
52
- def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info: Dict):
52
+ def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info: dict):
53
53
  return info["is_standing"]
54
54
 
55
- # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
55
+ # def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
56
56
  # return torch.zeros(self.num_envs, device=self.device)
57
57
 
58
58
  # def compute_normalized_dense_reward(
59
- # self, obs: Any, action: torch.Tensor, info: Dict
59
+ # self, obs: Any, action: torch.Tensor, info: dict
60
60
  # ):
61
61
  # max_reward = 1.0
62
62
  # return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,7 +1,7 @@
1
1
  import copy
2
2
  import os
3
3
  from pathlib import Path
4
- from typing import Any, Dict
4
+ from typing import Any
5
5
 
6
6
  import numpy as np
7
7
  import sapien
@@ -226,7 +226,7 @@ class TransportBoxEnv(BaseEnv):
226
226
  "facing_table_with_box": facing_table_with_box,
227
227
  }
228
228
 
229
- def _get_obs_extra(self, info: Dict):
229
+ def _get_obs_extra(self, info: dict):
230
230
  obs = dict(
231
231
  right_tcp_pose=self.agent.right_tcp.pose.raw_pose,
232
232
  left_tcp_pose=self.agent.left_tcp.pose.raw_pose,
@@ -252,7 +252,7 @@ class TransportBoxEnv(BaseEnv):
252
252
  torch.tensor([0.165, 0.07, 0.05], device=self.device)
253
253
  )
254
254
 
255
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
255
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
256
256
  # Stage 1, move to face the box on the table. Succeeds if facing_table_with_box
257
257
  reward = 1 - torch.tanh((self.agent.robot.qpos[:, 0] + 1.4).abs())
258
258
 
@@ -306,6 +306,6 @@ class TransportBoxEnv(BaseEnv):
306
306
  return reward
307
307
 
308
308
  def compute_normalized_dense_reward(
309
- self, obs: Any, action: torch.Tensor, info: Dict
309
+ self, obs: Any, action: torch.Tensor, info: dict
310
310
  ):
311
311
  return self.compute_dense_reward(obs, action, info) / 5
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List, Optional, Union
1
+ from typing import Any, Optional, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -125,15 +125,15 @@ class OpenCabinetDrawerEnv(BaseEnv):
125
125
  group=2, bit_idx=CABINET_COLLISION_BIT, bit=1
126
126
  )
127
127
 
128
- def _load_cabinets(self, joint_types: List[str]):
128
+ def _load_cabinets(self, joint_types: list[str]):
129
129
  # we sample random cabinet model_ids with numpy as numpy is always deterministic based on seed, regardless of
130
130
  # GPU/CPU simulation backends. This is useful for replaying demonstrations.
131
131
  model_ids = self._batched_episode_rng.choice(self.all_model_ids)
132
132
  link_ids = self._batched_episode_rng.randint(0, 2**31)
133
133
 
134
- self._cabinets: List[Articulation] = []
135
- handle_links: List[List[Link]] = []
136
- handle_links_meshes: List[List[trimesh.Trimesh]] = []
134
+ self._cabinets: list[Articulation] = []
135
+ handle_links: list[list[Link]] = []
136
+ handle_links_meshes: list[list[trimesh.Trimesh]] = []
137
137
  for i, model_id in enumerate(model_ids):
138
138
  # partnet-mobility is a dataset source and the ids are the ones we sampled
139
139
  # we provide tools to easily create the articulation builder like so by querying
@@ -320,7 +320,7 @@ class OpenCabinetDrawerEnv(BaseEnv):
320
320
  "open_enough": open_enough,
321
321
  }
322
322
 
323
- def _get_obs_extra(self, info: Dict):
323
+ def _get_obs_extra(self, info: dict):
324
324
  obs = dict(
325
325
  tcp_pose=self.agent.tcp.pose.raw_pose,
326
326
  )
@@ -333,7 +333,7 @@ class OpenCabinetDrawerEnv(BaseEnv):
333
333
  )
334
334
  return obs
335
335
 
336
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
336
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
337
337
  tcp_to_handle_dist = torch.linalg.norm(
338
338
  self.agent.tcp.pose.p - info["handle_link_pos"], axis=1
339
339
  )
@@ -352,7 +352,7 @@ class OpenCabinetDrawerEnv(BaseEnv):
352
352
  return reward
353
353
 
354
354
  def compute_normalized_dense_reward(
355
- self, obs: Any, action: torch.Tensor, info: Dict
355
+ self, obs: Any, action: torch.Tensor, info: dict
356
356
  ):
357
357
  max_reward = 5.0
358
358
  return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,5 +1,4 @@
1
1
  from copy import deepcopy
2
- from typing import Dict
3
2
 
4
3
  import numpy as np
5
4
  import sapien
@@ -55,7 +54,7 @@ class RoboCasaKitchenEnv(BaseEnv):
55
54
  overrides the default gripper. Should either be single str if same gripper type is to be used for all
56
55
  robots or else it should be a list of the same length as "robots" param
57
56
 
58
- initialization_noise (dict or list of dict): Dict containing the initialization noise parameters.
57
+ initialization_noise (dict or list of dict): dict containing the initialization noise parameters.
59
58
  The expected keys and corresponding value types are specified below:
60
59
 
61
60
  :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
@@ -456,7 +455,7 @@ class RoboCasaKitchenEnv(BaseEnv):
456
455
  def evaluate(self):
457
456
  return {}
458
457
 
459
- def _get_obs_extra(self, info: Dict):
458
+ def _get_obs_extra(self, info: dict):
460
459
  return dict()
461
460
 
462
461
  """
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List
1
+ from typing import Any
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -21,7 +21,7 @@ class QuadrupedReachEnv(BaseEnv):
21
21
  agent: ANYmalC
22
22
  default_qpos: torch.Tensor
23
23
 
24
- _UNDESIRED_CONTACT_LINK_NAMES: List[str] = None
24
+ _UNDESIRED_CONTACT_LINK_NAMES: list[str] = None
25
25
 
26
26
  def __init__(self, *args, robot_uids="anymal-c", **kwargs):
27
27
  super().__init__(*args, robot_uids=robot_uids, **kwargs)
@@ -110,7 +110,7 @@ class QuadrupedReachEnv(BaseEnv):
110
110
  "is_fallen": is_fallen,
111
111
  }
112
112
 
113
- def _get_obs_extra(self, info: Dict):
113
+ def _get_obs_extra(self, info: dict):
114
114
  obs = dict(
115
115
  root_linear_velocity=self.agent.robot.root_linear_velocity,
116
116
  root_angular_velocity=self.agent.robot.root_angular_velocity,
@@ -130,7 +130,7 @@ class QuadrupedReachEnv(BaseEnv):
130
130
  contact_exists = torch.norm(forces, dim=-1).max(-1).values > threshold
131
131
  return contact_exists
132
132
 
133
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
133
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
134
134
  robot_to_goal_dist = info["robot_to_goal_dist"]
135
135
  reaching_reward = 1 - torch.tanh(1 * robot_to_goal_dist)
136
136
 
@@ -151,7 +151,7 @@ class QuadrupedReachEnv(BaseEnv):
151
151
  return reward
152
152
 
153
153
  def compute_normalized_dense_reward(
154
- self, obs: Any, action: torch.Tensor, info: Dict
154
+ self, obs: Any, action: torch.Tensor, info: dict
155
155
  ):
156
156
  max_reward = 3.0
157
157
  return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List
1
+ from typing import Any
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -21,7 +21,7 @@ class QuadrupedSpinEnv(BaseEnv):
21
21
  agent: ANYmalC
22
22
  default_qpos: torch.Tensor
23
23
 
24
- _UNDESIRED_CONTACT_LINK_NAMES: List[str] = None
24
+ _UNDESIRED_CONTACT_LINK_NAMES: list[str] = None
25
25
 
26
26
  def __init__(self, *args, robot_uids="anymal-c", **kwargs):
27
27
  super().__init__(*args, robot_uids=robot_uids, **kwargs)
@@ -86,7 +86,7 @@ class QuadrupedSpinEnv(BaseEnv):
86
86
  "is_fallen": is_fallen,
87
87
  }
88
88
 
89
- def _get_obs_extra(self, info: Dict):
89
+ def _get_obs_extra(self, info: dict):
90
90
  obs = dict(
91
91
  root_linear_velocity=self.agent.robot.root_linear_velocity,
92
92
  root_angular_velocity=self.agent.robot.root_angular_velocity,
@@ -100,7 +100,7 @@ class QuadrupedSpinEnv(BaseEnv):
100
100
  contact_exists = torch.norm(forces, dim=-1).max(-1).values > threshold
101
101
  return contact_exists
102
102
 
103
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
103
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
104
104
  rotation_reward = self.agent.robot.root_angular_velocity[:, 2]
105
105
  # various penalties:
106
106
  lin_vel_z_l2 = torch.square(self.agent.robot.root_linear_velocity[:, 2])
@@ -119,7 +119,7 @@ class QuadrupedSpinEnv(BaseEnv):
119
119
  return reward
120
120
 
121
121
  def compute_normalized_dense_reward(
122
- self, obs: Any, action: torch.Tensor, info: Dict
122
+ self, obs: Any, action: torch.Tensor, info: dict
123
123
  ):
124
124
  max_reward = 2.0
125
125
  return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Tuple
1
+ from typing import Any, Tuple
2
2
 
3
3
  import numpy as np
4
4
  import torch
@@ -250,7 +250,7 @@ class RotateCubeEnv(BaseEnv):
250
250
  )
251
251
  )
252
252
 
253
- def _get_obs_extra(self, info: Dict):
253
+ def _get_obs_extra(self, info: dict):
254
254
  obs = dict(
255
255
  goal_pos=self.obj_goal.pose.p,
256
256
  goal_q=self.obj_goal.pose.q,
@@ -262,7 +262,7 @@ class RotateCubeEnv(BaseEnv):
262
262
  )
263
263
  return obs
264
264
 
265
- def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
265
+ def compute_dense_reward(self, obs: Any, action: Array, info: dict):
266
266
  obj_pos = self.obj.pose.p
267
267
  obj_q = self.obj.pose.q
268
268
  goal_pos = self.obj_goal.pose.p
@@ -341,7 +341,7 @@ class RotateCubeEnv(BaseEnv):
341
341
  total_reward[info["success"]] = 15
342
342
  return total_reward
343
343
 
344
- def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
344
+ def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
345
345
  self.max_reward = 15
346
346
  dense_reward = self.compute_dense_reward(obs=obs, action=action, info=info)
347
347
  norm_dense_reward = dense_reward / (2 * self.max_reward) + 0.5
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import Dict, Union
2
+ from typing import Union
3
3
 
4
4
  import numpy as np
5
5
  import sapien.core as sapien
@@ -278,7 +278,7 @@ class AssemblingKitsEnv(BaseEnv):
278
278
  "success": pos_correct & rot_correct & in_slot,
279
279
  }
280
280
 
281
- def _get_obs_extra(self, info: Dict):
281
+ def _get_obs_extra(self, info: dict):
282
282
  obs = dict(
283
283
  tcp_pose=self.agent.tcp.pose.raw_pose,
284
284
  )
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -98,7 +98,7 @@ class LiftPegUprightEnv(BaseEnv):
98
98
  "success": is_peg_upright & close_to_table,
99
99
  }
100
100
 
101
- def _get_obs_extra(self, info: Dict):
101
+ def _get_obs_extra(self, info: dict):
102
102
  obs = dict(
103
103
  tcp_pose=self.agent.tcp.pose.raw_pose,
104
104
  )
@@ -108,7 +108,7 @@ class LiftPegUprightEnv(BaseEnv):
108
108
  )
109
109
  return obs
110
110
 
111
- def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
111
+ def compute_dense_reward(self, obs: Any, action: Array, info: dict):
112
112
  # rotation reward as cosine similarity between peg direction vectors
113
113
  # peg center of mass to end of peg, (1,0,0), rotated by peg pose rotation
114
114
  # dot product with its goal orientation: (0,0,1) or (0,0,-1)
@@ -139,6 +139,6 @@ class LiftPegUprightEnv(BaseEnv):
139
139
  reward[info["success"]] = 3
140
140
  return reward
141
141
 
142
- def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
142
+ def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
143
143
  max_reward = 3.0
144
144
  return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -286,7 +286,7 @@ class PegInsertionSideEnv(BaseEnv):
286
286
  success, peg_head_pos_at_hole = self.has_peg_inserted()
287
287
  return dict(success=success, peg_head_pos_at_hole=peg_head_pos_at_hole)
288
288
 
289
- def _get_obs_extra(self, info: Dict):
289
+ def _get_obs_extra(self, info: dict):
290
290
  obs = dict(tcp_pose=self.agent.tcp.pose.raw_pose)
291
291
  if self.obs_mode_struct.use_state:
292
292
  obs.update(
@@ -297,7 +297,7 @@ class PegInsertionSideEnv(BaseEnv):
297
297
  )
298
298
  return obs
299
299
 
300
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
300
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
301
301
  # Stage 1: Encourage gripper to be rotated to be lined up with the peg
302
302
 
303
303
  # Stage 2: Encourage gripper to move close to peg tail and grasp it
@@ -355,6 +355,6 @@ class PegInsertionSideEnv(BaseEnv):
355
355
  return reward
356
356
 
357
357
  def compute_normalized_dense_reward(
358
- self, obs: Any, action: torch.Tensor, info: Dict
358
+ self, obs: Any, action: torch.Tensor, info: dict
359
359
  ):
360
360
  return self.compute_dense_reward(obs, action, info) / 10
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import Dict, List, Union
2
+ from typing import Union
3
3
 
4
4
  import numpy as np
5
5
  import sapien
@@ -51,7 +51,7 @@ class PickClutterEnv(BaseEnv):
51
51
  "To download default json:"
52
52
  "`python -m mani_skill.utils.download_asset pick_clutter_ycb`."
53
53
  )
54
- self._episodes: List[Dict] = load_json(episode_json)
54
+ self._episodes: list[dict] = load_json(episode_json)
55
55
  if reconfiguration_freq is None:
56
56
  if num_envs == 1:
57
57
  reconfiguration_freq = 1
@@ -110,7 +110,7 @@ class PickClutterEnv(BaseEnv):
110
110
  # sample some clutter configurations
111
111
  eps_idxs = self._batched_episode_rng.randint(0, len(self._episodes))
112
112
 
113
- self.selectable_target_objects: List[List[Actor]] = []
113
+ self.selectable_target_objects: list[list[Actor]] = []
114
114
  """for each sub-scene, a list of objects that can be selected as targets"""
115
115
  all_objects = []
116
116
 
@@ -182,7 +182,7 @@ class PickClutterEnv(BaseEnv):
182
182
  "fail": torch.zeros(self.num_envs, device=self.device, dtype=bool),
183
183
  }
184
184
 
185
- def _get_obs_extra(self, info: Dict):
185
+ def _get_obs_extra(self, info: dict):
186
186
 
187
187
  return dict()
188
188
 
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -129,7 +129,7 @@ class PickCubeEnv(BaseEnv):
129
129
  goal_xyz[:, 2] = torch.rand((b)) * self.max_goal_height + xyz[:, 2]
130
130
  self.goal_site.set_pose(Pose.create_from_pq(goal_xyz))
131
131
 
132
- def _get_obs_extra(self, info: Dict):
132
+ def _get_obs_extra(self, info: dict):
133
133
  # in reality some people hack is_grasped into observations by checking if the gripper can close fully or not
134
134
  obs = dict(
135
135
  is_grasped=info["is_grasped"],
@@ -158,7 +158,7 @@ class PickCubeEnv(BaseEnv):
158
158
  "is_grasped": is_grasped,
159
159
  }
160
160
 
161
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
161
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
162
162
  tcp_to_obj_dist = torch.linalg.norm(
163
163
  self.cube.pose.p - self.agent.tcp_pose.p, axis=1
164
164
  )
@@ -186,7 +186,7 @@ class PickCubeEnv(BaseEnv):
186
186
  return reward
187
187
 
188
188
  def compute_normalized_dense_reward(
189
- self, obs: Any, action: torch.Tensor, info: Dict
189
+ self, obs: Any, action: torch.Tensor, info: dict
190
190
  ):
191
191
  return self.compute_dense_reward(obs=obs, action=action, info=info) / 5
192
192
 
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, List, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -134,7 +134,7 @@ class PickSingleYCBEnv(BaseEnv):
134
134
  or set reconfiguration_freq to be >= 1."""
135
135
  )
136
136
 
137
- self._objs: List[Actor] = []
137
+ self._objs: list[Actor] = []
138
138
  self.obj_heights = []
139
139
  for i, model_id in enumerate(model_ids):
140
140
  # TODO: before official release we will finalize a metadata dataclass that these build functions should return.
@@ -212,7 +212,7 @@ class PickSingleYCBEnv(BaseEnv):
212
212
  success=torch.logical_and(is_obj_placed, is_robot_static),
213
213
  )
214
214
 
215
- def _get_obs_extra(self, info: Dict):
215
+ def _get_obs_extra(self, info: dict):
216
216
  obs = dict(
217
217
  tcp_pose=self.agent.tcp.pose.raw_pose,
218
218
  goal_pos=self.goal_site.pose.p,
@@ -227,7 +227,7 @@ class PickSingleYCBEnv(BaseEnv):
227
227
  )
228
228
  return obs
229
229
 
230
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
230
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
231
231
  tcp_to_obj_dist = torch.linalg.norm(
232
232
  self.obj.pose.p - self.agent.tcp.pose.p, axis=1
233
233
  )
@@ -254,6 +254,6 @@ class PickSingleYCBEnv(BaseEnv):
254
254
  return reward
255
255
 
256
256
  def compute_normalized_dense_reward(
257
- self, obs: Any, action: torch.Tensor, info: Dict
257
+ self, obs: Any, action: torch.Tensor, info: dict
258
258
  ):
259
259
  return self.compute_dense_reward(obs=obs, action=action, info=info) / 6
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import gymnasium as gym
4
4
  import matplotlib.pyplot as plt
@@ -200,7 +200,7 @@ class PlaceSphereEnv(BaseEnv):
200
200
  "success": success,
201
201
  }
202
202
 
203
- def _get_obs_extra(self, info: Dict):
203
+ def _get_obs_extra(self, info: dict):
204
204
  obs = dict(
205
205
  is_grasped=info["is_obj_grasped"],
206
206
  tcp_pose=self.agent.tcp.pose.raw_pose,
@@ -213,7 +213,7 @@ class PlaceSphereEnv(BaseEnv):
213
213
  )
214
214
  return obs
215
215
 
216
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
216
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
217
217
  # reaching reward
218
218
  tcp_pose = self.agent.tcp.pose.p
219
219
  obj_pos = self.obj.pose.p
@@ -252,7 +252,7 @@ class PlaceSphereEnv(BaseEnv):
252
252
  reward[info["success"]] = 13
253
253
  return reward
254
254
 
255
- def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
255
+ def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
256
256
  # this should be equal to compute_dense_reward / max possible reward
257
257
  max_reward = 13.0
258
258
  return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,4 +1,4 @@
1
- from typing import Dict, Union
1
+ from typing import Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -271,7 +271,7 @@ class PlugChargerEnv(BaseEnv):
271
271
  success=success,
272
272
  )
273
273
 
274
- def _get_obs_extra(self, info: Dict):
274
+ def _get_obs_extra(self, info: dict):
275
275
  obs = dict(tcp_pose=self.agent.tcp.pose.raw_pose)
276
276
  if self.obs_mode_struct.use_state:
277
277
  obs.update(
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -140,7 +140,7 @@ class PokeCubeEnv(BaseEnv):
140
140
  goal_region_pose = Pose.create_from_pq(p=goal_region_xyz, q=goal_region_q)
141
141
  self.goal_region.set_pose(goal_region_pose)
142
142
 
143
- def _get_obs_extra(self, info: Dict):
143
+ def _get_obs_extra(self, info: dict):
144
144
  obs = dict(
145
145
  tcp_pose=self.agent.tcp.pose.raw_pose,
146
146
  )
@@ -190,7 +190,7 @@ class PokeCubeEnv(BaseEnv):
190
190
  "head_to_cube_dist": head_to_cube_dist,
191
191
  }
192
192
 
193
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
193
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
194
194
  # reach peg
195
195
  tcp_pos = self.agent.tcp.pose.p
196
196
  tgt_tcp_pose = self.peg.pose
@@ -224,7 +224,7 @@ class PokeCubeEnv(BaseEnv):
224
224
  return reward
225
225
 
226
226
  def compute_normalized_dense_reward(
227
- self, obs: Any, action: torch.Tensor, info: Dict
227
+ self, obs: Any, action: torch.Tensor, info: dict
228
228
  ):
229
229
  max_reward = 10.0
230
230
  return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -43,7 +43,7 @@ class PullCubeEnv(BaseEnv):
43
43
 
44
44
  @property
45
45
  def _default_sensor_configs(self):
46
- pose = look_at(eye=[-0.5,0.0,0.25], target=[0.2,0.0,-0.5])
46
+ pose = look_at(eye=[-0.5, 0.0, 0.25], target=[0.2, 0.0, -0.5])
47
47
  return [CameraConfig("base_camera", pose, 128, 128, np.pi / 2, 0.01, 100)]
48
48
 
49
49
  @property
@@ -114,7 +114,7 @@ class PullCubeEnv(BaseEnv):
114
114
  "success": is_obj_placed,
115
115
  }
116
116
 
117
- def _get_obs_extra(self, info: Dict):
117
+ def _get_obs_extra(self, info: dict):
118
118
  obs = dict(
119
119
  tcp_pose=self.agent.tcp.pose.raw_pose,
120
120
  goal_pos=self.goal_region.pose.p,
@@ -125,7 +125,7 @@ class PullCubeEnv(BaseEnv):
125
125
  )
126
126
  return obs
127
127
 
128
- def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
128
+ def compute_dense_reward(self, obs: Any, action: Array, info: dict):
129
129
  # grippers should close and pull from behind the cube, not grip it
130
130
  # distance to backside of cube (+ 2*0.005) sufficiently encourages this
131
131
  tcp_pull_pos = self.obj.pose.p + torch.tensor(
@@ -146,6 +146,6 @@ class PullCubeEnv(BaseEnv):
146
146
  reward[info["success"]] = 3
147
147
  return reward
148
148
 
149
- def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
149
+ def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
150
150
  max_reward = 3.0
151
151
  return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Union
1
+ from typing import Any, Union
2
2
 
3
3
  import numpy as np
4
4
  import sapien
@@ -176,7 +176,7 @@ class PullCubeToolEnv(BaseEnv):
176
176
  cube_pose = Pose.create_from_pq(p=cube_xyz, q=cube_q)
177
177
  self.cube.set_pose(cube_pose)
178
178
 
179
- def _get_obs_extra(self, info: Dict):
179
+ def _get_obs_extra(self, info: dict):
180
180
  obs = dict(
181
181
  tcp_pose=self.agent.tcp.pose.raw_pose,
182
182
  )
@@ -217,7 +217,7 @@ class PullCubeToolEnv(BaseEnv):
217
217
  ),
218
218
  }
219
219
 
220
- def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
220
+ def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
221
221
 
222
222
  tcp_pos = self.agent.tcp.pose.p
223
223
  cube_pos = self.cube.pose.p
@@ -272,7 +272,7 @@ class PullCubeToolEnv(BaseEnv):
272
272
  return reward
273
273
 
274
274
  def compute_normalized_dense_reward(
275
- self, obs: Any, action: torch.Tensor, info: Dict
275
+ self, obs: Any, action: torch.Tensor, info: dict
276
276
  ):
277
277
  """
278
278
  Normalizes the dense reward by the maximum possible reward (success bonus)