mani-skill-nightly 2025.10.21.2011__py3-none-any.whl → 2025.10.22.157__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mani-skill-nightly might be problematic. Click here for more details.
- mani_skill/agents/base_agent.py +20 -14
- mani_skill/agents/base_real_agent.py +6 -6
- mani_skill/agents/controllers/base_controller.py +6 -6
- mani_skill/agents/controllers/pd_joint_pos.py +2 -2
- mani_skill/agents/controllers/utils/kinematics.py +27 -12
- mani_skill/agents/multi_agent.py +5 -5
- mani_skill/agents/registration.py +3 -4
- mani_skill/agents/robots/allegro_hand/allegro.py +1 -2
- mani_skill/agents/robots/allegro_hand/allegro_touch.py +3 -3
- mani_skill/agents/robots/dclaw/dclaw.py +2 -3
- mani_skill/agents/robots/fetch/fetch.py +2 -2
- mani_skill/agents/robots/floating_ability_hand/floating_ability_hand.py +10 -13
- mani_skill/agents/robots/floating_robotiq_2f_85_gripper/floating_robotiq_2f_85_gripper.py +2 -2
- mani_skill/agents/robots/lerobot/manipulator.py +4 -4
- mani_skill/agents/robots/panda/panda_stick.py +2 -2
- mani_skill/agents/robots/trifingerpro/trifingerpro.py +1 -2
- mani_skill/agents/robots/xarm/xarm7_ability.py +2 -2
- mani_skill/agents/utils.py +2 -2
- mani_skill/envs/minimal_template.py +4 -4
- mani_skill/envs/sapien_env.py +36 -33
- mani_skill/envs/scene.py +27 -27
- mani_skill/envs/scenes/base_env.py +3 -3
- mani_skill/envs/sim2real_env.py +10 -10
- mani_skill/envs/tasks/control/ant.py +6 -6
- mani_skill/envs/tasks/control/cartpole.py +4 -4
- mani_skill/envs/tasks/control/hopper.py +7 -7
- mani_skill/envs/tasks/control/humanoid.py +20 -20
- mani_skill/envs/tasks/dexterity/insert_flower.py +41 -23
- mani_skill/envs/tasks/dexterity/rotate_single_object_in_hand.py +6 -6
- mani_skill/envs/tasks/dexterity/rotate_valve.py +5 -5
- mani_skill/envs/tasks/digital_twins/base_env.py +4 -4
- mani_skill/envs/tasks/digital_twins/bridge_dataset_eval/base_env.py +22 -12
- mani_skill/envs/tasks/digital_twins/so100_arm/grasp_cube.py +4 -4
- mani_skill/envs/tasks/drawing/draw.py +1 -3
- mani_skill/envs/tasks/drawing/draw_svg.py +6 -8
- mani_skill/envs/tasks/drawing/draw_triangle.py +1 -2
- mani_skill/envs/tasks/empty_env.py +1 -3
- mani_skill/envs/tasks/fmb/fmb.py +1 -2
- mani_skill/envs/tasks/humanoid/humanoid_pick_place.py +7 -7
- mani_skill/envs/tasks/humanoid/humanoid_stand.py +5 -5
- mani_skill/envs/tasks/humanoid/transport_box.py +4 -4
- mani_skill/envs/tasks/mobile_manipulation/open_cabinet_drawer.py +8 -8
- mani_skill/envs/tasks/mobile_manipulation/robocasa/kitchen.py +2 -3
- mani_skill/envs/tasks/quadruped/quadruped_reach.py +5 -5
- mani_skill/envs/tasks/quadruped/quadruped_spin.py +5 -5
- mani_skill/envs/tasks/rotate_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/assembling_kits.py +2 -2
- mani_skill/envs/tasks/tabletop/lift_peg_upright.py +4 -4
- mani_skill/envs/tasks/tabletop/peg_insertion_side.py +4 -4
- mani_skill/envs/tasks/tabletop/pick_clutter_ycb.py +4 -4
- mani_skill/envs/tasks/tabletop/pick_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/pick_single_ycb.py +5 -5
- mani_skill/envs/tasks/tabletop/place_sphere.py +4 -4
- mani_skill/envs/tasks/tabletop/plug_charger.py +2 -2
- mani_skill/envs/tasks/tabletop/poke_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/pull_cube.py +5 -5
- mani_skill/envs/tasks/tabletop/pull_cube_tool.py +4 -4
- mani_skill/envs/tasks/tabletop/push_cube.py +6 -6
- mani_skill/envs/tasks/tabletop/push_t.py +4 -4
- mani_skill/envs/tasks/tabletop/roll_ball.py +4 -4
- mani_skill/envs/tasks/tabletop/stack_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/stack_pyramid.py +44 -25
- mani_skill/envs/tasks/tabletop/turn_faucet.py +4 -4
- mani_skill/envs/tasks/tabletop/two_robot_pick_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/two_robot_stack_cube.py +4 -4
- mani_skill/envs/template.py +4 -4
- mani_skill/envs/utils/observations/observations.py +2 -3
- mani_skill/envs/utils/randomization/batched_rng.py +7 -7
- mani_skill/envs/utils/randomization/samplers.py +2 -2
- mani_skill/examples/benchmarking/envs/maniskill/franka_move.py +2 -2
- mani_skill/examples/benchmarking/envs/maniskill/franka_pick_cube.py +2 -2
- mani_skill/examples/benchmarking/profiling.py +2 -2
- mani_skill/examples/demo_random_action.py +1 -1
- mani_skill/render/shaders.py +5 -5
- mani_skill/sensors/base_sensor.py +1 -2
- mani_skill/sensors/camera.py +4 -4
- mani_skill/trajectory/replay_trajectory.py +0 -1
- mani_skill/utils/assets/data.py +3 -3
- mani_skill/utils/building/_mjcf_loader.py +11 -11
- mani_skill/utils/building/actor_builder.py +4 -4
- mani_skill/utils/building/articulation_builder.py +3 -3
- mani_skill/utils/building/mjcf_loader.py +6 -6
- mani_skill/utils/building/urdf_loader.py +6 -6
- mani_skill/utils/common.py +2 -2
- mani_skill/utils/geometry/bounding_cylinder.py +4 -4
- mani_skill/utils/geometry/geometry.py +1 -3
- mani_skill/utils/geometry/trimesh_utils.py +1 -3
- mani_skill/utils/gym_utils.py +2 -4
- mani_skill/utils/registration.py +6 -6
- mani_skill/utils/sapien_utils.py +21 -21
- mani_skill/utils/scene_builder/ai2thor/constants.py +1 -2
- mani_skill/utils/scene_builder/ai2thor/scene_builder.py +9 -9
- mani_skill/utils/scene_builder/control/planar/scene_builder.py +2 -4
- mani_skill/utils/scene_builder/kitchen_counter/scene_builder.py +1 -2
- mani_skill/utils/scene_builder/registration.py +1 -2
- mani_skill/utils/scene_builder/replicacad/rearrange/scene_builder.py +16 -16
- mani_skill/utils/scene_builder/replicacad/scene_builder.py +15 -15
- mani_skill/utils/scene_builder/robocasa/fixtures/windows.py +2 -4
- mani_skill/utils/scene_builder/robocasa/scene_builder.py +5 -5
- mani_skill/utils/scene_builder/scene_builder.py +15 -15
- mani_skill/utils/scene_builder/table/scene_builder.py +1 -2
- mani_skill/utils/structs/actor.py +6 -6
- mani_skill/utils/structs/articulation.py +32 -30
- mani_skill/utils/structs/articulation_joint.py +6 -6
- mani_skill/utils/structs/base.py +14 -9
- mani_skill/utils/structs/drive.py +2 -2
- mani_skill/utils/structs/link.py +10 -8
- mani_skill/utils/structs/pose.py +3 -3
- mani_skill/utils/structs/render_camera.py +4 -4
- mani_skill/utils/structs/types.py +3 -1
- mani_skill/utils/visualization/jupyter_utils.py +1 -3
- mani_skill/utils/visualization/misc.py +5 -5
- mani_skill/utils/wrappers/cached_reset.py +5 -3
- mani_skill/utils/wrappers/flatten.py +1 -2
- mani_skill/utils/wrappers/record.py +10 -8
- mani_skill/utils/wrappers/visual_encoders.py +2 -2
- mani_skill/vector/wrappers/gymnasium.py +23 -13
- mani_skill/vector/wrappers/sb3.py +5 -5
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/METADATA +1 -1
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/RECORD +124 -124
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/WHEEL +0 -0
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/licenses/LICENSE +0 -0
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/licenses/LICENSE-3RD-PARTY +0 -0
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import math
|
|
2
|
-
from typing import Dict
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import sapien
|
|
@@ -309,7 +308,7 @@ class DrawTriangleEnv(BaseEnv):
|
|
|
309
308
|
out = self.success_check()
|
|
310
309
|
return {"success": out}
|
|
311
310
|
|
|
312
|
-
def _get_obs_extra(self, info:
|
|
311
|
+
def _get_obs_extra(self, info: dict):
|
|
313
312
|
obs = dict(
|
|
314
313
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
315
314
|
)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
|
|
3
1
|
import numpy as np
|
|
4
2
|
import sapien
|
|
5
3
|
import torch
|
|
@@ -47,5 +45,5 @@ class EmptyEnv(BaseEnv):
|
|
|
47
45
|
def evaluate(self):
|
|
48
46
|
return {}
|
|
49
47
|
|
|
50
|
-
def _get_obs_extra(self, info:
|
|
48
|
+
def _get_obs_extra(self, info: dict):
|
|
51
49
|
return dict()
|
mani_skill/envs/tasks/fmb/fmb.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import os.path as osp
|
|
2
|
-
from typing import Dict
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import sapien
|
|
@@ -178,7 +177,7 @@ class FMBAssembly1Env(BaseEnv):
|
|
|
178
177
|
)
|
|
179
178
|
return {"success": bridge_placed}
|
|
180
179
|
|
|
181
|
-
def _get_obs_extra(self, info:
|
|
180
|
+
def _get_obs_extra(self, info: dict):
|
|
182
181
|
obs = dict(tcp_pose=self.agent.tcp.pose.raw_pose)
|
|
183
182
|
if self.obs_mode_struct.use_state:
|
|
184
183
|
obs.update(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import sapien
|
|
@@ -65,7 +65,7 @@ class HumanoidPickPlaceEnv(BaseEnv):
|
|
|
65
65
|
"fail": torch.zeros(self.num_envs, device=self.device, dtype=bool),
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
-
def _get_obs_extra(self, info:
|
|
68
|
+
def _get_obs_extra(self, info: dict):
|
|
69
69
|
return dict()
|
|
70
70
|
|
|
71
71
|
|
|
@@ -100,7 +100,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
|
|
|
100
100
|
100,
|
|
101
101
|
)
|
|
102
102
|
|
|
103
|
-
def _load_scene(self, options:
|
|
103
|
+
def _load_scene(self, options: dict):
|
|
104
104
|
super()._load_scene(options)
|
|
105
105
|
scale = self.kitchen_scene_scale
|
|
106
106
|
builder = self.scene.create_actor_builder()
|
|
@@ -149,7 +149,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
|
|
|
149
149
|
"is_grasped": is_grasped,
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
-
def _get_obs_extra(self, info:
|
|
152
|
+
def _get_obs_extra(self, info: dict):
|
|
153
153
|
# in reality some people hack is_grasped into observations by checking if the gripper can close fully or not
|
|
154
154
|
obs = dict(
|
|
155
155
|
is_grasped=info["is_grasped"],
|
|
@@ -168,7 +168,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
|
|
|
168
168
|
"""a dense reward that rewards the agent for opening their hand"""
|
|
169
169
|
return 1 - torch.tanh(self.agent.right_hand_dist_to_open_grasp())
|
|
170
170
|
|
|
171
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
171
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
172
172
|
tcp_to_obj_dist = torch.linalg.norm(
|
|
173
173
|
self.apple.pose.p - self.agent.right_tcp.pose.p, axis=1
|
|
174
174
|
)
|
|
@@ -201,7 +201,7 @@ class HumanoidPlaceAppleInBowl(HumanoidPickPlaceEnv):
|
|
|
201
201
|
return reward
|
|
202
202
|
|
|
203
203
|
def compute_normalized_dense_reward(
|
|
204
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
204
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
205
205
|
):
|
|
206
206
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / 10
|
|
207
207
|
|
|
@@ -254,7 +254,7 @@ class UnitreeG1PlaceAppleInBowlEnv(HumanoidPlaceAppleInBowl):
|
|
|
254
254
|
scene_config=SceneConfig(contact_offset=0.01),
|
|
255
255
|
)
|
|
256
256
|
|
|
257
|
-
def _initialize_episode(self, env_idx: torch.Tensor, options:
|
|
257
|
+
def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
|
|
258
258
|
super()._initialize_episode(env_idx, options)
|
|
259
259
|
with torch.device(self.device):
|
|
260
260
|
b = len(env_idx)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -46,17 +46,17 @@ class HumanoidStandEnv(BaseEnv):
|
|
|
46
46
|
self.agent.is_fallen()
|
|
47
47
|
return {"is_standing": is_standing, "fail": ~is_standing}
|
|
48
48
|
|
|
49
|
-
def _get_obs_extra(self, info:
|
|
49
|
+
def _get_obs_extra(self, info: dict):
|
|
50
50
|
return dict()
|
|
51
51
|
|
|
52
|
-
def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info:
|
|
52
|
+
def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
53
53
|
return info["is_standing"]
|
|
54
54
|
|
|
55
|
-
# def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
55
|
+
# def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
56
56
|
# return torch.zeros(self.num_envs, device=self.device)
|
|
57
57
|
|
|
58
58
|
# def compute_normalized_dense_reward(
|
|
59
|
-
# self, obs: Any, action: torch.Tensor, info:
|
|
59
|
+
# self, obs: Any, action: torch.Tensor, info: dict
|
|
60
60
|
# ):
|
|
61
61
|
# max_reward = 1.0
|
|
62
62
|
# return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import sapien
|
|
@@ -226,7 +226,7 @@ class TransportBoxEnv(BaseEnv):
|
|
|
226
226
|
"facing_table_with_box": facing_table_with_box,
|
|
227
227
|
}
|
|
228
228
|
|
|
229
|
-
def _get_obs_extra(self, info:
|
|
229
|
+
def _get_obs_extra(self, info: dict):
|
|
230
230
|
obs = dict(
|
|
231
231
|
right_tcp_pose=self.agent.right_tcp.pose.raw_pose,
|
|
232
232
|
left_tcp_pose=self.agent.left_tcp.pose.raw_pose,
|
|
@@ -252,7 +252,7 @@ class TransportBoxEnv(BaseEnv):
|
|
|
252
252
|
torch.tensor([0.165, 0.07, 0.05], device=self.device)
|
|
253
253
|
)
|
|
254
254
|
|
|
255
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
255
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
256
256
|
# Stage 1, move to face the box on the table. Succeeds if facing_table_with_box
|
|
257
257
|
reward = 1 - torch.tanh((self.agent.robot.qpos[:, 0] + 1.4).abs())
|
|
258
258
|
|
|
@@ -306,6 +306,6 @@ class TransportBoxEnv(BaseEnv):
|
|
|
306
306
|
return reward
|
|
307
307
|
|
|
308
308
|
def compute_normalized_dense_reward(
|
|
309
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
309
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
310
310
|
):
|
|
311
311
|
return self.compute_dense_reward(obs, action, info) / 5
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Optional, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -125,15 +125,15 @@ class OpenCabinetDrawerEnv(BaseEnv):
|
|
|
125
125
|
group=2, bit_idx=CABINET_COLLISION_BIT, bit=1
|
|
126
126
|
)
|
|
127
127
|
|
|
128
|
-
def _load_cabinets(self, joint_types:
|
|
128
|
+
def _load_cabinets(self, joint_types: list[str]):
|
|
129
129
|
# we sample random cabinet model_ids with numpy as numpy is always deterministic based on seed, regardless of
|
|
130
130
|
# GPU/CPU simulation backends. This is useful for replaying demonstrations.
|
|
131
131
|
model_ids = self._batched_episode_rng.choice(self.all_model_ids)
|
|
132
132
|
link_ids = self._batched_episode_rng.randint(0, 2**31)
|
|
133
133
|
|
|
134
|
-
self._cabinets:
|
|
135
|
-
handle_links:
|
|
136
|
-
handle_links_meshes:
|
|
134
|
+
self._cabinets: list[Articulation] = []
|
|
135
|
+
handle_links: list[list[Link]] = []
|
|
136
|
+
handle_links_meshes: list[list[trimesh.Trimesh]] = []
|
|
137
137
|
for i, model_id in enumerate(model_ids):
|
|
138
138
|
# partnet-mobility is a dataset source and the ids are the ones we sampled
|
|
139
139
|
# we provide tools to easily create the articulation builder like so by querying
|
|
@@ -320,7 +320,7 @@ class OpenCabinetDrawerEnv(BaseEnv):
|
|
|
320
320
|
"open_enough": open_enough,
|
|
321
321
|
}
|
|
322
322
|
|
|
323
|
-
def _get_obs_extra(self, info:
|
|
323
|
+
def _get_obs_extra(self, info: dict):
|
|
324
324
|
obs = dict(
|
|
325
325
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
326
326
|
)
|
|
@@ -333,7 +333,7 @@ class OpenCabinetDrawerEnv(BaseEnv):
|
|
|
333
333
|
)
|
|
334
334
|
return obs
|
|
335
335
|
|
|
336
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
336
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
337
337
|
tcp_to_handle_dist = torch.linalg.norm(
|
|
338
338
|
self.agent.tcp.pose.p - info["handle_link_pos"], axis=1
|
|
339
339
|
)
|
|
@@ -352,7 +352,7 @@ class OpenCabinetDrawerEnv(BaseEnv):
|
|
|
352
352
|
return reward
|
|
353
353
|
|
|
354
354
|
def compute_normalized_dense_reward(
|
|
355
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
355
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
356
356
|
):
|
|
357
357
|
max_reward = 5.0
|
|
358
358
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from copy import deepcopy
|
|
2
|
-
from typing import Dict
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import sapien
|
|
@@ -55,7 +54,7 @@ class RoboCasaKitchenEnv(BaseEnv):
|
|
|
55
54
|
overrides the default gripper. Should either be single str if same gripper type is to be used for all
|
|
56
55
|
robots or else it should be a list of the same length as "robots" param
|
|
57
56
|
|
|
58
|
-
initialization_noise (dict or list of dict):
|
|
57
|
+
initialization_noise (dict or list of dict): dict containing the initialization noise parameters.
|
|
59
58
|
The expected keys and corresponding value types are specified below:
|
|
60
59
|
|
|
61
60
|
:`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
|
|
@@ -456,7 +455,7 @@ class RoboCasaKitchenEnv(BaseEnv):
|
|
|
456
455
|
def evaluate(self):
|
|
457
456
|
return {}
|
|
458
457
|
|
|
459
|
-
def _get_obs_extra(self, info:
|
|
458
|
+
def _get_obs_extra(self, info: dict):
|
|
460
459
|
return dict()
|
|
461
460
|
|
|
462
461
|
"""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -21,7 +21,7 @@ class QuadrupedReachEnv(BaseEnv):
|
|
|
21
21
|
agent: ANYmalC
|
|
22
22
|
default_qpos: torch.Tensor
|
|
23
23
|
|
|
24
|
-
_UNDESIRED_CONTACT_LINK_NAMES:
|
|
24
|
+
_UNDESIRED_CONTACT_LINK_NAMES: list[str] = None
|
|
25
25
|
|
|
26
26
|
def __init__(self, *args, robot_uids="anymal-c", **kwargs):
|
|
27
27
|
super().__init__(*args, robot_uids=robot_uids, **kwargs)
|
|
@@ -110,7 +110,7 @@ class QuadrupedReachEnv(BaseEnv):
|
|
|
110
110
|
"is_fallen": is_fallen,
|
|
111
111
|
}
|
|
112
112
|
|
|
113
|
-
def _get_obs_extra(self, info:
|
|
113
|
+
def _get_obs_extra(self, info: dict):
|
|
114
114
|
obs = dict(
|
|
115
115
|
root_linear_velocity=self.agent.robot.root_linear_velocity,
|
|
116
116
|
root_angular_velocity=self.agent.robot.root_angular_velocity,
|
|
@@ -130,7 +130,7 @@ class QuadrupedReachEnv(BaseEnv):
|
|
|
130
130
|
contact_exists = torch.norm(forces, dim=-1).max(-1).values > threshold
|
|
131
131
|
return contact_exists
|
|
132
132
|
|
|
133
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
133
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
134
134
|
robot_to_goal_dist = info["robot_to_goal_dist"]
|
|
135
135
|
reaching_reward = 1 - torch.tanh(1 * robot_to_goal_dist)
|
|
136
136
|
|
|
@@ -151,7 +151,7 @@ class QuadrupedReachEnv(BaseEnv):
|
|
|
151
151
|
return reward
|
|
152
152
|
|
|
153
153
|
def compute_normalized_dense_reward(
|
|
154
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
154
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
155
155
|
):
|
|
156
156
|
max_reward = 3.0
|
|
157
157
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -21,7 +21,7 @@ class QuadrupedSpinEnv(BaseEnv):
|
|
|
21
21
|
agent: ANYmalC
|
|
22
22
|
default_qpos: torch.Tensor
|
|
23
23
|
|
|
24
|
-
_UNDESIRED_CONTACT_LINK_NAMES:
|
|
24
|
+
_UNDESIRED_CONTACT_LINK_NAMES: list[str] = None
|
|
25
25
|
|
|
26
26
|
def __init__(self, *args, robot_uids="anymal-c", **kwargs):
|
|
27
27
|
super().__init__(*args, robot_uids=robot_uids, **kwargs)
|
|
@@ -86,7 +86,7 @@ class QuadrupedSpinEnv(BaseEnv):
|
|
|
86
86
|
"is_fallen": is_fallen,
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
-
def _get_obs_extra(self, info:
|
|
89
|
+
def _get_obs_extra(self, info: dict):
|
|
90
90
|
obs = dict(
|
|
91
91
|
root_linear_velocity=self.agent.robot.root_linear_velocity,
|
|
92
92
|
root_angular_velocity=self.agent.robot.root_angular_velocity,
|
|
@@ -100,7 +100,7 @@ class QuadrupedSpinEnv(BaseEnv):
|
|
|
100
100
|
contact_exists = torch.norm(forces, dim=-1).max(-1).values > threshold
|
|
101
101
|
return contact_exists
|
|
102
102
|
|
|
103
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
103
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
104
104
|
rotation_reward = self.agent.robot.root_angular_velocity[:, 2]
|
|
105
105
|
# various penalties:
|
|
106
106
|
lin_vel_z_l2 = torch.square(self.agent.robot.root_linear_velocity[:, 2])
|
|
@@ -119,7 +119,7 @@ class QuadrupedSpinEnv(BaseEnv):
|
|
|
119
119
|
return reward
|
|
120
120
|
|
|
121
121
|
def compute_normalized_dense_reward(
|
|
122
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
122
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
123
123
|
):
|
|
124
124
|
max_reward = 2.0
|
|
125
125
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Tuple
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import torch
|
|
@@ -250,7 +250,7 @@ class RotateCubeEnv(BaseEnv):
|
|
|
250
250
|
)
|
|
251
251
|
)
|
|
252
252
|
|
|
253
|
-
def _get_obs_extra(self, info:
|
|
253
|
+
def _get_obs_extra(self, info: dict):
|
|
254
254
|
obs = dict(
|
|
255
255
|
goal_pos=self.obj_goal.pose.p,
|
|
256
256
|
goal_q=self.obj_goal.pose.q,
|
|
@@ -262,7 +262,7 @@ class RotateCubeEnv(BaseEnv):
|
|
|
262
262
|
)
|
|
263
263
|
return obs
|
|
264
264
|
|
|
265
|
-
def compute_dense_reward(self, obs: Any, action: Array, info:
|
|
265
|
+
def compute_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
266
266
|
obj_pos = self.obj.pose.p
|
|
267
267
|
obj_q = self.obj.pose.q
|
|
268
268
|
goal_pos = self.obj_goal.pose.p
|
|
@@ -341,7 +341,7 @@ class RotateCubeEnv(BaseEnv):
|
|
|
341
341
|
total_reward[info["success"]] = 15
|
|
342
342
|
return total_reward
|
|
343
343
|
|
|
344
|
-
def compute_normalized_dense_reward(self, obs: Any, action: Array, info:
|
|
344
|
+
def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
345
345
|
self.max_reward = 15
|
|
346
346
|
dense_reward = self.compute_dense_reward(obs=obs, action=action, info=info)
|
|
347
347
|
norm_dense_reward = dense_reward / (2 * self.max_reward) + 0.5
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Union
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import sapien.core as sapien
|
|
@@ -278,7 +278,7 @@ class AssemblingKitsEnv(BaseEnv):
|
|
|
278
278
|
"success": pos_correct & rot_correct & in_slot,
|
|
279
279
|
}
|
|
280
280
|
|
|
281
|
-
def _get_obs_extra(self, info:
|
|
281
|
+
def _get_obs_extra(self, info: dict):
|
|
282
282
|
obs = dict(
|
|
283
283
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
284
284
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -98,7 +98,7 @@ class LiftPegUprightEnv(BaseEnv):
|
|
|
98
98
|
"success": is_peg_upright & close_to_table,
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
def _get_obs_extra(self, info:
|
|
101
|
+
def _get_obs_extra(self, info: dict):
|
|
102
102
|
obs = dict(
|
|
103
103
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
104
104
|
)
|
|
@@ -108,7 +108,7 @@ class LiftPegUprightEnv(BaseEnv):
|
|
|
108
108
|
)
|
|
109
109
|
return obs
|
|
110
110
|
|
|
111
|
-
def compute_dense_reward(self, obs: Any, action: Array, info:
|
|
111
|
+
def compute_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
112
112
|
# rotation reward as cosine similarity between peg direction vectors
|
|
113
113
|
# peg center of mass to end of peg, (1,0,0), rotated by peg pose rotation
|
|
114
114
|
# dot product with its goal orientation: (0,0,1) or (0,0,-1)
|
|
@@ -139,6 +139,6 @@ class LiftPegUprightEnv(BaseEnv):
|
|
|
139
139
|
reward[info["success"]] = 3
|
|
140
140
|
return reward
|
|
141
141
|
|
|
142
|
-
def compute_normalized_dense_reward(self, obs: Any, action: Array, info:
|
|
142
|
+
def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
143
143
|
max_reward = 3.0
|
|
144
144
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -286,7 +286,7 @@ class PegInsertionSideEnv(BaseEnv):
|
|
|
286
286
|
success, peg_head_pos_at_hole = self.has_peg_inserted()
|
|
287
287
|
return dict(success=success, peg_head_pos_at_hole=peg_head_pos_at_hole)
|
|
288
288
|
|
|
289
|
-
def _get_obs_extra(self, info:
|
|
289
|
+
def _get_obs_extra(self, info: dict):
|
|
290
290
|
obs = dict(tcp_pose=self.agent.tcp.pose.raw_pose)
|
|
291
291
|
if self.obs_mode_struct.use_state:
|
|
292
292
|
obs.update(
|
|
@@ -297,7 +297,7 @@ class PegInsertionSideEnv(BaseEnv):
|
|
|
297
297
|
)
|
|
298
298
|
return obs
|
|
299
299
|
|
|
300
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
300
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
301
301
|
# Stage 1: Encourage gripper to be rotated to be lined up with the peg
|
|
302
302
|
|
|
303
303
|
# Stage 2: Encourage gripper to move close to peg tail and grasp it
|
|
@@ -355,6 +355,6 @@ class PegInsertionSideEnv(BaseEnv):
|
|
|
355
355
|
return reward
|
|
356
356
|
|
|
357
357
|
def compute_normalized_dense_reward(
|
|
358
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
358
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
359
359
|
):
|
|
360
360
|
return self.compute_dense_reward(obs, action, info) / 10
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Union
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import sapien
|
|
@@ -51,7 +51,7 @@ class PickClutterEnv(BaseEnv):
|
|
|
51
51
|
"To download default json:"
|
|
52
52
|
"`python -m mani_skill.utils.download_asset pick_clutter_ycb`."
|
|
53
53
|
)
|
|
54
|
-
self._episodes:
|
|
54
|
+
self._episodes: list[dict] = load_json(episode_json)
|
|
55
55
|
if reconfiguration_freq is None:
|
|
56
56
|
if num_envs == 1:
|
|
57
57
|
reconfiguration_freq = 1
|
|
@@ -110,7 +110,7 @@ class PickClutterEnv(BaseEnv):
|
|
|
110
110
|
# sample some clutter configurations
|
|
111
111
|
eps_idxs = self._batched_episode_rng.randint(0, len(self._episodes))
|
|
112
112
|
|
|
113
|
-
self.selectable_target_objects:
|
|
113
|
+
self.selectable_target_objects: list[list[Actor]] = []
|
|
114
114
|
"""for each sub-scene, a list of objects that can be selected as targets"""
|
|
115
115
|
all_objects = []
|
|
116
116
|
|
|
@@ -182,7 +182,7 @@ class PickClutterEnv(BaseEnv):
|
|
|
182
182
|
"fail": torch.zeros(self.num_envs, device=self.device, dtype=bool),
|
|
183
183
|
}
|
|
184
184
|
|
|
185
|
-
def _get_obs_extra(self, info:
|
|
185
|
+
def _get_obs_extra(self, info: dict):
|
|
186
186
|
|
|
187
187
|
return dict()
|
|
188
188
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -129,7 +129,7 @@ class PickCubeEnv(BaseEnv):
|
|
|
129
129
|
goal_xyz[:, 2] = torch.rand((b)) * self.max_goal_height + xyz[:, 2]
|
|
130
130
|
self.goal_site.set_pose(Pose.create_from_pq(goal_xyz))
|
|
131
131
|
|
|
132
|
-
def _get_obs_extra(self, info:
|
|
132
|
+
def _get_obs_extra(self, info: dict):
|
|
133
133
|
# in reality some people hack is_grasped into observations by checking if the gripper can close fully or not
|
|
134
134
|
obs = dict(
|
|
135
135
|
is_grasped=info["is_grasped"],
|
|
@@ -158,7 +158,7 @@ class PickCubeEnv(BaseEnv):
|
|
|
158
158
|
"is_grasped": is_grasped,
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
161
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
162
162
|
tcp_to_obj_dist = torch.linalg.norm(
|
|
163
163
|
self.cube.pose.p - self.agent.tcp_pose.p, axis=1
|
|
164
164
|
)
|
|
@@ -186,7 +186,7 @@ class PickCubeEnv(BaseEnv):
|
|
|
186
186
|
return reward
|
|
187
187
|
|
|
188
188
|
def compute_normalized_dense_reward(
|
|
189
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
189
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
190
190
|
):
|
|
191
191
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / 5
|
|
192
192
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -134,7 +134,7 @@ class PickSingleYCBEnv(BaseEnv):
|
|
|
134
134
|
or set reconfiguration_freq to be >= 1."""
|
|
135
135
|
)
|
|
136
136
|
|
|
137
|
-
self._objs:
|
|
137
|
+
self._objs: list[Actor] = []
|
|
138
138
|
self.obj_heights = []
|
|
139
139
|
for i, model_id in enumerate(model_ids):
|
|
140
140
|
# TODO: before official release we will finalize a metadata dataclass that these build functions should return.
|
|
@@ -212,7 +212,7 @@ class PickSingleYCBEnv(BaseEnv):
|
|
|
212
212
|
success=torch.logical_and(is_obj_placed, is_robot_static),
|
|
213
213
|
)
|
|
214
214
|
|
|
215
|
-
def _get_obs_extra(self, info:
|
|
215
|
+
def _get_obs_extra(self, info: dict):
|
|
216
216
|
obs = dict(
|
|
217
217
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
218
218
|
goal_pos=self.goal_site.pose.p,
|
|
@@ -227,7 +227,7 @@ class PickSingleYCBEnv(BaseEnv):
|
|
|
227
227
|
)
|
|
228
228
|
return obs
|
|
229
229
|
|
|
230
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
230
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
231
231
|
tcp_to_obj_dist = torch.linalg.norm(
|
|
232
232
|
self.obj.pose.p - self.agent.tcp.pose.p, axis=1
|
|
233
233
|
)
|
|
@@ -254,6 +254,6 @@ class PickSingleYCBEnv(BaseEnv):
|
|
|
254
254
|
return reward
|
|
255
255
|
|
|
256
256
|
def compute_normalized_dense_reward(
|
|
257
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
257
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
258
258
|
):
|
|
259
259
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / 6
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import gymnasium as gym
|
|
4
4
|
import matplotlib.pyplot as plt
|
|
@@ -200,7 +200,7 @@ class PlaceSphereEnv(BaseEnv):
|
|
|
200
200
|
"success": success,
|
|
201
201
|
}
|
|
202
202
|
|
|
203
|
-
def _get_obs_extra(self, info:
|
|
203
|
+
def _get_obs_extra(self, info: dict):
|
|
204
204
|
obs = dict(
|
|
205
205
|
is_grasped=info["is_obj_grasped"],
|
|
206
206
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
@@ -213,7 +213,7 @@ class PlaceSphereEnv(BaseEnv):
|
|
|
213
213
|
)
|
|
214
214
|
return obs
|
|
215
215
|
|
|
216
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
216
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
217
217
|
# reaching reward
|
|
218
218
|
tcp_pose = self.agent.tcp.pose.p
|
|
219
219
|
obj_pos = self.obj.pose.p
|
|
@@ -252,7 +252,7 @@ class PlaceSphereEnv(BaseEnv):
|
|
|
252
252
|
reward[info["success"]] = 13
|
|
253
253
|
return reward
|
|
254
254
|
|
|
255
|
-
def compute_normalized_dense_reward(self, obs: Any, action: Array, info:
|
|
255
|
+
def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
256
256
|
# this should be equal to compute_dense_reward / max possible reward
|
|
257
257
|
max_reward = 13.0
|
|
258
258
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -271,7 +271,7 @@ class PlugChargerEnv(BaseEnv):
|
|
|
271
271
|
success=success,
|
|
272
272
|
)
|
|
273
273
|
|
|
274
|
-
def _get_obs_extra(self, info:
|
|
274
|
+
def _get_obs_extra(self, info: dict):
|
|
275
275
|
obs = dict(tcp_pose=self.agent.tcp.pose.raw_pose)
|
|
276
276
|
if self.obs_mode_struct.use_state:
|
|
277
277
|
obs.update(
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -140,7 +140,7 @@ class PokeCubeEnv(BaseEnv):
|
|
|
140
140
|
goal_region_pose = Pose.create_from_pq(p=goal_region_xyz, q=goal_region_q)
|
|
141
141
|
self.goal_region.set_pose(goal_region_pose)
|
|
142
142
|
|
|
143
|
-
def _get_obs_extra(self, info:
|
|
143
|
+
def _get_obs_extra(self, info: dict):
|
|
144
144
|
obs = dict(
|
|
145
145
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
146
146
|
)
|
|
@@ -190,7 +190,7 @@ class PokeCubeEnv(BaseEnv):
|
|
|
190
190
|
"head_to_cube_dist": head_to_cube_dist,
|
|
191
191
|
}
|
|
192
192
|
|
|
193
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
193
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
194
194
|
# reach peg
|
|
195
195
|
tcp_pos = self.agent.tcp.pose.p
|
|
196
196
|
tgt_tcp_pose = self.peg.pose
|
|
@@ -224,7 +224,7 @@ class PokeCubeEnv(BaseEnv):
|
|
|
224
224
|
return reward
|
|
225
225
|
|
|
226
226
|
def compute_normalized_dense_reward(
|
|
227
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
227
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
228
228
|
):
|
|
229
229
|
max_reward = 10.0
|
|
230
230
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien
|
|
@@ -43,7 +43,7 @@ class PullCubeEnv(BaseEnv):
|
|
|
43
43
|
|
|
44
44
|
@property
|
|
45
45
|
def _default_sensor_configs(self):
|
|
46
|
-
pose = look_at(eye=[-0.5,0.0,0.25], target=[0.2,0.0
|
|
46
|
+
pose = look_at(eye=[-0.5, 0.0, 0.25], target=[0.2, 0.0, -0.5])
|
|
47
47
|
return [CameraConfig("base_camera", pose, 128, 128, np.pi / 2, 0.01, 100)]
|
|
48
48
|
|
|
49
49
|
@property
|
|
@@ -114,7 +114,7 @@ class PullCubeEnv(BaseEnv):
|
|
|
114
114
|
"success": is_obj_placed,
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
-
def _get_obs_extra(self, info:
|
|
117
|
+
def _get_obs_extra(self, info: dict):
|
|
118
118
|
obs = dict(
|
|
119
119
|
tcp_pose=self.agent.tcp.pose.raw_pose,
|
|
120
120
|
goal_pos=self.goal_region.pose.p,
|
|
@@ -125,7 +125,7 @@ class PullCubeEnv(BaseEnv):
|
|
|
125
125
|
)
|
|
126
126
|
return obs
|
|
127
127
|
|
|
128
|
-
def compute_dense_reward(self, obs: Any, action: Array, info:
|
|
128
|
+
def compute_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
129
129
|
# grippers should close and pull from behind the cube, not grip it
|
|
130
130
|
# distance to backside of cube (+ 2*0.005) sufficiently encourages this
|
|
131
131
|
tcp_pull_pos = self.obj.pose.p + torch.tensor(
|
|
@@ -146,6 +146,6 @@ class PullCubeEnv(BaseEnv):
|
|
|
146
146
|
reward[info["success"]] = 3
|
|
147
147
|
return reward
|
|
148
148
|
|
|
149
|
-
def compute_normalized_dense_reward(self, obs: Any, action: Array, info:
|
|
149
|
+
def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
150
150
|
max_reward = 3.0
|
|
151
151
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|