mani-skill-nightly 2025.10.21.2011__py3-none-any.whl → 2025.10.22.157__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mani-skill-nightly might be problematic. Click here for more details.
- mani_skill/agents/base_agent.py +20 -14
- mani_skill/agents/base_real_agent.py +6 -6
- mani_skill/agents/controllers/base_controller.py +6 -6
- mani_skill/agents/controllers/pd_joint_pos.py +2 -2
- mani_skill/agents/controllers/utils/kinematics.py +27 -12
- mani_skill/agents/multi_agent.py +5 -5
- mani_skill/agents/registration.py +3 -4
- mani_skill/agents/robots/allegro_hand/allegro.py +1 -2
- mani_skill/agents/robots/allegro_hand/allegro_touch.py +3 -3
- mani_skill/agents/robots/dclaw/dclaw.py +2 -3
- mani_skill/agents/robots/fetch/fetch.py +2 -2
- mani_skill/agents/robots/floating_ability_hand/floating_ability_hand.py +10 -13
- mani_skill/agents/robots/floating_robotiq_2f_85_gripper/floating_robotiq_2f_85_gripper.py +2 -2
- mani_skill/agents/robots/lerobot/manipulator.py +4 -4
- mani_skill/agents/robots/panda/panda_stick.py +2 -2
- mani_skill/agents/robots/trifingerpro/trifingerpro.py +1 -2
- mani_skill/agents/robots/xarm/xarm7_ability.py +2 -2
- mani_skill/agents/utils.py +2 -2
- mani_skill/envs/minimal_template.py +4 -4
- mani_skill/envs/sapien_env.py +36 -33
- mani_skill/envs/scene.py +27 -27
- mani_skill/envs/scenes/base_env.py +3 -3
- mani_skill/envs/sim2real_env.py +10 -10
- mani_skill/envs/tasks/control/ant.py +6 -6
- mani_skill/envs/tasks/control/cartpole.py +4 -4
- mani_skill/envs/tasks/control/hopper.py +7 -7
- mani_skill/envs/tasks/control/humanoid.py +20 -20
- mani_skill/envs/tasks/dexterity/insert_flower.py +41 -23
- mani_skill/envs/tasks/dexterity/rotate_single_object_in_hand.py +6 -6
- mani_skill/envs/tasks/dexterity/rotate_valve.py +5 -5
- mani_skill/envs/tasks/digital_twins/base_env.py +4 -4
- mani_skill/envs/tasks/digital_twins/bridge_dataset_eval/base_env.py +22 -12
- mani_skill/envs/tasks/digital_twins/so100_arm/grasp_cube.py +4 -4
- mani_skill/envs/tasks/drawing/draw.py +1 -3
- mani_skill/envs/tasks/drawing/draw_svg.py +6 -8
- mani_skill/envs/tasks/drawing/draw_triangle.py +1 -2
- mani_skill/envs/tasks/empty_env.py +1 -3
- mani_skill/envs/tasks/fmb/fmb.py +1 -2
- mani_skill/envs/tasks/humanoid/humanoid_pick_place.py +7 -7
- mani_skill/envs/tasks/humanoid/humanoid_stand.py +5 -5
- mani_skill/envs/tasks/humanoid/transport_box.py +4 -4
- mani_skill/envs/tasks/mobile_manipulation/open_cabinet_drawer.py +8 -8
- mani_skill/envs/tasks/mobile_manipulation/robocasa/kitchen.py +2 -3
- mani_skill/envs/tasks/quadruped/quadruped_reach.py +5 -5
- mani_skill/envs/tasks/quadruped/quadruped_spin.py +5 -5
- mani_skill/envs/tasks/rotate_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/assembling_kits.py +2 -2
- mani_skill/envs/tasks/tabletop/lift_peg_upright.py +4 -4
- mani_skill/envs/tasks/tabletop/peg_insertion_side.py +4 -4
- mani_skill/envs/tasks/tabletop/pick_clutter_ycb.py +4 -4
- mani_skill/envs/tasks/tabletop/pick_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/pick_single_ycb.py +5 -5
- mani_skill/envs/tasks/tabletop/place_sphere.py +4 -4
- mani_skill/envs/tasks/tabletop/plug_charger.py +2 -2
- mani_skill/envs/tasks/tabletop/poke_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/pull_cube.py +5 -5
- mani_skill/envs/tasks/tabletop/pull_cube_tool.py +4 -4
- mani_skill/envs/tasks/tabletop/push_cube.py +6 -6
- mani_skill/envs/tasks/tabletop/push_t.py +4 -4
- mani_skill/envs/tasks/tabletop/roll_ball.py +4 -4
- mani_skill/envs/tasks/tabletop/stack_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/stack_pyramid.py +44 -25
- mani_skill/envs/tasks/tabletop/turn_faucet.py +4 -4
- mani_skill/envs/tasks/tabletop/two_robot_pick_cube.py +4 -4
- mani_skill/envs/tasks/tabletop/two_robot_stack_cube.py +4 -4
- mani_skill/envs/template.py +4 -4
- mani_skill/envs/utils/observations/observations.py +2 -3
- mani_skill/envs/utils/randomization/batched_rng.py +7 -7
- mani_skill/envs/utils/randomization/samplers.py +2 -2
- mani_skill/examples/benchmarking/envs/maniskill/franka_move.py +2 -2
- mani_skill/examples/benchmarking/envs/maniskill/franka_pick_cube.py +2 -2
- mani_skill/examples/benchmarking/profiling.py +2 -2
- mani_skill/examples/demo_random_action.py +1 -1
- mani_skill/render/shaders.py +5 -5
- mani_skill/sensors/base_sensor.py +1 -2
- mani_skill/sensors/camera.py +4 -4
- mani_skill/trajectory/replay_trajectory.py +0 -1
- mani_skill/utils/assets/data.py +3 -3
- mani_skill/utils/building/_mjcf_loader.py +11 -11
- mani_skill/utils/building/actor_builder.py +4 -4
- mani_skill/utils/building/articulation_builder.py +3 -3
- mani_skill/utils/building/mjcf_loader.py +6 -6
- mani_skill/utils/building/urdf_loader.py +6 -6
- mani_skill/utils/common.py +2 -2
- mani_skill/utils/geometry/bounding_cylinder.py +4 -4
- mani_skill/utils/geometry/geometry.py +1 -3
- mani_skill/utils/geometry/trimesh_utils.py +1 -3
- mani_skill/utils/gym_utils.py +2 -4
- mani_skill/utils/registration.py +6 -6
- mani_skill/utils/sapien_utils.py +21 -21
- mani_skill/utils/scene_builder/ai2thor/constants.py +1 -2
- mani_skill/utils/scene_builder/ai2thor/scene_builder.py +9 -9
- mani_skill/utils/scene_builder/control/planar/scene_builder.py +2 -4
- mani_skill/utils/scene_builder/kitchen_counter/scene_builder.py +1 -2
- mani_skill/utils/scene_builder/registration.py +1 -2
- mani_skill/utils/scene_builder/replicacad/rearrange/scene_builder.py +16 -16
- mani_skill/utils/scene_builder/replicacad/scene_builder.py +15 -15
- mani_skill/utils/scene_builder/robocasa/fixtures/windows.py +2 -4
- mani_skill/utils/scene_builder/robocasa/scene_builder.py +5 -5
- mani_skill/utils/scene_builder/scene_builder.py +15 -15
- mani_skill/utils/scene_builder/table/scene_builder.py +1 -2
- mani_skill/utils/structs/actor.py +6 -6
- mani_skill/utils/structs/articulation.py +32 -30
- mani_skill/utils/structs/articulation_joint.py +6 -6
- mani_skill/utils/structs/base.py +14 -9
- mani_skill/utils/structs/drive.py +2 -2
- mani_skill/utils/structs/link.py +10 -8
- mani_skill/utils/structs/pose.py +3 -3
- mani_skill/utils/structs/render_camera.py +4 -4
- mani_skill/utils/structs/types.py +3 -1
- mani_skill/utils/visualization/jupyter_utils.py +1 -3
- mani_skill/utils/visualization/misc.py +5 -5
- mani_skill/utils/wrappers/cached_reset.py +5 -3
- mani_skill/utils/wrappers/flatten.py +1 -2
- mani_skill/utils/wrappers/record.py +10 -8
- mani_skill/utils/wrappers/visual_encoders.py +2 -2
- mani_skill/vector/wrappers/gymnasium.py +23 -13
- mani_skill/vector/wrappers/sb3.py +5 -5
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/METADATA +1 -1
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/RECORD +124 -124
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/WHEEL +0 -0
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/licenses/LICENSE +0 -0
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/licenses/LICENSE-3RD-PARTY +0 -0
- {mani_skill_nightly-2025.10.21.2011.dist-info → mani_skill_nightly-2025.10.22.157.dist-info}/top_level.txt +0 -0
mani_skill/envs/sapien_env.py
CHANGED
|
@@ -2,7 +2,7 @@ import copy
|
|
|
2
2
|
import gc
|
|
3
3
|
import os
|
|
4
4
|
from functools import cached_property
|
|
5
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Optional, Sequence, Tuple, Union
|
|
6
6
|
|
|
7
7
|
import dacite
|
|
8
8
|
import gymnasium as gym
|
|
@@ -80,7 +80,7 @@ class BaseEnv(gym.Env):
|
|
|
80
80
|
|
|
81
81
|
viewer_camera_configs (dict): configurations of the viewer camera in the GUI to override any environment defaults. Similar usage as @sensor_configs.
|
|
82
82
|
|
|
83
|
-
robot_uids (Union[str, BaseAgent,
|
|
83
|
+
robot_uids (Union[str, BaseAgent, list[Union[str, BaseAgent]]]): list of robots to instantiate and control in the environment.
|
|
84
84
|
|
|
85
85
|
sim_config (Union[SimConfig, dict]): Configurations for simulation if used that override the environment defaults. If given
|
|
86
86
|
a dictionary, it can just override specific attributes e.g. ``sim_config=dict(scene_config=dict(solver_iterations=25))``. If
|
|
@@ -118,7 +118,7 @@ class BaseEnv(gym.Env):
|
|
|
118
118
|
"""
|
|
119
119
|
|
|
120
120
|
# fmt: off
|
|
121
|
-
SUPPORTED_ROBOTS:
|
|
121
|
+
SUPPORTED_ROBOTS: list[Union[str, Tuple[str]]] = None
|
|
122
122
|
"""Override this to enforce which robots or tuples of robots together are supported in the task. During env creation,
|
|
123
123
|
setting robot_uids auto loads all desired robots into the scene, but not all tasks are designed to support some robot setups"""
|
|
124
124
|
SUPPORTED_OBS_MODES = ("state", "state_dict", "none", "sensor_data", "any_textures", "pointcloud")
|
|
@@ -142,27 +142,27 @@ class BaseEnv(gym.Env):
|
|
|
142
142
|
single_action_space: gym.Space
|
|
143
143
|
"""the unbatched action space of the environment"""
|
|
144
144
|
|
|
145
|
-
_sensors:
|
|
145
|
+
_sensors: dict[str, BaseSensor]
|
|
146
146
|
"""all sensors configured in this environment"""
|
|
147
|
-
_sensor_configs:
|
|
147
|
+
_sensor_configs: dict[str, BaseSensorConfig]
|
|
148
148
|
"""all sensor configurations parsed from self._sensor_configs and agent._sensor_configs"""
|
|
149
|
-
_agent_sensor_configs:
|
|
149
|
+
_agent_sensor_configs: dict[str, BaseSensorConfig]
|
|
150
150
|
"""all agent sensor configs parsed from agent._sensor_configs"""
|
|
151
|
-
_human_render_cameras:
|
|
151
|
+
_human_render_cameras: dict[str, Camera]
|
|
152
152
|
"""cameras used for rendering the current environment retrievable via `env.render_rgb_array()`. These are not used to generate observations"""
|
|
153
|
-
_default_human_render_camera_configs:
|
|
153
|
+
_default_human_render_camera_configs: dict[str, CameraConfig]
|
|
154
154
|
"""all camera configurations for cameras used for human render"""
|
|
155
|
-
_human_render_camera_configs:
|
|
155
|
+
_human_render_camera_configs: dict[str, CameraConfig]
|
|
156
156
|
"""all camera configurations parsed from self._human_render_camera_configs"""
|
|
157
157
|
|
|
158
|
-
_hidden_objects:
|
|
158
|
+
_hidden_objects: list[Union[Actor, Articulation]] = []
|
|
159
159
|
"""list of objects that are hidden during rendering when generating visual observations / running render_cameras()"""
|
|
160
160
|
|
|
161
161
|
_main_rng: np.random.RandomState = None
|
|
162
162
|
"""main rng generator that generates episode seed sequences. For internal use only"""
|
|
163
163
|
_batched_main_rng: BatchedRNG = None
|
|
164
164
|
"""the batched main RNG that generates episode seed sequences. For internal use only"""
|
|
165
|
-
_main_seed:
|
|
165
|
+
_main_seed: list[int] = None
|
|
166
166
|
"""main seed list for _main_rng and _batched_main_rng. _main_rng uses _main_seed[0]. For internal use only"""
|
|
167
167
|
_episode_rng: np.random.RandomState = None
|
|
168
168
|
"""the numpy RNG that you can use to generate random numpy data. It is not recommended to use this. Instead use the _batched_episode_rng which helps ensure GPU and CPU simulation generate the same data with the same seeds."""
|
|
@@ -201,7 +201,7 @@ class BaseEnv(gym.Env):
|
|
|
201
201
|
sensor_configs: Optional[dict] = dict(),
|
|
202
202
|
human_render_camera_configs: Optional[dict] = dict(),
|
|
203
203
|
viewer_camera_configs: Optional[dict] = dict(),
|
|
204
|
-
robot_uids: Union[str, BaseAgent,
|
|
204
|
+
robot_uids: Union[str, BaseAgent, list[Union[str, BaseAgent]]] = None,
|
|
205
205
|
sim_config: Union[SimConfig, dict] = dict(),
|
|
206
206
|
reconfiguration_freq: Optional[int] = None,
|
|
207
207
|
sim_backend: str = "auto",
|
|
@@ -439,7 +439,7 @@ class BaseEnv(gym.Env):
|
|
|
439
439
|
def _default_sensor_configs(
|
|
440
440
|
self,
|
|
441
441
|
) -> Union[
|
|
442
|
-
BaseSensorConfig, Sequence[BaseSensorConfig],
|
|
442
|
+
BaseSensorConfig, Sequence[BaseSensorConfig], dict[str, BaseSensorConfig]
|
|
443
443
|
]:
|
|
444
444
|
"""Add default (non-agent) sensors to the environment by returning sensor configurations. These can be overriden by the user at
|
|
445
445
|
env creation time"""
|
|
@@ -448,7 +448,7 @@ class BaseEnv(gym.Env):
|
|
|
448
448
|
def _default_human_render_camera_configs(
|
|
449
449
|
self,
|
|
450
450
|
) -> Union[
|
|
451
|
-
CameraConfig, Sequence[CameraConfig],
|
|
451
|
+
CameraConfig, Sequence[CameraConfig], dict[str, CameraConfig]
|
|
452
452
|
]:
|
|
453
453
|
"""Add default cameras for rendering when using render_mode='rgb_array'. These can be overriden by the user at env creation time """
|
|
454
454
|
return []
|
|
@@ -498,7 +498,7 @@ class BaseEnv(gym.Env):
|
|
|
498
498
|
"""The current observation mode. This affects the observation returned by env.get_obs()"""
|
|
499
499
|
return self._obs_mode
|
|
500
500
|
|
|
501
|
-
def get_obs(self, info: Optional[
|
|
501
|
+
def get_obs(self, info: Optional[dict] = None, unflattened: bool = False):
|
|
502
502
|
"""
|
|
503
503
|
Return the current observation of the environment. User may call this directly to get the current observation
|
|
504
504
|
as opposed to taking a step with actions in the environment.
|
|
@@ -509,7 +509,7 @@ class BaseEnv(gym.Env):
|
|
|
509
509
|
data in the info object by overriding the `self.evaluate` function.
|
|
510
510
|
|
|
511
511
|
Args:
|
|
512
|
-
info (
|
|
512
|
+
info (dict): The info object of the environment. Generally should always be the result of `self.get_info()`.
|
|
513
513
|
If this is None (the default), this function will call `self.get_info()` itself
|
|
514
514
|
unflattened (bool): Whether to return the observation without flattening even if the observation mode (`self.obs_mode`) asserts to return a flattened observation.
|
|
515
515
|
"""
|
|
@@ -543,7 +543,7 @@ class BaseEnv(gym.Env):
|
|
|
543
543
|
obs["state"] = common.flatten_state_dict(data, use_torch=True, device=self.device)
|
|
544
544
|
return obs
|
|
545
545
|
|
|
546
|
-
def _get_obs_state_dict(self, info:
|
|
546
|
+
def _get_obs_state_dict(self, info: dict):
|
|
547
547
|
"""Get (ground-truth) state-based observations."""
|
|
548
548
|
return dict(
|
|
549
549
|
agent=self._get_obs_agent(),
|
|
@@ -555,7 +555,7 @@ class BaseEnv(gym.Env):
|
|
|
555
555
|
Controller state is also included although most default controllers do not have any state."""
|
|
556
556
|
return self.agent.get_proprioception()
|
|
557
557
|
|
|
558
|
-
def _get_obs_extra(self, info:
|
|
558
|
+
def _get_obs_extra(self, info: dict):
|
|
559
559
|
"""Get task-relevant extra observations. Usually defined on a task by task basis"""
|
|
560
560
|
return dict()
|
|
561
561
|
|
|
@@ -564,11 +564,11 @@ class BaseEnv(gym.Env):
|
|
|
564
564
|
for sensor in self._sensors.values():
|
|
565
565
|
sensor.capture()
|
|
566
566
|
|
|
567
|
-
def get_sensor_images(self) ->
|
|
567
|
+
def get_sensor_images(self) -> dict[str, dict[str, torch.Tensor]]:
|
|
568
568
|
"""Get image (RGB) visualizations of what sensors currently sense. This function calls self._get_obs_sensor_data() internally which automatically hides objects and updates the render"""
|
|
569
569
|
return self.scene.get_sensor_images(self._get_obs_sensor_data())
|
|
570
570
|
|
|
571
|
-
def get_sensor_params(self) ->
|
|
571
|
+
def get_sensor_params(self) -> dict[str, dict[str, torch.Tensor]]:
|
|
572
572
|
"""Get all sensor parameters."""
|
|
573
573
|
params = dict()
|
|
574
574
|
for name, sensor in self._sensors.items():
|
|
@@ -624,7 +624,7 @@ class BaseEnv(gym.Env):
|
|
|
624
624
|
torch.cuda.synchronize()
|
|
625
625
|
return sensor_obs
|
|
626
626
|
|
|
627
|
-
def _get_obs_with_sensor_data(self, info:
|
|
627
|
+
def _get_obs_with_sensor_data(self, info: dict, apply_texture_transforms: bool = True) -> dict:
|
|
628
628
|
"""Get the observation with sensor data"""
|
|
629
629
|
return dict(
|
|
630
630
|
agent=self._get_obs_agent(),
|
|
@@ -645,7 +645,7 @@ class BaseEnv(gym.Env):
|
|
|
645
645
|
def reward_mode(self):
|
|
646
646
|
return self._reward_mode
|
|
647
647
|
|
|
648
|
-
def get_reward(self, obs: Any, action: torch.Tensor, info:
|
|
648
|
+
def get_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
649
649
|
"""
|
|
650
650
|
Compute the reward for environment at its current state. observation data, the most recent action, and the info dictionary (generated by the self.evaluate() function)
|
|
651
651
|
are provided as inputs. By default the observation data will be in its most raw form, a dictionary (no flattening, wrappers etc.)
|
|
@@ -653,7 +653,7 @@ class BaseEnv(gym.Env):
|
|
|
653
653
|
Args:
|
|
654
654
|
obs (Any): The observation data.
|
|
655
655
|
action (torch.Tensor): The most recent action.
|
|
656
|
-
info (
|
|
656
|
+
info (dict): The info dictionary.
|
|
657
657
|
"""
|
|
658
658
|
if self._reward_mode == "sparse":
|
|
659
659
|
reward = self.compute_sparse_reward(obs=obs, action=action, info=info)
|
|
@@ -669,7 +669,7 @@ class BaseEnv(gym.Env):
|
|
|
669
669
|
raise NotImplementedError(self._reward_mode)
|
|
670
670
|
return reward
|
|
671
671
|
|
|
672
|
-
def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info:
|
|
672
|
+
def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
673
673
|
"""
|
|
674
674
|
|
|
675
675
|
Computes the sparse reward. By default this function tries to use the success/fail information in
|
|
@@ -678,7 +678,7 @@ class BaseEnv(gym.Env):
|
|
|
678
678
|
Args:
|
|
679
679
|
obs (Any): The observation data. By default the observation data will be in its most raw form, a dictionary (no flattening, wrappers etc.)
|
|
680
680
|
action (torch.Tensor): The most recent action.
|
|
681
|
-
info (
|
|
681
|
+
info (dict): The info dictionary.
|
|
682
682
|
"""
|
|
683
683
|
if "success" in info:
|
|
684
684
|
if "fail" in info:
|
|
@@ -695,19 +695,19 @@ class BaseEnv(gym.Env):
|
|
|
695
695
|
reward = torch.zeros(self.num_envs, dtype=torch.float, device=self.device)
|
|
696
696
|
return reward
|
|
697
697
|
|
|
698
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
698
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
699
699
|
"""
|
|
700
700
|
Compute the dense reward.
|
|
701
701
|
|
|
702
702
|
Args:
|
|
703
703
|
obs (Any): The observation data. By default the observation data will be in its most raw form, a dictionary (no flattening, wrappers etc.)
|
|
704
704
|
action (torch.Tensor): The most recent action.
|
|
705
|
-
info (
|
|
705
|
+
info (dict): The info dictionary.
|
|
706
706
|
"""
|
|
707
707
|
raise NotImplementedError()
|
|
708
708
|
|
|
709
709
|
def compute_normalized_dense_reward(
|
|
710
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
710
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
711
711
|
):
|
|
712
712
|
"""
|
|
713
713
|
Compute the normalized dense reward.
|
|
@@ -715,7 +715,7 @@ class BaseEnv(gym.Env):
|
|
|
715
715
|
Args:
|
|
716
716
|
obs (Any): The observation data. By default the observation data will be in its most raw form, a dictionary (no flattening, wrappers etc.)
|
|
717
717
|
action (torch.Tensor): The most recent action.
|
|
718
|
-
info (
|
|
718
|
+
info (dict): The info dictionary.
|
|
719
719
|
"""
|
|
720
720
|
raise NotImplementedError()
|
|
721
721
|
|
|
@@ -1035,7 +1035,7 @@ class BaseEnv(gym.Env):
|
|
|
1035
1035
|
# Step
|
|
1036
1036
|
# -------------------------------------------------------------------------- #
|
|
1037
1037
|
|
|
1038
|
-
def step(self, action: Union[None, np.ndarray, torch.Tensor,
|
|
1038
|
+
def step(self, action: Union[None, np.ndarray, torch.Tensor, dict]):
|
|
1039
1039
|
"""
|
|
1040
1040
|
Take a step through the environment with an action. Actions are automatically clipped to the action space.
|
|
1041
1041
|
|
|
@@ -1067,7 +1067,7 @@ class BaseEnv(gym.Env):
|
|
|
1067
1067
|
)
|
|
1068
1068
|
|
|
1069
1069
|
def _step_action(
|
|
1070
|
-
self, action: Union[None, np.ndarray, torch.Tensor,
|
|
1070
|
+
self, action: Union[None, np.ndarray, torch.Tensor, dict]
|
|
1071
1071
|
) -> Union[None, torch.Tensor]:
|
|
1072
1072
|
set_action = False
|
|
1073
1073
|
action_is_unbatched = False
|
|
@@ -1169,7 +1169,10 @@ class BaseEnv(gym.Env):
|
|
|
1169
1169
|
def _set_scene_config(self):
|
|
1170
1170
|
physx.set_shape_config(contact_offset=self.sim_config.scene_config.contact_offset, rest_offset=self.sim_config.scene_config.rest_offset)
|
|
1171
1171
|
physx.set_body_config(solver_position_iterations=self.sim_config.scene_config.solver_position_iterations, solver_velocity_iterations=self.sim_config.scene_config.solver_velocity_iterations, sleep_threshold=self.sim_config.scene_config.sleep_threshold)
|
|
1172
|
-
|
|
1172
|
+
gravity = self.sim_config.scene_config.gravity
|
|
1173
|
+
if not isinstance(gravity, np.ndarray):
|
|
1174
|
+
gravity = np.array(gravity)
|
|
1175
|
+
physx.set_scene_config(gravity=gravity, bounce_threshold=self.sim_config.scene_config.bounce_threshold, enable_pcm=self.sim_config.scene_config.enable_pcm, enable_tgs=self.sim_config.scene_config.enable_tgs, enable_ccd=self.sim_config.scene_config.enable_ccd, enable_enhanced_determinism=self.sim_config.scene_config.enable_enhanced_determinism, enable_friction_every_iteration=self.sim_config.scene_config.enable_friction_every_iteration, cpu_workers=self.sim_config.scene_config.cpu_workers )
|
|
1173
1176
|
physx.set_default_material(**self.sim_config.default_materials_config.dict())
|
|
1174
1177
|
|
|
1175
1178
|
def _setup_scene(self):
|
|
@@ -1283,7 +1286,7 @@ class BaseEnv(gym.Env):
|
|
|
1283
1286
|
"""
|
|
1284
1287
|
return common.flatten_state_dict(self.get_state_dict(), use_torch=True)
|
|
1285
1288
|
|
|
1286
|
-
def set_state_dict(self, state:
|
|
1289
|
+
def set_state_dict(self, state: dict, env_idx: torch.Tensor = None):
|
|
1287
1290
|
"""
|
|
1288
1291
|
Set environment state with a state dictionary. Override to include task information (e.g., goal)
|
|
1289
1292
|
|
mani_skill/envs/scene.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from functools import cached_property
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import sapien
|
|
@@ -33,8 +33,8 @@ if SAPIEN_RENDER_SYSTEM == "3.1":
|
|
|
33
33
|
|
|
34
34
|
@dataclass
|
|
35
35
|
class StateDictRegistry:
|
|
36
|
-
actors:
|
|
37
|
-
articulations:
|
|
36
|
+
actors: dict[str, Actor]
|
|
37
|
+
articulations: dict[str, Articulation]
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class ManiSkillScene:
|
|
@@ -48,7 +48,7 @@ class ManiSkillScene:
|
|
|
48
48
|
|
|
49
49
|
def __init__(
|
|
50
50
|
self,
|
|
51
|
-
sub_scenes: Optional[
|
|
51
|
+
sub_scenes: Optional[list[sapien.Scene]] = None,
|
|
52
52
|
sim_config: SimConfig = SimConfig(),
|
|
53
53
|
debug_mode: bool = True,
|
|
54
54
|
device: Device = None,
|
|
@@ -75,18 +75,18 @@ class ManiSkillScene:
|
|
|
75
75
|
self.backend = backend # references the backend object stored in BaseEnv class
|
|
76
76
|
|
|
77
77
|
self.render_system_group: sapien.render.RenderSystemGroup = None
|
|
78
|
-
self.camera_groups:
|
|
78
|
+
self.camera_groups: dict[str, sapien.render.RenderCameraGroup] = dict()
|
|
79
79
|
|
|
80
|
-
self.actors:
|
|
81
|
-
self.articulations:
|
|
80
|
+
self.actors: dict[str, Actor] = dict()
|
|
81
|
+
self.articulations: dict[str, Articulation] = dict()
|
|
82
82
|
|
|
83
|
-
self.actor_views:
|
|
83
|
+
self.actor_views: dict[str, Actor] = dict()
|
|
84
84
|
"""views of actors in any sub-scenes created by using Actor.merge and queryable as if it were a single Actor"""
|
|
85
|
-
self.articulation_views:
|
|
85
|
+
self.articulation_views: dict[str, Articulation] = dict()
|
|
86
86
|
"""views of articulations in any sub-scenes created by using Articulation.merge and queryable as if it were a single Articulation"""
|
|
87
87
|
|
|
88
|
-
self.sensors:
|
|
89
|
-
self.human_render_cameras:
|
|
88
|
+
self.sensors: dict[str, BaseSensor] = dict()
|
|
89
|
+
self.human_render_cameras: dict[str, Camera] = dict()
|
|
90
90
|
self._sensors_initialized = False
|
|
91
91
|
self._human_render_cameras_initialized = False
|
|
92
92
|
|
|
@@ -97,12 +97,12 @@ class ManiSkillScene:
|
|
|
97
97
|
self._needs_fetch = False
|
|
98
98
|
"""Used internally to raise some errors ahead of time of when there may be undefined behaviors"""
|
|
99
99
|
|
|
100
|
-
self.pairwise_contact_queries:
|
|
100
|
+
self.pairwise_contact_queries: dict[
|
|
101
101
|
str, physx.PhysxGpuContactPairImpulseQuery
|
|
102
102
|
] = dict()
|
|
103
103
|
"""dictionary mapping pairwise contact query keys to GPU contact queries. Used in GPU simulation only to cache queries as
|
|
104
104
|
query creation will pause any GPU sim computation"""
|
|
105
|
-
self._pairwise_contact_query_unique_hashes:
|
|
105
|
+
self._pairwise_contact_query_unique_hashes: dict[str, int] = dict()
|
|
106
106
|
"""maps keys in self.pairwise_contact_queries to unique hashes dependent on the actual objects involved in the query.
|
|
107
107
|
This is used to determine automatically when to rebuild contact queries as keys for self.pairwise_contact_queries are kept
|
|
108
108
|
non-unique between episode resets in order to be easily rebuilt and deallocate old queries. This essentially acts as a way
|
|
@@ -203,7 +203,7 @@ class ManiSkillScene:
|
|
|
203
203
|
height,
|
|
204
204
|
near,
|
|
205
205
|
far,
|
|
206
|
-
fovy: Union[float,
|
|
206
|
+
fovy: Union[float, list, None] = None,
|
|
207
207
|
intrinsic: Union[Array, None] = None,
|
|
208
208
|
mount: Union[Actor, Link, None] = None,
|
|
209
209
|
) -> RenderCamera:
|
|
@@ -225,7 +225,7 @@ class ManiSkillScene:
|
|
|
225
225
|
height,
|
|
226
226
|
near,
|
|
227
227
|
far,
|
|
228
|
-
fovy: Union[float,
|
|
228
|
+
fovy: Union[float, list, None] = None,
|
|
229
229
|
intrinsic: Union[Array, None] = None,
|
|
230
230
|
mount: Union[Actor, Link, None] = None,
|
|
231
231
|
) -> RenderCamera:
|
|
@@ -304,7 +304,7 @@ class ManiSkillScene:
|
|
|
304
304
|
height,
|
|
305
305
|
near,
|
|
306
306
|
far,
|
|
307
|
-
fovy: Union[float,
|
|
307
|
+
fovy: Union[float, list, None] = None,
|
|
308
308
|
intrinsic: Union[Array, None] = None,
|
|
309
309
|
mount: Union[Actor, Link, None] = None,
|
|
310
310
|
) -> RenderCamera:
|
|
@@ -583,7 +583,7 @@ class ManiSkillScene:
|
|
|
583
583
|
shadow_near=0.1,
|
|
584
584
|
shadow_far=10.0,
|
|
585
585
|
shadow_map_size=2048,
|
|
586
|
-
scene_idxs: Optional[
|
|
586
|
+
scene_idxs: Optional[list[int]] = None,
|
|
587
587
|
):
|
|
588
588
|
if scene_idxs is None:
|
|
589
589
|
scene_idxs = list(range(len(self.sub_scenes)))
|
|
@@ -619,7 +619,7 @@ class ManiSkillScene:
|
|
|
619
619
|
shadow_near=-10.0,
|
|
620
620
|
shadow_far=10.0,
|
|
621
621
|
shadow_map_size=2048,
|
|
622
|
-
scene_idxs: Optional[
|
|
622
|
+
scene_idxs: Optional[list[int]] = None,
|
|
623
623
|
):
|
|
624
624
|
if scene_idxs is None:
|
|
625
625
|
scene_idxs = list(range(len(self.sub_scenes)))
|
|
@@ -664,7 +664,7 @@ class ManiSkillScene:
|
|
|
664
664
|
shadow_near=0.1,
|
|
665
665
|
shadow_far=10.0,
|
|
666
666
|
shadow_map_size=2048,
|
|
667
|
-
scene_idxs: Optional[
|
|
667
|
+
scene_idxs: Optional[list[int]] = None,
|
|
668
668
|
):
|
|
669
669
|
if scene_idxs is None:
|
|
670
670
|
scene_idxs = list(range(len(self.sub_scenes)))
|
|
@@ -873,7 +873,7 @@ class ManiSkillScene:
|
|
|
873
873
|
del state_dict["articulations"]
|
|
874
874
|
return state_dict
|
|
875
875
|
|
|
876
|
-
def set_sim_state(self, state:
|
|
876
|
+
def set_sim_state(self, state: dict, env_idx: torch.Tensor = None):
|
|
877
877
|
if env_idx is not None:
|
|
878
878
|
prev_reset_mask = self._reset_mask.clone()
|
|
879
879
|
# safe guard against setting the wrong states
|
|
@@ -908,7 +908,7 @@ class ManiSkillScene:
|
|
|
908
908
|
for scene in self.sub_scenes:
|
|
909
909
|
scene.update_render()
|
|
910
910
|
self.px.gpu_init()
|
|
911
|
-
self.non_static_actors:
|
|
911
|
+
self.non_static_actors: list[Actor] = []
|
|
912
912
|
# find non static actors, and set data indices that are now available after gpu_init was called
|
|
913
913
|
for actor in self.actors.values():
|
|
914
914
|
if actor.px_body_type == "static":
|
|
@@ -990,7 +990,7 @@ class ManiSkillScene:
|
|
|
990
990
|
# ---------------------------------------------------------------------------- #
|
|
991
991
|
def _get_all_render_bodies(
|
|
992
992
|
self,
|
|
993
|
-
) ->
|
|
993
|
+
) -> list[Tuple[sapien.render.RenderBodyComponent, int]]:
|
|
994
994
|
all_render_bodies = []
|
|
995
995
|
for actor in self.actors.values():
|
|
996
996
|
if actor.px_body_type == "static":
|
|
@@ -1078,13 +1078,13 @@ class ManiSkillScene:
|
|
|
1078
1078
|
|
|
1079
1079
|
self.render_system_group = sync_manager
|
|
1080
1080
|
|
|
1081
|
-
def _gpu_setup_sensors(self, sensors:
|
|
1081
|
+
def _gpu_setup_sensors(self, sensors: dict[str, BaseSensor]):
|
|
1082
1082
|
if SAPIEN_RENDER_SYSTEM == "3.1":
|
|
1083
1083
|
self._sapien_31_gpu_setup_sensors(sensors)
|
|
1084
1084
|
else:
|
|
1085
1085
|
self._sapien_gpu_setup_sensors(sensors)
|
|
1086
1086
|
|
|
1087
|
-
def _sapien_gpu_setup_sensors(self, sensors:
|
|
1087
|
+
def _sapien_gpu_setup_sensors(self, sensors: dict[str, BaseSensor]):
|
|
1088
1088
|
for name, sensor in sensors.items():
|
|
1089
1089
|
if isinstance(sensor, Camera):
|
|
1090
1090
|
try:
|
|
@@ -1126,8 +1126,8 @@ class ManiSkillScene:
|
|
|
1126
1126
|
)
|
|
1127
1127
|
|
|
1128
1128
|
def get_sensor_images(
|
|
1129
|
-
self, obs:
|
|
1130
|
-
) ->
|
|
1129
|
+
self, obs: dict[str, Any]
|
|
1130
|
+
) -> dict[str, dict[str, torch.Tensor]]:
|
|
1131
1131
|
"""Get raw sensor data as images for visualization purposes."""
|
|
1132
1132
|
sensor_data = dict()
|
|
1133
1133
|
for name, sensor in self.sensors.items():
|
|
@@ -1136,7 +1136,7 @@ class ManiSkillScene:
|
|
|
1136
1136
|
|
|
1137
1137
|
def get_human_render_camera_images(
|
|
1138
1138
|
self, camera_name: str = None
|
|
1139
|
-
) ->
|
|
1139
|
+
) -> dict[str, torch.Tensor]:
|
|
1140
1140
|
image_data = dict()
|
|
1141
1141
|
if self.gpu_sim_enabled:
|
|
1142
1142
|
if self.parallel_in_single_scene:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import sapien as sapien
|
|
@@ -142,11 +142,11 @@ class SceneManipulationEnv(BaseEnv):
|
|
|
142
142
|
def evaluate(self) -> dict:
|
|
143
143
|
return dict()
|
|
144
144
|
|
|
145
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
145
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
146
146
|
return 0
|
|
147
147
|
|
|
148
148
|
def compute_normalized_dense_reward(
|
|
149
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
149
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
150
150
|
):
|
|
151
151
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / 1
|
|
152
152
|
|
mani_skill/envs/sim2real_env.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import time
|
|
2
|
-
from typing import Any, Callable,
|
|
2
|
+
from typing import Any, Callable, Optional
|
|
3
3
|
|
|
4
4
|
import gymnasium as gym
|
|
5
5
|
import numpy as np
|
|
@@ -37,7 +37,7 @@ class Sim2RealEnv(gym.Env):
|
|
|
37
37
|
self.agent.reset(qpos=self.base_sim_env.agent.robot.qpos.cpu().flatten())
|
|
38
38
|
input("Press enter if the environment is reset")
|
|
39
39
|
|
|
40
|
-
sensor_data_preprocessing_function (Optional[Callable[[
|
|
40
|
+
sensor_data_preprocessing_function (Optional[Callable[[dict], dict]]): The function to call to process the sensor data returned by the BaseRealAgent.get_sensor_data function.
|
|
41
41
|
By default this is None and we use a default processing function which does the following for each sensor type:
|
|
42
42
|
- Camera: Perform a center crop of the real sensor image (rgb or depth) to have the same aspect ratio as the simulation sensor image. Then resize the image to the simulation sensor image shape using cv2.resize
|
|
43
43
|
|
|
@@ -56,7 +56,7 @@ class Sim2RealEnv(gym.Env):
|
|
|
56
56
|
real_reset_function: Optional[
|
|
57
57
|
Callable[["Sim2RealEnv", Optional[int], Optional[dict]], None]
|
|
58
58
|
] = None,
|
|
59
|
-
sensor_data_preprocessing_function: Optional[Callable[[
|
|
59
|
+
sensor_data_preprocessing_function: Optional[Callable[[dict], dict]] = None,
|
|
60
60
|
render_mode: Optional[str] = "sensors",
|
|
61
61
|
skip_data_checks: bool = False,
|
|
62
62
|
control_freq: Optional[int] = None,
|
|
@@ -122,7 +122,7 @@ class Sim2RealEnv(gym.Env):
|
|
|
122
122
|
return self
|
|
123
123
|
|
|
124
124
|
cur_env = self.sim_env
|
|
125
|
-
wrappers:
|
|
125
|
+
wrappers: list[gym.Wrapper] = []
|
|
126
126
|
while isinstance(cur_env, gym.Wrapper):
|
|
127
127
|
wrappers.append(cur_env)
|
|
128
128
|
cur_env = cur_env.env
|
|
@@ -232,7 +232,7 @@ class Sim2RealEnv(gym.Env):
|
|
|
232
232
|
# using the original user implemented sim env's _get_obs_agent function in case they modify it e.g. to remove qvel values as they might be too noisy
|
|
233
233
|
return self.base_sim_env.__class__._get_obs_agent(self)
|
|
234
234
|
|
|
235
|
-
def _get_obs_extra(self, info:
|
|
235
|
+
def _get_obs_extra(self, info: dict):
|
|
236
236
|
# using the original user implemented sim env's _get_obs_extra function in case they modify it e.g. to include engineered features like the tcp_pose of the robot
|
|
237
237
|
try:
|
|
238
238
|
return self.base_sim_env.__class__._get_obs_extra(self, info)
|
|
@@ -259,7 +259,7 @@ class Sim2RealEnv(gym.Env):
|
|
|
259
259
|
return data
|
|
260
260
|
|
|
261
261
|
def _get_obs_with_sensor_data(
|
|
262
|
-
self, info:
|
|
262
|
+
self, info: dict, apply_texture_transforms: bool = True
|
|
263
263
|
) -> dict:
|
|
264
264
|
"""Get the observation with sensor data"""
|
|
265
265
|
return self.base_sim_env.__class__._get_obs_with_sensor_data(
|
|
@@ -298,7 +298,7 @@ class Sim2RealEnv(gym.Env):
|
|
|
298
298
|
def get_reward(self, obs, action, info):
|
|
299
299
|
return self.base_sim_env.__class__.get_reward(self, obs, action, info)
|
|
300
300
|
|
|
301
|
-
def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info:
|
|
301
|
+
def compute_sparse_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
302
302
|
"""
|
|
303
303
|
Computes the sparse reward. By default this function tries to use the success/fail information in
|
|
304
304
|
returned by the evaluate function and gives +1 if success, -1 if fail, 0 otherwise"""
|
|
@@ -306,11 +306,11 @@ class Sim2RealEnv(gym.Env):
|
|
|
306
306
|
self, obs, action, info
|
|
307
307
|
)
|
|
308
308
|
|
|
309
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
309
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
310
310
|
raise NotImplementedError()
|
|
311
311
|
|
|
312
312
|
def compute_normalized_dense_reward(
|
|
313
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
313
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
314
314
|
):
|
|
315
315
|
raise NotImplementedError()
|
|
316
316
|
|
|
@@ -347,7 +347,7 @@ class Sim2RealEnv(gym.Env):
|
|
|
347
347
|
self.agent.stop()
|
|
348
348
|
|
|
349
349
|
def preprocess_sensor_data(
|
|
350
|
-
self, sensor_data:
|
|
350
|
+
self, sensor_data: dict, sensor_names: Optional[list[str]] = None
|
|
351
351
|
):
|
|
352
352
|
import cv2
|
|
353
353
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any,
|
|
2
|
+
from typing import Any, Optional, Union
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import sapien
|
|
@@ -152,7 +152,7 @@ class AntEnv(BaseEnv):
|
|
|
152
152
|
link.name for link in self.active_links if "foot" in link.name
|
|
153
153
|
]
|
|
154
154
|
|
|
155
|
-
def _initialize_episode(self, env_idx: torch.Tensor, options:
|
|
155
|
+
def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
|
|
156
156
|
with torch.device(self.device):
|
|
157
157
|
b = len(env_idx)
|
|
158
158
|
# set agent root pose - torso now centered at dummy root at (0,0,0)
|
|
@@ -235,7 +235,7 @@ class AntEnv(BaseEnv):
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
# cache re-used computation
|
|
238
|
-
def evaluate(self) ->
|
|
238
|
+
def evaluate(self) -> dict:
|
|
239
239
|
link_angvels, link_linvels, cmass_linvel = self.get_vels
|
|
240
240
|
return dict(
|
|
241
241
|
link_angvels=link_angvels,
|
|
@@ -243,7 +243,7 @@ class AntEnv(BaseEnv):
|
|
|
243
243
|
cmass_linvel=cmass_linvel,
|
|
244
244
|
)
|
|
245
245
|
|
|
246
|
-
def _get_obs_extra(self, info:
|
|
246
|
+
def _get_obs_extra(self, info: dict):
|
|
247
247
|
obs = super()._get_obs_extra(info)
|
|
248
248
|
if self.obs_mode_struct.use_state:
|
|
249
249
|
obs.update(
|
|
@@ -282,14 +282,14 @@ class AntEnv(BaseEnv):
|
|
|
282
282
|
.view(-1)
|
|
283
283
|
)
|
|
284
284
|
|
|
285
|
-
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info:
|
|
285
|
+
def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: dict):
|
|
286
286
|
small_control = (4 + self.control_rew(action)) / 5
|
|
287
287
|
return (
|
|
288
288
|
small_control * self.move_x_rew(info, self.move_speed) * self.standing_rew()
|
|
289
289
|
)
|
|
290
290
|
|
|
291
291
|
def compute_normalized_dense_reward(
|
|
292
|
-
self, obs: Any, action: torch.Tensor, info:
|
|
292
|
+
self, obs: Any, action: torch.Tensor, info: dict
|
|
293
293
|
):
|
|
294
294
|
return self.compute_dense_reward(obs, action, info)
|
|
295
295
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Adapted from https://github.com/google-deepmind/dm_control/blob/main/dm_control/suite/cartpole.py"""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional, Union
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import sapien
|
|
@@ -132,7 +132,7 @@ class CartpoleEnv(BaseEnv):
|
|
|
132
132
|
def evaluate(self):
|
|
133
133
|
return dict()
|
|
134
134
|
|
|
135
|
-
def _get_obs_extra(self, info:
|
|
135
|
+
def _get_obs_extra(self, info: dict):
|
|
136
136
|
obs = dict(
|
|
137
137
|
velocity=self.agent.robot.links_map["pole_1"].linear_velocity,
|
|
138
138
|
angular_velocity=self.agent.robot.links_map["pole_1"].angular_velocity,
|
|
@@ -143,7 +143,7 @@ class CartpoleEnv(BaseEnv):
|
|
|
143
143
|
def pole_angle_cosine(self):
|
|
144
144
|
return torch.cos(self.agent.robot.joints_map["hinge_1"].qpos)
|
|
145
145
|
|
|
146
|
-
def compute_dense_reward(self, obs: Any, action: Array, info:
|
|
146
|
+
def compute_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
147
147
|
cart_pos = self.agent.robot.links_map["cart"].pose.p[
|
|
148
148
|
:, 0
|
|
149
149
|
] # (B, ), we only care about x position
|
|
@@ -169,7 +169,7 @@ class CartpoleEnv(BaseEnv):
|
|
|
169
169
|
reward = upright * centered * small_control * small_velocity
|
|
170
170
|
return reward
|
|
171
171
|
|
|
172
|
-
def compute_normalized_dense_reward(self, obs: Any, action: Array, info:
|
|
172
|
+
def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
|
|
173
173
|
# this should be equal to compute_dense_reward / max possible reward
|
|
174
174
|
max_reward = 1.0
|
|
175
175
|
return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
|