gr-libs 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaluation/analyze_results_cross_alg_cross_domain.py +277 -0
- evaluation/create_minigrid_map_image.py +34 -0
- evaluation/file_system.py +42 -0
- evaluation/generate_experiments_results.py +92 -0
- evaluation/generate_experiments_results_new_ver1.py +254 -0
- evaluation/generate_experiments_results_new_ver2.py +331 -0
- evaluation/generate_task_specific_statistics_plots.py +272 -0
- evaluation/get_plans_images.py +47 -0
- evaluation/increasing_and_decreasing_.py +63 -0
- gr_libs/__init__.py +2 -0
- gr_libs/environment/__init__.py +0 -0
- gr_libs/environment/environment.py +227 -0
- gr_libs/environment/utils/__init__.py +0 -0
- gr_libs/environment/utils/utils.py +17 -0
- gr_libs/metrics/__init__.py +0 -0
- gr_libs/metrics/metrics.py +224 -0
- gr_libs/ml/__init__.py +6 -0
- gr_libs/ml/agent.py +56 -0
- gr_libs/ml/base/__init__.py +1 -0
- gr_libs/ml/base/rl_agent.py +54 -0
- gr_libs/ml/consts.py +22 -0
- gr_libs/ml/neural/__init__.py +3 -0
- gr_libs/ml/neural/deep_rl_learner.py +395 -0
- gr_libs/ml/neural/utils/__init__.py +2 -0
- gr_libs/ml/neural/utils/dictlist.py +33 -0
- gr_libs/ml/neural/utils/penv.py +57 -0
- gr_libs/ml/planner/__init__.py +0 -0
- gr_libs/ml/planner/mcts/__init__.py +0 -0
- gr_libs/ml/planner/mcts/mcts_model.py +330 -0
- gr_libs/ml/planner/mcts/utils/__init__.py +2 -0
- gr_libs/ml/planner/mcts/utils/node.py +33 -0
- gr_libs/ml/planner/mcts/utils/tree.py +102 -0
- gr_libs/ml/sequential/__init__.py +1 -0
- gr_libs/ml/sequential/lstm_model.py +192 -0
- gr_libs/ml/tabular/__init__.py +3 -0
- gr_libs/ml/tabular/state.py +21 -0
- gr_libs/ml/tabular/tabular_q_learner.py +453 -0
- gr_libs/ml/tabular/tabular_rl_agent.py +126 -0
- gr_libs/ml/utils/__init__.py +6 -0
- gr_libs/ml/utils/env.py +7 -0
- gr_libs/ml/utils/format.py +100 -0
- gr_libs/ml/utils/math.py +13 -0
- gr_libs/ml/utils/other.py +24 -0
- gr_libs/ml/utils/storage.py +127 -0
- gr_libs/recognizer/__init__.py +0 -0
- gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +102 -0
- gr_libs/recognizer/graml/__init__.py +0 -0
- gr_libs/recognizer/graml/gr_dataset.py +134 -0
- gr_libs/recognizer/graml/graml_recognizer.py +266 -0
- gr_libs/recognizer/recognizer.py +46 -0
- gr_libs/recognizer/utils/__init__.py +1 -0
- gr_libs/recognizer/utils/format.py +13 -0
- gr_libs-0.1.3.dist-info/METADATA +197 -0
- gr_libs-0.1.3.dist-info/RECORD +62 -0
- gr_libs-0.1.3.dist-info/WHEEL +5 -0
- gr_libs-0.1.3.dist-info/top_level.txt +3 -0
- tutorials/graml_minigrid_tutorial.py +30 -0
- tutorials/graml_panda_tutorial.py +32 -0
- tutorials/graml_parking_tutorial.py +38 -0
- tutorials/graml_point_maze_tutorial.py +43 -0
- tutorials/graql_minigrid_tutorial.py +29 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
|
2
|
+
import numpy as np
|
3
|
+
from stable_baselines3 import PPO, SAC
|
4
|
+
from gr_libs.environment.environment import PANDA, GCEnvProperty, PandaProperty
|
5
|
+
from gr_libs.environment.utils.utils import domain_to_env_property
|
6
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
7
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
8
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
9
|
+
from gr_libs import GCGraml
|
10
|
+
|
11
|
+
recognizer = GCGraml( # TODO make these tutorials into pytests
|
12
|
+
domain_name=PANDA,
|
13
|
+
env_name="PandaMyReachDense"
|
14
|
+
)
|
15
|
+
recognizer.domain_learning_phase(base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
|
16
|
+
train_configs=[(SAC, 800000)])
|
17
|
+
recognizer.goals_adaptation_phase(dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])])
|
18
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
19
|
+
property_type = domain_to_env_property(PANDA)
|
20
|
+
env_property = property_type("PandaMyReachDense")
|
21
|
+
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
22
|
+
actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
|
23
|
+
actor.learn()
|
24
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
25
|
+
full_sequence = actor.generate_observation(
|
26
|
+
action_selection_method=stochastic_amplified_selection,
|
27
|
+
random_optimalism=True, # the noise that's added to the actions
|
28
|
+
)
|
29
|
+
|
30
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
31
|
+
closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
|
32
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
|
@@ -0,0 +1,38 @@
|
|
1
|
+
|
2
|
+
from stable_baselines3 import PPO, SAC, TD3
|
3
|
+
from gr_libs.environment.environment import EnvProperty, GCEnvProperty, ParkingProperty
|
4
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
6
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
7
|
+
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
|
8
|
+
|
9
|
+
# Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
|
10
|
+
recognizer = GCGraml(
|
11
|
+
env_name="parking", # TODO change to macros which are importable from some info or env module of enums.
|
12
|
+
problems=[ParkingProperty("parking-v0")],
|
13
|
+
train_configs=[(PPO, 400000)],
|
14
|
+
gc_goal_set=[f"Parking-S-14-PC--GI-{i}-v0" for i in range(1,21)]
|
15
|
+
)
|
16
|
+
recognizer.domain_learning_phase()
|
17
|
+
recognizer.goals_adaptation_phase(
|
18
|
+
dynamic_goals_problems = [ParkingProperty(p) for p in ["Parking-S-14-PC--GI-1-v0",
|
19
|
+
"Parking-S-14-PC--GI-4-v0",
|
20
|
+
"Parking-S-14-PC--GI-8-v0",
|
21
|
+
"Parking-S-14-PC--GI-11-v0",
|
22
|
+
"Parking-S-14-PC--GI-14-v0",
|
23
|
+
"Parking-S-14-PC--GI-18-v0",
|
24
|
+
"Parking-S-14-PC--GI-21-v0"]] # TODO detach the goal from the environment instance in every gym env, add the ability to alter it from outside.
|
25
|
+
#dynamic_train_configs=[(SAC, 400000) for _ in range(7)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
|
26
|
+
)
|
27
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
28
|
+
actor = DeepRLAgent(env_name="parking", problem_name="Parking-S-14-PC--GI-8-v0", algorithm=TD3, num_timesteps=400000)
|
29
|
+
actor.learn()
|
30
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
31
|
+
full_sequence = actor.generate_observation(
|
32
|
+
action_selection_method=stochastic_amplified_selection,
|
33
|
+
random_optimalism=True, # the noise that's added to the actions
|
34
|
+
)
|
35
|
+
|
36
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
37
|
+
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-8-v0").str_to_goal(), 0.5)
|
38
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 8")
|
@@ -0,0 +1,43 @@
|
|
1
|
+
|
2
|
+
from stable_baselines3 import SAC, TD3
|
3
|
+
from gr_libs.environment.utils.format import maze_str_to_goal
|
4
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
6
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
7
|
+
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
|
8
|
+
|
9
|
+
# Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
|
10
|
+
recognizer = ExpertBasedGraml(
|
11
|
+
env_name="point_maze", # TODO change to macros which are importable from some info or env module of enums.
|
12
|
+
problems=[("PointMaze-FourRoomsEnvDense-11x11-Goal-9x1"),
|
13
|
+
("PointMaze-FourRoomsEnv-11x11-Goal-9x9"), # this one doesn't work with dense rewards because of encountering local minima
|
14
|
+
("PointMaze-FourRoomsEnvDense-11x11-Goal-1x9"),
|
15
|
+
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x3"),
|
16
|
+
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x4"),
|
17
|
+
("PointMaze-FourRoomsEnvDense-11x11-Goal-8x2"),
|
18
|
+
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"),
|
19
|
+
("PointMaze-FourRoomsEnvDense-11x11-Goal-2x8")],
|
20
|
+
task_str_to_goal=maze_str_to_goal,
|
21
|
+
method=DeepRLAgent,
|
22
|
+
collect_statistics=False,
|
23
|
+
train_configs=[(SAC, 200000) for _ in range(8)],
|
24
|
+
)
|
25
|
+
recognizer.domain_learning_phase()
|
26
|
+
recognizer.goals_adaptation_phase(
|
27
|
+
dynamic_goals_problems = ["PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
|
28
|
+
"PointMaze-FourRoomsEnvDense-11x11-Goal-7x3",
|
29
|
+
"PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"],
|
30
|
+
dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
|
31
|
+
)
|
32
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
33
|
+
actor = DeepRLAgent(env_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
|
34
|
+
actor.learn()
|
35
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
36
|
+
full_sequence = actor.generate_observation(
|
37
|
+
action_selection_method=stochastic_amplified_selection,
|
38
|
+
random_optimalism=True, # the noise that's added to the actions
|
39
|
+
)
|
40
|
+
|
41
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
|
42
|
+
closest_goal = recognizer.inference_phase(partial_sequence, maze_str_to_goal("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4"), 0.5)
|
43
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from gr_libs.environment.environment import QLEARNING
|
2
|
+
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
3
|
+
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
4
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
5
|
+
from gr_libs import Graql
|
6
|
+
|
7
|
+
recognizer = Graql(
|
8
|
+
domain_name="minigrid",
|
9
|
+
env_name="MiniGrid-SimpleCrossingS13N4"
|
10
|
+
)
|
11
|
+
|
12
|
+
#Graql doesn't have a domain learning phase, so we skip it
|
13
|
+
|
14
|
+
recognizer.goals_adaptation_phase(
|
15
|
+
dynamic_goals = [(11,1), (11,11), (1,11)],
|
16
|
+
dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
|
17
|
+
)
|
18
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
19
|
+
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
20
|
+
actor.learn()
|
21
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
22
|
+
full_sequence = actor.generate_observation(
|
23
|
+
action_selection_method=stochastic_amplified_selection,
|
24
|
+
random_optimalism=True, # the noise that's added to the actions
|
25
|
+
)
|
26
|
+
|
27
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
28
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
29
|
+
print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|