gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
- evaluation/create_minigrid_map_image.py +10 -6
- evaluation/file_system.py +16 -5
- evaluation/generate_experiments_results.py +123 -74
- evaluation/generate_experiments_results_new_ver1.py +227 -243
- evaluation/generate_experiments_results_new_ver2.py +317 -317
- evaluation/generate_task_specific_statistics_plots.py +481 -253
- evaluation/get_plans_images.py +41 -26
- evaluation/increasing_and_decreasing_.py +97 -56
- gr_libs/__init__.py +2 -1
- gr_libs/_version.py +2 -2
- gr_libs/environment/__init__.py +16 -8
- gr_libs/environment/environment.py +167 -39
- gr_libs/environment/utils/utils.py +22 -12
- gr_libs/metrics/__init__.py +5 -0
- gr_libs/metrics/metrics.py +76 -34
- gr_libs/ml/__init__.py +2 -0
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +1 -1
- gr_libs/ml/base/rl_agent.py +13 -10
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/deep_rl_learner.py +433 -352
- gr_libs/ml/neural/utils/__init__.py +1 -1
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/neural/utils/penv.py +5 -2
- gr_libs/ml/planner/mcts/mcts_model.py +524 -302
- gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
- gr_libs/ml/planner/mcts/utils/node.py +11 -7
- gr_libs/ml/planner/mcts/utils/tree.py +14 -10
- gr_libs/ml/sequential/__init__.py +1 -1
- gr_libs/ml/sequential/lstm_model.py +256 -175
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +123 -73
- gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
- gr_libs/ml/utils/__init__.py +8 -2
- gr_libs/ml/utils/format.py +78 -70
- gr_libs/ml/utils/math.py +2 -1
- gr_libs/ml/utils/other.py +1 -1
- gr_libs/ml/utils/storage.py +88 -28
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
- gr_libs/recognizer/graml/gr_dataset.py +209 -110
- gr_libs/recognizer/graml/graml_recognizer.py +431 -240
- gr_libs/recognizer/recognizer.py +38 -27
- gr_libs/recognizer/utils/__init__.py +1 -1
- gr_libs/recognizer/utils/format.py +8 -3
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
- gr_libs-0.1.8.dist-info/RECORD +70 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +8 -4
- tests/test_graql.py +2 -1
- tutorials/gcdraco_panda_tutorial.py +66 -0
- tutorials/gcdraco_parking_tutorial.py +61 -0
- tutorials/graml_minigrid_tutorial.py +42 -12
- tutorials/graml_panda_tutorial.py +35 -14
- tutorials/graml_parking_tutorial.py +37 -20
- tutorials/graml_point_maze_tutorial.py +33 -13
- tutorials/graql_minigrid_tutorial.py +31 -15
- gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0
gr_libs/recognizer/recognizer.py
CHANGED
@@ -4,42 +4,53 @@ from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
|
|
4
4
|
from gr_libs.environment.utils.utils import domain_to_env_property
|
5
5
|
from gr_libs.ml.base.rl_agent import RLAgent
|
6
6
|
|
7
|
+
|
7
8
|
class Recognizer(ABC):
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
9
|
+
def __init__(
|
10
|
+
self,
|
11
|
+
domain_name: str,
|
12
|
+
env_name: str,
|
13
|
+
collect_statistics=False,
|
14
|
+
rl_agent_type: Type[RLAgent] = None,
|
15
|
+
**kwargs,
|
16
|
+
):
|
17
|
+
assert domain_name in SUPPORTED_DOMAINS
|
18
|
+
self.rl_agent_type = rl_agent_type
|
19
|
+
self.domain_name = domain_name
|
20
|
+
self.env_prop_type = domain_to_env_property(self.domain_name)
|
21
|
+
self.env_prop = self.env_prop_type(env_name)
|
22
|
+
self.collect_statistics = collect_statistics
|
23
|
+
|
24
|
+
@abstractmethod
|
25
|
+
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
26
|
+
pass
|
27
|
+
|
19
28
|
|
20
29
|
class LearningRecognizer(Recognizer):
|
21
|
-
|
22
|
-
|
30
|
+
def __init__(self, *args, **kwargs):
|
31
|
+
super().__init__(*args, **kwargs)
|
32
|
+
|
33
|
+
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
34
|
+
self.original_train_configs = train_configs
|
23
35
|
|
24
|
-
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
25
|
-
self.original_train_configs = train_configs
|
26
36
|
|
27
37
|
# a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
|
28
38
|
class GaAgentTrainerRecognizer(Recognizer):
|
29
|
-
|
30
|
-
|
39
|
+
def __init__(self, *args, **kwargs):
|
40
|
+
super().__init__(*args, **kwargs)
|
41
|
+
|
42
|
+
@abstractmethod
|
43
|
+
def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
|
44
|
+
pass
|
31
45
|
|
32
|
-
|
33
|
-
|
34
|
-
pass
|
46
|
+
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
47
|
+
super().domain_learning_phase(base_goals, train_configs)
|
35
48
|
|
36
|
-
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
37
|
-
super().domain_learning_phase(base_goals, train_configs)
|
38
49
|
|
39
50
|
class GaAdaptingRecognizer(Recognizer):
|
40
|
-
|
41
|
-
|
51
|
+
def __init__(self, *args, **kwargs):
|
52
|
+
super().__init__(*args, **kwargs)
|
42
53
|
|
43
|
-
|
44
|
-
|
45
|
-
|
54
|
+
@abstractmethod
|
55
|
+
def goals_adaptation_phase(self, dynamic_goals: List[str]):
|
56
|
+
pass
|
@@ -1 +1 @@
|
|
1
|
-
from .format import recognizer_str_to_obj
|
1
|
+
from .format import recognizer_str_to_obj
|
@@ -1,6 +1,11 @@
|
|
1
|
-
from gr_libs.recognizer.graml.graml_recognizer import
|
1
|
+
from gr_libs.recognizer.graml.graml_recognizer import (
|
2
|
+
GCGraml,
|
3
|
+
ExpertBasedGraml,
|
4
|
+
MCTSBasedGraml,
|
5
|
+
)
|
2
6
|
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
|
3
7
|
|
8
|
+
|
4
9
|
def recognizer_str_to_obj(recognizer_str: str):
|
5
10
|
recognizer_map = {
|
6
11
|
"GCGraml": GCGraml,
|
@@ -8,6 +13,6 @@ def recognizer_str_to_obj(recognizer_str: str):
|
|
8
13
|
"MCTSBasedGraml": MCTSBasedGraml,
|
9
14
|
"Graql": Graql,
|
10
15
|
"Draco": Draco,
|
11
|
-
"GCDraco": GCDraco
|
16
|
+
"GCDraco": GCDraco,
|
12
17
|
}
|
13
|
-
return recognizer_map.get(recognizer_str)
|
18
|
+
return recognizer_map.get(recognizer_str)
|
@@ -0,0 +1,70 @@
|
|
1
|
+
evaluation/analyze_results_cross_alg_cross_domain.py,sha256=ioAySQ92yWMg9rNUxy5TQ-viPMTRjIPqBLwYmUIgaCA,11346
|
2
|
+
evaluation/create_minigrid_map_image.py,sha256=l8MukZBGV63EnEjdGhbdH-9sXR7kTbfPnEzE5ZJkw6w,1257
|
3
|
+
evaluation/file_system.py,sha256=Asaqq0_4CFVhQ8VyfEuaDBQK3-QkjMJIUfx5SA8y1Co,1645
|
4
|
+
evaluation/generate_experiments_results.py,sha256=RIlztqGh9OtZg6usLRypEJJdPmZA_M3x6bHi-T0WCxs,5257
|
5
|
+
evaluation/generate_experiments_results_new_ver1.py,sha256=yGg8MFU-9xwdYsOyZCehGwTvWnwoudN7ctpUOLmi070,10305
|
6
|
+
evaluation/generate_experiments_results_new_ver2.py,sha256=ERJRuAY446QGOvqRlbUWYELF_fFO4GgOeMcdTXIOvrY,14267
|
7
|
+
evaluation/generate_task_specific_statistics_plots.py,sha256=IIL-4qSbR2YLih8vvOIkz-poIq7p7_NuEZG1xR10vXw,19126
|
8
|
+
evaluation/get_plans_images.py,sha256=F2Tez4ZeFsU22R8x0pjeUQ2GMQrpbN6g8XCHDPrz_F8,2730
|
9
|
+
evaluation/increasing_and_decreasing_.py,sha256=MscBjQwGauBdRoFxgHfLpcd-iu6WqNgmD_iHx4wfh2U,3866
|
10
|
+
gr_libs/__init__.py,sha256=MpvF14G0wRxRm9dxz97p5JxRhIbAfyFc3MJ1S8YRsNM,297
|
11
|
+
gr_libs/_version.py,sha256=AjUi5zEL_BoWoXMXR1FnWc3mD6FHX7snDXjDHVLoens,511
|
12
|
+
gr_libs/environment/__init__.py,sha256=mttRtyD8jXs9qMTjqruKO5JohrFhlS4QPEDYw5Se2MA,1150
|
13
|
+
gr_libs/environment/environment.py,sha256=4_LbPuIDdPI-yM0wnQFL_xqrC9VOa9Q_8ejkIUKTbvs,10445
|
14
|
+
gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
gr_libs/environment/utils/utils.py,sha256=4K5yy4bs-dNRsfl3KUctQw4Kx-lzYuN_8JzI-xxk_Ng,630
|
16
|
+
gr_libs/metrics/__init__.py,sha256=cNIyyAB4YJHAG5wzIh8ZW1792vYrt6iW9i2mkBJUa9Q,110
|
17
|
+
gr_libs/metrics/metrics.py,sha256=Yj3qYRAyrQeTxHK_shIjrgAKic4vMibK6ClyKavTzZ0,9016
|
18
|
+
gr_libs/ml/__init__.py,sha256=r2SBzpSNpR6wynSn_npew8CCz0E_PBwQbuBGWFfIKRQ,235
|
19
|
+
gr_libs/ml/agent.py,sha256=ea1yRltKX0LSpRMnpAQLRKvvKoLMQz9MgMeWBPhQISw,2095
|
20
|
+
gr_libs/ml/consts.py,sha256=vsEB1nk5V_qP3FjNlv4vBKeTTFngV3RNaNp6fWnmEz0,366
|
21
|
+
gr_libs/ml/base/__init__.py,sha256=nofgF53Gho5KlAV6BWTi0jfQACDynp6bq3kctm1R6aM,69
|
22
|
+
gr_libs/ml/base/rl_agent.py,sha256=OIrcdtgSHk7ZcUdSyQHbPnwU9T1SqGRQkOzx5rSt8LY,1600
|
23
|
+
gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
|
24
|
+
gr_libs/ml/neural/deep_rl_learner.py,sha256=mv89WoCCGHcrnyEOzHldCMlQeUyAIbGoYHMQmahzM0w,21808
|
25
|
+
gr_libs/ml/neural/utils/__init__.py,sha256=xbJ40_o7rTrzS9LXidjurGaRMdMjvSUnXsTjbJf9kR8,107
|
26
|
+
gr_libs/ml/neural/utils/dictlist.py,sha256=ORFez_KmaCzraStF97hxdgCAAALP4Er8u3e9RcqlvhM,1030
|
27
|
+
gr_libs/ml/neural/utils/penv.py,sha256=v_yy2E05ZyspH-95trnjB0es10A2i13iBr3Zub_goZA,1897
|
28
|
+
gr_libs/ml/planner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
|
+
gr_libs/ml/planner/mcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
+
gr_libs/ml/planner/mcts/mcts_model.py,sha256=zbYKhb_49iS2I9Tgh7NtAf1iwM5lH6ZC3y3-uDvr40I,26466
|
31
|
+
gr_libs/ml/planner/mcts/utils/__init__.py,sha256=0ccEf23-6VIenUSrlVFCq0VNVDkCHHNzBw7jR09UiO4,46
|
32
|
+
gr_libs/ml/planner/mcts/utils/node.py,sha256=LcbBjzURMDF4v_Lvz24dyhhW5xb1xQKWdPkue-2lNLM,1056
|
33
|
+
gr_libs/ml/planner/mcts/utils/tree.py,sha256=49i1ZLYV-9w-lh09Mb4qd3zQ92EqpyYWn5cQA5mo1F0,3409
|
34
|
+
gr_libs/ml/sequential/__init__.py,sha256=HTtnXxcqxF7e_uPh0--kOhHZtF2YasvgKJDIig38NkM,62
|
35
|
+
gr_libs/ml/sequential/lstm_model.py,sha256=1MESuStASSTptJ5dWIUyex76o7UNria2yBPoza3sCOo,10868
|
36
|
+
gr_libs/ml/tabular/__init__.py,sha256=jAfjfTFZLLlVm1KUiJdxdnaNGFp1J2KBU89q_vvradM,177
|
37
|
+
gr_libs/ml/tabular/state.py,sha256=ImpIrYWF80PB-4EeQ2Q9nO7jMZ2s0hGbgsir1ZtsO88,700
|
38
|
+
gr_libs/ml/tabular/tabular_q_learner.py,sha256=CSzyN0qVBPFUqiJ_uJSTemh7FddeEJVar4Zu8z2IZ6I,19060
|
39
|
+
gr_libs/ml/tabular/tabular_rl_agent.py,sha256=JzFgVhiILjhgA_aBbsJYgQaFUPEJOxlHTcsPKrg-h4E,3658
|
40
|
+
gr_libs/ml/utils/__init__.py,sha256=eYkoOi-rIjxog1ikMqrCmXVOiSP9XHQh66fwWfBsfKs,284
|
41
|
+
gr_libs/ml/utils/env.py,sha256=AWVN0OXYmFU-J3FUiwvEAIY93Suf1oL6VNcxtyWJraM,171
|
42
|
+
gr_libs/ml/utils/format.py,sha256=xh2TKzVZsy1XmK8JCk_MJhB5zn37VIHVx060Rd-YqKE,3581
|
43
|
+
gr_libs/ml/utils/math.py,sha256=4U_F67eS6xuS7fN9hNtZFcFPObmSN2soi-2O3AGVgVs,442
|
44
|
+
gr_libs/ml/utils/other.py,sha256=QM44H4Bx1ajaz594P23sQ7tJ0JDraABeQSD23ygWf9w,506
|
45
|
+
gr_libs/ml/utils/storage.py,sha256=a5F-KXRMdGGHPTKSQpGeNRHfZyv9m6j7ZhuLLNNjTvE,4947
|
46
|
+
gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
|
+
gr_libs/problems/consts.py,sha256=tzad2l2DVnQxHen1AS8rPYmGiy02x_53NOrjmv9KL9E,62096
|
48
|
+
gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
+
gr_libs/recognizer/recognizer.py,sha256=Q4EpAQFgahkDMdhm8Dgsu2aRB8EetpxxELtrTuv-ri4,1897
|
50
|
+
gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
|
+
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=LG_nVjujzcmC9euyouTB9CywvCHpuqe4CPB8jTlzgfA,6743
|
52
|
+
gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
|
+
gr_libs/recognizer/graml/gr_dataset.py,sha256=c0-n4kzMQN78DUQV_7egz_oyHC9EMmmYixMRA2DFO3k,10106
|
54
|
+
gr_libs/recognizer/graml/graml_recognizer.py,sha256=qm6kdRX8HpKOb6S3D_8oWj6HA4pzk1jnsA9BekTOKyE,19867
|
55
|
+
gr_libs/recognizer/utils/__init__.py,sha256=MvXPRyr30W5C_n-Dul3aheE_9SWy2aIMGINKWj36mfM,42
|
56
|
+
gr_libs/recognizer/utils/format.py,sha256=O1NbaZ_3bgl5jKgcGwdA0R1NWGm9Ha0EG6mHJcivpTY,512
|
57
|
+
tests/test_gcdraco.py,sha256=o4badhWKXMhysPVt0a2jjvKDR-mfzt5KqyRyIutZnS0,292
|
58
|
+
tests/test_graml.py,sha256=1h9LjRbss_fpqViDX7KBjLKfu4EZstREOnVHQCDJct4,576
|
59
|
+
tests/test_graql.py,sha256=VM6o6wHuf2y76YQo7pbBrerbBZe4MwYv9sFvj1Y-nZ0,146
|
60
|
+
tutorials/gcdraco_panda_tutorial.py,sha256=hfZ--4Q0JEwFTnmZ9PStIhJxk3fHAgnP1XhE_Mq4nS4,2287
|
61
|
+
tutorials/gcdraco_parking_tutorial.py,sha256=iWhfRGFFqUXUEKNXEWww1SIoEIo8dJ8GOp5FzFfAlKE,2097
|
62
|
+
tutorials/graml_minigrid_tutorial.py,sha256=eHNe5G5gMktWb-3Z_nHvQP6O5UNbFVQpCp6HUnJsxYQ,2204
|
63
|
+
tutorials/graml_panda_tutorial.py,sha256=8zpPyoA8GI0bTjGI3mKCBW2eZwlZ2dQ5NNtVcg9t5rU,2241
|
64
|
+
tutorials/graml_parking_tutorial.py,sha256=EuQ_j1KqJmMaFEgINvNSlSe6TKkL3XP-mh0M0ZC6IIA,2011
|
65
|
+
tutorials/graml_point_maze_tutorial.py,sha256=dgvz1qnE6k2YOE_5dyRDF8MQYRAuLGYcdpvi-NiNoSs,2186
|
66
|
+
tutorials/graql_minigrid_tutorial.py,sha256=pznoOcO8PU-VwfH9sxlJBOQskmilfrHg9nky1m2Adz0,1940
|
67
|
+
gr_libs-0.1.8.dist-info/METADATA,sha256=-DZi3tJBEZF7hdoh6L5PRWycjIihDp4iKVALsBM7VME,9614
|
68
|
+
gr_libs-0.1.8.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
|
69
|
+
gr_libs-0.1.8.dist-info/top_level.txt,sha256=fJQF8Q8Dfh_D3pA2mhNodazNjzW6b3oWfnx6Jdo-pBU,35
|
70
|
+
gr_libs-0.1.8.dist-info/RECORD,,
|
tests/test_gcdraco.py
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
from tutorials.gcdraco_panda_tutorial import run_gcdraco_panda_tutorial
|
2
|
+
from tutorials.gcdraco_parking_tutorial import run_gcdraco_parking_tutorial
|
3
|
+
|
4
|
+
|
5
|
+
def test_gcdraco_panda_tutorial():
|
6
|
+
run_gcdraco_panda_tutorial()
|
7
|
+
|
8
|
+
|
9
|
+
def test_gcdraco_parking_tutorial():
|
10
|
+
run_gcdraco_parking_tutorial()
|
tests/test_graml.py
CHANGED
@@ -3,14 +3,18 @@ from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
|
|
3
3
|
from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
|
4
4
|
from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
|
5
5
|
|
6
|
+
|
6
7
|
def test_graml_minigrid_tutorial():
|
7
|
-
|
8
|
+
run_graml_minigrid_tutorial()
|
9
|
+
|
8
10
|
|
9
11
|
def test_graml_panda_tutorial():
|
10
|
-
|
12
|
+
run_graml_panda_tutorial()
|
13
|
+
|
11
14
|
|
12
15
|
def test_graml_parking_tutorial():
|
13
|
-
|
16
|
+
run_graml_parking_tutorial()
|
17
|
+
|
14
18
|
|
15
19
|
def test_graml_point_maze_tutorial():
|
16
|
-
|
20
|
+
run_graml_point_maze_tutorial()
|
tests/test_graql.py
CHANGED
@@ -0,0 +1,66 @@
|
|
1
|
+
from stable_baselines3 import PPO, SAC
|
2
|
+
from gr_libs.metrics import (
|
3
|
+
stochastic_amplified_selection,
|
4
|
+
mean_p_value,
|
5
|
+
mean_wasserstein_distance,
|
6
|
+
)
|
7
|
+
from gr_libs import GCDraco
|
8
|
+
from gr_libs.environment.utils.utils import domain_to_env_property
|
9
|
+
import numpy as np
|
10
|
+
from gr_libs.environment.environment import PANDA, PandaProperty
|
11
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
12
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
13
|
+
import gr_envs.panda_scripts
|
14
|
+
|
15
|
+
|
16
|
+
def run_gcdraco_panda_tutorial():
|
17
|
+
recognizer = GCDraco(
|
18
|
+
domain_name=PANDA,
|
19
|
+
env_name="PandaMyReachDense",
|
20
|
+
evaluation_function=mean_wasserstein_distance, # or mean_p_value
|
21
|
+
)
|
22
|
+
|
23
|
+
recognizer.domain_learning_phase(
|
24
|
+
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(30)],
|
25
|
+
train_configs=[(SAC, 800000)],
|
26
|
+
)
|
27
|
+
|
28
|
+
recognizer.goals_adaptation_phase(
|
29
|
+
dynamic_goals=[
|
30
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
31
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
32
|
+
np.array([[0.2, 0.2, 0.1]]),
|
33
|
+
]
|
34
|
+
)
|
35
|
+
|
36
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
37
|
+
property_type = domain_to_env_property(PANDA)
|
38
|
+
env_property = property_type("PandaMyReachDense")
|
39
|
+
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
40
|
+
actor = DeepRLAgent(
|
41
|
+
domain_name=PANDA,
|
42
|
+
problem_name=problem_name,
|
43
|
+
env_prop=env_property,
|
44
|
+
algorithm=PPO,
|
45
|
+
num_timesteps=400000,
|
46
|
+
)
|
47
|
+
actor.learn()
|
48
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
49
|
+
full_sequence = actor.generate_observation(
|
50
|
+
action_selection_method=stochastic_amplified_selection,
|
51
|
+
random_optimalism=True, # the noise that's added to the actions
|
52
|
+
with_dict=True,
|
53
|
+
)
|
54
|
+
partial_sequence = random_subset_with_order(
|
55
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
56
|
+
)
|
57
|
+
closest_goal = recognizer.inference_phase(
|
58
|
+
partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
|
59
|
+
)
|
60
|
+
print(
|
61
|
+
f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
|
62
|
+
)
|
63
|
+
|
64
|
+
|
65
|
+
if __name__ == "__main__":
|
66
|
+
run_gcdraco_panda_tutorial()
|
@@ -0,0 +1,61 @@
|
|
1
|
+
from stable_baselines3 import PPO, TD3
|
2
|
+
from gr_libs.metrics import (
|
3
|
+
stochastic_amplified_selection,
|
4
|
+
mean_p_value,
|
5
|
+
mean_wasserstein_distance,
|
6
|
+
)
|
7
|
+
from gr_libs import GCDraco
|
8
|
+
from gr_libs.environment.utils.utils import domain_to_env_property
|
9
|
+
from gr_libs.environment.environment import PARKING, ParkingProperty
|
10
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
11
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
12
|
+
import gr_envs
|
13
|
+
|
14
|
+
|
15
|
+
def run_gcdraco_parking_tutorial():
|
16
|
+
recognizer = GCDraco(
|
17
|
+
domain_name=PARKING,
|
18
|
+
env_name="Parking-S-14-PC-",
|
19
|
+
evaluation_function=mean_wasserstein_distance, # or mean_p_value
|
20
|
+
)
|
21
|
+
|
22
|
+
recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
|
23
|
+
recognizer.goals_adaptation_phase(
|
24
|
+
dynamic_goals=["1", "11", "21"]
|
25
|
+
# no need for expert sequence generation since GCRL is used
|
26
|
+
)
|
27
|
+
|
28
|
+
property_type = domain_to_env_property(PARKING)
|
29
|
+
env_property = property_type("Parking-S-14-PC-")
|
30
|
+
|
31
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
32
|
+
actor = DeepRLAgent(
|
33
|
+
domain_name="parking",
|
34
|
+
problem_name="Parking-S-14-PC--GI-11-v0",
|
35
|
+
env_prop=env_property,
|
36
|
+
algorithm=TD3,
|
37
|
+
num_timesteps=400000,
|
38
|
+
)
|
39
|
+
actor.learn()
|
40
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
41
|
+
full_sequence = actor.generate_observation(
|
42
|
+
action_selection_method=stochastic_amplified_selection,
|
43
|
+
random_optimalism=True, # the noise that's added to the actions
|
44
|
+
with_dict=True,
|
45
|
+
)
|
46
|
+
|
47
|
+
partial_sequence = random_subset_with_order(
|
48
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
49
|
+
)
|
50
|
+
closest_goal = recognizer.inference_phase(
|
51
|
+
partial_sequence,
|
52
|
+
ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
|
53
|
+
0.5,
|
54
|
+
)
|
55
|
+
print(
|
56
|
+
f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: 11"
|
57
|
+
)
|
58
|
+
|
59
|
+
|
60
|
+
if __name__ == "__main__":
|
61
|
+
run_gcdraco_parking_tutorial()
|
@@ -1,34 +1,64 @@
|
|
1
1
|
from gr_libs.environment.environment import MINIGRID, QLEARNING
|
2
|
+
from gr_libs.environment.utils.utils import domain_to_env_property
|
2
3
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
3
4
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
4
5
|
from gr_libs.ml.utils.format import random_subset_with_order
|
5
6
|
from gr_libs import ExpertBasedGraml
|
6
7
|
|
8
|
+
|
7
9
|
def run_graml_minigrid_tutorial():
|
8
10
|
recognizer = ExpertBasedGraml(
|
9
|
-
domain_name=MINIGRID,
|
10
|
-
env_name="MiniGrid-SimpleCrossingS13N4"
|
11
|
+
domain_name=MINIGRID, env_name="MiniGrid-SimpleCrossingS13N4"
|
11
12
|
)
|
12
13
|
|
13
|
-
recognizer.domain_learning_phase(
|
14
|
-
|
14
|
+
recognizer.domain_learning_phase(
|
15
|
+
base_goals=[
|
16
|
+
(11, 1),
|
17
|
+
(11, 11),
|
18
|
+
(1, 11),
|
19
|
+
(7, 11),
|
20
|
+
(8, 1),
|
21
|
+
(10, 6),
|
22
|
+
(6, 9),
|
23
|
+
(11, 3),
|
24
|
+
(11, 5),
|
25
|
+
],
|
26
|
+
train_configs=[(QLEARNING, 100000) for _ in range(9)],
|
27
|
+
)
|
15
28
|
|
16
29
|
recognizer.goals_adaptation_phase(
|
17
|
-
dynamic_goals
|
18
|
-
dynamic_train_configs=[
|
30
|
+
dynamic_goals=[(11, 1), (11, 11), (1, 11)],
|
31
|
+
dynamic_train_configs=[
|
32
|
+
(QLEARNING, 100000) for _ in range(3)
|
33
|
+
], # for expert sequence generation.
|
19
34
|
)
|
35
|
+
|
36
|
+
property_type = domain_to_env_property(MINIGRID)
|
37
|
+
env_property = property_type("MiniGrid-SimpleCrossingS13N4")
|
38
|
+
|
20
39
|
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
21
|
-
actor = TabularQLearner(
|
40
|
+
actor = TabularQLearner(
|
41
|
+
domain_name="minigrid",
|
42
|
+
problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0",
|
43
|
+
env_prop=env_property,
|
44
|
+
algorithm=QLEARNING,
|
45
|
+
num_timesteps=100000,
|
46
|
+
)
|
22
47
|
actor.learn()
|
23
48
|
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
24
49
|
full_sequence = actor.generate_observation(
|
25
50
|
action_selection_method=stochastic_amplified_selection,
|
26
|
-
random_optimalism=True,
|
51
|
+
random_optimalism=True, # the noise that's added to the actions
|
52
|
+
)
|
53
|
+
|
54
|
+
partial_sequence = random_subset_with_order(
|
55
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
56
|
+
)
|
57
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11, 1), 0.5)
|
58
|
+
print(
|
59
|
+
f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)"
|
27
60
|
)
|
28
61
|
|
29
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
30
|
-
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
31
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
32
62
|
|
33
63
|
if __name__ == "__main__":
|
34
|
-
run_graml_minigrid_tutorial()
|
64
|
+
run_graml_minigrid_tutorial()
|
@@ -1,41 +1,62 @@
|
|
1
|
-
|
2
1
|
import numpy as np
|
3
2
|
from stable_baselines3 import PPO, SAC
|
4
3
|
import gr_libs.environment.environment
|
5
|
-
from gr_libs.environment.environment import
|
4
|
+
from gr_libs.environment.environment import (
|
5
|
+
PANDA,
|
6
|
+
EnvProperty,
|
7
|
+
GCEnvProperty,
|
8
|
+
PandaProperty,
|
9
|
+
)
|
6
10
|
from gr_libs.environment.utils.utils import domain_to_env_property
|
7
11
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
8
12
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
9
13
|
from gr_libs.ml.utils.format import random_subset_with_order
|
10
14
|
from gr_libs import GCGraml
|
11
15
|
|
16
|
+
|
12
17
|
def run_graml_panda_tutorial():
|
13
|
-
recognizer = GCGraml(
|
14
|
-
domain_name=PANDA,
|
15
|
-
env_name="PandaMyReachDense"
|
18
|
+
recognizer = GCGraml( # TODO make these tutorials into pytests
|
19
|
+
domain_name=PANDA, env_name="PandaMyReachDense"
|
16
20
|
)
|
17
21
|
recognizer.domain_learning_phase(
|
18
|
-
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
|
19
|
-
train_configs=[(SAC, 800000)]
|
22
|
+
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)],
|
23
|
+
train_configs=[(SAC, 800000)],
|
20
24
|
)
|
21
25
|
recognizer.goals_adaptation_phase(
|
22
|
-
dynamic_goals=[
|
26
|
+
dynamic_goals=[
|
27
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
28
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
29
|
+
np.array([[0.2, 0.2, 0.1]]),
|
30
|
+
]
|
23
31
|
)
|
24
32
|
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
25
33
|
property_type = domain_to_env_property(PANDA)
|
26
34
|
env_property = property_type("PandaMyReachDense")
|
27
35
|
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
28
|
-
actor = DeepRLAgent(
|
36
|
+
actor = DeepRLAgent(
|
37
|
+
domain_name=PANDA,
|
38
|
+
problem_name=problem_name,
|
39
|
+
env_prop=env_property,
|
40
|
+
algorithm=PPO,
|
41
|
+
num_timesteps=400000,
|
42
|
+
)
|
29
43
|
actor.learn()
|
30
44
|
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
31
45
|
full_sequence = actor.generate_observation(
|
32
46
|
action_selection_method=stochastic_amplified_selection,
|
33
|
-
random_optimalism=True,
|
47
|
+
random_optimalism=True, # the noise that's added to the actions
|
48
|
+
)
|
49
|
+
|
50
|
+
partial_sequence = random_subset_with_order(
|
51
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
52
|
+
)
|
53
|
+
closest_goal = recognizer.inference_phase(
|
54
|
+
partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
|
55
|
+
)
|
56
|
+
print(
|
57
|
+
f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
|
34
58
|
)
|
35
59
|
|
36
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
37
|
-
closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
|
38
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
|
39
60
|
|
40
61
|
if __name__ == "__main__":
|
41
|
-
run_graml_panda_tutorial()
|
62
|
+
run_graml_panda_tutorial()
|
@@ -1,39 +1,56 @@
|
|
1
|
-
|
2
1
|
from stable_baselines3 import PPO, SAC, TD3
|
3
|
-
from gr_libs.environment.environment import
|
2
|
+
from gr_libs.environment.environment import (
|
3
|
+
PARKING,
|
4
|
+
EnvProperty,
|
5
|
+
GCEnvProperty,
|
6
|
+
ParkingProperty,
|
7
|
+
)
|
8
|
+
from gr_libs.environment.utils.utils import domain_to_env_property
|
4
9
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
|
-
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
10
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
6
11
|
from gr_libs.ml.utils.format import random_subset_with_order
|
7
12
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
|
8
|
-
|
13
|
+
|
9
14
|
|
10
15
|
def run_graml_parking_tutorial():
|
11
|
-
recognizer = GCGraml(
|
12
|
-
|
13
|
-
|
14
|
-
)
|
15
|
-
|
16
|
-
recognizer.domain_learning_phase(
|
17
|
-
[i for i in range(1,21)],
|
18
|
-
[(PPO, 200000)]
|
19
|
-
)
|
16
|
+
recognizer = GCGraml(domain_name=PARKING, env_name="Parking-S-14-PC-")
|
17
|
+
|
18
|
+
recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
|
20
19
|
recognizer.goals_adaptation_phase(
|
21
|
-
dynamic_goals
|
20
|
+
dynamic_goals=["1", "11", "21"]
|
22
21
|
# no need for expert sequence generation since GCRL is used
|
23
22
|
)
|
24
23
|
|
24
|
+
property_type = domain_to_env_property(PARKING)
|
25
|
+
env_property = property_type("Parking-S-14-PC-")
|
26
|
+
|
25
27
|
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
26
|
-
actor = DeepRLAgent(
|
28
|
+
actor = DeepRLAgent(
|
29
|
+
domain_name="parking",
|
30
|
+
problem_name="Parking-S-14-PC--GI-11-v0",
|
31
|
+
env_prop=env_property,
|
32
|
+
algorithm=TD3,
|
33
|
+
num_timesteps=400000,
|
34
|
+
)
|
27
35
|
actor.learn()
|
28
36
|
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
29
37
|
full_sequence = actor.generate_observation(
|
30
38
|
action_selection_method=stochastic_amplified_selection,
|
31
|
-
random_optimalism=True,
|
39
|
+
random_optimalism=True, # the noise that's added to the actions
|
40
|
+
)
|
41
|
+
|
42
|
+
partial_sequence = random_subset_with_order(
|
43
|
+
full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
|
44
|
+
)
|
45
|
+
closest_goal = recognizer.inference_phase(
|
46
|
+
partial_sequence,
|
47
|
+
ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
|
48
|
+
0.5,
|
49
|
+
)
|
50
|
+
print(
|
51
|
+
f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11"
|
32
52
|
)
|
33
53
|
|
34
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
35
|
-
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
|
36
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
|
37
54
|
|
38
55
|
if __name__ == "__main__":
|
39
|
-
run_graml_parking_tutorial()
|
56
|
+
run_graml_parking_tutorial()
|