gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
  2. evaluation/create_minigrid_map_image.py +10 -6
  3. evaluation/file_system.py +16 -5
  4. evaluation/generate_experiments_results.py +123 -74
  5. evaluation/generate_experiments_results_new_ver1.py +227 -243
  6. evaluation/generate_experiments_results_new_ver2.py +317 -317
  7. evaluation/generate_task_specific_statistics_plots.py +481 -253
  8. evaluation/get_plans_images.py +41 -26
  9. evaluation/increasing_and_decreasing_.py +97 -56
  10. gr_libs/__init__.py +2 -1
  11. gr_libs/_version.py +2 -2
  12. gr_libs/environment/__init__.py +16 -8
  13. gr_libs/environment/environment.py +167 -39
  14. gr_libs/environment/utils/utils.py +22 -12
  15. gr_libs/metrics/__init__.py +5 -0
  16. gr_libs/metrics/metrics.py +76 -34
  17. gr_libs/ml/__init__.py +2 -0
  18. gr_libs/ml/agent.py +21 -6
  19. gr_libs/ml/base/__init__.py +1 -1
  20. gr_libs/ml/base/rl_agent.py +13 -10
  21. gr_libs/ml/consts.py +1 -1
  22. gr_libs/ml/neural/deep_rl_learner.py +433 -352
  23. gr_libs/ml/neural/utils/__init__.py +1 -1
  24. gr_libs/ml/neural/utils/dictlist.py +3 -3
  25. gr_libs/ml/neural/utils/penv.py +5 -2
  26. gr_libs/ml/planner/mcts/mcts_model.py +524 -302
  27. gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
  28. gr_libs/ml/planner/mcts/utils/node.py +11 -7
  29. gr_libs/ml/planner/mcts/utils/tree.py +14 -10
  30. gr_libs/ml/sequential/__init__.py +1 -1
  31. gr_libs/ml/sequential/lstm_model.py +256 -175
  32. gr_libs/ml/tabular/state.py +7 -7
  33. gr_libs/ml/tabular/tabular_q_learner.py +123 -73
  34. gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
  35. gr_libs/ml/utils/__init__.py +8 -2
  36. gr_libs/ml/utils/format.py +78 -70
  37. gr_libs/ml/utils/math.py +2 -1
  38. gr_libs/ml/utils/other.py +1 -1
  39. gr_libs/ml/utils/storage.py +88 -28
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
  42. gr_libs/recognizer/graml/gr_dataset.py +209 -110
  43. gr_libs/recognizer/graml/graml_recognizer.py +431 -240
  44. gr_libs/recognizer/recognizer.py +38 -27
  45. gr_libs/recognizer/utils/__init__.py +1 -1
  46. gr_libs/recognizer/utils/format.py +8 -3
  47. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
  48. gr_libs-0.1.8.dist-info/RECORD +70 -0
  49. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
  50. tests/test_gcdraco.py +10 -0
  51. tests/test_graml.py +8 -4
  52. tests/test_graql.py +2 -1
  53. tutorials/gcdraco_panda_tutorial.py +66 -0
  54. tutorials/gcdraco_parking_tutorial.py +61 -0
  55. tutorials/graml_minigrid_tutorial.py +42 -12
  56. tutorials/graml_panda_tutorial.py +35 -14
  57. tutorials/graml_parking_tutorial.py +37 -20
  58. tutorials/graml_point_maze_tutorial.py +33 -13
  59. tutorials/graql_minigrid_tutorial.py +31 -15
  60. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  61. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0
@@ -4,42 +4,53 @@ from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
4
4
  from gr_libs.environment.utils.utils import domain_to_env_property
5
5
  from gr_libs.ml.base.rl_agent import RLAgent
6
6
 
7
+
7
8
  class Recognizer(ABC):
8
- def __init__(self, domain_name: str, env_name:str, collect_statistics=False, rl_agent_type: Type[RLAgent]=None):
9
- assert domain_name in SUPPORTED_DOMAINS
10
- self.rl_agent_type = rl_agent_type
11
- self.domain_name = domain_name
12
- self.env_prop_type = domain_to_env_property(self.domain_name)
13
- self.env_prop = self.env_prop_type(env_name)
14
- self.collect_statistics = collect_statistics
15
-
16
- @abstractmethod
17
- def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
18
- pass
9
+ def __init__(
10
+ self,
11
+ domain_name: str,
12
+ env_name: str,
13
+ collect_statistics=False,
14
+ rl_agent_type: Type[RLAgent] = None,
15
+ **kwargs,
16
+ ):
17
+ assert domain_name in SUPPORTED_DOMAINS
18
+ self.rl_agent_type = rl_agent_type
19
+ self.domain_name = domain_name
20
+ self.env_prop_type = domain_to_env_property(self.domain_name)
21
+ self.env_prop = self.env_prop_type(env_name)
22
+ self.collect_statistics = collect_statistics
23
+
24
+ @abstractmethod
25
+ def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
26
+ pass
27
+
19
28
 
20
29
  class LearningRecognizer(Recognizer):
21
- def __init__(self, *args, **kwargs):
22
- super().__init__(*args, **kwargs)
30
+ def __init__(self, *args, **kwargs):
31
+ super().__init__(*args, **kwargs)
32
+
33
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
34
+ self.original_train_configs = train_configs
23
35
 
24
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
25
- self.original_train_configs = train_configs
26
36
 
27
37
  # a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
28
38
  class GaAgentTrainerRecognizer(Recognizer):
29
- def __init__(self, *args, **kwargs):
30
- super().__init__(*args, **kwargs)
39
+ def __init__(self, *args, **kwargs):
40
+ super().__init__(*args, **kwargs)
41
+
42
+ @abstractmethod
43
+ def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
44
+ pass
31
45
 
32
- @abstractmethod
33
- def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
34
- pass
46
+ def domain_learning_phase(self, base_goals: List[str], train_configs: List):
47
+ super().domain_learning_phase(base_goals, train_configs)
35
48
 
36
- def domain_learning_phase(self, base_goals: List[str], train_configs: List):
37
- super().domain_learning_phase(base_goals, train_configs)
38
49
 
39
50
  class GaAdaptingRecognizer(Recognizer):
40
- def __init__(self, *args, **kwargs):
41
- super().__init__(*args, **kwargs)
51
+ def __init__(self, *args, **kwargs):
52
+ super().__init__(*args, **kwargs)
42
53
 
43
- @abstractmethod
44
- def goals_adaptation_phase(self, dynamic_goals: List[str]):
45
- pass
54
+ @abstractmethod
55
+ def goals_adaptation_phase(self, dynamic_goals: List[str]):
56
+ pass
@@ -1 +1 @@
1
- from .format import recognizer_str_to_obj
1
+ from .format import recognizer_str_to_obj
@@ -1,6 +1,11 @@
1
- from gr_libs.recognizer.graml.graml_recognizer import GCGraml, ExpertBasedGraml, MCTSBasedGraml
1
+ from gr_libs.recognizer.graml.graml_recognizer import (
2
+ GCGraml,
3
+ ExpertBasedGraml,
4
+ MCTSBasedGraml,
5
+ )
2
6
  from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
3
7
 
8
+
4
9
  def recognizer_str_to_obj(recognizer_str: str):
5
10
  recognizer_map = {
6
11
  "GCGraml": GCGraml,
@@ -8,6 +13,6 @@ def recognizer_str_to_obj(recognizer_str: str):
8
13
  "MCTSBasedGraml": MCTSBasedGraml,
9
14
  "Graql": Graql,
10
15
  "Draco": Draco,
11
- "GCDraco": GCDraco
16
+ "GCDraco": GCDraco,
12
17
  }
13
- return recognizer_map.get(recognizer_str)
18
+ return recognizer_map.get(recognizer_str)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.7.post0
3
+ Version: 0.1.8
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -0,0 +1,70 @@
1
+ evaluation/analyze_results_cross_alg_cross_domain.py,sha256=ioAySQ92yWMg9rNUxy5TQ-viPMTRjIPqBLwYmUIgaCA,11346
2
+ evaluation/create_minigrid_map_image.py,sha256=l8MukZBGV63EnEjdGhbdH-9sXR7kTbfPnEzE5ZJkw6w,1257
3
+ evaluation/file_system.py,sha256=Asaqq0_4CFVhQ8VyfEuaDBQK3-QkjMJIUfx5SA8y1Co,1645
4
+ evaluation/generate_experiments_results.py,sha256=RIlztqGh9OtZg6usLRypEJJdPmZA_M3x6bHi-T0WCxs,5257
5
+ evaluation/generate_experiments_results_new_ver1.py,sha256=yGg8MFU-9xwdYsOyZCehGwTvWnwoudN7ctpUOLmi070,10305
6
+ evaluation/generate_experiments_results_new_ver2.py,sha256=ERJRuAY446QGOvqRlbUWYELF_fFO4GgOeMcdTXIOvrY,14267
7
+ evaluation/generate_task_specific_statistics_plots.py,sha256=IIL-4qSbR2YLih8vvOIkz-poIq7p7_NuEZG1xR10vXw,19126
8
+ evaluation/get_plans_images.py,sha256=F2Tez4ZeFsU22R8x0pjeUQ2GMQrpbN6g8XCHDPrz_F8,2730
9
+ evaluation/increasing_and_decreasing_.py,sha256=MscBjQwGauBdRoFxgHfLpcd-iu6WqNgmD_iHx4wfh2U,3866
10
+ gr_libs/__init__.py,sha256=MpvF14G0wRxRm9dxz97p5JxRhIbAfyFc3MJ1S8YRsNM,297
11
+ gr_libs/_version.py,sha256=AjUi5zEL_BoWoXMXR1FnWc3mD6FHX7snDXjDHVLoens,511
12
+ gr_libs/environment/__init__.py,sha256=mttRtyD8jXs9qMTjqruKO5JohrFhlS4QPEDYw5Se2MA,1150
13
+ gr_libs/environment/environment.py,sha256=4_LbPuIDdPI-yM0wnQFL_xqrC9VOa9Q_8ejkIUKTbvs,10445
14
+ gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ gr_libs/environment/utils/utils.py,sha256=4K5yy4bs-dNRsfl3KUctQw4Kx-lzYuN_8JzI-xxk_Ng,630
16
+ gr_libs/metrics/__init__.py,sha256=cNIyyAB4YJHAG5wzIh8ZW1792vYrt6iW9i2mkBJUa9Q,110
17
+ gr_libs/metrics/metrics.py,sha256=Yj3qYRAyrQeTxHK_shIjrgAKic4vMibK6ClyKavTzZ0,9016
18
+ gr_libs/ml/__init__.py,sha256=r2SBzpSNpR6wynSn_npew8CCz0E_PBwQbuBGWFfIKRQ,235
19
+ gr_libs/ml/agent.py,sha256=ea1yRltKX0LSpRMnpAQLRKvvKoLMQz9MgMeWBPhQISw,2095
20
+ gr_libs/ml/consts.py,sha256=vsEB1nk5V_qP3FjNlv4vBKeTTFngV3RNaNp6fWnmEz0,366
21
+ gr_libs/ml/base/__init__.py,sha256=nofgF53Gho5KlAV6BWTi0jfQACDynp6bq3kctm1R6aM,69
22
+ gr_libs/ml/base/rl_agent.py,sha256=OIrcdtgSHk7ZcUdSyQHbPnwU9T1SqGRQkOzx5rSt8LY,1600
23
+ gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
24
+ gr_libs/ml/neural/deep_rl_learner.py,sha256=mv89WoCCGHcrnyEOzHldCMlQeUyAIbGoYHMQmahzM0w,21808
25
+ gr_libs/ml/neural/utils/__init__.py,sha256=xbJ40_o7rTrzS9LXidjurGaRMdMjvSUnXsTjbJf9kR8,107
26
+ gr_libs/ml/neural/utils/dictlist.py,sha256=ORFez_KmaCzraStF97hxdgCAAALP4Er8u3e9RcqlvhM,1030
27
+ gr_libs/ml/neural/utils/penv.py,sha256=v_yy2E05ZyspH-95trnjB0es10A2i13iBr3Zub_goZA,1897
28
+ gr_libs/ml/planner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ gr_libs/ml/planner/mcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ gr_libs/ml/planner/mcts/mcts_model.py,sha256=zbYKhb_49iS2I9Tgh7NtAf1iwM5lH6ZC3y3-uDvr40I,26466
31
+ gr_libs/ml/planner/mcts/utils/__init__.py,sha256=0ccEf23-6VIenUSrlVFCq0VNVDkCHHNzBw7jR09UiO4,46
32
+ gr_libs/ml/planner/mcts/utils/node.py,sha256=LcbBjzURMDF4v_Lvz24dyhhW5xb1xQKWdPkue-2lNLM,1056
33
+ gr_libs/ml/planner/mcts/utils/tree.py,sha256=49i1ZLYV-9w-lh09Mb4qd3zQ92EqpyYWn5cQA5mo1F0,3409
34
+ gr_libs/ml/sequential/__init__.py,sha256=HTtnXxcqxF7e_uPh0--kOhHZtF2YasvgKJDIig38NkM,62
35
+ gr_libs/ml/sequential/lstm_model.py,sha256=1MESuStASSTptJ5dWIUyex76o7UNria2yBPoza3sCOo,10868
36
+ gr_libs/ml/tabular/__init__.py,sha256=jAfjfTFZLLlVm1KUiJdxdnaNGFp1J2KBU89q_vvradM,177
37
+ gr_libs/ml/tabular/state.py,sha256=ImpIrYWF80PB-4EeQ2Q9nO7jMZ2s0hGbgsir1ZtsO88,700
38
+ gr_libs/ml/tabular/tabular_q_learner.py,sha256=CSzyN0qVBPFUqiJ_uJSTemh7FddeEJVar4Zu8z2IZ6I,19060
39
+ gr_libs/ml/tabular/tabular_rl_agent.py,sha256=JzFgVhiILjhgA_aBbsJYgQaFUPEJOxlHTcsPKrg-h4E,3658
40
+ gr_libs/ml/utils/__init__.py,sha256=eYkoOi-rIjxog1ikMqrCmXVOiSP9XHQh66fwWfBsfKs,284
41
+ gr_libs/ml/utils/env.py,sha256=AWVN0OXYmFU-J3FUiwvEAIY93Suf1oL6VNcxtyWJraM,171
42
+ gr_libs/ml/utils/format.py,sha256=xh2TKzVZsy1XmK8JCk_MJhB5zn37VIHVx060Rd-YqKE,3581
43
+ gr_libs/ml/utils/math.py,sha256=4U_F67eS6xuS7fN9hNtZFcFPObmSN2soi-2O3AGVgVs,442
44
+ gr_libs/ml/utils/other.py,sha256=QM44H4Bx1ajaz594P23sQ7tJ0JDraABeQSD23ygWf9w,506
45
+ gr_libs/ml/utils/storage.py,sha256=a5F-KXRMdGGHPTKSQpGeNRHfZyv9m6j7ZhuLLNNjTvE,4947
46
+ gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ gr_libs/problems/consts.py,sha256=tzad2l2DVnQxHen1AS8rPYmGiy02x_53NOrjmv9KL9E,62096
48
+ gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ gr_libs/recognizer/recognizer.py,sha256=Q4EpAQFgahkDMdhm8Dgsu2aRB8EetpxxELtrTuv-ri4,1897
50
+ gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
+ gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=LG_nVjujzcmC9euyouTB9CywvCHpuqe4CPB8jTlzgfA,6743
52
+ gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
+ gr_libs/recognizer/graml/gr_dataset.py,sha256=c0-n4kzMQN78DUQV_7egz_oyHC9EMmmYixMRA2DFO3k,10106
54
+ gr_libs/recognizer/graml/graml_recognizer.py,sha256=qm6kdRX8HpKOb6S3D_8oWj6HA4pzk1jnsA9BekTOKyE,19867
55
+ gr_libs/recognizer/utils/__init__.py,sha256=MvXPRyr30W5C_n-Dul3aheE_9SWy2aIMGINKWj36mfM,42
56
+ gr_libs/recognizer/utils/format.py,sha256=O1NbaZ_3bgl5jKgcGwdA0R1NWGm9Ha0EG6mHJcivpTY,512
57
+ tests/test_gcdraco.py,sha256=o4badhWKXMhysPVt0a2jjvKDR-mfzt5KqyRyIutZnS0,292
58
+ tests/test_graml.py,sha256=1h9LjRbss_fpqViDX7KBjLKfu4EZstREOnVHQCDJct4,576
59
+ tests/test_graql.py,sha256=VM6o6wHuf2y76YQo7pbBrerbBZe4MwYv9sFvj1Y-nZ0,146
60
+ tutorials/gcdraco_panda_tutorial.py,sha256=hfZ--4Q0JEwFTnmZ9PStIhJxk3fHAgnP1XhE_Mq4nS4,2287
61
+ tutorials/gcdraco_parking_tutorial.py,sha256=iWhfRGFFqUXUEKNXEWww1SIoEIo8dJ8GOp5FzFfAlKE,2097
62
+ tutorials/graml_minigrid_tutorial.py,sha256=eHNe5G5gMktWb-3Z_nHvQP6O5UNbFVQpCp6HUnJsxYQ,2204
63
+ tutorials/graml_panda_tutorial.py,sha256=8zpPyoA8GI0bTjGI3mKCBW2eZwlZ2dQ5NNtVcg9t5rU,2241
64
+ tutorials/graml_parking_tutorial.py,sha256=EuQ_j1KqJmMaFEgINvNSlSe6TKkL3XP-mh0M0ZC6IIA,2011
65
+ tutorials/graml_point_maze_tutorial.py,sha256=dgvz1qnE6k2YOE_5dyRDF8MQYRAuLGYcdpvi-NiNoSs,2186
66
+ tutorials/graql_minigrid_tutorial.py,sha256=pznoOcO8PU-VwfH9sxlJBOQskmilfrHg9nky1m2Adz0,1940
67
+ gr_libs-0.1.8.dist-info/METADATA,sha256=-DZi3tJBEZF7hdoh6L5PRWycjIihDp4iKVALsBM7VME,9614
68
+ gr_libs-0.1.8.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
69
+ gr_libs-0.1.8.dist-info/top_level.txt,sha256=fJQF8Q8Dfh_D3pA2mhNodazNjzW6b3oWfnx6Jdo-pBU,35
70
+ gr_libs-0.1.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/test_gcdraco.py ADDED
@@ -0,0 +1,10 @@
1
+ from tutorials.gcdraco_panda_tutorial import run_gcdraco_panda_tutorial
2
+ from tutorials.gcdraco_parking_tutorial import run_gcdraco_parking_tutorial
3
+
4
+
5
+ def test_gcdraco_panda_tutorial():
6
+ run_gcdraco_panda_tutorial()
7
+
8
+
9
+ def test_gcdraco_parking_tutorial():
10
+ run_gcdraco_parking_tutorial()
tests/test_graml.py CHANGED
@@ -3,14 +3,18 @@ from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
3
3
  from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
4
4
  from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
5
5
 
6
+
6
7
  def test_graml_minigrid_tutorial():
7
- run_graml_minigrid_tutorial()
8
+ run_graml_minigrid_tutorial()
9
+
8
10
 
9
11
  def test_graml_panda_tutorial():
10
- run_graml_panda_tutorial()
12
+ run_graml_panda_tutorial()
13
+
11
14
 
12
15
  def test_graml_parking_tutorial():
13
- run_graml_parking_tutorial()
16
+ run_graml_parking_tutorial()
17
+
14
18
 
15
19
  def test_graml_point_maze_tutorial():
16
- run_graml_point_maze_tutorial()
20
+ run_graml_point_maze_tutorial()
tests/test_graql.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from tutorials.graql_minigrid_tutorial import run_graql_minigrid_tutorial
2
2
 
3
+
3
4
  def test_graql_minigrid_tutorial():
4
- run_graql_minigrid_tutorial()
5
+ run_graql_minigrid_tutorial()
@@ -0,0 +1,66 @@
1
+ from stable_baselines3 import PPO, SAC
2
+ from gr_libs.metrics import (
3
+ stochastic_amplified_selection,
4
+ mean_p_value,
5
+ mean_wasserstein_distance,
6
+ )
7
+ from gr_libs import GCDraco
8
+ from gr_libs.environment.utils.utils import domain_to_env_property
9
+ import numpy as np
10
+ from gr_libs.environment.environment import PANDA, PandaProperty
11
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
12
+ from gr_libs.ml.utils.format import random_subset_with_order
13
+ import gr_envs.panda_scripts
14
+
15
+
16
+ def run_gcdraco_panda_tutorial():
17
+ recognizer = GCDraco(
18
+ domain_name=PANDA,
19
+ env_name="PandaMyReachDense",
20
+ evaluation_function=mean_wasserstein_distance, # or mean_p_value
21
+ )
22
+
23
+ recognizer.domain_learning_phase(
24
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(30)],
25
+ train_configs=[(SAC, 800000)],
26
+ )
27
+
28
+ recognizer.goals_adaptation_phase(
29
+ dynamic_goals=[
30
+ np.array([[-0.1, -0.1, 0.1]]),
31
+ np.array([[-0.1, 0.1, 0.1]]),
32
+ np.array([[0.2, 0.2, 0.1]]),
33
+ ]
34
+ )
35
+
36
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
37
+ property_type = domain_to_env_property(PANDA)
38
+ env_property = property_type("PandaMyReachDense")
39
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
40
+ actor = DeepRLAgent(
41
+ domain_name=PANDA,
42
+ problem_name=problem_name,
43
+ env_prop=env_property,
44
+ algorithm=PPO,
45
+ num_timesteps=400000,
46
+ )
47
+ actor.learn()
48
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
49
+ full_sequence = actor.generate_observation(
50
+ action_selection_method=stochastic_amplified_selection,
51
+ random_optimalism=True, # the noise that's added to the actions
52
+ with_dict=True,
53
+ )
54
+ partial_sequence = random_subset_with_order(
55
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
56
+ )
57
+ closest_goal = recognizer.inference_phase(
58
+ partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
59
+ )
60
+ print(
61
+ f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
62
+ )
63
+
64
+
65
+ if __name__ == "__main__":
66
+ run_gcdraco_panda_tutorial()
@@ -0,0 +1,61 @@
1
+ from stable_baselines3 import PPO, TD3
2
+ from gr_libs.metrics import (
3
+ stochastic_amplified_selection,
4
+ mean_p_value,
5
+ mean_wasserstein_distance,
6
+ )
7
+ from gr_libs import GCDraco
8
+ from gr_libs.environment.utils.utils import domain_to_env_property
9
+ from gr_libs.environment.environment import PARKING, ParkingProperty
10
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
11
+ from gr_libs.ml.utils.format import random_subset_with_order
12
+ import gr_envs
13
+
14
+
15
+ def run_gcdraco_parking_tutorial():
16
+ recognizer = GCDraco(
17
+ domain_name=PARKING,
18
+ env_name="Parking-S-14-PC-",
19
+ evaluation_function=mean_wasserstein_distance, # or mean_p_value
20
+ )
21
+
22
+ recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
23
+ recognizer.goals_adaptation_phase(
24
+ dynamic_goals=["1", "11", "21"]
25
+ # no need for expert sequence generation since GCRL is used
26
+ )
27
+
28
+ property_type = domain_to_env_property(PARKING)
29
+ env_property = property_type("Parking-S-14-PC-")
30
+
31
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
32
+ actor = DeepRLAgent(
33
+ domain_name="parking",
34
+ problem_name="Parking-S-14-PC--GI-11-v0",
35
+ env_prop=env_property,
36
+ algorithm=TD3,
37
+ num_timesteps=400000,
38
+ )
39
+ actor.learn()
40
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
41
+ full_sequence = actor.generate_observation(
42
+ action_selection_method=stochastic_amplified_selection,
43
+ random_optimalism=True, # the noise that's added to the actions
44
+ with_dict=True,
45
+ )
46
+
47
+ partial_sequence = random_subset_with_order(
48
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
49
+ )
50
+ closest_goal = recognizer.inference_phase(
51
+ partial_sequence,
52
+ ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
53
+ 0.5,
54
+ )
55
+ print(
56
+ f"closest_goal returned by GCDRACO: {closest_goal}\nactual goal actor aimed towards: 11"
57
+ )
58
+
59
+
60
+ if __name__ == "__main__":
61
+ run_gcdraco_parking_tutorial()
@@ -1,34 +1,64 @@
1
1
  from gr_libs.environment.environment import MINIGRID, QLEARNING
2
+ from gr_libs.environment.utils.utils import domain_to_env_property
2
3
  from gr_libs.metrics.metrics import stochastic_amplified_selection
3
4
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
5
  from gr_libs.ml.utils.format import random_subset_with_order
5
6
  from gr_libs import ExpertBasedGraml
6
7
 
8
+
7
9
  def run_graml_minigrid_tutorial():
8
10
  recognizer = ExpertBasedGraml(
9
- domain_name=MINIGRID,
10
- env_name="MiniGrid-SimpleCrossingS13N4"
11
+ domain_name=MINIGRID, env_name="MiniGrid-SimpleCrossingS13N4"
11
12
  )
12
13
 
13
- recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
14
- train_configs=[(QLEARNING, 100000) for _ in range(9)])
14
+ recognizer.domain_learning_phase(
15
+ base_goals=[
16
+ (11, 1),
17
+ (11, 11),
18
+ (1, 11),
19
+ (7, 11),
20
+ (8, 1),
21
+ (10, 6),
22
+ (6, 9),
23
+ (11, 3),
24
+ (11, 5),
25
+ ],
26
+ train_configs=[(QLEARNING, 100000) for _ in range(9)],
27
+ )
15
28
 
16
29
  recognizer.goals_adaptation_phase(
17
- dynamic_goals = [(11,1), (11,11), (1,11)],
18
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
30
+ dynamic_goals=[(11, 1), (11, 11), (1, 11)],
31
+ dynamic_train_configs=[
32
+ (QLEARNING, 100000) for _ in range(3)
33
+ ], # for expert sequence generation.
19
34
  )
35
+
36
+ property_type = domain_to_env_property(MINIGRID)
37
+ env_property = property_type("MiniGrid-SimpleCrossingS13N4")
38
+
20
39
  # TD3 is different from recognizer and expert algorithms, which are SAC #
21
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
40
+ actor = TabularQLearner(
41
+ domain_name="minigrid",
42
+ problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0",
43
+ env_prop=env_property,
44
+ algorithm=QLEARNING,
45
+ num_timesteps=100000,
46
+ )
22
47
  actor.learn()
23
48
  # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
24
49
  full_sequence = actor.generate_observation(
25
50
  action_selection_method=stochastic_amplified_selection,
26
- random_optimalism=True, # the noise that's added to the actions
51
+ random_optimalism=True, # the noise that's added to the actions
52
+ )
53
+
54
+ partial_sequence = random_subset_with_order(
55
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
56
+ )
57
+ closest_goal = recognizer.inference_phase(partial_sequence, (11, 1), 0.5)
58
+ print(
59
+ f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)"
27
60
  )
28
61
 
29
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
30
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
31
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
32
62
 
33
63
  if __name__ == "__main__":
34
- run_graml_minigrid_tutorial()
64
+ run_graml_minigrid_tutorial()
@@ -1,41 +1,62 @@
1
-
2
1
  import numpy as np
3
2
  from stable_baselines3 import PPO, SAC
4
3
  import gr_libs.environment.environment
5
- from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
4
+ from gr_libs.environment.environment import (
5
+ PANDA,
6
+ EnvProperty,
7
+ GCEnvProperty,
8
+ PandaProperty,
9
+ )
6
10
  from gr_libs.environment.utils.utils import domain_to_env_property
7
11
  from gr_libs.metrics.metrics import stochastic_amplified_selection
8
12
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
9
13
  from gr_libs.ml.utils.format import random_subset_with_order
10
14
  from gr_libs import GCGraml
11
15
 
16
+
12
17
  def run_graml_panda_tutorial():
13
- recognizer = GCGraml( # TODO make these tutorials into pytests
14
- domain_name=PANDA,
15
- env_name="PandaMyReachDense"
18
+ recognizer = GCGraml( # TODO make these tutorials into pytests
19
+ domain_name=PANDA, env_name="PandaMyReachDense"
16
20
  )
17
21
  recognizer.domain_learning_phase(
18
- base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
19
- train_configs=[(SAC, 800000)]
22
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)],
23
+ train_configs=[(SAC, 800000)],
20
24
  )
21
25
  recognizer.goals_adaptation_phase(
22
- dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
26
+ dynamic_goals=[
27
+ np.array([[-0.1, -0.1, 0.1]]),
28
+ np.array([[-0.1, 0.1, 0.1]]),
29
+ np.array([[0.2, 0.2, 0.1]]),
30
+ ]
23
31
  )
24
32
  # TD3 is different from recognizer and expert algorithms, which are SAC #
25
33
  property_type = domain_to_env_property(PANDA)
26
34
  env_property = property_type("PandaMyReachDense")
27
35
  problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
28
- actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
36
+ actor = DeepRLAgent(
37
+ domain_name=PANDA,
38
+ problem_name=problem_name,
39
+ env_prop=env_property,
40
+ algorithm=PPO,
41
+ num_timesteps=400000,
42
+ )
29
43
  actor.learn()
30
44
  # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
45
  full_sequence = actor.generate_observation(
32
46
  action_selection_method=stochastic_amplified_selection,
33
- random_optimalism=True, # the noise that's added to the actions
47
+ random_optimalism=True, # the noise that's added to the actions
48
+ )
49
+
50
+ partial_sequence = random_subset_with_order(
51
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
52
+ )
53
+ closest_goal = recognizer.inference_phase(
54
+ partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5
55
+ )
56
+ print(
57
+ f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]"
34
58
  )
35
59
 
36
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
- closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
38
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
39
60
 
40
61
  if __name__ == "__main__":
41
- run_graml_panda_tutorial()
62
+ run_graml_panda_tutorial()
@@ -1,39 +1,56 @@
1
-
2
1
  from stable_baselines3 import PPO, SAC, TD3
3
- from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
2
+ from gr_libs.environment.environment import (
3
+ PARKING,
4
+ EnvProperty,
5
+ GCEnvProperty,
6
+ ParkingProperty,
7
+ )
8
+ from gr_libs.environment.utils.utils import domain_to_env_property
4
9
  from gr_libs.metrics.metrics import stochastic_amplified_selection
5
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
10
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
11
  from gr_libs.ml.utils.format import random_subset_with_order
7
12
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
- import gr_libs.environment.environment
13
+
9
14
 
10
15
  def run_graml_parking_tutorial():
11
- recognizer = GCGraml(
12
- domain_name=PARKING,
13
- env_name="Parking-S-14-PC-"
14
- )
15
-
16
- recognizer.domain_learning_phase(
17
- [i for i in range(1,21)],
18
- [(PPO, 200000)]
19
- )
16
+ recognizer = GCGraml(domain_name=PARKING, env_name="Parking-S-14-PC-")
17
+
18
+ recognizer.domain_learning_phase([i for i in range(1, 21)], [(PPO, 200000)])
20
19
  recognizer.goals_adaptation_phase(
21
- dynamic_goals = ["1", "11", "21"]
20
+ dynamic_goals=["1", "11", "21"]
22
21
  # no need for expert sequence generation since GCRL is used
23
22
  )
24
23
 
24
+ property_type = domain_to_env_property(PARKING)
25
+ env_property = property_type("Parking-S-14-PC-")
26
+
25
27
  # TD3 is different from recognizer and expert algorithms, which are SAC #
26
- actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
28
+ actor = DeepRLAgent(
29
+ domain_name="parking",
30
+ problem_name="Parking-S-14-PC--GI-11-v0",
31
+ env_prop=env_property,
32
+ algorithm=TD3,
33
+ num_timesteps=400000,
34
+ )
27
35
  actor.learn()
28
36
  # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
37
  full_sequence = actor.generate_observation(
30
38
  action_selection_method=stochastic_amplified_selection,
31
- random_optimalism=True, # the noise that's added to the actions
39
+ random_optimalism=True, # the noise that's added to the actions
40
+ )
41
+
42
+ partial_sequence = random_subset_with_order(
43
+ full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False
44
+ )
45
+ closest_goal = recognizer.inference_phase(
46
+ partial_sequence,
47
+ ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(),
48
+ 0.5,
49
+ )
50
+ print(
51
+ f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11"
32
52
  )
33
53
 
34
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
35
- closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
36
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
37
54
 
38
55
  if __name__ == "__main__":
39
- run_graml_parking_tutorial()
56
+ run_graml_parking_tutorial()