gr-libs 0.1.4__py3-none-any.whl → 0.1.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CI/README.md +12 -0
- CI/docker_build_context/Dockerfile +15 -0
- gr_libs/_version.py +21 -0
- gr_libs/environment/__init__.py +22 -0
- gr_libs/environment/environment.py +1 -3
- gr_libs/metrics/metrics.py +1 -2
- gr_libs/ml/neural/deep_rl_learner.py +10 -12
- gr_libs/problems/__init__.py +0 -0
- gr_libs/problems/consts.py +1244 -0
- gr_libs/recognizer/graml/graml_recognizer.py +1 -2
- gr_libs/recognizer/recognizer.py +3 -4
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.6.post1.dist-info}/METADATA +22 -1
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.6.post1.dist-info}/RECORD +22 -15
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.6.post1.dist-info}/top_level.txt +2 -0
- tests/test_graml.py +16 -0
- tests/test_graql.py +4 -0
- tutorials/graml_minigrid_tutorial.py +26 -22
- tutorials/graml_panda_tutorial.py +31 -22
- tutorials/graml_parking_tutorial.py +30 -30
- tutorials/graml_point_maze_tutorial.py +31 -35
- tutorials/graql_minigrid_tutorial.py +25 -20
- {gr_libs-0.1.4.dist-info → gr_libs-0.1.6.post1.dist-info}/WHEEL +0 -0
@@ -103,7 +103,6 @@ class Graml(LearningRecognizer):
|
|
103
103
|
self.plans_dict[f"{true_goal}_true"] = true_sequence
|
104
104
|
|
105
105
|
with open(embeddings_path + f'/{true_goal}_{percentage}_plans_dict.pkl', 'wb') as plans_file:
|
106
|
-
# TODO erase AGENT_BASED macros
|
107
106
|
to_dump = {}
|
108
107
|
for goal, obss in self.plans_dict.items():
|
109
108
|
if goal == f"{true_goal}_true":
|
@@ -243,7 +242,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
|
|
243
242
|
if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
|
244
243
|
gc_agent = self.rl_agent_type(**kwargs)
|
245
244
|
gc_agent.learn()
|
246
|
-
self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
|
245
|
+
self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
|
247
246
|
|
248
247
|
def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
249
248
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
gr_libs/recognizer/recognizer.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from typing import List, Type
|
3
|
-
|
4
3
|
from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
|
5
4
|
from gr_libs.environment.utils.utils import domain_to_env_property
|
6
5
|
from gr_libs.ml.base.rl_agent import RLAgent
|
@@ -18,7 +17,7 @@ class Recognizer(ABC):
|
|
18
17
|
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
19
18
|
pass
|
20
19
|
|
21
|
-
class LearningRecognizer(Recognizer):
|
20
|
+
class LearningRecognizer(Recognizer):
|
22
21
|
def __init__(self, *args, **kwargs):
|
23
22
|
super().__init__(*args, **kwargs)
|
24
23
|
|
@@ -26,7 +25,7 @@ class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheri
|
|
26
25
|
self.original_train_configs = train_configs
|
27
26
|
|
28
27
|
# a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
|
29
|
-
class GaAgentTrainerRecognizer(Recognizer):
|
28
|
+
class GaAgentTrainerRecognizer(Recognizer):
|
30
29
|
def __init__(self, *args, **kwargs):
|
31
30
|
super().__init__(*args, **kwargs)
|
32
31
|
|
@@ -37,7 +36,7 @@ class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the
|
|
37
36
|
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
38
37
|
super().domain_learning_phase(base_goals, train_configs)
|
39
38
|
|
40
|
-
class GaAdaptingRecognizer(Recognizer):
|
39
|
+
class GaAdaptingRecognizer(Recognizer):
|
41
40
|
def __init__(self, *args, **kwargs):
|
42
41
|
super().__init__(*args, **kwargs)
|
43
42
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: gr_libs
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6.post1
|
4
4
|
Summary: Package with goal recognition frameworks baselines
|
5
5
|
Author: Ben Nageris
|
6
6
|
Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
|
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
|
|
17
17
|
Requires-Dist: rl_zoo3
|
18
18
|
Requires-Dist: stable_baselines3[extra]
|
19
19
|
Requires-Dist: sb3_contrib
|
20
|
+
Requires-Dist: pytest
|
20
21
|
Provides-Extra: minigrid
|
21
22
|
Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
|
22
23
|
Provides-Extra: highway
|
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
|
|
111
112
|
|
112
113
|
Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
|
113
114
|
|
115
|
+
## Working with an initial dataset of trained agents
|
116
|
+
gr_libs also includes a library of trained agents for the various supported environments within the package.
|
117
|
+
To get the dataset of trained agents, you can run:
|
118
|
+
```sh
|
119
|
+
python download_dataset.py
|
120
|
+
```
|
121
|
+
|
122
|
+
An alternative is to use our docker image, which includes the dataset in it.
|
123
|
+
You can:
|
124
|
+
1. pull the image:
|
125
|
+
```sh
|
126
|
+
docker pull ghcr.io/MatanShamir1/gr_test_base:latest
|
127
|
+
```
|
128
|
+
2. run a container:
|
129
|
+
```sh
|
130
|
+
docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
|
131
|
+
```
|
132
|
+
3. don't forget to install the package from within the container, go back to 'Setup' for that.
|
133
|
+
|
114
134
|
### Method 1: Writing a Custom Script
|
115
135
|
|
116
136
|
1. **Create a recognizer**
|
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
|
|
118
138
|
Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
|
119
139
|
|
120
140
|
```python
|
141
|
+
import gr_libs.environment # Triggers gym env registration - you must run it!
|
121
142
|
recognizer = Graql(
|
122
143
|
domain_name="minigrid",
|
123
144
|
env_name="MiniGrid-SimpleCrossingS13N4"
|
@@ -1,3 +1,5 @@
|
|
1
|
+
CI/README.md,sha256=CbWNAWrXFFwYq3sWAORhoQIE5busoNyYh_rFWVH1enw,800
|
2
|
+
CI/docker_build_context/Dockerfile,sha256=Rk7LYTxOW7VVJcmNa8csZ4BwkunMYIiHX4WVSuMam50,311
|
1
3
|
evaluation/analyze_results_cross_alg_cross_domain.py,sha256=s_DDh4rNfRnvQ0PDa2d5411jYOa7CaI1YeB8Dpup7QU,9803
|
2
4
|
evaluation/create_minigrid_map_image.py,sha256=jaSW3n3tY222iFUeAMqedBP9cvD88GCzPrQ6_XHv5oQ,1242
|
3
5
|
evaluation/file_system.py,sha256=SSYnj8QGFkq-8V_0s7x2MWbD88aFaoFY4Ogc_Pt8m6U,1601
|
@@ -8,19 +10,20 @@ evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icw
|
|
8
10
|
evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
|
9
11
|
evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
|
10
12
|
gr_libs/__init__.py,sha256=-uKsQiHIL7yojbDwlTR-I8sj1WX9XT52PoFbPjtUTKo,145
|
11
|
-
gr_libs/
|
12
|
-
gr_libs/environment/
|
13
|
+
gr_libs/_version.py,sha256=C8Me-BH17Mqlv65Ba3Tqc5gFEzabp8fxxyIA9C_XdDQ,517
|
14
|
+
gr_libs/environment/__init__.py,sha256=HFVGBcufWf8-ahCo6h_s2pFEyvDy59cFg8z908RgdYo,1038
|
15
|
+
gr_libs/environment/environment.py,sha256=d6ZbiAQ4H1aLrUFI8sm0BN9DVW3JtzpkodSi_70Z_PY,6780
|
13
16
|
gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
17
|
gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
|
15
18
|
gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
gr_libs/metrics/metrics.py,sha256=
|
19
|
+
gr_libs/metrics/metrics.py,sha256=4bnvs5suv-QrK9i1NuOzkE_E8uIzS1nlEazNDRXvZGs,8700
|
17
20
|
gr_libs/ml/__init__.py,sha256=jrjxYqvSRgWwFWw7XQP9DzOwvmprMZ2umwT7t-DYtDU,233
|
18
21
|
gr_libs/ml/agent.py,sha256=DSnK8nRx9SS76fAOZZEEvA68_meLjzm9lfQpMUXmGQU,1957
|
19
22
|
gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
|
20
23
|
gr_libs/ml/base/__init__.py,sha256=MfIYhl_UqH8H7YoTCih8wBFA_gpTOUFq8Ph0_Nq0XQk,68
|
21
24
|
gr_libs/ml/base/rl_agent.py,sha256=u9rnb-ma9iDM5b_BlwjcTJGSFezIGrxXINw6b-Dbl8s,1598
|
22
25
|
gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
|
23
|
-
gr_libs/ml/neural/deep_rl_learner.py,sha256=
|
26
|
+
gr_libs/ml/neural/deep_rl_learner.py,sha256=b41_b4GVlYqxhjrr1_YMcGdU9iwcMXsf3zH8D2kEucs,20659
|
24
27
|
gr_libs/ml/neural/utils/__init__.py,sha256=bJgPfRnmfDQxdnb0OyRGwzgebEc1PnlO7-GpqszPBcc,106
|
25
28
|
gr_libs/ml/neural/utils/dictlist.py,sha256=WpHfdWpVZ_T3PcSnOQUC--ro_tsS0dvam2WG3LcsHDw,1039
|
26
29
|
gr_libs/ml/neural/utils/penv.py,sha256=R1uW8sePQqvTlJjpAuMx16eDU6TuGAjQF3hTR1QasMo,1862
|
@@ -42,22 +45,26 @@ gr_libs/ml/utils/format.py,sha256=nu7RzVwn_raG_fqqmnqlJgUjtA0yzKztkB3a5QZnRYo,30
|
|
42
45
|
gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
|
43
46
|
gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
|
44
47
|
gr_libs/ml/utils/storage.py,sha256=oCdvL_ypCglnSJsyyXzNyV_UJASTfioa3yJhFlFso64,4277
|
48
|
+
gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
+
gr_libs/problems/consts.py,sha256=ON7yfKTAKETg7i3okDYuOzEU7KWvynyubl0m7TlU6Hs,38808
|
45
50
|
gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
-
gr_libs/recognizer/recognizer.py,sha256=
|
51
|
+
gr_libs/recognizer/recognizer.py,sha256=ZrApJVdBQxKRYhhDiWLCNGmlxgi674nwgb30BgVggC8,1705
|
47
52
|
gr_libs/recognizer/recognizer_doc.md,sha256=RnTvbZhl2opvU7-QT4pULCV5HCdJTw2dsu8WQOOiR3E,2521
|
48
53
|
gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
54
|
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
|
50
55
|
gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
56
|
gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
|
52
|
-
gr_libs/recognizer/graml/graml_recognizer.py,sha256=
|
57
|
+
gr_libs/recognizer/graml/graml_recognizer.py,sha256=SGs7rtkA73lbCv9HISa6dfjVUJUhlH54QriVsoGVRss,15672
|
53
58
|
gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
|
54
59
|
gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
|
55
|
-
|
56
|
-
|
57
|
-
tutorials/
|
58
|
-
tutorials/
|
59
|
-
tutorials/
|
60
|
-
|
61
|
-
|
62
|
-
gr_libs-0.1.
|
63
|
-
gr_libs-0.1.
|
60
|
+
tests/test_graml.py,sha256=ZJB2jqtf4Q2-KZredkJq90teqmHBIvigCAQpvR5G110,559
|
61
|
+
tests/test_graql.py,sha256=-onMi13e2wStOmB5bYv2f3Ita3QFFiw416XMBkby0OI,141
|
62
|
+
tutorials/graml_minigrid_tutorial.py,sha256=ONvxFi79R7d8dcd6gy083Z_yy9A2flhGTDIDRxurdx8,1782
|
63
|
+
tutorials/graml_panda_tutorial.py,sha256=wtv_lsw0vsU7j45GKeWecTfE7jzfh4iVGEVnQyaWthM,2063
|
64
|
+
tutorials/graml_parking_tutorial.py,sha256=46-sfxmYA9jLRSpqIF9z69MLSfOSTJarfjlQ_Igq294,1769
|
65
|
+
tutorials/graml_point_maze_tutorial.py,sha256=mYq3IxYbf9jidq-4VdT3MdStV80Q5lytFv6Xzzn22Ys,1835
|
66
|
+
tutorials/graql_minigrid_tutorial.py,sha256=Jb0TCUhiZQkFeafJWUTPnCISd4FKfPrqP-xfHiqCGKE,1635
|
67
|
+
gr_libs-0.1.6.post1.dist-info/METADATA,sha256=UPwlwVlbGTpTsUhYwWH5hYr-hSBpcWjrFIA7sWg0Kj4,9620
|
68
|
+
gr_libs-0.1.6.post1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
69
|
+
gr_libs-0.1.6.post1.dist-info/top_level.txt,sha256=rL-bbK-KnLzVbLIUCdN1riH58lup3jG0NJ3LTt_qSwo,38
|
70
|
+
gr_libs-0.1.6.post1.dist-info/RECORD,,
|
tests/test_graml.py
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
from tutorials.graml_minigrid_tutorial import run_graml_minigrid_tutorial
|
2
|
+
from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
|
3
|
+
from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
|
4
|
+
from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
|
5
|
+
|
6
|
+
def test_graml_minigrid_tutorial():
|
7
|
+
run_graml_minigrid_tutorial()
|
8
|
+
|
9
|
+
def test_graml_panda_tutorial():
|
10
|
+
run_graml_panda_tutorial()
|
11
|
+
|
12
|
+
def test_graml_parking_tutorial():
|
13
|
+
run_graml_parking_tutorial()
|
14
|
+
|
15
|
+
def test_graml_point_maze_tutorial():
|
16
|
+
run_graml_point_maze_tutorial()
|
tests/test_graql.py
ADDED
@@ -1,30 +1,34 @@
|
|
1
|
-
from gr_libs.environment.environment import QLEARNING
|
1
|
+
from gr_libs.environment.environment import MINIGRID, QLEARNING
|
2
2
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
3
3
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
4
4
|
from gr_libs.ml.utils.format import random_subset_with_order
|
5
5
|
from gr_libs import ExpertBasedGraml
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def run_graml_minigrid_tutorial():
|
8
|
+
recognizer = ExpertBasedGraml(
|
9
|
+
domain_name=MINIGRID,
|
10
|
+
env_name="MiniGrid-SimpleCrossingS13N4"
|
11
|
+
)
|
11
12
|
|
12
|
-
recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
|
13
|
-
|
13
|
+
recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
|
14
|
+
train_configs=[(QLEARNING, 100000) for _ in range(9)])
|
14
15
|
|
15
|
-
recognizer.goals_adaptation_phase(
|
16
|
-
|
17
|
-
|
18
|
-
)
|
19
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
20
|
-
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
21
|
-
actor.learn()
|
22
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
23
|
-
full_sequence = actor.generate_observation(
|
24
|
-
|
25
|
-
|
26
|
-
)
|
16
|
+
recognizer.goals_adaptation_phase(
|
17
|
+
dynamic_goals = [(11,1), (11,11), (1,11)],
|
18
|
+
dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
|
19
|
+
)
|
20
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
21
|
+
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
22
|
+
actor.learn()
|
23
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
24
|
+
full_sequence = actor.generate_observation(
|
25
|
+
action_selection_method=stochastic_amplified_selection,
|
26
|
+
random_optimalism=True, # the noise that's added to the actions
|
27
|
+
)
|
27
28
|
|
28
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
29
|
-
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
30
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
29
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
30
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
31
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
32
|
+
|
33
|
+
if __name__ == "__main__":
|
34
|
+
run_graml_minigrid_tutorial()
|
@@ -1,32 +1,41 @@
|
|
1
1
|
|
2
2
|
import numpy as np
|
3
3
|
from stable_baselines3 import PPO, SAC
|
4
|
-
|
4
|
+
import gr_libs.environment.environment
|
5
|
+
from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
|
5
6
|
from gr_libs.environment.utils.utils import domain_to_env_property
|
6
7
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
7
8
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
8
9
|
from gr_libs.ml.utils.format import random_subset_with_order
|
9
10
|
from gr_libs import GCGraml
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
)
|
12
|
+
def run_graml_panda_tutorial():
|
13
|
+
recognizer = GCGraml( # TODO make these tutorials into pytests
|
14
|
+
domain_name=PANDA,
|
15
|
+
env_name="PandaMyReachDense"
|
16
|
+
)
|
17
|
+
recognizer.domain_learning_phase(
|
18
|
+
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
|
19
|
+
train_configs=[(SAC, 800000)]
|
20
|
+
)
|
21
|
+
recognizer.goals_adaptation_phase(
|
22
|
+
dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
|
23
|
+
)
|
24
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
25
|
+
property_type = domain_to_env_property(PANDA)
|
26
|
+
env_property = property_type("PandaMyReachDense")
|
27
|
+
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
28
|
+
actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
|
29
|
+
actor.learn()
|
30
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
31
|
+
full_sequence = actor.generate_observation(
|
32
|
+
action_selection_method=stochastic_amplified_selection,
|
33
|
+
random_optimalism=True, # the noise that's added to the actions
|
34
|
+
)
|
29
35
|
|
30
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
31
|
-
closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
|
32
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
|
36
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
37
|
+
closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
|
38
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
|
39
|
+
|
40
|
+
if __name__ == "__main__":
|
41
|
+
run_graml_panda_tutorial()
|
@@ -1,38 +1,38 @@
|
|
1
1
|
|
2
2
|
from stable_baselines3 import PPO, SAC, TD3
|
3
|
-
from gr_libs.environment.environment import EnvProperty, GCEnvProperty, ParkingProperty
|
3
|
+
from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
|
4
4
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
5
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
6
6
|
from gr_libs.ml.utils.format import random_subset_with_order
|
7
7
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
|
8
8
|
|
9
|
-
|
10
|
-
recognizer = GCGraml(
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
"Parking-S-14-PC--GI-18-v0",
|
24
|
-
"Parking-S-14-PC--GI-21-v0"]] # TODO detach the goal from the environment instance in every gym env, add the ability to alter it from outside.
|
25
|
-
#dynamic_train_configs=[(SAC, 400000) for _ in range(7)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
|
26
|
-
)
|
27
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
28
|
-
actor = DeepRLAgent(env_name="parking", problem_name="Parking-S-14-PC--GI-8-v0", algorithm=TD3, num_timesteps=400000)
|
29
|
-
actor.learn()
|
30
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
31
|
-
full_sequence = actor.generate_observation(
|
32
|
-
action_selection_method=stochastic_amplified_selection,
|
33
|
-
random_optimalism=True, # the noise that's added to the actions
|
34
|
-
)
|
9
|
+
def run_graml_parking_tutorial():
|
10
|
+
recognizer = GCGraml(
|
11
|
+
domain_name=PARKING,
|
12
|
+
env_name="Parking-S-14-PC-"
|
13
|
+
)
|
14
|
+
|
15
|
+
recognizer.domain_learning_phase(
|
16
|
+
[i for i in range(1,21)],
|
17
|
+
[(PPO, 200000)]
|
18
|
+
)
|
19
|
+
recognizer.goals_adaptation_phase(
|
20
|
+
dynamic_goals = ["1", "11", "21"]
|
21
|
+
# no need for expert sequence generation since GCRL is used
|
22
|
+
)
|
35
23
|
|
36
|
-
|
37
|
-
|
38
|
-
|
24
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
25
|
+
actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
|
26
|
+
actor.learn()
|
27
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
28
|
+
full_sequence = actor.generate_observation(
|
29
|
+
action_selection_method=stochastic_amplified_selection,
|
30
|
+
random_optimalism=True, # the noise that's added to the actions
|
31
|
+
)
|
32
|
+
|
33
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
34
|
+
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
|
35
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
|
36
|
+
|
37
|
+
if __name__ == "__main__":
|
38
|
+
run_graml_parking_tutorial()
|
@@ -1,43 +1,39 @@
|
|
1
1
|
|
2
2
|
from stable_baselines3 import SAC, TD3
|
3
|
-
from gr_libs.environment.
|
3
|
+
from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
|
4
4
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
5
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
6
6
|
from gr_libs.ml.utils.format import random_subset_with_order
|
7
7
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
|
8
8
|
|
9
|
-
|
10
|
-
recognizer = ExpertBasedGraml(
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-1x9"),
|
15
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x3"),
|
16
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x4"),
|
17
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-8x2"),
|
18
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"),
|
19
|
-
("PointMaze-FourRoomsEnvDense-11x11-Goal-2x8")],
|
20
|
-
task_str_to_goal=maze_str_to_goal,
|
21
|
-
method=DeepRLAgent,
|
22
|
-
collect_statistics=False,
|
23
|
-
train_configs=[(SAC, 200000) for _ in range(8)],
|
24
|
-
)
|
25
|
-
recognizer.domain_learning_phase()
|
26
|
-
recognizer.goals_adaptation_phase(
|
27
|
-
dynamic_goals_problems = ["PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
|
28
|
-
"PointMaze-FourRoomsEnvDense-11x11-Goal-7x3",
|
29
|
-
"PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"],
|
30
|
-
dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
|
31
|
-
)
|
32
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
33
|
-
actor = DeepRLAgent(env_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
|
34
|
-
actor.learn()
|
35
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
36
|
-
full_sequence = actor.generate_observation(
|
37
|
-
action_selection_method=stochastic_amplified_selection,
|
38
|
-
random_optimalism=True, # the noise that's added to the actions
|
39
|
-
)
|
9
|
+
def run_graml_point_maze_tutorial():
|
10
|
+
recognizer = ExpertBasedGraml(
|
11
|
+
domain_name=POINT_MAZE,
|
12
|
+
env_name="PointMaze-FourRoomsEnvDense-11x11"
|
13
|
+
)
|
40
14
|
|
41
|
-
|
42
|
-
|
43
|
-
|
15
|
+
recognizer.domain_learning_phase(
|
16
|
+
[(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
|
17
|
+
[(SAC, 200000) for _ in range(8)]
|
18
|
+
)
|
19
|
+
|
20
|
+
recognizer.goals_adaptation_phase(
|
21
|
+
dynamic_goals = [(4,4), (7,3), (3,7)],
|
22
|
+
dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
|
23
|
+
)
|
24
|
+
|
25
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
26
|
+
actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
|
27
|
+
actor.learn()
|
28
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
29
|
+
full_sequence = actor.generate_observation(
|
30
|
+
action_selection_method=stochastic_amplified_selection,
|
31
|
+
random_optimalism=True, # the noise that's added to the actions
|
32
|
+
)
|
33
|
+
|
34
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
|
35
|
+
closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
|
36
|
+
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
|
37
|
+
|
38
|
+
if __name__ == "__main__":
|
39
|
+
run_graml_point_maze_tutorial()
|
@@ -4,26 +4,31 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
|
4
4
|
from gr_libs.ml.utils.format import random_subset_with_order
|
5
5
|
from gr_libs import Graql
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
def run_graql_minigrid_tutorial():
|
8
|
+
recognizer = Graql(
|
9
|
+
domain_name="minigrid",
|
10
|
+
env_name="MiniGrid-SimpleCrossingS13N4"
|
11
|
+
)
|
11
12
|
|
12
|
-
#Graql doesn't have a domain learning phase, so we skip it
|
13
|
+
#Graql doesn't have a domain learning phase, so we skip it
|
13
14
|
|
14
|
-
recognizer.goals_adaptation_phase(
|
15
|
-
|
16
|
-
|
17
|
-
)
|
18
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
19
|
-
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
20
|
-
actor.learn()
|
21
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
22
|
-
full_sequence = actor.generate_observation(
|
23
|
-
|
24
|
-
|
25
|
-
)
|
15
|
+
recognizer.goals_adaptation_phase(
|
16
|
+
dynamic_goals = [(11,1), (11,11), (1,11)],
|
17
|
+
dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
|
18
|
+
)
|
19
|
+
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
20
|
+
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
21
|
+
actor.learn()
|
22
|
+
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
23
|
+
full_sequence = actor.generate_observation(
|
24
|
+
action_selection_method=stochastic_amplified_selection,
|
25
|
+
random_optimalism=True, # the noise that's added to the actions
|
26
|
+
)
|
26
27
|
|
27
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
28
|
-
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
29
|
-
print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
28
|
+
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
29
|
+
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
30
|
+
print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
31
|
+
return closest_goal, (11,1)
|
32
|
+
|
33
|
+
if __name__ == "__main__":
|
34
|
+
run_graql_minigrid_tutorial()
|
File without changes
|