gr-libs 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +3 -1
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +260 -0
- gr_libs/environment/__init__.py +14 -1
- gr_libs/environment/_utils/__init__.py +0 -0
- gr_libs/environment/{utils → _utils}/utils.py +1 -1
- gr_libs/environment/environment.py +278 -23
- gr_libs/evaluation/__init__.py +1 -0
- gr_libs/evaluation/generate_experiments_results.py +100 -0
- gr_libs/metrics/__init__.py +2 -0
- gr_libs/metrics/metrics.py +166 -31
- gr_libs/ml/__init__.py +1 -6
- gr_libs/ml/base/__init__.py +3 -1
- gr_libs/ml/base/rl_agent.py +68 -3
- gr_libs/ml/neural/__init__.py +1 -3
- gr_libs/ml/neural/deep_rl_learner.py +241 -84
- gr_libs/ml/neural/utils/__init__.py +1 -2
- gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
- gr_libs/ml/planner/mcts/mcts_model.py +71 -34
- gr_libs/ml/sequential/__init__.py +0 -1
- gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
- gr_libs/ml/tabular/__init__.py +1 -3
- gr_libs/ml/tabular/tabular_q_learner.py +27 -9
- gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
- gr_libs/ml/utils/__init__.py +2 -9
- gr_libs/ml/utils/format.py +13 -90
- gr_libs/ml/utils/math.py +3 -2
- gr_libs/ml/utils/other.py +2 -2
- gr_libs/ml/utils/storage.py +41 -94
- gr_libs/odgr_executor.py +263 -0
- gr_libs/problems/consts.py +570 -292
- gr_libs/recognizer/{utils → _utils}/format.py +2 -2
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +127 -36
- gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
- gr_libs/recognizer/graml/graml_recognizer.py +186 -35
- gr_libs/recognizer/recognizer.py +59 -10
- gr_libs/tutorials/draco_panda_tutorial.py +58 -0
- gr_libs/tutorials/draco_parking_tutorial.py +56 -0
- {tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +11 -11
- {tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +6 -8
- {tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +18 -14
- {tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +11 -12
- {tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +8 -10
- {tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +17 -3
- {tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
- {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/METADATA +95 -29
- gr_libs-0.2.5.dist-info/RECORD +72 -0
- {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/WHEEL +1 -1
- gr_libs-0.2.5.dist-info/top_level.txt +2 -0
- tests/test_draco.py +14 -0
- tests/test_gcdraco.py +2 -2
- tests/test_graml.py +4 -4
- tests/test_graql.py +1 -1
- tests/test_odgr_executor_expertbasedgraml.py +14 -0
- tests/test_odgr_executor_gcdraco.py +14 -0
- tests/test_odgr_executor_gcgraml.py +14 -0
- tests/test_odgr_executor_graql.py +14 -0
- evaluation/analyze_results_cross_alg_cross_domain.py +0 -267
- evaluation/create_minigrid_map_image.py +0 -38
- evaluation/file_system.py +0 -53
- evaluation/generate_experiments_results.py +0 -141
- evaluation/generate_experiments_results_new_ver1.py +0 -238
- evaluation/generate_experiments_results_new_ver2.py +0 -331
- evaluation/generate_task_specific_statistics_plots.py +0 -500
- evaluation/get_plans_images.py +0 -62
- evaluation/increasing_and_decreasing_.py +0 -104
- gr_libs/ml/neural/utils/penv.py +0 -60
- gr_libs-0.1.8.dist-info/RECORD +0 -70
- gr_libs-0.1.8.dist-info/top_level.txt +0 -4
- /gr_libs/{environment/utils/__init__.py → _evaluation/_generate_experiments_results.py} +0 -0
- /gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
- /gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
- /gr_libs/recognizer/{utils → _utils}/__init__.py +0 -0
@@ -1,9 +1,9 @@
|
|
1
|
+
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
|
1
2
|
from gr_libs.recognizer.graml.graml_recognizer import (
|
2
|
-
GCGraml,
|
3
3
|
ExpertBasedGraml,
|
4
|
+
GCGraml,
|
4
5
|
MCTSBasedGraml,
|
5
6
|
)
|
6
|
-
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
|
7
7
|
|
8
8
|
|
9
9
|
def recognizer_str_to_obj(recognizer_str: str):
|
@@ -1,14 +1,9 @@
|
|
1
|
-
from abc import abstractmethod
|
2
1
|
import os
|
2
|
+
|
3
3
|
import dill
|
4
|
-
from typing import List, Type, Callable
|
5
4
|
import numpy as np
|
6
|
-
|
7
|
-
from gr_libs.
|
8
|
-
from gr_libs.metrics.metrics import (
|
9
|
-
kl_divergence_norm_softmax,
|
10
|
-
mean_wasserstein_distance,
|
11
|
-
)
|
5
|
+
|
6
|
+
from gr_libs.metrics.metrics import kl_divergence_norm_softmax
|
12
7
|
from gr_libs.ml.base import RLAgent
|
13
8
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
14
9
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
@@ -22,11 +17,40 @@ from gr_libs.recognizer.recognizer import (
|
|
22
17
|
|
23
18
|
|
24
19
|
class GRAsRL(Recognizer):
|
20
|
+
"""
|
21
|
+
GRAsRL class represents a goal recognition framework that using reinforcement learning.
|
22
|
+
It inherits from the Recognizer class and implements the goal recognition process, including the
|
23
|
+
Goal adaptation and the inference phase. It trains agents for each new goal, which makes it impractical
|
24
|
+
for realtime environments where goals mmight change.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
agents (dict): A dictionary that maps problem names to RLAgent instances.
|
28
|
+
active_goals (List[str]): A list of active goals.
|
29
|
+
active_problems (List[str]): A list of active problem names.
|
30
|
+
action_space (gym.Space): The action space of the RLAgent.
|
31
|
+
|
32
|
+
Methods:
|
33
|
+
goals_adaptation_phase: Performs the goals adaptation phase.
|
34
|
+
prepare_inf_sequence: Prepares the inference sequence for goal-directed problems.
|
35
|
+
inference_phase: Performs the inference phase and returns the recognized goal.
|
36
|
+
choose_agent: Returns the RLAgent for a given problem name.
|
37
|
+
"""
|
38
|
+
|
25
39
|
def __init__(self, *args, **kwargs):
|
26
40
|
super().__init__(*args, **kwargs)
|
27
41
|
self.agents = {} # consider changing to ContextualAgent
|
28
42
|
|
29
|
-
def goals_adaptation_phase(self, dynamic_goals:
|
43
|
+
def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
|
44
|
+
"""
|
45
|
+
Performs the goals adaptation phase.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
dynamic_goals (List[str]): A list of dynamic goals.
|
49
|
+
dynamic_train_configs: The dynamic training configurations.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
None
|
53
|
+
"""
|
30
54
|
super().goals_adaptation_phase(dynamic_goals, dynamic_train_configs)
|
31
55
|
dynamic_goals_problems = [
|
32
56
|
self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
|
@@ -48,18 +72,46 @@ class GRAsRL(Recognizer):
|
|
48
72
|
self.agents[problem_name] = agent
|
49
73
|
self.action_space = next(iter(self.agents.values())).env.action_space
|
50
74
|
|
75
|
+
def prepare_inf_sequence(self, problem_name: str, inf_sequence):
|
76
|
+
"""
|
77
|
+
Prepares the inference sequence for goal-directed problems.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
problem_name (str): The name of the problem.
|
81
|
+
inf_sequence: The inference sequence.
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
The prepared inference sequence.
|
85
|
+
"""
|
86
|
+
if not self.env_prop.use_goal_directed_problem():
|
87
|
+
for obs in inf_sequence:
|
88
|
+
obs[0]["desired_goal"] = np.array(
|
89
|
+
[self.env_prop.str_to_goal(problem_name)],
|
90
|
+
dtype=obs[0]["desired_goal"].dtype,
|
91
|
+
)
|
92
|
+
return inf_sequence
|
93
|
+
return inf_sequence
|
94
|
+
|
51
95
|
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
96
|
+
"""
|
97
|
+
Performs the inference phase and returns the recognized goal.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
inf_sequence: The inference sequence.
|
101
|
+
true_goal: The true goal.
|
102
|
+
percentage: The percentage.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
The recognized goal as a string.
|
106
|
+
"""
|
52
107
|
scores = []
|
53
108
|
for problem_name in self.active_problems:
|
54
109
|
agent = self.choose_agent(problem_name)
|
55
110
|
if self.env_prop.gc_adaptable():
|
56
|
-
assert (
|
57
|
-
self.__class__.__name__ == "GCDraco"
|
58
|
-
), "This recognizer is not compatible with goal conditioned problems."
|
59
111
|
inf_sequence = self.prepare_inf_sequence(problem_name, inf_sequence)
|
60
112
|
score = self.evaluation_function(inf_sequence, agent, self.action_space)
|
61
113
|
scores.append(score)
|
62
|
-
|
114
|
+
|
63
115
|
if self.collect_statistics:
|
64
116
|
results_path = get_gr_as_rl_experiment_confidence_path(
|
65
117
|
domain_name=self.env_prop.domain_name,
|
@@ -82,10 +134,24 @@ class GRAsRL(Recognizer):
|
|
82
134
|
return str(self.active_goals[true_goal_index])
|
83
135
|
|
84
136
|
def choose_agent(self, problem_name: str) -> RLAgent:
|
137
|
+
"""
|
138
|
+
Returns the RLAgent for a given problem name.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
problem_name (str): The name of the problem.
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
The RLAgent instance.
|
145
|
+
"""
|
85
146
|
return self.agents[problem_name]
|
86
147
|
|
87
148
|
|
88
149
|
class Graql(GRAsRL, GaAgentTrainerRecognizer):
|
150
|
+
"""
|
151
|
+
Graql extends the GRAsRL framework and GaAgentTrainerRecognizer, since it trains new agents for every new goal and it adheres
|
152
|
+
to the goal recognition as reinforcement learning framework. It uses a tabular Q-learning agent for discrete state and action spaces.
|
153
|
+
"""
|
154
|
+
|
89
155
|
def __init__(self, *args, **kwargs):
|
90
156
|
super().__init__(*args, **kwargs)
|
91
157
|
assert (
|
@@ -93,12 +159,31 @@ class Graql(GRAsRL, GaAgentTrainerRecognizer):
|
|
93
159
|
and self.env_prop.is_state_discrete()
|
94
160
|
and self.env_prop.is_action_discrete()
|
95
161
|
)
|
96
|
-
if self.rl_agent_type
|
162
|
+
if self.rl_agent_type is None:
|
97
163
|
self.rl_agent_type = TabularQLearner
|
98
164
|
self.evaluation_function = kl_divergence_norm_softmax
|
99
165
|
|
100
166
|
|
101
167
|
class Draco(GRAsRL, GaAgentTrainerRecognizer):
|
168
|
+
"""
|
169
|
+
Draco class represents a recognizer agent trained using the GRAsRL framework.
|
170
|
+
Like Graql, it trains new agents for every new goal and adheres to the goal recognition as reinforcement learning framework.
|
171
|
+
It uses a deep reinforcement learning agent for continuous state and action spaces.
|
172
|
+
|
173
|
+
Args:
|
174
|
+
*args: Variable length argument list.
|
175
|
+
**kwargs: Arbitrary keyword arguments.
|
176
|
+
|
177
|
+
Attributes:
|
178
|
+
rl_agent_type (type): Type of the reinforcement learning agent.
|
179
|
+
evaluation_function (callable): Function used for evaluation.
|
180
|
+
|
181
|
+
"""
|
182
|
+
|
183
|
+
def __init__(self, *args, **kwargs):
|
184
|
+
super().__init__(*args, **kwargs)
|
185
|
+
# Add any additional initialization code here
|
186
|
+
|
102
187
|
def __init__(self, *args, **kwargs):
|
103
188
|
super().__init__(*args, **kwargs)
|
104
189
|
assert (
|
@@ -108,15 +193,25 @@ class Draco(GRAsRL, GaAgentTrainerRecognizer):
|
|
108
193
|
if self.rl_agent_type == None:
|
109
194
|
self.rl_agent_type = DeepRLAgent
|
110
195
|
self.evaluation_function = kwargs.get("evaluation_function")
|
111
|
-
|
112
|
-
|
113
|
-
or type(self.evaluation_function) != Callable
|
114
|
-
)
|
196
|
+
if self.evaluation_function is None:
|
197
|
+
from gr_libs.metrics.metrics import mean_wasserstein_distance
|
115
198
|
|
199
|
+
self.evaluation_function = mean_wasserstein_distance
|
200
|
+
assert callable(
|
201
|
+
self.evaluation_function
|
202
|
+
), "Evaluation function must be a callable function."
|
203
|
+
|
204
|
+
|
205
|
+
class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
|
206
|
+
"""
|
207
|
+
GCDraco recognizer uses goal-conditioned reinforcement learning using the Draco algorithm.
|
208
|
+
It inherits from GRAsRL, LearningRecognizer, and GaAdaptingRecognizer.
|
209
|
+
It is designed for environments with continuous state and action spaces.
|
210
|
+
It uses a goal-conditioned deep reinforcement learning agent for training and inference, which
|
211
|
+
enables it to adapt to new goals during the goal adaptation phase without requiring retraining,
|
212
|
+
making it suitable for dynamic environments.
|
213
|
+
"""
|
116
214
|
|
117
|
-
class GCDraco(
|
118
|
-
GRAsRL, LearningRecognizer, GaAdaptingRecognizer
|
119
|
-
): # TODO problem: it gets 2 goal_adaptation phase from parents, one with configs and one without.
|
120
215
|
def __init__(self, *args, **kwargs):
|
121
216
|
super().__init__(*args, **kwargs)
|
122
217
|
assert (
|
@@ -127,12 +222,18 @@ class GCDraco(
|
|
127
222
|
if self.rl_agent_type == None:
|
128
223
|
self.rl_agent_type = GCDeepRLAgent
|
129
224
|
self.evaluation_function = kwargs.get("evaluation_function")
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
225
|
+
if self.evaluation_function is None:
|
226
|
+
from gr_libs.metrics.metrics import mean_wasserstein_distance
|
227
|
+
|
228
|
+
self.evaluation_function = mean_wasserstein_distance
|
229
|
+
assert callable(
|
230
|
+
self.evaluation_function
|
231
|
+
), "Evaluation function must be a callable function."
|
134
232
|
|
135
|
-
def domain_learning_phase(self,
|
233
|
+
def domain_learning_phase(self, problems):
|
234
|
+
base = problems["gc"]
|
235
|
+
base_goals = base["goals"]
|
236
|
+
train_configs = base["train_configs"]
|
136
237
|
super().domain_learning_phase(base_goals, train_configs)
|
137
238
|
agent_kwargs = {
|
138
239
|
"domain_name": self.env_prop.domain_name,
|
@@ -155,13 +256,3 @@ class GCDraco(
|
|
155
256
|
|
156
257
|
def choose_agent(self, problem_name: str) -> RLAgent:
|
157
258
|
return next(iter(self.agents.values()))
|
158
|
-
|
159
|
-
def prepare_inf_sequence(self, problem_name: str, inf_sequence):
|
160
|
-
if not self.env_prop.use_goal_directed_problem():
|
161
|
-
for obs in inf_sequence:
|
162
|
-
obs[0]["desired_goal"] = np.array(
|
163
|
-
[self.env_prop.str_to_goal(problem_name)],
|
164
|
-
dtype=obs[0]["desired_goal"].dtype,
|
165
|
-
)
|
166
|
-
return inf_sequence
|
167
|
-
return inf_sequence
|
@@ -1,16 +1,16 @@
|
|
1
|
-
import
|
2
|
-
from torch.utils.data import Dataset
|
1
|
+
import os
|
3
2
|
import random
|
4
3
|
from types import MethodType
|
5
|
-
|
4
|
+
|
5
|
+
import dill
|
6
|
+
import numpy as np
|
7
|
+
import torch
|
8
|
+
from torch.utils.data import Dataset
|
9
|
+
|
6
10
|
from gr_libs.environment.environment import EnvProperty
|
7
11
|
from gr_libs.metrics.metrics import measure_average_sequence_distance
|
8
12
|
from gr_libs.ml.base.rl_agent import ContextualAgent
|
9
13
|
from gr_libs.ml.utils import get_siamese_dataset_path
|
10
|
-
from gr_libs.ml.base import RLAgent
|
11
|
-
import os
|
12
|
-
import dill
|
13
|
-
import torch
|
14
14
|
|
15
15
|
|
16
16
|
class GRDataset(Dataset):
|
@@ -30,19 +30,19 @@ class GRDataset(Dataset):
|
|
30
30
|
def check_diff_goals(first_agent_goal, second_agent_goal):
|
31
31
|
try:
|
32
32
|
assert first_agent_goal != second_agent_goal
|
33
|
-
except Exception
|
33
|
+
except Exception:
|
34
34
|
try:
|
35
35
|
assert any(first_agent_goal != second_agent_goal)
|
36
|
-
except Exception
|
36
|
+
except Exception:
|
37
37
|
for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
|
38
38
|
assert any(elm1 != elm2 for elm1, elm2 in zip(arr1, arr2))
|
39
39
|
|
40
40
|
|
41
41
|
def generate_datasets(
|
42
42
|
num_samples,
|
43
|
-
agents:
|
43
|
+
agents: list[ContextualAgent],
|
44
44
|
observation_creation_method: MethodType,
|
45
|
-
problems:
|
45
|
+
problems: list[str],
|
46
46
|
env_prop: EnvProperty,
|
47
47
|
recognizer_name: str,
|
48
48
|
gc_goal_set=None,
|
@@ -1,39 +1,49 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
""" Collection of recognizers that use GRAML methods: metric learning for ODGR. """
|
2
|
+
|
3
3
|
import os
|
4
|
-
from
|
5
|
-
|
6
|
-
|
7
|
-
from typing import List, Tuple
|
4
|
+
from abc import abstractmethod
|
5
|
+
|
6
|
+
import dill
|
8
7
|
import numpy as np
|
9
|
-
from torch.utils.data import DataLoader
|
10
|
-
from torch.nn.utils.rnn import pad_sequence
|
11
8
|
import torch
|
9
|
+
from torch.nn.utils.rnn import pad_sequence
|
10
|
+
from torch.utils.data import DataLoader
|
11
|
+
|
12
|
+
from gr_libs.environment.environment import EnvProperty
|
13
|
+
from gr_libs.metrics import metrics
|
14
|
+
from gr_libs.ml import utils
|
15
|
+
from gr_libs.ml.base import ContextualAgent
|
12
16
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
13
17
|
from gr_libs.ml.planner.mcts import mcts_model
|
14
|
-
import
|
18
|
+
from gr_libs.ml.sequential._lstm_model import LstmObservations, train_metric_model
|
15
19
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
16
|
-
from gr_libs.recognizer.graml.gr_dataset import GRDataset, generate_datasets
|
17
|
-
from gr_libs.ml.sequential.lstm_model import LstmObservations, train_metric_model
|
18
20
|
from gr_libs.ml.utils.format import random_subset_with_order
|
19
21
|
from gr_libs.ml.utils.storage import (
|
20
22
|
get_and_create,
|
21
|
-
get_lstm_model_dir,
|
22
23
|
get_embeddings_result_path,
|
24
|
+
get_lstm_model_dir,
|
23
25
|
get_policy_sequences_result_path,
|
24
26
|
)
|
25
|
-
from gr_libs.
|
27
|
+
from gr_libs.recognizer.graml._gr_dataset import GRDataset, generate_datasets
|
26
28
|
from gr_libs.recognizer.recognizer import (
|
27
29
|
GaAdaptingRecognizer,
|
28
30
|
GaAgentTrainerRecognizer,
|
29
31
|
LearningRecognizer,
|
30
|
-
|
31
|
-
) # import first, very dependent
|
32
|
+
)
|
32
33
|
|
33
34
|
### TODO IMPLEMENT MORE SELECTION METHODS, MAKE SURE action_probs IS AS IT SEEMS: list of action-probability 'es ###
|
34
35
|
|
35
36
|
|
36
37
|
def collate_fn(batch):
|
38
|
+
"""
|
39
|
+
Collates a batch of data for training or evaluation.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
batch (list): A list of tuples, where each tuple contains the first traces, second traces, and the label indicating whether the goals are the same.
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
tuple: A tuple containing the padded first traces, padded second traces, labels, lengths of first traces, and lengths of second traces.
|
46
|
+
"""
|
37
47
|
first_traces, second_traces, is_same_goals = zip(*batch)
|
38
48
|
# torch.stack takes tensor tuples (fixed size) and stacks them up in a matrix
|
39
49
|
first_traces_padded = pad_sequence(
|
@@ -68,17 +78,52 @@ def save_weights(model: LstmObservations, path):
|
|
68
78
|
|
69
79
|
|
70
80
|
class Graml(LearningRecognizer):
|
81
|
+
"""
|
82
|
+
The Graml class is a subclass of LearningRecognizer and represents a recognizer that uses the Graml algorithm for goal recognition.
|
83
|
+
Graml learns a metric over observation sequences, over time: using a GC or a collection of agents, it creates a dataset and learns
|
84
|
+
the metric on it during the domain learning phase. During the goals adaptation phase, it creates or receives a library of sequences for each goal,
|
85
|
+
and maintains embeddings of them for the inference phase. The inference phase uses the learned metric to find the closest goal to a given sequence.
|
86
|
+
|
87
|
+
Attributes:
|
88
|
+
agents (list[ContextualAgent]): A list of contextual agents associated with the recognizer.
|
89
|
+
train_func: The function used for training the metric model.
|
90
|
+
collate_func: The function used for collating data in the training process.
|
91
|
+
|
92
|
+
Methods:
|
93
|
+
train_agents_on_base_goals(base_goals: list[str], train_configs: list): Trains the agents on the given base goals and train configurations.
|
94
|
+
domain_learning_phase(base_goals: list[str], train_configs: list): Performs the domain learning phase of the Graml algorithm.
|
95
|
+
goals_adaptation_phase(dynamic_goals: list[EnvProperty], save_fig=False): Performs the goals adaptation phase of the Graml algorithm.
|
96
|
+
get_goal_plan(goal): Retrieves the plan associated with the given goal.
|
97
|
+
dump_plans(true_sequence, true_goal, percentage): Dumps the plans to a file.
|
98
|
+
create_embeddings_dict(): Creates the embeddings dictionary for the plans.
|
99
|
+
inference_phase(inf_sequence, true_goal, percentage) -> str: Performs the inference phase of the Graml algorithm and returns the closest goal.
|
100
|
+
generate_sequences_library(goal: str, save_fig=False) -> list[list[tuple[np.ndarray, np.ndarray]]]: Generates the sequences library for the given goal.
|
101
|
+
update_sequences_library_inference_phase(inf_sequence) -> list[list[tuple[np.ndarray, np.ndarray]]]: Updates the sequences library during the inference phase.
|
102
|
+
"""
|
103
|
+
|
71
104
|
def __init__(self, *args, **kwargs):
|
105
|
+
"""
|
106
|
+
Initialize the GramlRecognizer object.
|
107
|
+
|
108
|
+
Args:
|
109
|
+
*args: Variable length argument list.
|
110
|
+
**kwargs: Arbitrary keyword arguments.
|
111
|
+
|
112
|
+
Attributes:
|
113
|
+
agents (list[ContextualAgent]): List of contextual agents.
|
114
|
+
train_func: Training function for the metric model.
|
115
|
+
collate_func: Collate function for data batching.
|
116
|
+
"""
|
72
117
|
super().__init__(*args, **kwargs)
|
73
|
-
self.agents:
|
118
|
+
self.agents: list[ContextualAgent] = []
|
74
119
|
self.train_func = train_metric_model
|
75
120
|
self.collate_func = collate_fn
|
76
121
|
|
77
122
|
@abstractmethod
|
78
|
-
def train_agents_on_base_goals(self, base_goals:
|
123
|
+
def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
|
79
124
|
pass
|
80
125
|
|
81
|
-
def domain_learning_phase(self, base_goals:
|
126
|
+
def domain_learning_phase(self, base_goals: list[str], train_configs: list):
|
82
127
|
super().domain_learning_phase(base_goals, train_configs)
|
83
128
|
self.train_agents_on_base_goals(base_goals, train_configs)
|
84
129
|
# train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
|
@@ -130,7 +175,7 @@ class Graml(LearningRecognizer):
|
|
130
175
|
)
|
131
176
|
save_weights(model=self.model, path=self.model_file_path)
|
132
177
|
|
133
|
-
def goals_adaptation_phase(self, dynamic_goals:
|
178
|
+
def goals_adaptation_phase(self, dynamic_goals: list[EnvProperty], save_fig=False):
|
134
179
|
self.is_first_inf_since_new_goals = True
|
135
180
|
self.current_goals = dynamic_goals
|
136
181
|
# start by training each rl agent on the base goal set
|
@@ -245,13 +290,13 @@ class Graml(LearningRecognizer):
|
|
245
290
|
@abstractmethod
|
246
291
|
def generate_sequences_library(
|
247
292
|
self, goal: str, save_fig=False
|
248
|
-
) ->
|
293
|
+
) -> list[list[tuple[np.ndarray, np.ndarray]]]:
|
249
294
|
pass
|
250
295
|
|
251
296
|
# this function duplicates every sequence and creates a consecutive and non-consecutive version of it
|
252
297
|
def update_sequences_library_inference_phase(
|
253
298
|
self, inf_sequence
|
254
|
-
) ->
|
299
|
+
) -> list[list[tuple[np.ndarray, np.ndarray]]]:
|
255
300
|
new_plans_dict = {}
|
256
301
|
for goal, obss in self.plans_dict.items():
|
257
302
|
new_obss = []
|
@@ -281,17 +326,27 @@ class Graml(LearningRecognizer):
|
|
281
326
|
|
282
327
|
|
283
328
|
class BGGraml(Graml):
|
329
|
+
"""
|
330
|
+
BGGraml class represents a goal-directed agent for the BGGraml algorithm.
|
331
|
+
|
332
|
+
It extends the Graml class and provides additional methods for training agents on base goals.
|
333
|
+
"""
|
334
|
+
|
284
335
|
def __init__(self, *args, **kwargs):
|
285
336
|
super().__init__(*args, **kwargs)
|
286
337
|
|
287
|
-
def domain_learning_phase(self,
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
338
|
+
def domain_learning_phase(self, problems):
|
339
|
+
# Always use 'bg' for BGGraml
|
340
|
+
base = problems["bg"]
|
341
|
+
base_goals = base["goals"]
|
342
|
+
train_configs = base["train_configs"]
|
343
|
+
assert len(base_goals) == len(
|
344
|
+
train_configs
|
345
|
+
), "base_goals and train_configs should have the same length"
|
346
|
+
super().domain_learning_phase(base_goals, train_configs)
|
292
347
|
|
293
348
|
# In case we need goal-directed agent for every goal
|
294
|
-
def train_agents_on_base_goals(self, base_goals:
|
349
|
+
def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
|
295
350
|
self.original_problems = [
|
296
351
|
self.env_prop.goal_to_problem_str(g) for g in base_goals
|
297
352
|
]
|
@@ -316,14 +371,40 @@ class BGGraml(Graml):
|
|
316
371
|
|
317
372
|
|
318
373
|
class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
|
374
|
+
"""
|
375
|
+
MCTSBasedGraml is a class that represents a recognizer based on the MCTS algorithm.
|
376
|
+
It inherits from BGGraml and GaAdaptingRecognizer classes.
|
377
|
+
|
378
|
+
Attributes:
|
379
|
+
rl_agent_type (type): The type of reinforcement learning agent used.
|
380
|
+
"""
|
381
|
+
|
319
382
|
def __init__(self, *args, **kwargs):
|
383
|
+
"""
|
384
|
+
Initialize the GramlRecognizer object.
|
385
|
+
|
386
|
+
Args:
|
387
|
+
*args: Variable length argument list.
|
388
|
+
**kwargs: Arbitrary keyword arguments.
|
389
|
+
|
390
|
+
"""
|
320
391
|
super().__init__(*args, **kwargs)
|
321
392
|
if self.rl_agent_type == None:
|
322
393
|
self.rl_agent_type = TabularQLearner
|
323
394
|
|
324
395
|
def generate_sequences_library(
|
325
396
|
self, goal: str, save_fig=False
|
326
|
-
) ->
|
397
|
+
) -> list[list[tuple[np.ndarray, np.ndarray]]]:
|
398
|
+
"""
|
399
|
+
Generates a library of sequences for a given goal.
|
400
|
+
|
401
|
+
Args:
|
402
|
+
goal (str): The goal for which to generate sequences.
|
403
|
+
save_fig (bool, optional): Whether to save the generated figure. Defaults to False.
|
404
|
+
|
405
|
+
Returns:
|
406
|
+
list[list[tuple[np.ndarray, np.ndarray]]]: The generated sequences library.
|
407
|
+
"""
|
327
408
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
328
409
|
img_path = os.path.join(
|
329
410
|
get_policy_sequences_result_path(
|
@@ -342,7 +423,29 @@ class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
|
|
342
423
|
|
343
424
|
|
344
425
|
class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
|
426
|
+
"""
|
427
|
+
ExpertBasedGraml class represents a Graml recognizer that uses expert knowledge to generate sequences library and adapt goals.
|
428
|
+
|
429
|
+
Args:
|
430
|
+
*args: Variable length argument list.
|
431
|
+
**kwargs: Arbitrary keyword arguments.
|
432
|
+
|
433
|
+
Attributes:
|
434
|
+
rl_agent_type (type): The type of reinforcement learning agent used.
|
435
|
+
env_prop (EnvironmentProperties): The environment properties.
|
436
|
+
dynamic_train_configs_dict (dict): The dynamic training configurations for each problem.
|
437
|
+
|
438
|
+
"""
|
439
|
+
|
345
440
|
def __init__(self, *args, **kwargs):
|
441
|
+
"""
|
442
|
+
Initialize the GRAML Recognizer.
|
443
|
+
|
444
|
+
Args:
|
445
|
+
*args: Variable length argument list.
|
446
|
+
**kwargs: Arbitrary keyword arguments.
|
447
|
+
|
448
|
+
"""
|
346
449
|
super().__init__(*args, **kwargs)
|
347
450
|
if self.rl_agent_type == None:
|
348
451
|
if self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete():
|
@@ -352,7 +455,18 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
|
|
352
455
|
|
353
456
|
def generate_sequences_library(
|
354
457
|
self, goal: str, save_fig=False
|
355
|
-
) ->
|
458
|
+
) -> list[list[tuple[np.ndarray, np.ndarray]]]:
|
459
|
+
"""
|
460
|
+
Generates a sequences library for a given goal.
|
461
|
+
|
462
|
+
Args:
|
463
|
+
goal (str): The goal for which to generate the sequences library.
|
464
|
+
save_fig (bool, optional): Whether to save the figure. Defaults to False.
|
465
|
+
|
466
|
+
Returns:
|
467
|
+
list[list[tuple[np.ndarray, np.ndarray]]]: The generated sequences library.
|
468
|
+
|
469
|
+
"""
|
356
470
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
357
471
|
kwargs = {
|
358
472
|
"domain_name": self.domain_name,
|
@@ -377,7 +491,18 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
|
|
377
491
|
agent_kwargs["fig_path"] = fig_path
|
378
492
|
return [agent.generate_observation(**agent_kwargs)]
|
379
493
|
|
380
|
-
def goals_adaptation_phase(self, dynamic_goals:
|
494
|
+
def goals_adaptation_phase(self, dynamic_goals: list[str], dynamic_train_configs):
|
495
|
+
"""
|
496
|
+
Performs the goals adaptation phase.
|
497
|
+
|
498
|
+
Args:
|
499
|
+
dynamic_goals (list[str]): The dynamic goals.
|
500
|
+
dynamic_train_configs: The dynamic training configurations.
|
501
|
+
|
502
|
+
Returns:
|
503
|
+
The result of the goals adaptation phase.
|
504
|
+
|
505
|
+
"""
|
381
506
|
self.dynamic_goals_problems = [
|
382
507
|
self.env_prop.goal_to_problem_str(g) for g in dynamic_goals
|
383
508
|
]
|
@@ -391,6 +516,28 @@ class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
|
|
391
516
|
|
392
517
|
|
393
518
|
class GCGraml(Graml, GaAdaptingRecognizer):
|
519
|
+
"""
|
520
|
+
GCGraml class represents a recognizer that uses the GCDeepRLAgent for domain learning and sequence generation.
|
521
|
+
It makes its adaptation phase quicker and require less assumptions, but the assumption of a GC agent is still needed and may result
|
522
|
+
in less optimal policies that generate the observations in the synthetic dataset, which could eventually lead to a less optimal metric.
|
523
|
+
|
524
|
+
Args:
|
525
|
+
Graml (class): Base class for Graml recognizers.
|
526
|
+
GaAdaptingRecognizer (class): Base class for GA adapting recognizers.
|
527
|
+
|
528
|
+
Attributes:
|
529
|
+
rl_agent_type (class): The type of RL agent to be used for learning and generation.
|
530
|
+
env_prop (object): The environment properties.
|
531
|
+
agents (list): List of contextual agents.
|
532
|
+
|
533
|
+
Methods:
|
534
|
+
__init__: Initializes the GCGraml recognizer.
|
535
|
+
domain_learning_phase: Performs the domain learning phase.
|
536
|
+
train_agents_on_base_goals: Trains the RL agents on the base goals.
|
537
|
+
generate_sequences_library: Generates sequences library for a specific goal.
|
538
|
+
|
539
|
+
"""
|
540
|
+
|
394
541
|
def __init__(self, *args, **kwargs):
|
395
542
|
super().__init__(*args, **kwargs)
|
396
543
|
if self.rl_agent_type == None:
|
@@ -401,14 +548,18 @@ class GCGraml(Graml, GaAdaptingRecognizer):
|
|
401
548
|
and not self.env_prop.is_action_discrete()
|
402
549
|
)
|
403
550
|
|
404
|
-
def domain_learning_phase(self,
|
551
|
+
def domain_learning_phase(self, problems):
|
552
|
+
# Always use 'gc' for GCGraml
|
553
|
+
base = problems["gc"]
|
554
|
+
base_goals = base["goals"]
|
555
|
+
train_configs = base["train_configs"]
|
405
556
|
assert (
|
406
557
|
len(train_configs) == 1
|
407
|
-
), "
|
408
|
-
|
558
|
+
), "GCGraml should only have one train config for the base goals, it uses a single agent"
|
559
|
+
super().domain_learning_phase(base_goals, train_configs)
|
409
560
|
|
410
561
|
# In case we need goal-directed agent for every goal
|
411
|
-
def train_agents_on_base_goals(self, base_goals:
|
562
|
+
def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
|
412
563
|
self.gc_goal_set = base_goals
|
413
564
|
self.original_problems = self.env_prop.name # needed for gr_dataset
|
414
565
|
# start by training each rl agent on the base goal set
|
@@ -432,7 +583,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
|
|
432
583
|
|
433
584
|
def generate_sequences_library(
|
434
585
|
self, goal: str, save_fig=False
|
435
|
-
) ->
|
586
|
+
) -> list[list[tuple[np.ndarray, np.ndarray]]]:
|
436
587
|
problem_name = self.env_prop.goal_to_problem_str(goal)
|
437
588
|
kwargs = {
|
438
589
|
"domain_name": self.domain_name,
|