gr-libs 0.1.6.post1__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
- evaluation/create_minigrid_map_image.py +10 -6
- evaluation/file_system.py +16 -5
- evaluation/generate_experiments_results.py +123 -74
- evaluation/generate_experiments_results_new_ver1.py +227 -243
- evaluation/generate_experiments_results_new_ver2.py +317 -317
- evaluation/generate_task_specific_statistics_plots.py +481 -253
- evaluation/get_plans_images.py +41 -26
- evaluation/increasing_and_decreasing_.py +97 -56
- gr_libs/__init__.py +6 -1
- gr_libs/_version.py +2 -2
- gr_libs/environment/__init__.py +17 -9
- gr_libs/environment/environment.py +167 -39
- gr_libs/environment/utils/utils.py +22 -12
- gr_libs/metrics/__init__.py +5 -0
- gr_libs/metrics/metrics.py +76 -34
- gr_libs/ml/__init__.py +2 -0
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +1 -1
- gr_libs/ml/base/rl_agent.py +13 -10
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/deep_rl_learner.py +433 -352
- gr_libs/ml/neural/utils/__init__.py +1 -1
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/neural/utils/penv.py +5 -2
- gr_libs/ml/planner/mcts/mcts_model.py +524 -302
- gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
- gr_libs/ml/planner/mcts/utils/node.py +11 -7
- gr_libs/ml/planner/mcts/utils/tree.py +14 -10
- gr_libs/ml/sequential/__init__.py +1 -1
- gr_libs/ml/sequential/lstm_model.py +256 -175
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +123 -73
- gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
- gr_libs/ml/utils/__init__.py +8 -2
- gr_libs/ml/utils/format.py +78 -70
- gr_libs/ml/utils/math.py +2 -1
- gr_libs/ml/utils/other.py +1 -1
- gr_libs/ml/utils/storage.py +95 -28
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
- gr_libs/recognizer/graml/gr_dataset.py +209 -110
- gr_libs/recognizer/graml/graml_recognizer.py +431 -231
- gr_libs/recognizer/recognizer.py +38 -27
- gr_libs/recognizer/utils/__init__.py +1 -1
- gr_libs/recognizer/utils/format.py +8 -3
- {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
- gr_libs-0.1.8.dist-info/RECORD +70 -0
- {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
- {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -1
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +8 -4
- tests/test_graql.py +2 -1
- tutorials/gcdraco_panda_tutorial.py +66 -0
- tutorials/gcdraco_parking_tutorial.py +61 -0
- tutorials/graml_minigrid_tutorial.py +42 -12
- tutorials/graml_panda_tutorial.py +35 -14
- tutorials/graml_parking_tutorial.py +37 -19
- tutorials/graml_point_maze_tutorial.py +33 -13
- tutorials/graql_minigrid_tutorial.py +31 -15
- CI/README.md +0 -12
- CI/docker_build_context/Dockerfile +0 -15
- gr_libs/recognizer/recognizer_doc.md +0 -61
- gr_libs-0.1.6.post1.dist-info/RECORD +0 -70
@@ -16,250 +16,450 @@ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
|
16
16
|
from gr_libs.recognizer.graml.gr_dataset import GRDataset, generate_datasets
|
17
17
|
from gr_libs.ml.sequential.lstm_model import LstmObservations, train_metric_model
|
18
18
|
from gr_libs.ml.utils.format import random_subset_with_order
|
19
|
-
from gr_libs.ml.utils.storage import
|
19
|
+
from gr_libs.ml.utils.storage import (
|
20
|
+
get_and_create,
|
21
|
+
get_lstm_model_dir,
|
22
|
+
get_embeddings_result_path,
|
23
|
+
get_policy_sequences_result_path,
|
24
|
+
)
|
20
25
|
from gr_libs.metrics import metrics
|
21
|
-
from gr_libs.recognizer.recognizer import
|
26
|
+
from gr_libs.recognizer.recognizer import (
|
27
|
+
GaAdaptingRecognizer,
|
28
|
+
GaAgentTrainerRecognizer,
|
29
|
+
LearningRecognizer,
|
30
|
+
Recognizer,
|
31
|
+
) # import first, very dependent
|
22
32
|
|
23
33
|
### TODO IMPLEMENT MORE SELECTION METHODS, MAKE SURE action_probs IS AS IT SEEMS: list of action-probability 'es ###
|
24
34
|
|
35
|
+
|
25
36
|
def collate_fn(batch):
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
37
|
+
first_traces, second_traces, is_same_goals = zip(*batch)
|
38
|
+
# torch.stack takes tensor tuples (fixed size) and stacks them up in a matrix
|
39
|
+
first_traces_padded = pad_sequence(
|
40
|
+
[torch.stack(sequence) for sequence in first_traces], batch_first=True
|
41
|
+
)
|
42
|
+
second_traces_padded = pad_sequence(
|
43
|
+
[torch.stack(sequence) for sequence in second_traces], batch_first=True
|
44
|
+
)
|
45
|
+
first_traces_lengths = [len(trace) for trace in first_traces]
|
46
|
+
second_traces_lengths = [len(trace) for trace in second_traces]
|
47
|
+
return (
|
48
|
+
first_traces_padded.to(utils.device),
|
49
|
+
second_traces_padded.to(utils.device),
|
50
|
+
torch.stack(is_same_goals).to(utils.device),
|
51
|
+
first_traces_lengths,
|
52
|
+
second_traces_lengths,
|
53
|
+
)
|
54
|
+
|
55
|
+
|
56
|
+
def load_weights(loaded_model: LstmObservations, path):
|
57
|
+
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
58
|
+
loaded_model.load_state_dict(torch.load(path, map_location=utils.device))
|
59
|
+
loaded_model.to(utils.device) # Ensure model is on the right device
|
60
|
+
return loaded_model
|
61
|
+
|
62
|
+
|
63
|
+
def save_weights(model: LstmObservations, path):
|
64
|
+
directory = os.path.dirname(path)
|
65
|
+
if not os.path.exists(directory):
|
66
|
+
os.makedirs(directory)
|
67
|
+
torch.save(model.state_dict(), path)
|
68
|
+
|
45
69
|
|
46
70
|
class Graml(LearningRecognizer):
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
71
|
+
def __init__(self, *args, **kwargs):
|
72
|
+
super().__init__(*args, **kwargs)
|
73
|
+
self.agents: List[ContextualAgent] = []
|
74
|
+
self.train_func = train_metric_model
|
75
|
+
self.collate_func = collate_fn
|
76
|
+
|
77
|
+
@abstractmethod
|
78
|
+
def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
|
79
|
+
pass
|
80
|
+
|
81
|
+
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
82
|
+
super().domain_learning_phase(base_goals, train_configs)
|
83
|
+
self.train_agents_on_base_goals(base_goals, train_configs)
|
84
|
+
# train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
|
85
|
+
self.model_directory = get_lstm_model_dir(
|
86
|
+
domain_name=self.env_prop.domain_name,
|
87
|
+
env_name=self.env_prop.name,
|
88
|
+
model_name=self.env_prop.problem_list_to_str_tuple(self.original_problems),
|
89
|
+
recognizer=self.__class__.__name__,
|
90
|
+
)
|
91
|
+
last_path = r"lstm_model.pth"
|
92
|
+
self.model_file_path = os.path.join(self.model_directory, last_path)
|
93
|
+
self.model = LstmObservations(
|
94
|
+
input_size=self.env_prop.get_lstm_props().input_size,
|
95
|
+
hidden_size=self.env_prop.get_lstm_props().hidden_size,
|
96
|
+
)
|
97
|
+
self.model.to(utils.device)
|
98
|
+
|
99
|
+
if os.path.exists(self.model_file_path):
|
100
|
+
print(f"Loading pre-existing lstm model in {self.model_file_path}")
|
101
|
+
load_weights(loaded_model=self.model, path=self.model_file_path)
|
102
|
+
else:
|
103
|
+
print(f"{self.model_file_path} doesn't exist, training the model")
|
104
|
+
train_samples, dev_samples = generate_datasets(
|
105
|
+
num_samples=self.env_prop.get_lstm_props().num_samples,
|
106
|
+
agents=self.agents,
|
107
|
+
observation_creation_method=metrics.stochastic_amplified_selection,
|
108
|
+
problems=self.original_problems,
|
109
|
+
env_prop=self.env_prop,
|
110
|
+
gc_goal_set=self.gc_goal_set if hasattr(self, "gc_goal_set") else None,
|
111
|
+
recognizer_name=self.__class__.__name__,
|
112
|
+
)
|
113
|
+
|
114
|
+
train_dataset = GRDataset(len(train_samples), train_samples)
|
115
|
+
dev_dataset = GRDataset(len(dev_samples), dev_samples)
|
116
|
+
self.train_func(
|
117
|
+
self.model,
|
118
|
+
train_loader=DataLoader(
|
119
|
+
train_dataset,
|
120
|
+
batch_size=self.env_prop.get_lstm_props().batch_size,
|
121
|
+
shuffle=False,
|
122
|
+
collate_fn=self.collate_func,
|
123
|
+
),
|
124
|
+
dev_loader=DataLoader(
|
125
|
+
dev_dataset,
|
126
|
+
batch_size=self.env_prop.get_lstm_props().batch_size,
|
127
|
+
shuffle=False,
|
128
|
+
collate_fn=self.collate_func,
|
129
|
+
),
|
130
|
+
)
|
131
|
+
save_weights(model=self.model, path=self.model_file_path)
|
132
|
+
|
133
|
+
def goals_adaptation_phase(self, dynamic_goals: List[EnvProperty], save_fig=False):
|
134
|
+
self.is_first_inf_since_new_goals = True
|
135
|
+
self.current_goals = dynamic_goals
|
136
|
+
# start by training each rl agent on the base goal set
|
137
|
+
self.embeddings_dict = (
|
138
|
+
{}
|
139
|
+
) # relevant if the embedding of the plan occurs during the goals adaptation phase
|
140
|
+
self.plans_dict = (
|
141
|
+
{}
|
142
|
+
) # relevant if the embedding of the plan occurs during the inference phase
|
143
|
+
for goal in self.current_goals:
|
144
|
+
obss = self.generate_sequences_library(goal, save_fig=save_fig)
|
145
|
+
self.plans_dict[str(goal)] = obss
|
146
|
+
|
147
|
+
def get_goal_plan(self, goal):
|
148
|
+
assert (
|
149
|
+
self.plans_dict
|
150
|
+
), "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
|
151
|
+
return self.plans_dict[goal]
|
152
|
+
|
153
|
+
def dump_plans(self, true_sequence, true_goal, percentage):
|
154
|
+
assert (
|
155
|
+
self.plans_dict
|
156
|
+
), "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't return the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
|
157
|
+
# Arrange storage
|
158
|
+
embeddings_path = get_and_create(
|
159
|
+
get_embeddings_result_path(
|
160
|
+
domain_name=self.env_prop.domain_name,
|
161
|
+
env_name=self.env_prop.name,
|
162
|
+
recognizer=self.__class__.__name__,
|
163
|
+
)
|
164
|
+
)
|
165
|
+
self.plans_dict[f"{true_goal}_true"] = true_sequence
|
166
|
+
|
167
|
+
with open(
|
168
|
+
embeddings_path + f"/{true_goal}_{percentage}_plans_dict.pkl", "wb"
|
169
|
+
) as plans_file:
|
170
|
+
to_dump = {}
|
171
|
+
for goal, obss in self.plans_dict.items():
|
172
|
+
if goal == f"{true_goal}_true":
|
173
|
+
to_dump[goal] = self.agents[0].agent.simplify_observation(obss)
|
174
|
+
else:
|
175
|
+
to_dump[goal] = []
|
176
|
+
for obs in obss:
|
177
|
+
addition = (
|
178
|
+
self.agents[0].agent.simplify_observation(obs)
|
179
|
+
if self.is_first_inf_since_new_goals
|
180
|
+
else obs
|
181
|
+
)
|
182
|
+
to_dump[goal].append(addition)
|
183
|
+
dill.dump(to_dump, plans_file)
|
184
|
+
self.plans_dict.pop(f"{true_goal}_true")
|
185
|
+
|
186
|
+
def create_embeddings_dict(self):
|
187
|
+
for goal, obss in self.plans_dict.items():
|
188
|
+
self.embeddings_dict[goal] = []
|
189
|
+
for cons_seq, non_cons_seq in obss:
|
190
|
+
self.embeddings_dict[goal].append(
|
191
|
+
(
|
192
|
+
self.model.embed_sequence(cons_seq),
|
193
|
+
self.model.embed_sequence(non_cons_seq),
|
194
|
+
)
|
195
|
+
)
|
196
|
+
|
197
|
+
def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
|
198
|
+
embeddings_path = get_and_create(
|
199
|
+
get_embeddings_result_path(
|
200
|
+
domain_name=self.env_prop.domain_name,
|
201
|
+
env_name=self.env_prop.name,
|
202
|
+
recognizer=self.__class__.__name__,
|
203
|
+
)
|
204
|
+
)
|
205
|
+
simplified_inf_sequence = self.agents[0].agent.simplify_observation(
|
206
|
+
inf_sequence
|
207
|
+
)
|
208
|
+
new_embedding = self.model.embed_sequence(simplified_inf_sequence)
|
209
|
+
assert (
|
210
|
+
self.plans_dict
|
211
|
+
), "plans_dict wasn't created during goals_adaptation_phase and now inference phase can't embed the plans. when inference_same_length, keep the plans and not their embeddings during goals_adaptation_phase."
|
212
|
+
if self.is_first_inf_since_new_goals:
|
213
|
+
self.is_first_inf_since_new_goals = False
|
214
|
+
self.update_sequences_library_inference_phase(inf_sequence)
|
215
|
+
self.create_embeddings_dict()
|
216
|
+
|
217
|
+
closest_goal, greatest_similarity = None, 0
|
218
|
+
for goal, embeddings in self.embeddings_dict.items():
|
219
|
+
sum_curr_similarities = 0
|
220
|
+
for cons_embedding, non_cons_embedding in embeddings:
|
221
|
+
sum_curr_similarities += max(
|
222
|
+
torch.exp(-torch.sum(torch.abs(cons_embedding - new_embedding))),
|
223
|
+
torch.exp(
|
224
|
+
-torch.sum(torch.abs(non_cons_embedding - new_embedding))
|
225
|
+
),
|
226
|
+
)
|
227
|
+
mean_similarity = sum_curr_similarities / len(embeddings)
|
228
|
+
if mean_similarity > greatest_similarity:
|
229
|
+
closest_goal = goal
|
230
|
+
greatest_similarity = mean_similarity
|
231
|
+
|
232
|
+
self.embeddings_dict[f"{true_goal}_true"] = new_embedding
|
233
|
+
if self.collect_statistics:
|
234
|
+
with open(
|
235
|
+
os.path.join(
|
236
|
+
embeddings_path, f"{true_goal}_{percentage}_embeddings_dict.pkl"
|
237
|
+
),
|
238
|
+
"wb",
|
239
|
+
) as embeddings_file:
|
240
|
+
dill.dump(self.embeddings_dict, embeddings_file)
|
241
|
+
self.embeddings_dict.pop(f"{true_goal}_true")
|
242
|
+
|
243
|
+
return closest_goal
|
244
|
+
|
245
|
+
@abstractmethod
|
246
|
+
def generate_sequences_library(
|
247
|
+
self, goal: str, save_fig=False
|
248
|
+
) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
249
|
+
pass
|
250
|
+
|
251
|
+
# this function duplicates every sequence and creates a consecutive and non-consecutive version of it
|
252
|
+
def update_sequences_library_inference_phase(
|
253
|
+
self, inf_sequence
|
254
|
+
) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
255
|
+
new_plans_dict = {}
|
256
|
+
for goal, obss in self.plans_dict.items():
|
257
|
+
new_obss = []
|
258
|
+
for obs in obss:
|
259
|
+
consecutive_partial_obs = random_subset_with_order(
|
260
|
+
obs, len(inf_sequence), is_consecutive=True
|
261
|
+
)
|
262
|
+
non_consecutive_partial_obs = random_subset_with_order(
|
263
|
+
obs, len(inf_sequence), is_consecutive=False
|
264
|
+
)
|
265
|
+
simplified_consecutive_partial_obs = self.agents[
|
266
|
+
0
|
267
|
+
].agent.simplify_observation(consecutive_partial_obs)
|
268
|
+
simplified_non_consecutive_partial_obs = self.agents[
|
269
|
+
0
|
270
|
+
].agent.simplify_observation(non_consecutive_partial_obs)
|
271
|
+
new_obss.append(
|
272
|
+
(
|
273
|
+
simplified_consecutive_partial_obs,
|
274
|
+
simplified_non_consecutive_partial_obs,
|
275
|
+
)
|
276
|
+
)
|
277
|
+
new_plans_dict[goal] = (
|
278
|
+
new_obss # override old full observations with new partial observations with consecutive and non-consecutive versions.
|
279
|
+
)
|
280
|
+
self.plans_dict = new_plans_dict
|
281
|
+
|
169
282
|
|
170
283
|
class BGGraml(Graml):
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
284
|
+
def __init__(self, *args, **kwargs):
|
285
|
+
super().__init__(*args, **kwargs)
|
286
|
+
|
287
|
+
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
288
|
+
assert len(train_configs) == len(
|
289
|
+
base_goals
|
290
|
+
), "There should be train configs for every goal in BGGraml."
|
291
|
+
return super().domain_learning_phase(base_goals, train_configs)
|
292
|
+
|
293
|
+
# In case we need goal-directed agent for every goal
|
294
|
+
def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
|
295
|
+
self.original_problems = [
|
296
|
+
self.env_prop.goal_to_problem_str(g) for g in base_goals
|
297
|
+
]
|
298
|
+
# start by training each rl agent on the base goal set
|
299
|
+
for (problem, goal), (algorithm, num_timesteps) in zip(
|
300
|
+
zip(self.original_problems, base_goals), train_configs
|
301
|
+
):
|
302
|
+
kwargs = {
|
303
|
+
"domain_name": self.domain_name,
|
304
|
+
"problem_name": problem,
|
305
|
+
"env_prop": self.env_prop,
|
306
|
+
}
|
307
|
+
if algorithm != None:
|
308
|
+
kwargs["algorithm"] = algorithm
|
309
|
+
if num_timesteps != None:
|
310
|
+
kwargs["num_timesteps"] = num_timesteps
|
311
|
+
agent = self.rl_agent_type(**kwargs)
|
312
|
+
agent.learn()
|
313
|
+
self.agents.append(
|
314
|
+
ContextualAgent(problem_name=problem, problem_goal=goal, agent=agent)
|
315
|
+
)
|
316
|
+
|
189
317
|
|
190
318
|
class MCTSBasedGraml(BGGraml, GaAdaptingRecognizer):
|
191
|
-
|
192
|
-
|
193
|
-
|
319
|
+
def __init__(self, *args, **kwargs):
|
320
|
+
super().__init__(*args, **kwargs)
|
321
|
+
if self.rl_agent_type == None:
|
322
|
+
self.rl_agent_type = TabularQLearner
|
323
|
+
|
324
|
+
def generate_sequences_library(
|
325
|
+
self, goal: str, save_fig=False
|
326
|
+
) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
327
|
+
problem_name = self.env_prop.goal_to_problem_str(goal)
|
328
|
+
img_path = os.path.join(
|
329
|
+
get_policy_sequences_result_path(
|
330
|
+
self.env_prop.domain_name, recognizer=self.__class__.__name__
|
331
|
+
),
|
332
|
+
problem_name + "_MCTS",
|
333
|
+
)
|
334
|
+
return mcts_model.plan(
|
335
|
+
self.env_prop.name,
|
336
|
+
problem_name,
|
337
|
+
goal,
|
338
|
+
save_fig=save_fig,
|
339
|
+
img_path=img_path,
|
340
|
+
env_prop=self.env_prop,
|
341
|
+
)
|
194
342
|
|
195
|
-
def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
196
|
-
problem_name = self.env_prop.goal_to_problem_str(goal)
|
197
|
-
img_path = os.path.join(get_policy_sequences_result_path(self.env_prop.domain_name, recognizer=self.__class__.__name__), problem_name + "_MCTS")
|
198
|
-
return mcts_model.plan(self.env_prop.name, problem_name, goal, save_fig=True, img_path=img_path, env_prop=self.env_prop)
|
199
343
|
|
200
344
|
class ExpertBasedGraml(BGGraml, GaAgentTrainerRecognizer):
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
345
|
+
def __init__(self, *args, **kwargs):
|
346
|
+
super().__init__(*args, **kwargs)
|
347
|
+
if self.rl_agent_type == None:
|
348
|
+
if self.env_prop.is_state_discrete() and self.env_prop.is_action_discrete():
|
349
|
+
self.rl_agent_type = TabularQLearner
|
350
|
+
else:
|
351
|
+
self.rl_agent_type = DeepRLAgent
|
352
|
+
|
353
|
+
def generate_sequences_library(
|
354
|
+
self, goal: str, save_fig=False
|
355
|
+
) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
356
|
+
problem_name = self.env_prop.goal_to_problem_str(goal)
|
357
|
+
kwargs = {
|
358
|
+
"domain_name": self.domain_name,
|
359
|
+
"problem_name": problem_name,
|
360
|
+
"env_prop": self.env_prop,
|
361
|
+
}
|
362
|
+
if self.dynamic_train_configs_dict[problem_name][0] != None:
|
363
|
+
kwargs["algorithm"] = self.dynamic_train_configs_dict[problem_name][0]
|
364
|
+
if self.dynamic_train_configs_dict[problem_name][1] != None:
|
365
|
+
kwargs["num_timesteps"] = self.dynamic_train_configs_dict[problem_name][1]
|
366
|
+
agent = self.rl_agent_type(**kwargs)
|
367
|
+
agent.learn()
|
368
|
+
agent_kwargs = {
|
369
|
+
"action_selection_method": metrics.greedy_selection,
|
370
|
+
"random_optimalism": False,
|
371
|
+
"save_fig": save_fig,
|
372
|
+
}
|
373
|
+
if save_fig:
|
374
|
+
fig_path = get_and_create(
|
375
|
+
f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_bg_sequence"
|
376
|
+
)
|
377
|
+
agent_kwargs["fig_path"] = fig_path
|
378
|
+
return [agent.generate_observation(**agent_kwargs)]
|
379
|
+
|
380
|
+
def goals_adaptation_phase(self, dynamic_goals: List[str], dynamic_train_configs):
|
381
|
+
self.dynamic_goals_problems = [
|
382
|
+
self.env_prop.goal_to_problem_str(g) for g in dynamic_goals
|
383
|
+
]
|
384
|
+
self.dynamic_train_configs_dict = {
|
385
|
+
problem: config
|
386
|
+
for problem, config in zip(
|
387
|
+
self.dynamic_goals_problems, dynamic_train_configs
|
388
|
+
)
|
389
|
+
}
|
390
|
+
return super().goals_adaptation_phase(dynamic_goals)
|
391
|
+
|
223
392
|
|
224
393
|
class GCGraml(Graml, GaAdaptingRecognizer):
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
394
|
+
def __init__(self, *args, **kwargs):
|
395
|
+
super().__init__(*args, **kwargs)
|
396
|
+
if self.rl_agent_type == None:
|
397
|
+
self.rl_agent_type = GCDeepRLAgent
|
398
|
+
assert (
|
399
|
+
self.env_prop.gc_adaptable()
|
400
|
+
and not self.env_prop.is_state_discrete()
|
401
|
+
and not self.env_prop.is_action_discrete()
|
402
|
+
)
|
403
|
+
|
404
|
+
def domain_learning_phase(self, base_goals: List[str], train_configs: List):
|
405
|
+
assert (
|
406
|
+
len(train_configs) == 1
|
407
|
+
), "There should be one train config for the sole gc agent in GCGraml."
|
408
|
+
return super().domain_learning_phase(base_goals, train_configs)
|
409
|
+
|
410
|
+
# In case we need goal-directed agent for every goal
|
411
|
+
def train_agents_on_base_goals(self, base_goals: List[str], train_configs: List):
|
412
|
+
self.gc_goal_set = base_goals
|
413
|
+
self.original_problems = self.env_prop.name # needed for gr_dataset
|
414
|
+
# start by training each rl agent on the base goal set
|
415
|
+
kwargs = {
|
416
|
+
"domain_name": self.domain_name,
|
417
|
+
"problem_name": self.env_prop.name,
|
418
|
+
"env_prop": self.env_prop,
|
419
|
+
}
|
420
|
+
algorithm, num_timesteps = train_configs[0] # should only be one, was asserted
|
421
|
+
if algorithm != None:
|
422
|
+
kwargs["algorithm"] = algorithm
|
423
|
+
if num_timesteps != None:
|
424
|
+
kwargs["num_timesteps"] = num_timesteps
|
425
|
+
gc_agent = self.rl_agent_type(**kwargs)
|
426
|
+
gc_agent.learn()
|
427
|
+
self.agents.append(
|
428
|
+
ContextualAgent(
|
429
|
+
problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent
|
430
|
+
)
|
431
|
+
)
|
432
|
+
|
433
|
+
def generate_sequences_library(
|
434
|
+
self, goal: str, save_fig=False
|
435
|
+
) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
|
436
|
+
problem_name = self.env_prop.goal_to_problem_str(goal)
|
437
|
+
kwargs = {
|
438
|
+
"domain_name": self.domain_name,
|
439
|
+
"problem_name": self.env_prop.name,
|
440
|
+
"env_prop": self.env_prop,
|
441
|
+
} # problem name is env name in gc case
|
442
|
+
if self.original_train_configs[0][0] != None:
|
443
|
+
kwargs["algorithm"] = self.original_train_configs[0][0]
|
444
|
+
if self.original_train_configs[0][1] != None:
|
445
|
+
kwargs["num_timesteps"] = self.original_train_configs[0][1]
|
446
|
+
agent = self.rl_agent_type(**kwargs)
|
447
|
+
agent.learn()
|
448
|
+
agent_kwargs = {
|
449
|
+
"action_selection_method": metrics.stochastic_amplified_selection,
|
450
|
+
"random_optimalism": True,
|
451
|
+
"save_fig": save_fig,
|
452
|
+
}
|
453
|
+
if save_fig:
|
454
|
+
fig_path = get_and_create(
|
455
|
+
f"{os.path.abspath(os.path.join(get_policy_sequences_result_path(domain_name=self.env_prop.domain_name, env_name=self.env_prop.name, recognizer=self.__class__.__name__), problem_name))}_gc_sequence"
|
456
|
+
)
|
457
|
+
agent_kwargs["fig_path"] = fig_path
|
458
|
+
if self.env_prop.use_goal_directed_problem():
|
459
|
+
agent_kwargs["goal_directed_problem"] = problem_name
|
460
|
+
else:
|
461
|
+
agent_kwargs["goal_directed_goal"] = goal
|
462
|
+
obss = []
|
463
|
+
for _ in range(5):
|
464
|
+
obss.append(agent.generate_observation(**agent_kwargs))
|
465
|
+
return obss
|