gr-libs 0.1.8__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +3 -1
- gr_libs/_evaluation/__init__.py +1 -0
- evaluation/analyze_results_cross_alg_cross_domain.py → gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +81 -88
- evaluation/generate_experiments_results.py → gr_libs/_evaluation/_generate_experiments_results.py +6 -6
- evaluation/generate_task_specific_statistics_plots.py → gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +11 -14
- evaluation/get_plans_images.py → gr_libs/_evaluation/_get_plans_images.py +3 -4
- evaluation/increasing_and_decreasing_.py → gr_libs/_evaluation/_increasing_and_decreasing_.py +3 -1
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +294 -0
- gr_libs/environment/__init__.py +14 -1
- gr_libs/environment/{utils → _utils}/utils.py +1 -1
- gr_libs/environment/environment.py +257 -22
- gr_libs/metrics/__init__.py +2 -0
- gr_libs/metrics/metrics.py +166 -31
- gr_libs/ml/__init__.py +1 -6
- gr_libs/ml/base/__init__.py +3 -1
- gr_libs/ml/base/rl_agent.py +68 -3
- gr_libs/ml/neural/__init__.py +1 -3
- gr_libs/ml/neural/deep_rl_learner.py +227 -67
- gr_libs/ml/neural/utils/__init__.py +1 -2
- gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
- gr_libs/ml/planner/mcts/mcts_model.py +71 -34
- gr_libs/ml/sequential/__init__.py +0 -1
- gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
- gr_libs/ml/tabular/__init__.py +1 -3
- gr_libs/ml/tabular/tabular_q_learner.py +27 -9
- gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
- gr_libs/ml/utils/__init__.py +2 -9
- gr_libs/ml/utils/format.py +13 -90
- gr_libs/ml/utils/math.py +3 -2
- gr_libs/ml/utils/other.py +2 -2
- gr_libs/ml/utils/storage.py +41 -94
- gr_libs/odgr_executor.py +268 -0
- gr_libs/problems/consts.py +2 -2
- gr_libs/recognizer/_utils/__init__.py +0 -0
- gr_libs/recognizer/{utils → _utils}/format.py +2 -2
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +116 -36
- gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
- gr_libs/recognizer/graml/graml_recognizer.py +172 -29
- gr_libs/recognizer/recognizer.py +59 -10
- gr_libs/tutorials/draco_panda_tutorial.py +58 -0
- gr_libs/tutorials/draco_parking_tutorial.py +56 -0
- {tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +5 -9
- {tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +3 -7
- {tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +2 -2
- {tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +5 -10
- {tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +5 -9
- {tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +2 -1
- {tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
- {gr_libs-0.1.8.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
- gr_libs-0.2.2.dist-info/RECORD +71 -0
- {gr_libs-0.1.8.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
- gr_libs-0.2.2.dist-info/top_level.txt +2 -0
- tests/test_draco.py +14 -0
- tests/test_gcdraco.py +2 -2
- tests/test_graml.py +4 -4
- tests/test_graql.py +1 -1
- evaluation/create_minigrid_map_image.py +0 -38
- evaluation/file_system.py +0 -53
- evaluation/generate_experiments_results_new_ver1.py +0 -238
- evaluation/generate_experiments_results_new_ver2.py +0 -331
- gr_libs/ml/neural/utils/penv.py +0 -60
- gr_libs/recognizer/utils/__init__.py +0 -1
- gr_libs-0.1.8.dist-info/RECORD +0 -70
- gr_libs-0.1.8.dist-info/top_level.txt +0 -4
- /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
- /gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
- /gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
@@ -1,14 +1,16 @@
|
|
1
|
+
""" environment.py """
|
2
|
+
|
3
|
+
import os
|
1
4
|
from abc import abstractmethod
|
2
5
|
from collections import namedtuple
|
3
|
-
import os
|
4
6
|
|
5
7
|
import gymnasium as gym
|
6
|
-
from stable_baselines3.common.vec_env import DummyVecEnv
|
7
|
-
from PIL import Image
|
8
8
|
import numpy as np
|
9
9
|
from gymnasium.envs.registration import register
|
10
|
-
from minigrid.core.world_object import
|
11
|
-
from minigrid.wrappers import
|
10
|
+
from minigrid.core.world_object import Lava, Wall
|
11
|
+
from minigrid.wrappers import ImgObsWrapper, RGBImgPartialObsWrapper
|
12
|
+
from PIL import Image
|
13
|
+
from stable_baselines3.common.vec_env import DummyVecEnv
|
12
14
|
|
13
15
|
MINIGRID, PANDA, PARKING, POINT_MAZE = "minigrid", "panda", "parking", "point_maze"
|
14
16
|
|
@@ -22,111 +24,207 @@ LSTMProperties = namedtuple(
|
|
22
24
|
|
23
25
|
|
24
26
|
class EnvProperty:
|
27
|
+
"""
|
28
|
+
Base class for environment properties.
|
29
|
+
"""
|
30
|
+
|
25
31
|
def __init__(self, name):
|
32
|
+
"""
|
33
|
+
Initializes a new instance of the Environment class.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
name (str): The name of the environment.
|
37
|
+
"""
|
26
38
|
self.name = name
|
27
39
|
|
28
40
|
def __str__(self):
|
41
|
+
"""
|
42
|
+
Returns a string representation of the object.
|
43
|
+
"""
|
29
44
|
return f"{self.name}"
|
30
45
|
|
31
46
|
def __repr__(self):
|
47
|
+
"""
|
48
|
+
Returns a string representation of the object.
|
49
|
+
"""
|
32
50
|
return f"{self.name}"
|
33
51
|
|
34
52
|
def __eq__(self, other):
|
53
|
+
"""
|
54
|
+
Check if this object is equal to another object.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
other: The other object to compare with.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
True if the objects are equal, False otherwise.
|
61
|
+
"""
|
35
62
|
return self.name == other.name
|
36
63
|
|
37
64
|
def __ne__(self, other):
|
65
|
+
"""
|
66
|
+
Check if the current object is not equal to the other object.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
other: The object to compare with.
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
bool: True if the objects are not equal, False otherwise.
|
73
|
+
"""
|
38
74
|
return not self.__eq__(other)
|
39
75
|
|
40
76
|
@abstractmethod
|
41
77
|
def str_to_goal(self):
|
42
|
-
|
78
|
+
"""
|
79
|
+
Convert a problem name to a goal.
|
80
|
+
"""
|
43
81
|
|
44
82
|
@abstractmethod
|
45
83
|
def gc_adaptable(self):
|
46
|
-
|
84
|
+
"""
|
85
|
+
Check if the environment is goal-conditioned adaptable.
|
86
|
+
"""
|
47
87
|
|
48
88
|
@abstractmethod
|
49
89
|
def problem_list_to_str_tuple(self, problems):
|
50
|
-
|
90
|
+
"""
|
91
|
+
Convert a list of problems to a string tuple.
|
92
|
+
"""
|
51
93
|
|
52
94
|
@abstractmethod
|
53
95
|
def goal_to_problem_str(self, goal):
|
54
|
-
|
96
|
+
"""
|
97
|
+
Convert a goal to a problem string.
|
98
|
+
"""
|
55
99
|
|
56
100
|
@abstractmethod
|
57
101
|
def is_action_discrete(self):
|
58
|
-
|
102
|
+
"""
|
103
|
+
Check if the action space is discrete.
|
104
|
+
"""
|
59
105
|
|
60
106
|
@abstractmethod
|
61
107
|
def is_state_discrete(self):
|
62
|
-
|
108
|
+
"""
|
109
|
+
Check if the state space is discrete.
|
110
|
+
"""
|
63
111
|
|
64
112
|
@abstractmethod
|
65
113
|
def get_lstm_props(self):
|
66
|
-
|
114
|
+
"""
|
115
|
+
Get the LSTM properties for the environment.
|
116
|
+
"""
|
67
117
|
|
68
118
|
@abstractmethod
|
69
119
|
def change_done_by_specific_desired(self, obs, desired, old_success_done):
|
70
|
-
|
120
|
+
"""
|
121
|
+
Change the 'done' flag based on a specific desired goal.
|
122
|
+
"""
|
71
123
|
|
72
124
|
@abstractmethod
|
73
125
|
def is_done(self, done):
|
74
|
-
|
126
|
+
"""
|
127
|
+
Check if the episode is done.
|
128
|
+
"""
|
75
129
|
|
76
130
|
@abstractmethod
|
77
131
|
def is_success(self, info):
|
78
|
-
|
132
|
+
"""
|
133
|
+
Check if the episode is successful.
|
134
|
+
"""
|
79
135
|
|
80
136
|
def create_vec_env(self, kwargs):
|
137
|
+
"""
|
138
|
+
Create a vectorized environment.
|
139
|
+
"""
|
81
140
|
env = gym.make(**kwargs)
|
82
141
|
return DummyVecEnv([lambda: env])
|
83
142
|
|
84
143
|
@abstractmethod
|
85
144
|
def change_goal_to_specific_desired(self, obs, desired):
|
86
|
-
|
145
|
+
"""
|
146
|
+
Change the goal to a specific desired goal.
|
147
|
+
"""
|
87
148
|
|
88
149
|
|
89
150
|
class GCEnvProperty(EnvProperty):
|
151
|
+
"""
|
152
|
+
Base class for goal-conditioned environment properties.
|
153
|
+
"""
|
154
|
+
|
90
155
|
@abstractmethod
|
91
156
|
def use_goal_directed_problem(self):
|
92
|
-
|
157
|
+
"""
|
158
|
+
Check if the environment uses a goal-directed problem.
|
159
|
+
"""
|
93
160
|
|
94
161
|
def problem_list_to_str_tuple(self, problems):
|
162
|
+
"""
|
163
|
+
Convert a list of problems to a string tuple.
|
164
|
+
"""
|
95
165
|
return "goal_conditioned"
|
96
166
|
|
97
167
|
|
98
168
|
class MinigridProperty(EnvProperty):
|
169
|
+
"""
|
170
|
+
Environment properties for the Minigrid domain.
|
171
|
+
"""
|
172
|
+
|
99
173
|
def __init__(self, name):
|
100
174
|
super().__init__(name)
|
101
175
|
self.domain_name = "minigrid"
|
102
176
|
|
103
177
|
def goal_to_problem_str(self, goal):
|
178
|
+
"""
|
179
|
+
Convert a goal to a problem string.
|
180
|
+
"""
|
104
181
|
return self.name + f"-DynamicGoal-{goal[0]}x{goal[1]}-v0"
|
105
182
|
|
106
183
|
def str_to_goal(self, problem_name):
|
184
|
+
"""
|
185
|
+
Convert a problem name to a goal.
|
186
|
+
"""
|
107
187
|
parts = problem_name.split("-")
|
108
188
|
goal_part = [part for part in parts if "x" in part]
|
109
189
|
width, height = goal_part[0].split("x")
|
110
190
|
return (int(width), int(height))
|
111
191
|
|
112
192
|
def gc_adaptable(self):
|
193
|
+
"""
|
194
|
+
Check if the environment is goal-conditioned adaptable.
|
195
|
+
"""
|
113
196
|
return False
|
114
197
|
|
115
198
|
def problem_list_to_str_tuple(self, problems):
|
199
|
+
"""
|
200
|
+
Convert a list of problems to a string tuple.
|
201
|
+
"""
|
116
202
|
return "_".join([f"[{s.split('-')[-2]}]" for s in problems])
|
117
203
|
|
118
204
|
def is_action_discrete(self):
|
205
|
+
"""
|
206
|
+
Check if the action space is discrete.
|
207
|
+
"""
|
119
208
|
return True
|
120
209
|
|
121
210
|
def is_state_discrete(self):
|
211
|
+
"""
|
212
|
+
Check if the state space is discrete.
|
213
|
+
"""
|
122
214
|
return True
|
123
215
|
|
124
216
|
def get_lstm_props(self):
|
217
|
+
"""
|
218
|
+
Get the LSTM properties for the environment.
|
219
|
+
"""
|
125
220
|
return LSTMProperties(
|
126
221
|
batch_size=16, input_size=4, hidden_size=8, num_samples=40000
|
127
222
|
)
|
128
223
|
|
129
224
|
def create_sequence_image(self, sequence, img_path, problem_name):
|
225
|
+
"""
|
226
|
+
Create a sequence image for the environment.
|
227
|
+
"""
|
130
228
|
if not os.path.exists(os.path.dirname(img_path)):
|
131
229
|
os.makedirs(os.path.dirname(img_path))
|
132
230
|
env_id = (
|
@@ -134,7 +232,7 @@ class MinigridProperty(EnvProperty):
|
|
134
232
|
+ "-DynamicGoal-"
|
135
233
|
+ problem_name.split("-DynamicGoal-")[1]
|
136
234
|
)
|
137
|
-
|
235
|
+
register(
|
138
236
|
id=env_id,
|
139
237
|
entry_point="gr_envs.minigrid_scripts.envs:CustomColorEnv",
|
140
238
|
kwargs={
|
@@ -146,7 +244,6 @@ class MinigridProperty(EnvProperty):
|
|
146
244
|
"plan": sequence,
|
147
245
|
},
|
148
246
|
)
|
149
|
-
# print(result)
|
150
247
|
env = gym.make(id=env_id)
|
151
248
|
env = RGBImgPartialObsWrapper(env) # Get pixel observations
|
152
249
|
env = ImgObsWrapper(env) # Get rid of the 'mission' field
|
@@ -156,34 +253,62 @@ class MinigridProperty(EnvProperty):
|
|
156
253
|
|
157
254
|
####### save image to file
|
158
255
|
image_pil = Image.fromarray(np.uint8(img)).convert("RGB")
|
159
|
-
image_pil.save(r"{}.png".format(img_path))
|
256
|
+
image_pil.save(r"{}.png".format(os.path.join(img_path, "plan_image")))
|
160
257
|
|
161
258
|
def change_done_by_specific_desired(self, obs, desired, old_success_done):
|
259
|
+
"""
|
260
|
+
Change the 'done' flag based on a specific desired goal.
|
261
|
+
"""
|
162
262
|
assert (
|
163
263
|
desired is None
|
164
264
|
), "In MinigridProperty, giving a specific 'desired' is not supported."
|
165
265
|
return old_success_done
|
166
266
|
|
167
267
|
def is_done(self, done):
|
268
|
+
"""
|
269
|
+
Check if the episode is done.
|
270
|
+
"""
|
168
271
|
assert isinstance(done, np.ndarray)
|
169
272
|
return done[0]
|
170
273
|
|
171
|
-
# Not used currently since TabularQLearner doesn't need is_success from the environment
|
172
274
|
def is_success(self, info):
|
275
|
+
"""
|
276
|
+
Check if the episode is successful.
|
277
|
+
"""
|
173
278
|
raise NotImplementedError("no other option for any of the environments.")
|
174
279
|
|
175
280
|
def change_goal_to_specific_desired(self, obs, desired):
|
281
|
+
"""
|
282
|
+
Change the goal to a specific desired goal.
|
283
|
+
"""
|
176
284
|
assert (
|
177
285
|
desired is None
|
178
286
|
), "In MinigridProperty, giving a specific 'desired' is not supported."
|
179
287
|
|
180
288
|
|
181
289
|
class PandaProperty(GCEnvProperty):
|
290
|
+
"""
|
291
|
+
Environment properties for the Panda domain.
|
292
|
+
"""
|
293
|
+
|
182
294
|
def __init__(self, name):
|
295
|
+
"""
|
296
|
+
Initialize a new instance of the Environment class.
|
297
|
+
|
298
|
+
Args:
|
299
|
+
name (str): The name of the environment.
|
300
|
+
|
301
|
+
Attributes:
|
302
|
+
domain_name (str): The domain name of the environment.
|
303
|
+
|
304
|
+
"""
|
183
305
|
super().__init__(name)
|
184
306
|
self.domain_name = "panda"
|
185
307
|
|
186
308
|
def str_to_goal(self, problem_name):
|
309
|
+
"""
|
310
|
+
Convert a problem name to a goal.
|
311
|
+
"""
|
187
312
|
try:
|
188
313
|
numeric_part = problem_name.split("PandaMyReachDenseX")[1]
|
189
314
|
components = [
|
@@ -194,38 +319,62 @@ class PandaProperty(GCEnvProperty):
|
|
194
319
|
for component in components:
|
195
320
|
floats.append(float(component))
|
196
321
|
return np.array([floats], dtype=np.float32)
|
197
|
-
except Exception
|
322
|
+
except Exception:
|
198
323
|
return "general"
|
199
324
|
|
200
325
|
def goal_to_problem_str(self, goal):
|
326
|
+
"""
|
327
|
+
Convert a goal to a problem string.
|
328
|
+
"""
|
201
329
|
goal_str = "X".join(
|
202
330
|
[str(float(g)).replace(".", "y").replace("-", "M") for g in goal[0]]
|
203
331
|
)
|
204
332
|
return f"PandaMyReachDenseX{goal_str}-v3"
|
205
333
|
|
206
334
|
def gc_adaptable(self):
|
335
|
+
"""
|
336
|
+
Check if the environment is goal-conditioned adaptable.
|
337
|
+
"""
|
207
338
|
return True
|
208
339
|
|
209
340
|
def use_goal_directed_problem(self):
|
341
|
+
"""
|
342
|
+
Check if the environment uses a goal-directed problem.
|
343
|
+
"""
|
210
344
|
return False
|
211
345
|
|
212
346
|
def is_action_discrete(self):
|
347
|
+
"""
|
348
|
+
Check if the action space is discrete.
|
349
|
+
"""
|
213
350
|
return False
|
214
351
|
|
215
352
|
def is_state_discrete(self):
|
353
|
+
"""
|
354
|
+
Check if the state space is discrete.
|
355
|
+
"""
|
216
356
|
return False
|
217
357
|
|
218
358
|
def get_lstm_props(self):
|
359
|
+
"""
|
360
|
+
Get the LSTM properties for the environment.
|
361
|
+
"""
|
219
362
|
return LSTMProperties(
|
220
363
|
batch_size=32, input_size=9, hidden_size=8, num_samples=20000
|
221
364
|
)
|
222
365
|
|
223
366
|
def sample_goal():
|
367
|
+
"""
|
368
|
+
Sample a random goal.
|
369
|
+
"""
|
224
370
|
goal_range_low = np.array([-0.40, -0.40, 0.10])
|
225
371
|
goal_range_high = np.array([0.2, 0.2, 0.10])
|
226
372
|
return np.random.uniform(goal_range_low, goal_range_high)
|
227
373
|
|
228
374
|
def change_done_by_specific_desired(self, obs, desired, old_success_done):
|
375
|
+
"""
|
376
|
+
Change the 'done' flag based on a specific desired goal.
|
377
|
+
"""
|
229
378
|
if desired is None:
|
230
379
|
return old_success_done
|
231
380
|
assert isinstance(
|
@@ -241,70 +390,134 @@ class PandaProperty(GCEnvProperty):
|
|
241
390
|
return old_success_done
|
242
391
|
|
243
392
|
def is_done(self, done):
|
393
|
+
"""
|
394
|
+
Check if the episode is done.
|
395
|
+
"""
|
244
396
|
assert isinstance(done, np.ndarray)
|
245
397
|
return done[0]
|
246
398
|
|
247
399
|
def is_success(self, info):
|
400
|
+
"""
|
401
|
+
Check if the episode is successful.
|
402
|
+
"""
|
248
403
|
assert "is_success" in info[0].keys()
|
249
404
|
return info[0]["is_success"]
|
250
405
|
|
251
406
|
def change_goal_to_specific_desired(self, obs, desired):
|
407
|
+
"""
|
408
|
+
Change the goal to a specific desired goal.
|
409
|
+
"""
|
252
410
|
if desired is not None:
|
253
411
|
obs["desired_goal"] = desired
|
254
412
|
|
255
413
|
|
256
414
|
class ParkingProperty(GCEnvProperty):
|
415
|
+
"""
|
416
|
+
Environment properties for the Parking domain.
|
417
|
+
"""
|
257
418
|
|
258
419
|
def __init__(self, name):
|
420
|
+
"""
|
421
|
+
Initialize a new environment object.
|
422
|
+
|
423
|
+
Args:
|
424
|
+
name (str): The name of the environment.
|
425
|
+
|
426
|
+
Attributes:
|
427
|
+
domain_name (str): The domain name of the environment.
|
428
|
+
|
429
|
+
"""
|
259
430
|
super().__init__(name)
|
260
431
|
self.domain_name = "parking"
|
261
432
|
|
262
433
|
def goal_to_problem_str(self, goal):
|
434
|
+
"""
|
435
|
+
Convert a goal to a problem string.
|
436
|
+
"""
|
263
437
|
return self.name.split("-v0")[0] + f"-GI-{goal}-v0"
|
264
438
|
|
265
439
|
def gc_adaptable(self):
|
440
|
+
"""
|
441
|
+
Check if the environment is goal-conditioned adaptable.
|
442
|
+
"""
|
266
443
|
return True
|
267
444
|
|
268
445
|
def is_action_discrete(self):
|
446
|
+
"""
|
447
|
+
Check if the action space is discrete.
|
448
|
+
"""
|
269
449
|
return False
|
270
450
|
|
271
451
|
def is_state_discrete(self):
|
452
|
+
"""
|
453
|
+
Check if the state space is discrete.
|
454
|
+
"""
|
272
455
|
return False
|
273
456
|
|
274
457
|
def use_goal_directed_problem(self):
|
458
|
+
"""
|
459
|
+
Check if the environment uses a goal-directed problem.
|
460
|
+
"""
|
275
461
|
return True
|
276
462
|
|
277
463
|
def get_lstm_props(self):
|
464
|
+
"""
|
465
|
+
Get the LSTM properties for the environment.
|
466
|
+
"""
|
278
467
|
return LSTMProperties(
|
279
468
|
batch_size=32, input_size=8, hidden_size=8, num_samples=20000
|
280
469
|
)
|
281
470
|
|
282
471
|
def change_done_by_specific_desired(self, obs, desired, old_success_done):
|
472
|
+
"""
|
473
|
+
Change the 'done' flag based on a specific desired goal.
|
474
|
+
"""
|
283
475
|
assert (
|
284
476
|
desired is None
|
285
477
|
), "In ParkingProperty, giving a specific 'desired' is not supported."
|
286
478
|
return old_success_done
|
287
479
|
|
288
480
|
def is_done(self, done):
|
481
|
+
"""
|
482
|
+
Check if the episode is done.
|
483
|
+
"""
|
289
484
|
assert isinstance(done, np.ndarray)
|
290
485
|
return done[0]
|
291
486
|
|
292
487
|
def is_success(self, info):
|
488
|
+
"""
|
489
|
+
Check if the episode is successful.
|
490
|
+
"""
|
293
491
|
assert "is_success" in info[0].keys()
|
294
492
|
return info[0]["is_success"]
|
295
493
|
|
296
494
|
def change_goal_to_specific_desired(self, obs, desired):
|
495
|
+
"""
|
496
|
+
Change the goal to a specific desired goal.
|
497
|
+
"""
|
297
498
|
assert (
|
298
499
|
desired is None
|
299
500
|
), "In ParkingProperty, giving a specific 'desired' is not supported."
|
300
501
|
|
301
502
|
|
302
503
|
class PointMazeProperty(EnvProperty):
|
504
|
+
"""Environment properties for the Point Maze domain."""
|
505
|
+
|
303
506
|
def __init__(self, name):
|
507
|
+
"""
|
508
|
+
Initializes a new instance of the Environment class.
|
509
|
+
|
510
|
+
Args:
|
511
|
+
name (str): The name of the environment.
|
512
|
+
|
513
|
+
Attributes:
|
514
|
+
domain_name (str): The domain name of the environment.
|
515
|
+
"""
|
304
516
|
super().__init__(name)
|
305
517
|
self.domain_name = "point_maze"
|
306
518
|
|
307
519
|
def str_to_goal(self):
|
520
|
+
"""Convert a problem name to a goal."""
|
308
521
|
parts = self.name.split("-")
|
309
522
|
# Find the part containing the goal size (usually after "DynamicGoal")
|
310
523
|
sizes_parts = [part for part in parts if "x" in part]
|
@@ -314,40 +527,62 @@ class PointMazeProperty(EnvProperty):
|
|
314
527
|
return (int(width), int(height))
|
315
528
|
|
316
529
|
def gc_adaptable(self):
|
530
|
+
"""Check if the environment is goal-conditioned adaptable."""
|
317
531
|
return False
|
318
532
|
|
319
533
|
def problem_list_to_str_tuple(self, problems):
|
534
|
+
"""Convert a list of problems to a string tuple."""
|
320
535
|
return "_".join([f"[{s.split('-')[-1]}]" for s in problems])
|
321
536
|
|
322
537
|
def is_action_discrete(self):
|
538
|
+
"""Check if the action space is discrete."""
|
323
539
|
return False
|
324
540
|
|
325
541
|
def is_state_discrete(self):
|
542
|
+
"""Check if the state space is discrete."""
|
326
543
|
return False
|
327
544
|
|
328
545
|
def get_lstm_props(self):
|
546
|
+
"""
|
547
|
+
Get the LSTM properties for the environment.
|
548
|
+
"""
|
329
549
|
return LSTMProperties(
|
330
550
|
batch_size=32, input_size=6, hidden_size=8, num_samples=20000
|
331
551
|
)
|
332
552
|
|
333
553
|
def goal_to_problem_str(self, goal):
|
554
|
+
"""
|
555
|
+
Convert a goal to a problem string.
|
556
|
+
"""
|
334
557
|
return self.name + f"-Goal-{goal[0]}x{goal[1]}"
|
335
558
|
|
336
559
|
def change_done_by_specific_desired(self, obs, desired, old_success_done):
|
560
|
+
"""
|
561
|
+
Change the 'done' flag based on a specific desired goal.
|
562
|
+
"""
|
337
563
|
assert (
|
338
564
|
desired is None
|
339
565
|
), "In PointMazeProperty, giving a specific 'desired' is not supported."
|
340
566
|
return old_success_done
|
341
567
|
|
342
568
|
def is_done(self, done):
|
569
|
+
"""
|
570
|
+
Check if the episode is done.
|
571
|
+
"""
|
343
572
|
assert isinstance(done, np.ndarray)
|
344
573
|
return done[0]
|
345
574
|
|
346
575
|
def is_success(self, info):
|
576
|
+
"""
|
577
|
+
Check if the episode is successful.
|
578
|
+
"""
|
347
579
|
assert "success" in info[0].keys()
|
348
580
|
return info[0]["success"]
|
349
581
|
|
350
582
|
def change_goal_to_specific_desired(self, obs, desired):
|
583
|
+
"""
|
584
|
+
Change the goal to a specific desired goal.
|
585
|
+
"""
|
351
586
|
assert (
|
352
587
|
desired is None
|
353
588
|
), "In ParkingProperty, giving a specific 'desired' is not supported."
|