gr-libs 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/_evaluation/_generate_experiments_results.py +0 -141
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +73 -107
- gr_libs/environment/environment.py +22 -2
- gr_libs/evaluation/generate_experiments_results.py +100 -0
- gr_libs/ml/neural/deep_rl_learner.py +17 -20
- gr_libs/odgr_executor.py +20 -25
- gr_libs/problems/consts.py +568 -290
- gr_libs/recognizer/_utils/__init__.py +1 -0
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +12 -1
- gr_libs/recognizer/graml/graml_recognizer.py +16 -8
- gr_libs/tutorials/gcdraco_panda_tutorial.py +6 -2
- gr_libs/tutorials/gcdraco_parking_tutorial.py +3 -1
- gr_libs/tutorials/graml_minigrid_tutorial.py +16 -12
- gr_libs/tutorials/graml_panda_tutorial.py +6 -2
- gr_libs/tutorials/graml_parking_tutorial.py +3 -1
- gr_libs/tutorials/graml_point_maze_tutorial.py +15 -2
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.5.dist-info}/METADATA +27 -16
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.5.dist-info}/RECORD +26 -25
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.5.dist-info}/WHEEL +1 -1
- tests/test_odgr_executor_expertbasedgraml.py +14 -0
- tests/test_odgr_executor_gcdraco.py +14 -0
- tests/test_odgr_executor_gcgraml.py +14 -0
- tests/test_odgr_executor_graql.py +14 -0
- gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +0 -260
- gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +0 -497
- gr_libs/_evaluation/_get_plans_images.py +0 -61
- gr_libs/_evaluation/_increasing_and_decreasing_.py +0 -106
- /gr_libs/{_evaluation → evaluation}/__init__.py +0 -0
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.5.dist-info}/top_level.txt +0 -0
gr_libs/problems/consts.py
CHANGED
@@ -14,9 +14,15 @@ PROBLEMS = {
|
|
14
14
|
PARKING: {
|
15
15
|
"Parking-S-14-PC-": {
|
16
16
|
"L1": {
|
17
|
-
"base": {
|
18
|
-
"
|
19
|
-
|
17
|
+
"base": {
|
18
|
+
"gc": {
|
19
|
+
"goals": [i for i in range(1, 21)],
|
20
|
+
"train_configs": [(PPO, 200000)],
|
21
|
+
},
|
22
|
+
"bg": {
|
23
|
+
"goals": ["1", "4", "8", "14", "21"],
|
24
|
+
"train_configs": [(SAC, 200000) for _ in range(5)],
|
25
|
+
},
|
20
26
|
},
|
21
27
|
"G_0": {
|
22
28
|
"goals": ["1", "11", "21"],
|
@@ -26,9 +32,15 @@ PROBLEMS = {
|
|
26
32
|
},
|
27
33
|
},
|
28
34
|
"L2": {
|
29
|
-
"base": {
|
30
|
-
"
|
31
|
-
|
35
|
+
"base": {
|
36
|
+
"gc": {
|
37
|
+
"goals": [i for i in range(1, 21)],
|
38
|
+
"train_configs": [(PPO, 200000)],
|
39
|
+
},
|
40
|
+
"bg": {
|
41
|
+
"goals": ["1", "4", "8", "14", "21"],
|
42
|
+
"train_configs": [(SAC, 200000) for _ in range(5)],
|
43
|
+
},
|
32
44
|
},
|
33
45
|
"G_0": {
|
34
46
|
"goals": ["1", "8", "14", "21"],
|
@@ -36,9 +48,15 @@ PROBLEMS = {
|
|
36
48
|
},
|
37
49
|
},
|
38
50
|
"L3": {
|
39
|
-
"base": {
|
40
|
-
"
|
41
|
-
|
51
|
+
"base": {
|
52
|
+
"gc": {
|
53
|
+
"goals": [i for i in range(1, 21)],
|
54
|
+
"train_configs": [(PPO, 200000)],
|
55
|
+
},
|
56
|
+
"bg": {
|
57
|
+
"goals": ["1", "4", "8", "14", "21"],
|
58
|
+
"train_configs": [(SAC, 200000) for _ in range(5)],
|
59
|
+
},
|
42
60
|
},
|
43
61
|
"G_0": {
|
44
62
|
"goals": ["1", "8", "11", "18"],
|
@@ -46,9 +64,15 @@ PROBLEMS = {
|
|
46
64
|
},
|
47
65
|
},
|
48
66
|
"L4": {
|
49
|
-
"base": {
|
50
|
-
"
|
51
|
-
|
67
|
+
"base": {
|
68
|
+
"gc": {
|
69
|
+
"goals": [i for i in range(1, 21)],
|
70
|
+
"train_configs": [(PPO, 200000)],
|
71
|
+
},
|
72
|
+
"bg": {
|
73
|
+
"goals": ["1", "4", "8", "14", "21"],
|
74
|
+
"train_configs": [(SAC, 200000) for _ in range(5)],
|
75
|
+
},
|
52
76
|
},
|
53
77
|
"G_0": {
|
54
78
|
"goals": ["4", "8", "11", "14", "18"],
|
@@ -56,13 +80,15 @@ PROBLEMS = {
|
|
56
80
|
},
|
57
81
|
},
|
58
82
|
"L5": {
|
59
|
-
"base": {
|
60
|
-
"
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
83
|
+
"base": {
|
84
|
+
"gc": {
|
85
|
+
"goals": [i for i in range(1, 21)],
|
86
|
+
"train_configs": [(PPO, 200000)],
|
87
|
+
},
|
88
|
+
"bg": {
|
89
|
+
"goals": ["1", "4", "8", "11", "14", "18", "21"],
|
90
|
+
"train_configs": [(SAC, 200000) for _ in range(7)],
|
91
|
+
},
|
66
92
|
},
|
67
93
|
},
|
68
94
|
}
|
@@ -70,11 +96,26 @@ PROBLEMS = {
|
|
70
96
|
PANDA: {
|
71
97
|
"PandaMyReachDense": {
|
72
98
|
"L1": {
|
73
|
-
"base": {
|
74
|
-
"
|
75
|
-
|
76
|
-
|
77
|
-
|
99
|
+
"base": {
|
100
|
+
"gc": {
|
101
|
+
"goals": [
|
102
|
+
np.array([PandaProperty.sample_goal()])
|
103
|
+
for _ in range(1, 30)
|
104
|
+
],
|
105
|
+
"train_configs": [(SAC, 800000)],
|
106
|
+
},
|
107
|
+
"bg": {
|
108
|
+
"goals": [
|
109
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
110
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
111
|
+
np.array([[0.2, 0.2, 0.1]]),
|
112
|
+
],
|
113
|
+
"train_configs": [
|
114
|
+
(PPO, 200000),
|
115
|
+
(PPO, 200000),
|
116
|
+
(PPO, 300000),
|
117
|
+
],
|
118
|
+
},
|
78
119
|
},
|
79
120
|
"G_0": {
|
80
121
|
"goals": [
|
@@ -86,15 +127,32 @@ PROBLEMS = {
|
|
86
127
|
(SAC, 00000),
|
87
128
|
(SAC, 200000),
|
88
129
|
(SAC, 300000),
|
89
|
-
],
|
130
|
+
],
|
90
131
|
},
|
91
132
|
},
|
92
133
|
"L2": {
|
93
|
-
"base": {
|
94
|
-
"
|
95
|
-
|
96
|
-
|
97
|
-
|
134
|
+
"base": {
|
135
|
+
"gc": {
|
136
|
+
"goals": [
|
137
|
+
np.array([PandaProperty.sample_goal()])
|
138
|
+
for _ in range(1, 30)
|
139
|
+
],
|
140
|
+
"train_configs": [(SAC, 800000)],
|
141
|
+
},
|
142
|
+
"bg": {
|
143
|
+
"goals": [
|
144
|
+
np.array([[-0.5, -0.5, 0.1]]),
|
145
|
+
np.array([[-0.5, 0.2, 0.1]]),
|
146
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
147
|
+
np.array([[0.1, -0.1, 0.1]]),
|
148
|
+
],
|
149
|
+
"train_configs": [
|
150
|
+
(PPO, 400000),
|
151
|
+
(PPO, 400000),
|
152
|
+
(PPO, 400000),
|
153
|
+
(PPO, 400000),
|
154
|
+
],
|
155
|
+
},
|
98
156
|
},
|
99
157
|
"G_0": {
|
100
158
|
"goals": [
|
@@ -108,15 +166,29 @@ PROBLEMS = {
|
|
108
166
|
(SAC, 400000),
|
109
167
|
(SAC, 400000),
|
110
168
|
(SAC, 400000),
|
111
|
-
],
|
169
|
+
],
|
112
170
|
},
|
113
171
|
},
|
114
172
|
"L3": {
|
115
|
-
"base": {
|
116
|
-
"
|
117
|
-
|
118
|
-
|
119
|
-
|
173
|
+
"base": {
|
174
|
+
"gc": {
|
175
|
+
"goals": [
|
176
|
+
np.array([PandaProperty.sample_goal()])
|
177
|
+
for _ in range(1, 30)
|
178
|
+
],
|
179
|
+
"train_configs": [(SAC, 800000)],
|
180
|
+
},
|
181
|
+
"bg": {
|
182
|
+
"goals": [
|
183
|
+
np.array([[-0.5, -0.5, 0.1]]),
|
184
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
185
|
+
np.array([[-0.5, 0.2, 0.1]]),
|
186
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
187
|
+
np.array([[0.2, -0.2, 0.1]]),
|
188
|
+
np.array([[0.2, 0.2, 0.1]]),
|
189
|
+
],
|
190
|
+
"train_configs": [(PPO, 400000) for _ in range(6)],
|
191
|
+
},
|
120
192
|
},
|
121
193
|
"G_0": {
|
122
194
|
"goals": [
|
@@ -127,17 +199,36 @@ PROBLEMS = {
|
|
127
199
|
np.array([[0.2, -0.2, 0.1]]),
|
128
200
|
np.array([[0.2, 0.2, 0.1]]),
|
129
201
|
],
|
130
|
-
"train_configs": [
|
131
|
-
(SAC, 400000) for _ in range(6)
|
132
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
202
|
+
"train_configs": [(SAC, 400000) for _ in range(6)],
|
133
203
|
},
|
134
204
|
},
|
135
205
|
"L4": {
|
136
|
-
"base": {
|
137
|
-
"
|
138
|
-
|
139
|
-
|
140
|
-
|
206
|
+
"base": {
|
207
|
+
"gc": {
|
208
|
+
"goals": [
|
209
|
+
np.array([PandaProperty.sample_goal()])
|
210
|
+
for _ in range(1, 30)
|
211
|
+
],
|
212
|
+
"train_configs": [(SAC, 800000)],
|
213
|
+
},
|
214
|
+
"bg": {
|
215
|
+
"goals": [
|
216
|
+
np.array([[-0.3, -0.3, 0.1]]),
|
217
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
218
|
+
np.array([[-0.3, 0.2, 0.1]]),
|
219
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
220
|
+
np.array([[0.1, -0.1, 0.1]]),
|
221
|
+
np.array([[0.2, 0.2, 0.1]]),
|
222
|
+
],
|
223
|
+
"train_configs": [
|
224
|
+
(SAC, 400000),
|
225
|
+
(PPO, 400000),
|
226
|
+
(PPO, 400000),
|
227
|
+
(PPO, 400000),
|
228
|
+
(PPO, 400000),
|
229
|
+
(PPO, 400000),
|
230
|
+
],
|
231
|
+
},
|
141
232
|
},
|
142
233
|
"G_0": {
|
143
234
|
"goals": [
|
@@ -148,17 +239,42 @@ PROBLEMS = {
|
|
148
239
|
np.array([[0.1, -0.1, 0.1]]),
|
149
240
|
np.array([[0.2, 0.2, 0.1]]),
|
150
241
|
],
|
151
|
-
"train_configs": [
|
152
|
-
(SAC, 400000) for _ in range(6)
|
153
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
242
|
+
"train_configs": [(SAC, 400000) for _ in range(6)],
|
154
243
|
},
|
155
244
|
},
|
156
245
|
"L5": {
|
157
|
-
"base": {
|
158
|
-
"
|
159
|
-
|
160
|
-
|
161
|
-
|
246
|
+
"base": {
|
247
|
+
"gc": {
|
248
|
+
"goals": [
|
249
|
+
np.array([PandaProperty.sample_goal()])
|
250
|
+
for _ in range(1, 30)
|
251
|
+
],
|
252
|
+
"train_configs": [(SAC, 800000)],
|
253
|
+
},
|
254
|
+
"bg": {
|
255
|
+
"goals": [
|
256
|
+
np.array([[-0.5, -0.5, 0.1]]),
|
257
|
+
np.array([[-0.3, -0.3, 0.1]]),
|
258
|
+
np.array([[-0.1, -0.1, 0.1]]),
|
259
|
+
np.array([[-0.5, 0.2, 0.1]]),
|
260
|
+
np.array([[-0.3, 0.2, 0.1]]),
|
261
|
+
np.array([[-0.1, 0.1, 0.1]]),
|
262
|
+
np.array([[0.2, -0.2, 0.1]]),
|
263
|
+
np.array([[0.1, -0.1, 0.1]]),
|
264
|
+
np.array([[0.2, 0.2, 0.1]]),
|
265
|
+
],
|
266
|
+
"train_configs": [
|
267
|
+
(PPO, 400000),
|
268
|
+
(SAC, 400000),
|
269
|
+
(PPO, 400000),
|
270
|
+
(PPO, 400000),
|
271
|
+
(PPO, 400000),
|
272
|
+
(PPO, 400000),
|
273
|
+
(PPO, 400000),
|
274
|
+
(PPO, 400000),
|
275
|
+
(PPO, 400000),
|
276
|
+
],
|
277
|
+
},
|
162
278
|
},
|
163
279
|
"G_0": {
|
164
280
|
"goals": [
|
@@ -172,9 +288,7 @@ PROBLEMS = {
|
|
172
288
|
np.array([[0.1, -0.1, 0.1]]),
|
173
289
|
np.array([[0.2, 0.2, 0.1]]),
|
174
290
|
],
|
175
|
-
"train_configs": [
|
176
|
-
(SAC, 400000) for _ in range(9)
|
177
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
291
|
+
"train_configs": [(SAC, 400000) for _ in range(9)],
|
178
292
|
},
|
179
293
|
},
|
180
294
|
}
|
@@ -182,170 +296,258 @@ PROBLEMS = {
|
|
182
296
|
POINT_MAZE: {
|
183
297
|
"PointMaze-FourRoomsEnvDense-11x11": {
|
184
298
|
"L1": {
|
185
|
-
"base": {
|
186
|
-
"
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
299
|
+
"base": {
|
300
|
+
"gc": {
|
301
|
+
"goals": [
|
302
|
+
(9, 1),
|
303
|
+
(9, 9),
|
304
|
+
(1, 9),
|
305
|
+
(3, 3),
|
306
|
+
(3, 4),
|
307
|
+
(8, 2),
|
308
|
+
(3, 7),
|
309
|
+
(2, 8),
|
310
|
+
],
|
311
|
+
"train_configs": [(SAC, 400000) for _ in range(8)],
|
312
|
+
},
|
313
|
+
"bg": {
|
314
|
+
"goals": [(4, 4), (7, 3), (3, 7)],
|
315
|
+
"train_configs": [(SAC, 400000) for _ in range(3)],
|
316
|
+
},
|
197
317
|
},
|
198
318
|
"G_0": {
|
199
319
|
"goals": [(4, 4), (7, 3), (3, 7)],
|
200
|
-
"train_configs": [
|
201
|
-
(SAC, 400000) for _ in range(3)
|
202
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
320
|
+
"train_configs": [(SAC, 400000) for _ in range(3)],
|
203
321
|
},
|
204
322
|
},
|
205
323
|
"L2": {
|
206
|
-
"base": {
|
207
|
-
"
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
324
|
+
"base": {
|
325
|
+
"gc": {
|
326
|
+
"goals": [
|
327
|
+
(9, 1),
|
328
|
+
(9, 9),
|
329
|
+
(1, 9),
|
330
|
+
(3, 3),
|
331
|
+
(3, 4),
|
332
|
+
(8, 2),
|
333
|
+
(3, 7),
|
334
|
+
(2, 8),
|
335
|
+
],
|
336
|
+
"train_configs": [(SAC, 400000) for _ in range(8)],
|
337
|
+
},
|
338
|
+
"bg": {
|
339
|
+
"goals": [(4, 4), (7, 3), (3, 7), (8, 2)],
|
340
|
+
"train_configs": [(SAC, 400000) for _ in range(4)],
|
341
|
+
},
|
218
342
|
},
|
219
343
|
"G_0": {
|
220
344
|
"goals": [(4, 4), (7, 3), (3, 7), (8, 2)],
|
221
|
-
"train_configs": [
|
222
|
-
(SAC, 400000) for _ in range(4)
|
223
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
345
|
+
"train_configs": [(SAC, 400000) for _ in range(4)],
|
224
346
|
},
|
225
347
|
},
|
226
348
|
"L3": {
|
227
|
-
"base": {
|
228
|
-
"
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
349
|
+
"base": {
|
350
|
+
"gc": {
|
351
|
+
"goals": [
|
352
|
+
(9, 1),
|
353
|
+
(9, 9),
|
354
|
+
(1, 9),
|
355
|
+
(3, 3),
|
356
|
+
(3, 4),
|
357
|
+
(8, 2),
|
358
|
+
(3, 7),
|
359
|
+
(2, 8),
|
360
|
+
],
|
361
|
+
"train_configs": [(SAC, 400000) for _ in range(8)],
|
362
|
+
},
|
363
|
+
"bg": {
|
364
|
+
"goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8)],
|
365
|
+
"train_configs": [(SAC, 400000) for _ in range(5)],
|
366
|
+
},
|
239
367
|
},
|
240
368
|
"G_0": {
|
241
369
|
"goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8)],
|
242
|
-
"train_configs": [
|
243
|
-
(SAC, 400000) for _ in range(5)
|
244
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
370
|
+
"train_configs": [(SAC, 400000) for _ in range(5)],
|
245
371
|
},
|
246
372
|
},
|
247
373
|
"L4": {
|
248
|
-
"base": {
|
249
|
-
"
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
374
|
+
"base": {
|
375
|
+
"gc": {
|
376
|
+
"goals": [
|
377
|
+
(9, 1),
|
378
|
+
(9, 9),
|
379
|
+
(1, 9),
|
380
|
+
(3, 3),
|
381
|
+
(3, 4),
|
382
|
+
(8, 2),
|
383
|
+
(3, 7),
|
384
|
+
(2, 8),
|
385
|
+
],
|
386
|
+
"train_configs": [(SAC, 400000) for _ in range(8)],
|
387
|
+
},
|
388
|
+
"bg": {
|
389
|
+
"goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4)],
|
390
|
+
"train_configs": [(SAC, 400000) for _ in range(6)],
|
391
|
+
},
|
260
392
|
},
|
261
393
|
"G_0": {
|
262
394
|
"goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4)],
|
263
|
-
"train_configs": [
|
264
|
-
(SAC, 400000) for _ in range(6)
|
265
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
395
|
+
"train_configs": [(SAC, 400000) for _ in range(6)],
|
266
396
|
},
|
267
397
|
},
|
268
398
|
"L5": {
|
269
|
-
"base": {
|
270
|
-
"
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
399
|
+
"base": {
|
400
|
+
"gc": {
|
401
|
+
"goals": [
|
402
|
+
(9, 1),
|
403
|
+
(9, 9),
|
404
|
+
(1, 9),
|
405
|
+
(3, 3),
|
406
|
+
(3, 4),
|
407
|
+
(8, 2),
|
408
|
+
(3, 7),
|
409
|
+
(2, 8),
|
410
|
+
],
|
411
|
+
"train_configs": [(SAC, 400000) for _ in range(8)],
|
412
|
+
},
|
413
|
+
"bg": {
|
414
|
+
"goals": [
|
415
|
+
(4, 4),
|
416
|
+
(7, 3),
|
417
|
+
(3, 7),
|
418
|
+
(8, 2),
|
419
|
+
(2, 8),
|
420
|
+
(3, 4),
|
421
|
+
(4, 3),
|
422
|
+
],
|
423
|
+
"train_configs": [(SAC, 400000) for _ in range(7)],
|
424
|
+
},
|
281
425
|
},
|
282
426
|
"G_0": {
|
283
427
|
"goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4), (4, 3)],
|
284
|
-
"train_configs": [
|
285
|
-
(SAC, 400000) for _ in range(7)
|
286
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
428
|
+
"train_configs": [(SAC, 400000) for _ in range(7)],
|
287
429
|
},
|
288
430
|
},
|
289
431
|
},
|
290
432
|
"PointMaze-ObstaclesEnvDense-11x11": {
|
291
433
|
"L1": {
|
292
|
-
"base": {
|
293
|
-
"
|
294
|
-
|
434
|
+
"base": {
|
435
|
+
"gc": {
|
436
|
+
"goals": [
|
437
|
+
(5, 1),
|
438
|
+
(9, 9),
|
439
|
+
(1, 5),
|
440
|
+
(6, 4),
|
441
|
+
(4, 6),
|
442
|
+
(6, 6),
|
443
|
+
(7, 7),
|
444
|
+
],
|
445
|
+
"train_configs": [(SAC, 400000) for _ in range(7)],
|
446
|
+
},
|
447
|
+
"bg": {
|
448
|
+
"goals": [(5, 5), (7, 4), (4, 7)],
|
449
|
+
"train_configs": [(SAC, 400000) for _ in range(3)],
|
450
|
+
},
|
295
451
|
},
|
296
452
|
"G_0": {
|
297
453
|
"goals": [(5, 5), (7, 4), (4, 7)],
|
298
|
-
"train_configs": [
|
299
|
-
(SAC, 400000) for _ in range(3)
|
300
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
454
|
+
"train_configs": [(SAC, 400000) for _ in range(3)],
|
301
455
|
},
|
302
456
|
},
|
303
457
|
"L2": {
|
304
|
-
"base": {
|
305
|
-
"
|
306
|
-
|
458
|
+
"base": {
|
459
|
+
"gc": {
|
460
|
+
"goals": [
|
461
|
+
(5, 1),
|
462
|
+
(9, 9),
|
463
|
+
(1, 5),
|
464
|
+
(6, 4),
|
465
|
+
(4, 6),
|
466
|
+
(6, 6),
|
467
|
+
(7, 7),
|
468
|
+
],
|
469
|
+
"train_configs": [(SAC, 400000) for _ in range(7)],
|
470
|
+
},
|
471
|
+
"bg": {
|
472
|
+
"goals": [(5, 5), (3, 6), (7, 4)],
|
473
|
+
"train_configs": [(SAC, 400000) for _ in range(3)],
|
474
|
+
},
|
307
475
|
},
|
308
476
|
"G_0": {
|
309
477
|
"goals": [(5, 5), (3, 6), (7, 4)],
|
310
|
-
"train_configs": [
|
311
|
-
(SAC, 400000) for _ in range(3)
|
312
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
478
|
+
"train_configs": [(SAC, 400000) for _ in range(3)],
|
313
479
|
},
|
314
480
|
},
|
315
481
|
"L3": {
|
316
|
-
"base": {
|
317
|
-
"
|
318
|
-
|
482
|
+
"base": {
|
483
|
+
"gc": {
|
484
|
+
"goals": [
|
485
|
+
(5, 1),
|
486
|
+
(9, 9),
|
487
|
+
(1, 5),
|
488
|
+
(6, 4),
|
489
|
+
(4, 6),
|
490
|
+
(6, 6),
|
491
|
+
(7, 7),
|
492
|
+
],
|
493
|
+
"train_configs": [(SAC, 400000) for _ in range(7)],
|
494
|
+
},
|
495
|
+
"bg": {
|
496
|
+
"goals": [(5, 5), (3, 6), (7, 4), (4, 7)],
|
497
|
+
"train_configs": [(SAC, 400000) for _ in range(4)],
|
498
|
+
},
|
319
499
|
},
|
320
500
|
"G_0": {
|
321
501
|
"goals": [(5, 5), (3, 6), (7, 4), (4, 7)],
|
322
|
-
"train_configs": [
|
323
|
-
(SAC, 400000) for _ in range(4)
|
324
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
502
|
+
"train_configs": [(SAC, 400000) for _ in range(4)],
|
325
503
|
},
|
326
504
|
},
|
327
505
|
"L4": {
|
328
|
-
"base": {
|
329
|
-
"
|
330
|
-
|
506
|
+
"base": {
|
507
|
+
"gc": {
|
508
|
+
"goals": [
|
509
|
+
(5, 1),
|
510
|
+
(9, 9),
|
511
|
+
(1, 5),
|
512
|
+
(6, 4),
|
513
|
+
(4, 6),
|
514
|
+
(6, 6),
|
515
|
+
(7, 7),
|
516
|
+
],
|
517
|
+
"train_configs": [(SAC, 400000) for _ in range(7)],
|
518
|
+
},
|
519
|
+
"bg": {
|
520
|
+
"goals": [(3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
|
521
|
+
"train_configs": [(SAC, 400000) for _ in range(5)],
|
522
|
+
},
|
331
523
|
},
|
332
524
|
"G_0": {
|
333
525
|
"goals": [(3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
|
334
|
-
"train_configs": [
|
335
|
-
(SAC, 400000) for _ in range(5)
|
336
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
526
|
+
"train_configs": [(SAC, 400000) for _ in range(5)],
|
337
527
|
},
|
338
528
|
},
|
339
529
|
"L5": {
|
340
|
-
"base": {
|
341
|
-
"
|
342
|
-
|
530
|
+
"base": {
|
531
|
+
"gc": {
|
532
|
+
"goals": [
|
533
|
+
(5, 1),
|
534
|
+
(9, 9),
|
535
|
+
(1, 5),
|
536
|
+
(6, 4),
|
537
|
+
(4, 6),
|
538
|
+
(6, 6),
|
539
|
+
(7, 7),
|
540
|
+
],
|
541
|
+
"train_configs": [(SAC, 400000) for _ in range(7)],
|
542
|
+
},
|
543
|
+
"bg": {
|
544
|
+
"goals": [(5, 5), (3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
|
545
|
+
"train_configs": [(SAC, 400000) for _ in range(6)],
|
546
|
+
},
|
343
547
|
},
|
344
548
|
"G_0": {
|
345
549
|
"goals": [(5, 5), (3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
|
346
|
-
"train_configs": [
|
347
|
-
(SAC, 400000) for _ in range(6)
|
348
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
550
|
+
"train_configs": [(SAC, 400000) for _ in range(6)],
|
349
551
|
},
|
350
552
|
},
|
351
553
|
},
|
@@ -353,117 +555,137 @@ PROBLEMS = {
|
|
353
555
|
MINIGRID: {
|
354
556
|
"MiniGrid-SimpleCrossingS13N4": {
|
355
557
|
"L1": {
|
356
|
-
"base": {
|
357
|
-
"
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
(QLEARNING, 100000) for _ in range(9)
|
370
|
-
|
558
|
+
"base": {
|
559
|
+
"gc": {
|
560
|
+
"goals": [
|
561
|
+
(11, 1),
|
562
|
+
(11, 11),
|
563
|
+
(1, 11),
|
564
|
+
(7, 11),
|
565
|
+
(8, 1),
|
566
|
+
(10, 6),
|
567
|
+
(6, 9),
|
568
|
+
(11, 3),
|
569
|
+
(11, 5),
|
570
|
+
],
|
571
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(9)],
|
572
|
+
},
|
573
|
+
"bg": {
|
574
|
+
"goals": [(11, 1), (11, 11), (1, 11)],
|
575
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(3)],
|
576
|
+
},
|
371
577
|
},
|
372
578
|
"G_0": {
|
373
579
|
"goals": [(11, 1), (11, 11), (1, 11)],
|
374
|
-
"train_configs": [
|
375
|
-
(QLEARNING, 100000) for _ in range(3)
|
376
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
580
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(3)],
|
377
581
|
},
|
378
582
|
},
|
379
583
|
"L2": {
|
380
|
-
"base": {
|
381
|
-
"
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
(QLEARNING, 100000) for _ in range(9)
|
394
|
-
|
584
|
+
"base": {
|
585
|
+
"gc": {
|
586
|
+
"goals": [
|
587
|
+
(11, 1),
|
588
|
+
(11, 11),
|
589
|
+
(1, 11),
|
590
|
+
(7, 11),
|
591
|
+
(8, 1),
|
592
|
+
(10, 6),
|
593
|
+
(6, 9),
|
594
|
+
(11, 3),
|
595
|
+
(11, 5),
|
596
|
+
],
|
597
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(9)],
|
598
|
+
},
|
599
|
+
"bg": {
|
600
|
+
"goals": [(11, 1), (11, 11), (1, 11), (5, 9)],
|
601
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(4)],
|
602
|
+
},
|
395
603
|
},
|
396
604
|
"G_0": {
|
397
605
|
"goals": [(11, 1), (11, 11), (1, 11), (5, 9)],
|
398
|
-
"train_configs": [
|
399
|
-
(QLEARNING, 100000) for _ in range(4)
|
400
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
606
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(4)],
|
401
607
|
},
|
402
608
|
},
|
403
609
|
"L3": {
|
404
|
-
"base": {
|
405
|
-
"
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
(QLEARNING, 100000) for _ in range(9)
|
418
|
-
|
610
|
+
"base": {
|
611
|
+
"gc": {
|
612
|
+
"goals": [
|
613
|
+
(11, 1),
|
614
|
+
(11, 11),
|
615
|
+
(1, 11),
|
616
|
+
(7, 11),
|
617
|
+
(8, 1),
|
618
|
+
(10, 6),
|
619
|
+
(6, 9),
|
620
|
+
(11, 3),
|
621
|
+
(11, 5),
|
622
|
+
],
|
623
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(9)],
|
624
|
+
},
|
625
|
+
"bg": {
|
626
|
+
"goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1)],
|
627
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(5)],
|
628
|
+
},
|
419
629
|
},
|
420
630
|
"G_0": {
|
421
631
|
"goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1)],
|
422
|
-
"train_configs": [
|
423
|
-
(QLEARNING, 100000) for _ in range(5)
|
424
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
632
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(5)],
|
425
633
|
},
|
426
634
|
},
|
427
635
|
"L4": {
|
428
|
-
"base": {
|
429
|
-
"
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
(QLEARNING, 100000) for _ in range(9)
|
442
|
-
|
636
|
+
"base": {
|
637
|
+
"gc": {
|
638
|
+
"goals": [
|
639
|
+
(11, 1),
|
640
|
+
(11, 11),
|
641
|
+
(1, 11),
|
642
|
+
(7, 11),
|
643
|
+
(8, 1),
|
644
|
+
(10, 6),
|
645
|
+
(6, 9),
|
646
|
+
(11, 3),
|
647
|
+
(11, 5),
|
648
|
+
],
|
649
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(9)],
|
650
|
+
},
|
651
|
+
"bg": {
|
652
|
+
"goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1), (11, 3)],
|
653
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(6)],
|
654
|
+
},
|
443
655
|
},
|
444
656
|
"G_0": {
|
445
657
|
"goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1), (11, 3)],
|
446
|
-
"train_configs": [
|
447
|
-
(QLEARNING, 100000) for _ in range(6)
|
448
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
658
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(6)],
|
449
659
|
},
|
450
660
|
},
|
451
661
|
"L5": {
|
452
|
-
"base": {
|
453
|
-
"
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
(QLEARNING, 100000) for _ in range(9)
|
466
|
-
|
662
|
+
"base": {
|
663
|
+
"gc": {
|
664
|
+
"goals": [
|
665
|
+
(11, 1),
|
666
|
+
(11, 11),
|
667
|
+
(1, 11),
|
668
|
+
(7, 11),
|
669
|
+
(8, 1),
|
670
|
+
(10, 6),
|
671
|
+
(6, 9),
|
672
|
+
(11, 3),
|
673
|
+
(11, 5),
|
674
|
+
],
|
675
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(9)],
|
676
|
+
},
|
677
|
+
"bg": {
|
678
|
+
"goals": [
|
679
|
+
(11, 1),
|
680
|
+
(11, 11),
|
681
|
+
(1, 11),
|
682
|
+
(5, 9),
|
683
|
+
(6, 1),
|
684
|
+
(11, 3),
|
685
|
+
(11, 5),
|
686
|
+
],
|
687
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
688
|
+
},
|
467
689
|
},
|
468
690
|
"G_0": {
|
469
691
|
"goals": [
|
@@ -475,81 +697,137 @@ PROBLEMS = {
|
|
475
697
|
(11, 3),
|
476
698
|
(11, 5),
|
477
699
|
],
|
478
|
-
"train_configs": [
|
479
|
-
(QLEARNING, 100000) for _ in range(7)
|
480
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
700
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
481
701
|
},
|
482
702
|
},
|
483
703
|
},
|
484
704
|
"MiniGrid-LavaCrossingS9N2": {
|
485
705
|
"L1": {
|
486
|
-
"base": {
|
487
|
-
"
|
488
|
-
|
489
|
-
|
490
|
-
|
706
|
+
"base": {
|
707
|
+
"gc": {
|
708
|
+
"goals": [
|
709
|
+
(7, 7),
|
710
|
+
(1, 7),
|
711
|
+
(7, 1),
|
712
|
+
(1, 3),
|
713
|
+
(2, 5),
|
714
|
+
(5, 2),
|
715
|
+
(6, 5),
|
716
|
+
],
|
717
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
718
|
+
},
|
719
|
+
"bg": {
|
720
|
+
"goals": [(1, 3), (6, 5), (4, 7)],
|
721
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(3)],
|
722
|
+
},
|
491
723
|
},
|
492
724
|
"G_0": {
|
493
725
|
"goals": [(1, 3), (6, 5), (4, 7)],
|
494
|
-
"train_configs": [
|
495
|
-
(QLEARNING, 100000) for _ in range(3)
|
496
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
726
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(3)],
|
497
727
|
},
|
498
728
|
},
|
499
729
|
"L2": {
|
500
|
-
"base": {
|
501
|
-
"
|
502
|
-
|
503
|
-
|
504
|
-
|
730
|
+
"base": {
|
731
|
+
"gc": {
|
732
|
+
"goals": [
|
733
|
+
(7, 7),
|
734
|
+
(1, 7),
|
735
|
+
(7, 1),
|
736
|
+
(1, 3),
|
737
|
+
(2, 5),
|
738
|
+
(5, 2),
|
739
|
+
(6, 5),
|
740
|
+
],
|
741
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
742
|
+
},
|
743
|
+
"bg": {
|
744
|
+
"goals": [(1, 3), (6, 5), (4, 7), (2, 5)],
|
745
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(4)],
|
746
|
+
},
|
505
747
|
},
|
506
748
|
"G_0": {
|
507
749
|
"goals": [(1, 3), (6, 5), (4, 7), (2, 5)],
|
508
|
-
"train_configs": [
|
509
|
-
(QLEARNING, 100000) for _ in range(4)
|
510
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
750
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(4)],
|
511
751
|
},
|
512
752
|
},
|
513
753
|
"L3": {
|
514
|
-
"base": {
|
515
|
-
"
|
516
|
-
|
517
|
-
|
518
|
-
|
754
|
+
"base": {
|
755
|
+
"gc": {
|
756
|
+
"goals": [
|
757
|
+
(7, 7),
|
758
|
+
(1, 7),
|
759
|
+
(7, 1),
|
760
|
+
(1, 3),
|
761
|
+
(2, 5),
|
762
|
+
(5, 2),
|
763
|
+
(6, 5),
|
764
|
+
],
|
765
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
766
|
+
},
|
767
|
+
"bg": {
|
768
|
+
"goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2)],
|
769
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(5)],
|
770
|
+
},
|
519
771
|
},
|
520
772
|
"G_0": {
|
521
773
|
"goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2)],
|
522
|
-
"train_configs": [
|
523
|
-
(QLEARNING, 100000) for _ in range(5)
|
524
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
774
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(5)],
|
525
775
|
},
|
526
776
|
},
|
527
777
|
"L4": {
|
528
|
-
"base": {
|
529
|
-
"
|
530
|
-
|
531
|
-
|
532
|
-
|
778
|
+
"base": {
|
779
|
+
"gc": {
|
780
|
+
"goals": [
|
781
|
+
(7, 7),
|
782
|
+
(1, 7),
|
783
|
+
(7, 1),
|
784
|
+
(1, 3),
|
785
|
+
(2, 5),
|
786
|
+
(5, 2),
|
787
|
+
(6, 5),
|
788
|
+
],
|
789
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
790
|
+
},
|
791
|
+
"bg": {
|
792
|
+
"goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5)],
|
793
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(6)],
|
794
|
+
},
|
533
795
|
},
|
534
796
|
"G_0": {
|
535
797
|
"goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5)],
|
536
|
-
"train_configs": [
|
537
|
-
(QLEARNING, 100000) for _ in range(6)
|
538
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
798
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(6)],
|
539
799
|
},
|
540
800
|
},
|
541
801
|
"L5": {
|
542
|
-
"base": {
|
543
|
-
"
|
544
|
-
|
545
|
-
|
546
|
-
|
802
|
+
"base": {
|
803
|
+
"gc": {
|
804
|
+
"goals": [
|
805
|
+
(7, 7),
|
806
|
+
(1, 7),
|
807
|
+
(7, 1),
|
808
|
+
(1, 3),
|
809
|
+
(2, 5),
|
810
|
+
(5, 2),
|
811
|
+
(6, 5),
|
812
|
+
],
|
813
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
814
|
+
},
|
815
|
+
"bg": {
|
816
|
+
"goals": [
|
817
|
+
(1, 3),
|
818
|
+
(6, 5),
|
819
|
+
(4, 7),
|
820
|
+
(2, 5),
|
821
|
+
(5, 2),
|
822
|
+
(4, 5),
|
823
|
+
(1, 1),
|
824
|
+
],
|
825
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
826
|
+
},
|
547
827
|
},
|
548
828
|
"G_0": {
|
549
829
|
"goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5), (1, 1)],
|
550
|
-
"train_configs": [
|
551
|
-
(QLEARNING, 100000) for _ in range(7)
|
552
|
-
], # algorithms that use GC agent to generate sequences don't use this
|
830
|
+
"train_configs": [(QLEARNING, 100000) for _ in range(7)],
|
553
831
|
},
|
554
832
|
},
|
555
833
|
},
|