gr-libs 0.1.5__py3-none-any.whl → 0.1.7.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1244 @@
1
+ import numpy as np
2
+ from stable_baselines3 import PPO, SAC, TD3
3
+ from gr_libs.environment.environment import MINIGRID, PARKING, PANDA, POINT_MAZE, QLEARNING, PandaProperty
4
+
5
+
6
+ PROBLEMS = {
7
+ PARKING: {
8
+ "Parking-S-14-PC-": {
9
+ "L1": {
10
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
11
+ "goals": [i for i in range(1,21)],
12
+ "train_configs": [(PPO, 200000)]
13
+ },
14
+ "G_0": {
15
+ "goals": ["1", "11", "21"],
16
+ "train_configs": [(SAC, 200000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
17
+ }
18
+ },
19
+ "L2": {
20
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
21
+ "goals": [i for i in range(1,21)],
22
+ "train_configs": [(PPO, 200000)]
23
+ },
24
+ "G_0": {
25
+ "goals": ["1", "8", "14", "21"],
26
+ "train_configs": [(SAC, 200000) for _ in range(4)]
27
+ }
28
+ },
29
+ "L3": {
30
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
31
+ "goals": [i for i in range(1,21)],
32
+ "train_configs": [(PPO, 200000)]
33
+ },
34
+ "G_0": {
35
+ "goals": ["1", "8", "11", "18"],
36
+ "train_configs": [(SAC, 200000) for _ in range(4)]
37
+ }
38
+ },
39
+ "L4": {
40
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
41
+ "goals": [i for i in range(1,21)],
42
+ "train_configs": [(PPO, 200000)]
43
+ },
44
+ "G_0": {
45
+ "goals": ["4", "8", "11", "14", "18"],
46
+ "train_configs": [(SAC, 200000) for _ in range(5)]
47
+ }
48
+ },
49
+ "L5": {
50
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
51
+ "goals": [i for i in range(1,21)],
52
+ "train_configs": [(PPO, 200000)]
53
+ },
54
+ "G_0": {
55
+ "goals": ["1", "4", "8", "11", "14", "18", "21"],
56
+ "train_configs": [(SAC, 200000) for _ in range(7)]
57
+ }
58
+ }
59
+ }
60
+ }, # PARKING
61
+ PANDA: {
62
+ "PandaMyReachDense": {
63
+ "L1": {
64
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
65
+ "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
66
+ "train_configs": [(SAC, 800000)]
67
+ },
68
+ "G_0": {
69
+ "goals": [np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
70
+ "train_configs": [(SAC, 00000), (SAC, 200000), (SAC, 300000)] # algorithms that use GC agent to generate sequences don't use this
71
+ }
72
+ },
73
+ "L2": {
74
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
75
+ "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
76
+ "train_configs": [(SAC, 800000)]
77
+ },
78
+ "G_0": {
79
+ "goals": [np.array([[-0.5, -0.5, 0.1]]), np.array([[-0.5, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.1, -0.1, 0.1]])],
80
+ "train_configs": [(SAC, 400000), (SAC, 400000), (SAC, 400000), (SAC, 400000)] # algorithms that use GC agent to generate sequences don't use this
81
+ }
82
+ },
83
+ "L3": {
84
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
85
+ "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
86
+ "train_configs": [(SAC, 800000)]
87
+ },
88
+ "G_0": {
89
+ "goals": [np.array([[-0.5, -0.5, 0.1]]), np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.5, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, -0.2, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
90
+ "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
91
+ }
92
+ },
93
+ "L4": {
94
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
95
+ "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
96
+ "train_configs": [(SAC, 800000)]
97
+ },
98
+ "G_0": {
99
+ "goals": [np.array([[-0.3, -0.3, 0.1]]), np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.3, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.1, -0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
100
+ "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
101
+ }
102
+ },
103
+ "L5": {
104
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
105
+ "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
106
+ "train_configs": [(SAC, 800000)]
107
+ },
108
+ "G_0": {
109
+ "goals": [np.array([[-0.5, -0.5, 0.1]]), np.array([[-0.3, -0.3, 0.1]]), np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.5, 0.2, 0.1]]), np.array([[-0.3, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, -0.2, 0.1]]), np.array([[0.1, -0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
110
+ "train_configs": [(SAC, 400000) for _ in range(9)] # algorithms that use GC agent to generate sequences don't use this
111
+ }
112
+ }
113
+ }
114
+ }, # PANDA
115
+ POINT_MAZE: {
116
+ "PointMaze-FourRoomsEnvDense-11x11": {
117
+ "L1": {
118
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
119
+ "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
120
+ "train_configs": [(SAC, 400000) for _ in range(8)]
121
+ },
122
+ "G_0": {
123
+ "goals": [(4,4), (7,3), (3,7)],
124
+ "train_configs": [(SAC, 400000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
125
+ }
126
+ },
127
+ "L2": {
128
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
129
+ "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
130
+ "train_configs": [(SAC, 400000) for _ in range(8)]
131
+ },
132
+ "G_0": {
133
+ "goals": [(4,4), (7,3), (3,7), (8,2)],
134
+ "train_configs": [(SAC, 400000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
135
+ }
136
+ },
137
+ "L3": {
138
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
139
+ "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
140
+ "train_configs": [(SAC, 400000) for _ in range(8)]
141
+ },
142
+ "G_0": {
143
+ "goals": [(4,4), (7,3), (3,7), (8,2), (2,8)],
144
+ "train_configs": [(SAC, 400000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
145
+ }
146
+ },
147
+ "L4": {
148
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
149
+ "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
150
+ "train_configs": [(SAC, 400000) for _ in range(8)]
151
+ },
152
+ "G_0": {
153
+ "goals": [(4,4), (7,3), (3,7), (8,2), (2,8), (3,4)],
154
+ "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
155
+ }
156
+ },
157
+ "L5": {
158
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
159
+ "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
160
+ "train_configs": [(SAC, 400000) for _ in range(8)]
161
+ },
162
+ "G_0": {
163
+ "goals": [(4,4), (7,3), (3,7), (8,2), (2,8), (3,4), (4,3)],
164
+ "train_configs": [(SAC, 400000) for _ in range(7)] # algorithms that use GC agent to generate sequences don't use this
165
+ }
166
+ }
167
+ },
168
+ "PointMaze-ObstaclesEnvDense-11x11": {
169
+ "L1": {
170
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
171
+ "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
172
+ "train_configs": [(SAC, 400000) for _ in range(7)]
173
+ },
174
+ "G_0": {
175
+ "goals": [(5,5), (7,4), (4,7)],
176
+ "train_configs": [(SAC, 400000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
177
+ }
178
+ },
179
+ "L2": {
180
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
181
+ "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
182
+ "train_configs": [(SAC, 400000) for _ in range(7)]
183
+ },
184
+ "G_0": {
185
+ "goals": [(5,5), (3,6), (7,4)],
186
+ "train_configs": [(SAC, 400000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
187
+ }
188
+ },
189
+ "L3": {
190
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
191
+ "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
192
+ "train_configs": [(SAC, 400000) for _ in range(7)]
193
+ },
194
+ "G_0": {
195
+ "goals": [(5,5), (3,6), (7,4), (4,7)],
196
+ "train_configs": [(SAC, 400000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
197
+ }
198
+ },
199
+ "L4": {
200
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
201
+ "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
202
+ "train_configs": [(SAC, 400000) for _ in range(7)]
203
+ },
204
+ "G_0": {
205
+ "goals": [(3,6), (6,3), (7,4), (4,7), (8,8)],
206
+ "train_configs": [(SAC, 400000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
207
+ }
208
+ },
209
+ "L5": {
210
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
211
+ "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
212
+ "train_configs": [(SAC, 400000) for _ in range(7)]
213
+ },
214
+ "G_0": {
215
+ "goals": [(5,5), (3,6), (6,3), (7,4), (4,7), (8,8)],
216
+ "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
217
+ }
218
+ }
219
+ }
220
+ }, # POINT_MAZE
221
+ MINIGRID: {
222
+ "MiniGrid-SimpleCrossingS13N4": {
223
+ "L1": {
224
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
225
+ "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
226
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
227
+ },
228
+ "G_0": {
229
+ "goals": [(11,1), (11,11), (1,11)],
230
+ "train_configs": [(QLEARNING, 100000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
231
+ }
232
+ },
233
+ "L2": {
234
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
235
+ "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
236
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
237
+ },
238
+ "G_0": {
239
+ "goals": [(11,1), (11,11), (1,11), (5,9)],
240
+ "train_configs": [(QLEARNING, 100000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
241
+ }
242
+ },
243
+ "L3": {
244
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
245
+ "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
246
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
247
+ },
248
+ "G_0": {
249
+ "goals": [(11,1), (11,11), (1,11), (5,9), (6,1)],
250
+ "train_configs": [(QLEARNING, 100000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
251
+ }
252
+ },
253
+ "L4": {
254
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
255
+ "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
256
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
257
+ },
258
+ "G_0": {
259
+ "goals": [(11,1), (11,11), (1,11), (5,9), (6,1), (11,3)],
260
+ "train_configs": [(QLEARNING, 100000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
261
+ }
262
+ },
263
+ "L5": {
264
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
265
+ "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
266
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
267
+ },
268
+ "G_0": {
269
+ "goals": [(11,1), (11,11), (1,11), (5,9), (6,1), (11,3), (11,5)],
270
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)] # algorithms that use GC agent to generate sequences don't use this
271
+ }
272
+ }
273
+ },
274
+ "MiniGrid-LavaCrossingS9N2": {
275
+ "L1": {
276
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
277
+ "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
278
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
279
+ },
280
+ "G_0": {
281
+ "goals": [(1,3), (6,5), (4,7)],
282
+ "train_configs": [(QLEARNING, 100000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
283
+ }
284
+ },
285
+ "L2": {
286
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
287
+ "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
288
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
289
+ },
290
+ "G_0": {
291
+ "goals": [(1,3), (6,5), (4,7), (2,5)],
292
+ "train_configs": [(QLEARNING, 100000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
293
+ }
294
+ },
295
+ "L3": {
296
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
297
+ "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
298
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
299
+ },
300
+ "G_0": {
301
+ "goals": [(1,3), (6,5), (4,7), (2,5), (5,2)],
302
+ "train_configs": [(QLEARNING, 100000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
303
+ }
304
+ },
305
+ "L4": {
306
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
307
+ "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
308
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
309
+ },
310
+ "G_0": {
311
+ "goals": [(1,3), (6,5), (4,7), (2,5), (5,2), (4,5)],
312
+ "train_configs": [(QLEARNING, 100000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
313
+ }
314
+ },
315
+ "L5": {
316
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
317
+ "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
318
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
319
+ },
320
+ "G_0": {
321
+ "goals": [(1,3), (6,5), (4,7), (2,5), (5,2), (4,5), (1,1)],
322
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)] # algorithms that use GC agent to generate sequences don't use this
323
+ }
324
+ }
325
+ }
326
+ }, # MINIGRID
327
+ } # PROBLEMS
328
+
329
+ for i,perc in enumerate([0.3, 0.5, 0.7, 0.9, 1]):
330
+ for j, cons in enumerate([True, False]):
331
+
332
+ ### PARKING ###
333
+
334
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L1"].update(
335
+ {f"I_0_{i*6+j*3}": {
336
+ "goal": "1",
337
+ "train_config": (TD3, 200000),
338
+ "consecutive": cons,
339
+ "percentage": perc
340
+ },
341
+ f"I_0_{i*6+j*3+1}": {
342
+ "goal": "11",
343
+ "train_config": (TD3, 200000),
344
+ "consecutive": cons,
345
+ "percentage": perc
346
+ },
347
+ f"I_0_{i*6+j*3+2}": {
348
+ "goal": "21",
349
+ "train_config": (TD3, 300000),
350
+ "consecutive": cons,
351
+ "percentage": perc
352
+ }})
353
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L2"].update(
354
+ {f"I_0_{i*8+j*4}": {
355
+ "goal": "1",
356
+ "train_config": (TD3, 200000),
357
+ "consecutive": cons,
358
+ "percentage": perc
359
+ },
360
+ f"I_0_{i*8+j*4+1}": {
361
+ "goal": "8",
362
+ "train_config": (TD3, 200000),
363
+ "consecutive": cons,
364
+ "percentage": perc
365
+ },
366
+ f"I_0_{i*8+j*4+2}": {
367
+ "goal": "14",
368
+ "train_config": (TD3, 400000),
369
+ "consecutive": cons,
370
+ "percentage": perc
371
+ },
372
+ f"I_0_{i*8+j*4+3}": {
373
+ "goal": "21",
374
+ "train_config": (TD3, 300000),
375
+ "consecutive": cons,
376
+ "percentage": perc
377
+ }})
378
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L3"].update(
379
+ {f"I_0_{i*8+j*4}": {
380
+ "goal": "1",
381
+ "train_config": (TD3, 200000),
382
+ "consecutive": cons,
383
+ "percentage": perc
384
+ },
385
+ f"I_0_{i*8+j*4+1}": {
386
+ "goal": "8",
387
+ "train_config": (TD3, 200000),
388
+ "consecutive": cons,
389
+ "percentage": perc
390
+ },
391
+ f"I_0_{i*8+j*4+2}": {
392
+ "goal": "11",
393
+ "train_config": (TD3, 400000),
394
+ "consecutive": cons,
395
+ "percentage": perc
396
+ },
397
+ f"I_0_{i*8+j*4+3}": {
398
+ "goal": "18",
399
+ "train_config": (TD3, 300000),
400
+ "consecutive": cons,
401
+ "percentage": perc
402
+ }})
403
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L4"].update(
404
+ {f"I_0_{i*10+j*5}": {
405
+ "goal": "4",
406
+ "train_config": (TD3, 200000),
407
+ "consecutive": cons,
408
+ "percentage": perc
409
+ },
410
+ f"I_0_{i*10+j*5+1}": {
411
+ "goal": "8",
412
+ "train_config": (TD3, 200000),
413
+ "consecutive": cons,
414
+ "percentage": perc
415
+ },
416
+ f"I_0_{i*10+j*5+2}": {
417
+ "goal": "11",
418
+ "train_config": (TD3, 400000),
419
+ "consecutive": cons,
420
+ "percentage": perc
421
+ },
422
+ f"I_0_{i*10+j*5+3}": {
423
+ "goal": "14",
424
+ "train_config": (TD3, 300000),
425
+ "consecutive": cons,
426
+ "percentage": perc
427
+ },
428
+ f"I_0_{i*10+j*5+4}": {
429
+ "goal": "18",
430
+ "train_config": (TD3, 300000),
431
+ "consecutive": cons,
432
+ "percentage": perc
433
+ }})
434
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L5"].update(
435
+ {f"I_0_{i*14+j*7}": {
436
+ "goal": "1",
437
+ "train_config": (TD3, 200000),
438
+ "consecutive": cons,
439
+ "percentage": perc
440
+ },
441
+ f"I_0_{i*14+j*7+1}": {
442
+ "goal": "4",
443
+ "train_config": (TD3, 200000),
444
+ "consecutive": cons,
445
+ "percentage": perc
446
+ },
447
+ f"I_0_{i*14+j*7+2}": {
448
+ "goal": "8",
449
+ "train_config": (TD3, 400000),
450
+ "consecutive": cons,
451
+ "percentage": perc
452
+ },
453
+ f"I_0_{i*14+j*7+3}": {
454
+ "goal": "11",
455
+ "train_config": (TD3, 300000),
456
+ "consecutive": cons,
457
+ "percentage": perc
458
+ },
459
+ f"I_0_{i*14+j*7+4}": {
460
+ "goal": "14",
461
+ "train_config": (TD3, 300000),
462
+ "consecutive": cons,
463
+ "percentage": perc
464
+ },
465
+ f"I_0_{i*14+j*7+5}": {
466
+ "goal": "18",
467
+ "train_config": (TD3, 300000),
468
+ "consecutive": cons,
469
+ "percentage": perc
470
+ },
471
+ f"I_0_{i*14+j*7+6}": {
472
+ "goal": "21",
473
+ "train_config": (TD3, 300000),
474
+ "consecutive": cons,
475
+ "percentage": perc
476
+ }})
477
+
478
+ ### PANDA ###
479
+
480
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L1"].update(
481
+ {f"I_0_{i*6+j*3}": {
482
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
483
+ "train_config": (PPO, 200000),
484
+ "consecutive": cons,
485
+ "percentage": perc
486
+ },
487
+ f"I_0_{i*6+j*3+1}": {
488
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
489
+ "train_config": (PPO, 200000),
490
+ "consecutive": cons,
491
+ "percentage": perc
492
+ },
493
+ f"I_0_{i*6+j*3+2}": {
494
+ "goal": np.array([[0.2, 0.2, 0.1]]),
495
+ "train_config": (PPO, 200000),
496
+ "consecutive": cons,
497
+ "percentage": perc
498
+ }})
499
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L2"].update(
500
+ {f"I_0_{i*8+j*4}": {
501
+ "goal": np.array([[-0.5, -0.5, 0.1]]),
502
+ "train_config": (PPO, 200000),
503
+ "consecutive": cons,
504
+ "percentage": perc
505
+ },
506
+ f"I_0_{i*8+j*4+1}": {
507
+ "goal": np.array([[-0.5, 0.2, 0.1]]),
508
+ "train_config": (PPO, 200000),
509
+ "consecutive": cons,
510
+ "percentage": perc
511
+ },
512
+ f"I_0_{i*8+j*4+2}": {
513
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
514
+ "train_config": (PPO, 200000),
515
+ "consecutive": cons,
516
+ "percentage": perc
517
+ },
518
+ f"I_0_{i*8+j*4+3}": {
519
+ "goal": np.array([[0.1, -0.1, 0.1]]),
520
+ "train_config": (PPO, 200000),
521
+ "consecutive": cons,
522
+ "percentage": perc
523
+ }})
524
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L3"].update(
525
+ {f"I_0_{i*12+j*6}": {
526
+ "goal": np.array([[-0.5, -0.5, 0.1]]),
527
+ "train_config": (PPO, 200000),
528
+ "consecutive": cons,
529
+ "percentage": perc
530
+ },
531
+ f"I_0_{i*12+j*6+1}": {
532
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
533
+ "train_config": (PPO, 200000),
534
+ "consecutive": cons,
535
+ "percentage": perc
536
+ },
537
+ f"I_0_{i*12+j*6+2}": {
538
+ "goal": np.array([[-0.5, 0.2, 0.1]]),
539
+ "train_config": (PPO, 200000),
540
+ "consecutive": cons,
541
+ "percentage": perc
542
+ },
543
+ f"I_0_{i*12+j*6+3}": {
544
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
545
+ "train_config": (PPO, 200000),
546
+ "consecutive": cons,
547
+ "percentage": perc
548
+ },
549
+ f"I_0_{i*12+j*6+4}": {
550
+ "goal": np.array([[0.2, -0.2, 0.1]]),
551
+ "train_config": (PPO, 200000),
552
+ "consecutive": cons,
553
+ "percentage": perc
554
+ },
555
+ f"I_0_{i*12+j*6+5}": {
556
+ "goal": np.array([[0.2, 0.2, 0.1]]),
557
+ "train_config": (PPO, 200000),
558
+ "consecutive": cons,
559
+ "percentage": perc
560
+ }})
561
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L4"].update(
562
+ {f"I_0_{i*12+j*6}": {
563
+ "goal": np.array([[-0.3, -0.3, 0.1]]),
564
+ "train_config": (SAC, 200000),
565
+ "consecutive": cons,
566
+ "percentage": perc
567
+ },
568
+ f"I_0_{i*12+j*6+1}": {
569
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
570
+ "train_config": (PPO, 200000),
571
+ "consecutive": cons,
572
+ "percentage": perc
573
+ },
574
+ f"I_0_{i*12+j*6+2}": {
575
+ "goal": np.array([[-0.3, 0.2, 0.1]]),
576
+ "train_config": (PPO, 200000),
577
+ "consecutive": cons,
578
+ "percentage": perc
579
+ },
580
+ f"I_0_{i*12+j*6+3}": {
581
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
582
+ "train_config": (PPO, 200000),
583
+ "consecutive": cons,
584
+ "percentage": perc
585
+ },
586
+ f"I_0_{i*12+j*6+4}": {
587
+ "goal": np.array([[0.1, -0.1, 0.1]]),
588
+ "train_config": (PPO, 200000),
589
+ "consecutive": cons,
590
+ "percentage": perc
591
+ },
592
+ f"I_0_{i*12+j*6+5}": {
593
+ "goal": np.array([[0.2, 0.2, 0.1]]),
594
+ "train_config": (PPO, 200000),
595
+ "consecutive": cons,
596
+ "percentage": perc
597
+ }})
598
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L5"].update(
599
+ {f"I_0_{i*18+j*9}": {
600
+ "goal": np.array([[-0.5, -0.5, 0.1]]),
601
+ "train_config": (PPO, 200000),
602
+ "consecutive": cons,
603
+ "percentage": perc
604
+ },
605
+ f"I_0_{i*18+j*9+1}": {
606
+ "goal": np.array([[-0.3, -0.3, 0.1]]),
607
+ "train_config": (SAC, 200000),
608
+ "consecutive": cons,
609
+ "percentage": perc
610
+ },
611
+ f"I_0_{i*18+j*9+2}": {
612
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
613
+ "train_config": (PPO, 200000),
614
+ "consecutive": cons,
615
+ "percentage": perc
616
+ },
617
+ f"I_0_{i*18+j*9+3}": {
618
+ "goal": np.array([[-0.5, 0.2, 0.1]]),
619
+ "train_config": (PPO, 200000),
620
+ "consecutive": cons,
621
+ "percentage": perc
622
+ },
623
+ f"I_0_{i*18+j*9+4}": {
624
+ "goal": np.array([[-0.3, 0.2, 0.1]]),
625
+ "train_config": (PPO, 200000),
626
+ "consecutive": cons,
627
+ "percentage": perc
628
+ },
629
+ f"I_0_{i*18+j*9+5}": {
630
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
631
+ "train_config": (PPO, 200000),
632
+ "consecutive": cons,
633
+ "percentage": perc
634
+ },
635
+ f"I_0_{i*18+j*9+6}": {
636
+ "goal": np.array([[0.2, -0.2, 0.1]]),
637
+ "train_config": (PPO, 200000),
638
+ "consecutive": cons,
639
+ "percentage": perc
640
+ },
641
+ f"I_0_{i*18+j*9+7}": {
642
+ "goal": np.array([[0.1, -0.1, 0.1]]),
643
+ "train_config": (PPO, 200000),
644
+ "consecutive": cons,
645
+ "percentage": perc
646
+ },
647
+ f"I_0_{i*18+j*9+8}": {
648
+ "goal": np.array([[0.2, 0.2, 0.1]]),
649
+ "train_config": (PPO, 200000),
650
+ "consecutive": cons,
651
+ "percentage": perc
652
+ }})
653
+
654
+ ### POINT_MAZE ###
655
+
656
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L1"].update( # TODO the existing working 9x9 is not Dense. need to duplicate it for the dense one
657
+ {f"I_0_{i*6+j*3}": {
658
+ "goal": (4,4),
659
+ "train_config": (TD3, 400000),
660
+ "consecutive": cons,
661
+ "percentage": perc
662
+ },
663
+ f"I_0_{i*6+j*3+1}": {
664
+ "goal": (7,3),
665
+ "train_config": (TD3, 400000),
666
+ "consecutive": cons,
667
+ "percentage": perc
668
+ },
669
+ f"I_0_{i*6+j*3+2}": {
670
+ "goal": (3,7),
671
+ "train_config": (TD3, 400000),
672
+ "consecutive": cons,
673
+ "percentage": perc
674
+ }})
675
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L2"].update(
676
+ {f"I_0_{i*8+j*4}": {
677
+ "goal": (4,4),
678
+ "train_config": (TD3, 400000),
679
+ "consecutive": cons,
680
+ "percentage": perc
681
+ },
682
+ f"I_0_{i*8+j*4+1}": {
683
+ "goal": (7,3),
684
+ "train_config": (TD3, 400000),
685
+ "consecutive": cons,
686
+ "percentage": perc
687
+ },
688
+ f"I_0_{i*8+j*4+2}": {
689
+ "goal": (3,7),
690
+ "train_config": (TD3, 400000),
691
+ "consecutive": cons,
692
+ "percentage": perc
693
+ },
694
+ f"I_0_{i*8+j*4+3}": {
695
+ "goal": (8,2),
696
+ "train_config": (TD3, 400000),
697
+ "consecutive": cons,
698
+ "percentage": perc
699
+ }})
700
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L3"].update(
701
+ {f"I_0_{i*10+j*5}": {
702
+ "goal": (4,4),
703
+ "train_config": (TD3, 400000),
704
+ "consecutive": cons,
705
+ "percentage": perc
706
+ },
707
+ f"I_0_{i*10+j*5+1}": {
708
+ "goal": (7,3),
709
+ "train_config": (TD3, 400000),
710
+ "consecutive": cons,
711
+ "percentage": perc
712
+ },
713
+ f"I_0_{i*10+j*5+2}": {
714
+ "goal": (3,7),
715
+ "train_config": (TD3, 400000),
716
+ "consecutive": cons,
717
+ "percentage": perc
718
+ },
719
+ f"I_0_{i*10+j*5+3}": {
720
+ "goal": (8,2),
721
+ "train_config": (TD3, 400000),
722
+ "consecutive": cons,
723
+ "percentage": perc
724
+ },
725
+ f"I_0_{i*10+j*5+4}": {
726
+ "goal": (2,8),
727
+ "train_config": (TD3, 400000),
728
+ "consecutive": cons,
729
+ "percentage": perc
730
+ }})
731
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L4"].update(
732
+ {f"I_0_{i*12+j*6}": {
733
+ "goal": (4,4),
734
+ "train_config": (TD3, 400000),
735
+ "consecutive": cons,
736
+ "percentage": perc
737
+ },
738
+ f"I_0_{i*12+j*6+1}": {
739
+ "goal": (7,3),
740
+ "train_config": (TD3, 400000),
741
+ "consecutive": cons,
742
+ "percentage": perc
743
+ },
744
+ f"I_0_{i*12+j*6+2}": {
745
+ "goal": (3,7),
746
+ "train_config": (TD3, 400000),
747
+ "consecutive": cons,
748
+ "percentage": perc
749
+ },
750
+ f"I_0_{i*12+j*6+3}": {
751
+ "goal": (8,2),
752
+ "train_config": (TD3, 400000),
753
+ "consecutive": cons,
754
+ "percentage": perc
755
+ },
756
+ f"I_0_{i*12+j*6+4}": {
757
+ "goal": (2,8),
758
+ "train_config": (TD3, 400000),
759
+ "consecutive": cons,
760
+ "percentage": perc
761
+ },
762
+ f"I_0_{i*12+j*6+5}": {
763
+ "goal": (3,4),
764
+ "train_config": (TD3, 400000),
765
+ "consecutive": cons,
766
+ "percentage": perc
767
+ }})
768
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L5"].update(
769
+ {f"I_0_{i*14+j*7}": {
770
+ "goal": (4,4),
771
+ "train_config": (TD3, 400000),
772
+ "consecutive": cons,
773
+ "percentage": perc
774
+ },
775
+ f"I_0_{i*14+j*7+1}": {
776
+ "goal": (7,3),
777
+ "train_config": (TD3, 400000),
778
+ "consecutive": cons,
779
+ "percentage": perc
780
+ },
781
+ f"I_0_{i*14+j*7+2}": {
782
+ "goal": (3,7),
783
+ "train_config": (TD3, 400000),
784
+ "consecutive": cons,
785
+ "percentage": perc
786
+ },
787
+ f"I_0_{i*14+j*7+3}": {
788
+ "goal": (8,2),
789
+ "train_config": (TD3, 400000),
790
+ "consecutive": cons,
791
+ "percentage": perc
792
+ },
793
+ f"I_0_{i*14+j*7+4}": {
794
+ "goal": (2,8),
795
+ "train_config": (TD3, 400000),
796
+ "consecutive": cons,
797
+ "percentage": perc
798
+ },
799
+ f"I_0_{i*14+j*7+5}": {
800
+ "goal": (3,4),
801
+ "train_config": (TD3, 400000),
802
+ "consecutive": cons,
803
+ "percentage": perc
804
+ },
805
+ f"I_0_{i*14+j*7+6}": {
806
+ "goal": (4,3),
807
+ "train_config": (TD3, 400000),
808
+ "consecutive": cons,
809
+ "percentage": perc
810
+ }})
811
+
812
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L1"].update(
813
+ {f"I_0_{i*6+j*3}": {
814
+ "goal": (5,5),
815
+ "train_config": (TD3, 400000),
816
+ "consecutive": cons,
817
+ "percentage": perc
818
+ },
819
+ f"I_0_{i*6+j*3+1}": {
820
+ "goal": (7,4),
821
+ "train_config": (TD3, 400000),
822
+ "consecutive": cons,
823
+ "percentage": perc
824
+ },
825
+ f"I_0_{i*6+j*3+2}": {
826
+ "goal": (4,7),
827
+ "train_config": (TD3, 400000),
828
+ "consecutive": cons,
829
+ "percentage": perc
830
+ }})
831
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L2"].update(
832
+ {f"I_0_{i*6+j*3}": {
833
+ "goal": (5,5),
834
+ "train_config": (TD3, 400000),
835
+ "consecutive": cons,
836
+ "percentage": perc
837
+ },
838
+ f"I_0_{i*6+j*3+1}": {
839
+ "goal": (3,6),
840
+ "train_config": (TD3, 400000),
841
+ "consecutive": cons,
842
+ "percentage": perc
843
+ },
844
+ f"I_0_{i*6+j*3+2}": {
845
+ "goal": (7,4),
846
+ "train_config": (TD3, 400000),
847
+ "consecutive": cons,
848
+ "percentage": perc
849
+ }})
850
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L3"].update(
851
+ {f"I_0_{i*8+j*4}": {
852
+ "goal": (5,5),
853
+ "train_config": (TD3, 400000),
854
+ "consecutive": cons,
855
+ "percentage": perc
856
+ },
857
+ f"I_0_{i*8+j*4+1}": {
858
+ "goal": (3,6),
859
+ "train_config": (TD3, 400000),
860
+ "consecutive": cons,
861
+ "percentage": perc
862
+ },
863
+ f"I_0_{i*8+j*4+2}": {
864
+ "goal": (7,4),
865
+ "train_config": (TD3, 400000),
866
+ "consecutive": cons,
867
+ "percentage": perc
868
+ },
869
+ f"I_0_{i*8+j*4+3}": {
870
+ "goal": (4,7),
871
+ "train_config": (TD3, 400000),
872
+ "consecutive": cons,
873
+ "percentage": perc
874
+ }})
875
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L4"].update(
876
+ {f"I_0_{i*10+j*5}": {
877
+ "goal": (5,5),
878
+ "train_config": (TD3, 400000),
879
+ "consecutive": cons,
880
+ "percentage": perc
881
+ },
882
+ f"I_0_{i*10+j*5+1}": {
883
+ "goal": (3,6),
884
+ "train_config": (TD3, 400000),
885
+ "consecutive": cons,
886
+ "percentage": perc
887
+ },
888
+ f"I_0_{i*10+j*5+2}": {
889
+ "goal": (7,4),
890
+ "train_config": (TD3, 400000),
891
+ "consecutive": cons,
892
+ "percentage": perc
893
+ },
894
+ f"I_0_{i*10+j*5+3}": {
895
+ "goal": (4,7),
896
+ "train_config": (TD3, 400000),
897
+ "consecutive": cons,
898
+ "percentage": perc
899
+ },
900
+ f"I_0_{i*10+j*5+4}": {
901
+ "goal": (8,8),
902
+ "train_config": (TD3, 400000),
903
+ "consecutive": cons,
904
+ "percentage": perc
905
+ }})
906
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L5"].update(
907
+ {f"I_0_{i*12+j*6}": {
908
+ "goal": (5,5),
909
+ "train_config": (TD3, 400000),
910
+ "consecutive": cons,
911
+ "percentage": perc
912
+ },
913
+ f"I_0_{i*12+j*6+1}": {
914
+ "goal": (3,6),
915
+ "train_config": (TD3, 400000),
916
+ "consecutive": cons,
917
+ "percentage": perc
918
+ },
919
+ f"I_0_{i*12+j*6+2}": {
920
+ "goal": (6,3),
921
+ "train_config": (TD3, 400000),
922
+ "consecutive": cons,
923
+ "percentage": perc
924
+ },
925
+ f"I_0_{i*12+j*6+3}": {
926
+ "goal": (7,4),
927
+ "train_config": (TD3, 400000),
928
+ "consecutive": cons,
929
+ "percentage": perc
930
+ },
931
+ f"I_0_{i*12+j*6+4}": {
932
+ "goal": (4,7),
933
+ "train_config": (TD3, 400000),
934
+ "consecutive": cons,
935
+ "percentage": perc
936
+ },
937
+ f"I_0_{i*12+j*6+5}": {
938
+ "goal": (8,8),
939
+ "train_config": (TD3, 400000),
940
+ "consecutive": cons,
941
+ "percentage": perc
942
+ }})
943
+
944
+ ### MINIGRID ###
945
+
946
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L1"].update(
947
+ {f"I_0_{i*6+j*3}": {
948
+ "goal": (11,1),
949
+ "train_config": (QLEARNING, 100000),
950
+ "consecutive": cons,
951
+ "percentage": perc
952
+ },
953
+ f"I_0_{i*6+j*3+1}": {
954
+ "goal": (1,11),
955
+ "train_config": (QLEARNING, 100000),
956
+ "consecutive": cons,
957
+ "percentage": perc
958
+ },
959
+ f"I_0_{i*6+j*3+2}": {
960
+ "goal": (11,11),
961
+ "train_config": (QLEARNING, 100000),
962
+ "consecutive": cons,
963
+ "percentage": perc
964
+ }})
965
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L2"].update(
966
+ {f"I_0_{i*8+j*4}": {
967
+ "goal": (11,1),
968
+ "train_config": (QLEARNING, 100000),
969
+ "consecutive": cons,
970
+ "percentage": perc
971
+ },
972
+ f"I_0_{i*8+j*4+1}": {
973
+ "goal": (1,11),
974
+ "train_config": (QLEARNING, 100000),
975
+ "consecutive": cons,
976
+ "percentage": perc
977
+ },
978
+ f"I_0_{i*8+j*4+2}": {
979
+ "goal": (11,11),
980
+ "train_config": (QLEARNING, 100000),
981
+ "consecutive": cons,
982
+ "percentage": perc
983
+ },
984
+ f"I_0_{i*8+j*4+3}": {
985
+ "goal": (5,9),
986
+ "train_config": (QLEARNING, 100000),
987
+ "consecutive": cons,
988
+ "percentage": perc
989
+ }})
990
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L3"].update(
991
+ {f"I_0_{i*10+j*5}": {
992
+ "goal": (11,1),
993
+ "train_config": (QLEARNING, 100000),
994
+ "consecutive": cons,
995
+ "percentage": perc
996
+ },
997
+ f"I_0_{i*10+j*5+1}": {
998
+ "goal": (1,11),
999
+ "train_config": (QLEARNING, 100000),
1000
+ "consecutive": cons,
1001
+ "percentage": perc
1002
+ },
1003
+ f"I_0_{i*10+j*5+2}": {
1004
+ "goal": (11,11),
1005
+ "train_config": (QLEARNING, 100000),
1006
+ "consecutive": cons,
1007
+ "percentage": perc
1008
+ },
1009
+ f"I_0_{i*10+j*5+3}": {
1010
+ "goal": (5,9),
1011
+ "train_config": (QLEARNING, 100000),
1012
+ "consecutive": cons,
1013
+ "percentage": perc
1014
+ },
1015
+ f"I_0_{i*10+j*5+4}": {
1016
+ "goal": (6,1),
1017
+ "train_config": (QLEARNING, 100000),
1018
+ "consecutive": cons,
1019
+ "percentage": perc
1020
+ }})
1021
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L4"].update(
1022
+ {f"I_0_{i*12+j*6}": {
1023
+ "goal": (11,1),
1024
+ "train_config": (QLEARNING, 100000),
1025
+ "consecutive": cons,
1026
+ "percentage": perc
1027
+ },
1028
+ f"I_0_{i*12+j*6+1}": {
1029
+ "goal": (1,11),
1030
+ "train_config": (QLEARNING, 100000),
1031
+ "consecutive": cons,
1032
+ "percentage": perc
1033
+ },
1034
+ f"I_0_{i*12+j*6+2}": {
1035
+ "goal": (11,11),
1036
+ "train_config": (QLEARNING, 100000),
1037
+ "consecutive": cons,
1038
+ "percentage": perc
1039
+ },
1040
+ f"I_0_{i*12+j*6+3}": {
1041
+ "goal": (5,9),
1042
+ "train_config": (QLEARNING, 100000),
1043
+ "consecutive": cons,
1044
+ "percentage": perc
1045
+ },
1046
+ f"I_0_{i*12+j*6+5}": {
1047
+ "goal": (11,3),
1048
+ "train_config": (QLEARNING, 100000),
1049
+ "consecutive": cons,
1050
+ "percentage": perc
1051
+ }})
1052
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L5"].update(
1053
+ {f"I_0_{i*14+j*7}": {
1054
+ "goal": (11,1),
1055
+ "train_config": (QLEARNING, 100000),
1056
+ "consecutive": cons,
1057
+ "percentage": perc
1058
+ },
1059
+ f"I_0_{i*14+j*7+1}": {
1060
+ "goal": (1,11),
1061
+ "train_config": (QLEARNING, 100000),
1062
+ "consecutive": cons,
1063
+ "percentage": perc
1064
+ },
1065
+ f"I_0_{i*14+j*7+2}": {
1066
+ "goal": (11,11),
1067
+ "train_config": (QLEARNING, 100000),
1068
+ "consecutive": cons,
1069
+ "percentage": perc
1070
+ },
1071
+ f"I_0_{i*14+j*7+3}": {
1072
+ "goal": (5,9),
1073
+ "train_config": (QLEARNING, 100000),
1074
+ "consecutive": cons,
1075
+ "percentage": perc
1076
+ },
1077
+ f"I_0_{i*14+j*7+5}": {
1078
+ "goal": (11,3),
1079
+ "train_config": (QLEARNING, 100000),
1080
+ "consecutive": cons,
1081
+ "percentage": perc
1082
+ },
1083
+ f"I_0_{i*14+j*7+6}": {
1084
+ "goal": (11,5),
1085
+ "train_config": (QLEARNING, 100000),
1086
+ "consecutive": cons,
1087
+ "percentage": perc
1088
+ }})
1089
+
1090
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L1"].update(
1091
+ {f"I_0_{i*6+j*3}": {
1092
+ "goal": (1,3),
1093
+ "train_config": (QLEARNING, 100000),
1094
+ "consecutive": cons,
1095
+ "percentage": perc
1096
+ },
1097
+ f"I_0_{i*6+j*3+1}": {
1098
+ "goal": (6,5),
1099
+ "train_config": (QLEARNING, 100000),
1100
+ "consecutive": cons,
1101
+ "percentage": perc
1102
+ },
1103
+ f"I_0_{i*6+j*3+2}": {
1104
+ "goal": (4,7),
1105
+ "train_config": (QLEARNING, 100000),
1106
+ "consecutive": cons,
1107
+ "percentage": perc
1108
+ }})
1109
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L2"].update(
1110
+ {f"I_0_{i*8+j*4}": {
1111
+ "goal": (1,3),
1112
+ "train_config": (QLEARNING, 100000),
1113
+ "consecutive": cons,
1114
+ "percentage": perc
1115
+ },
1116
+ f"I_0_{i*8+j*4+1}": {
1117
+ "goal": (6,5),
1118
+ "train_config": (QLEARNING, 100000),
1119
+ "consecutive": cons,
1120
+ "percentage": perc
1121
+ },
1122
+ f"I_0_{i*8+j*4+2}": {
1123
+ "goal": (4,7),
1124
+ "train_config": (QLEARNING, 100000),
1125
+ "consecutive": cons,
1126
+ "percentage": perc
1127
+ },
1128
+ f"I_0_{i*8+j*4+3}": {
1129
+ "goal": (2,5),
1130
+ "train_config": (QLEARNING, 100000),
1131
+ "consecutive": cons,
1132
+ "percentage": perc
1133
+ }})
1134
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L3"].update(
1135
+ {f"I_0_{i*10+j*5}": {
1136
+ "goal": (1,3),
1137
+ "train_config": (QLEARNING, 100000),
1138
+ "consecutive": cons,
1139
+ "percentage": perc
1140
+ },
1141
+ f"I_0_{i*10+j*5+1}": {
1142
+ "goal": (6,5),
1143
+ "train_config": (QLEARNING, 100000),
1144
+ "consecutive": cons,
1145
+ "percentage": perc
1146
+ },
1147
+ f"I_0_{i*10+j*5+2}": {
1148
+ "goal": (4,7),
1149
+ "train_config": (QLEARNING, 100000),
1150
+ "consecutive": cons,
1151
+ "percentage": perc
1152
+ },
1153
+ f"I_0_{i*10+j*5+3}": {
1154
+ "goal": (2,5),
1155
+ "train_config": (QLEARNING, 100000),
1156
+ "consecutive": cons,
1157
+ "percentage": perc
1158
+ },
1159
+ f"I_0_{i*10+j*5+4}": {
1160
+ "goal": (5,2),
1161
+ "train_config": (QLEARNING, 100000),
1162
+ "consecutive": cons,
1163
+ "percentage": perc
1164
+ }})
1165
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L4"].update(
1166
+ {f"I_0_{i*12+j*6}": {
1167
+ "goal": (1,3),
1168
+ "train_config": (QLEARNING, 100000),
1169
+ "consecutive": cons,
1170
+ "percentage": perc
1171
+ },
1172
+ f"I_0_{i*12+j*6+1}": {
1173
+ "goal": (6,5),
1174
+ "train_config": (QLEARNING, 100000),
1175
+ "consecutive": cons,
1176
+ "percentage": perc
1177
+ },
1178
+ f"I_0_{i*12+j*6+2}": {
1179
+ "goal": (4,7),
1180
+ "train_config": (QLEARNING, 100000),
1181
+ "consecutive": cons,
1182
+ "percentage": perc
1183
+ },
1184
+ f"I_0_{i*12+j*6+3}": {
1185
+ "goal": (2,5),
1186
+ "train_config": (QLEARNING, 100000),
1187
+ "consecutive": cons,
1188
+ "percentage": perc
1189
+ },
1190
+ f"I_0_{i*12+j*6+4}": {
1191
+ "goal": (5,2),
1192
+ "train_config": (QLEARNING, 100000),
1193
+ "consecutive": cons,
1194
+ "percentage": perc
1195
+ },
1196
+ f"I_0_{i*12+j*6+5}": {
1197
+ "goal": (4,5),
1198
+ "train_config": (QLEARNING, 100000),
1199
+ "consecutive": cons,
1200
+ "percentage": perc
1201
+ }})
1202
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L5"].update(
1203
+ {f"I_0_{i*14+j*7}": {
1204
+ "goal": (1,3),
1205
+ "train_config": (QLEARNING, 100000),
1206
+ "consecutive": cons,
1207
+ "percentage": perc
1208
+ },
1209
+ f"I_0_{i*14+j*7+1}": {
1210
+ "goal": (6,5),
1211
+ "train_config": (QLEARNING, 100000),
1212
+ "consecutive": cons,
1213
+ "percentage": perc
1214
+ },
1215
+ f"I_0_{i*14+j*7+2}": {
1216
+ "goal": (4,7),
1217
+ "train_config": (QLEARNING, 100000),
1218
+ "consecutive": cons,
1219
+ "percentage": perc
1220
+ },
1221
+ f"I_0_{i*14+j*7+3}": {
1222
+ "goal": (2,5),
1223
+ "train_config": (QLEARNING, 100000),
1224
+ "consecutive": cons,
1225
+ "percentage": perc
1226
+ },
1227
+ f"I_0_{i*14+j*7+4}": {
1228
+ "goal": (5,2),
1229
+ "train_config": (QLEARNING, 100000),
1230
+ "consecutive": cons,
1231
+ "percentage": perc
1232
+ },
1233
+ f"I_0_{i*14+j*7+5}": {
1234
+ "goal": (4,5),
1235
+ "train_config": (QLEARNING, 100000),
1236
+ "consecutive": cons,
1237
+ "percentage": perc
1238
+ },
1239
+ f"I_0_{i*14+j*7+6}": {
1240
+ "goal": (1,1),
1241
+ "train_config": (QLEARNING, 100000),
1242
+ "consecutive": cons,
1243
+ "percentage": perc
1244
+ }})