gr-libs 0.1.8__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. gr_libs/__init__.py +3 -1
  2. gr_libs/_version.py +2 -2
  3. gr_libs/all_experiments.py +260 -0
  4. gr_libs/environment/__init__.py +14 -1
  5. gr_libs/environment/_utils/__init__.py +0 -0
  6. gr_libs/environment/{utils → _utils}/utils.py +1 -1
  7. gr_libs/environment/environment.py +278 -23
  8. gr_libs/evaluation/__init__.py +1 -0
  9. gr_libs/evaluation/generate_experiments_results.py +100 -0
  10. gr_libs/metrics/__init__.py +2 -0
  11. gr_libs/metrics/metrics.py +166 -31
  12. gr_libs/ml/__init__.py +1 -6
  13. gr_libs/ml/base/__init__.py +3 -1
  14. gr_libs/ml/base/rl_agent.py +68 -3
  15. gr_libs/ml/neural/__init__.py +1 -3
  16. gr_libs/ml/neural/deep_rl_learner.py +241 -84
  17. gr_libs/ml/neural/utils/__init__.py +1 -2
  18. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +1 -1
  19. gr_libs/ml/planner/mcts/mcts_model.py +71 -34
  20. gr_libs/ml/sequential/__init__.py +0 -1
  21. gr_libs/ml/sequential/{lstm_model.py → _lstm_model.py} +11 -14
  22. gr_libs/ml/tabular/__init__.py +1 -3
  23. gr_libs/ml/tabular/tabular_q_learner.py +27 -9
  24. gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
  25. gr_libs/ml/utils/__init__.py +2 -9
  26. gr_libs/ml/utils/format.py +13 -90
  27. gr_libs/ml/utils/math.py +3 -2
  28. gr_libs/ml/utils/other.py +2 -2
  29. gr_libs/ml/utils/storage.py +41 -94
  30. gr_libs/odgr_executor.py +263 -0
  31. gr_libs/problems/consts.py +570 -292
  32. gr_libs/recognizer/{utils → _utils}/format.py +2 -2
  33. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +127 -36
  34. gr_libs/recognizer/graml/{gr_dataset.py → _gr_dataset.py} +11 -11
  35. gr_libs/recognizer/graml/graml_recognizer.py +186 -35
  36. gr_libs/recognizer/recognizer.py +59 -10
  37. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  38. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  39. {tutorials → gr_libs/tutorials}/gcdraco_panda_tutorial.py +11 -11
  40. {tutorials → gr_libs/tutorials}/gcdraco_parking_tutorial.py +6 -8
  41. {tutorials → gr_libs/tutorials}/graml_minigrid_tutorial.py +18 -14
  42. {tutorials → gr_libs/tutorials}/graml_panda_tutorial.py +11 -12
  43. {tutorials → gr_libs/tutorials}/graml_parking_tutorial.py +8 -10
  44. {tutorials → gr_libs/tutorials}/graml_point_maze_tutorial.py +17 -3
  45. {tutorials → gr_libs/tutorials}/graql_minigrid_tutorial.py +2 -2
  46. {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/METADATA +95 -29
  47. gr_libs-0.2.5.dist-info/RECORD +72 -0
  48. {gr_libs-0.1.8.dist-info → gr_libs-0.2.5.dist-info}/WHEEL +1 -1
  49. gr_libs-0.2.5.dist-info/top_level.txt +2 -0
  50. tests/test_draco.py +14 -0
  51. tests/test_gcdraco.py +2 -2
  52. tests/test_graml.py +4 -4
  53. tests/test_graql.py +1 -1
  54. tests/test_odgr_executor_expertbasedgraml.py +14 -0
  55. tests/test_odgr_executor_gcdraco.py +14 -0
  56. tests/test_odgr_executor_gcgraml.py +14 -0
  57. tests/test_odgr_executor_graql.py +14 -0
  58. evaluation/analyze_results_cross_alg_cross_domain.py +0 -267
  59. evaluation/create_minigrid_map_image.py +0 -38
  60. evaluation/file_system.py +0 -53
  61. evaluation/generate_experiments_results.py +0 -141
  62. evaluation/generate_experiments_results_new_ver1.py +0 -238
  63. evaluation/generate_experiments_results_new_ver2.py +0 -331
  64. evaluation/generate_task_specific_statistics_plots.py +0 -500
  65. evaluation/get_plans_images.py +0 -62
  66. evaluation/increasing_and_decreasing_.py +0 -104
  67. gr_libs/ml/neural/utils/penv.py +0 -60
  68. gr_libs-0.1.8.dist-info/RECORD +0 -70
  69. gr_libs-0.1.8.dist-info/top_level.txt +0 -4
  70. /gr_libs/{environment/utils/__init__.py → _evaluation/_generate_experiments_results.py} +0 -0
  71. /gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +0 -0
  72. /gr_libs/ml/planner/mcts/{utils → _utils}/node.py +0 -0
  73. /gr_libs/recognizer/{utils → _utils}/__init__.py +0 -0
@@ -1,22 +1,28 @@
1
1
  import numpy as np
2
2
  from stable_baselines3 import PPO, SAC, TD3
3
+
3
4
  from gr_libs.environment.environment import (
4
5
  MINIGRID,
5
- PARKING,
6
6
  PANDA,
7
+ PARKING,
7
8
  POINT_MAZE,
8
9
  QLEARNING,
9
10
  PandaProperty,
10
11
  )
11
12
 
12
-
13
13
  PROBLEMS = {
14
14
  PARKING: {
15
15
  "Parking-S-14-PC-": {
16
16
  "L1": {
17
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
18
- "goals": [i for i in range(1, 21)],
19
- "train_configs": [(PPO, 200000)],
17
+ "base": {
18
+ "gc": {
19
+ "goals": [i for i in range(1, 21)],
20
+ "train_configs": [(PPO, 200000)],
21
+ },
22
+ "bg": {
23
+ "goals": ["1", "4", "8", "14", "21"],
24
+ "train_configs": [(SAC, 200000) for _ in range(5)],
25
+ },
20
26
  },
21
27
  "G_0": {
22
28
  "goals": ["1", "11", "21"],
@@ -26,9 +32,15 @@ PROBLEMS = {
26
32
  },
27
33
  },
28
34
  "L2": {
29
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
30
- "goals": [i for i in range(1, 21)],
31
- "train_configs": [(PPO, 200000)],
35
+ "base": {
36
+ "gc": {
37
+ "goals": [i for i in range(1, 21)],
38
+ "train_configs": [(PPO, 200000)],
39
+ },
40
+ "bg": {
41
+ "goals": ["1", "4", "8", "14", "21"],
42
+ "train_configs": [(SAC, 200000) for _ in range(5)],
43
+ },
32
44
  },
33
45
  "G_0": {
34
46
  "goals": ["1", "8", "14", "21"],
@@ -36,9 +48,15 @@ PROBLEMS = {
36
48
  },
37
49
  },
38
50
  "L3": {
39
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
40
- "goals": [i for i in range(1, 21)],
41
- "train_configs": [(PPO, 200000)],
51
+ "base": {
52
+ "gc": {
53
+ "goals": [i for i in range(1, 21)],
54
+ "train_configs": [(PPO, 200000)],
55
+ },
56
+ "bg": {
57
+ "goals": ["1", "4", "8", "14", "21"],
58
+ "train_configs": [(SAC, 200000) for _ in range(5)],
59
+ },
42
60
  },
43
61
  "G_0": {
44
62
  "goals": ["1", "8", "11", "18"],
@@ -46,9 +64,15 @@ PROBLEMS = {
46
64
  },
47
65
  },
48
66
  "L4": {
49
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
50
- "goals": [i for i in range(1, 21)],
51
- "train_configs": [(PPO, 200000)],
67
+ "base": {
68
+ "gc": {
69
+ "goals": [i for i in range(1, 21)],
70
+ "train_configs": [(PPO, 200000)],
71
+ },
72
+ "bg": {
73
+ "goals": ["1", "4", "8", "14", "21"],
74
+ "train_configs": [(SAC, 200000) for _ in range(5)],
75
+ },
52
76
  },
53
77
  "G_0": {
54
78
  "goals": ["4", "8", "11", "14", "18"],
@@ -56,13 +80,15 @@ PROBLEMS = {
56
80
  },
57
81
  },
58
82
  "L5": {
59
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
60
- "goals": [i for i in range(1, 21)],
61
- "train_configs": [(PPO, 200000)],
62
- },
63
- "G_0": {
64
- "goals": ["1", "4", "8", "11", "14", "18", "21"],
65
- "train_configs": [(SAC, 200000) for _ in range(7)],
83
+ "base": {
84
+ "gc": {
85
+ "goals": [i for i in range(1, 21)],
86
+ "train_configs": [(PPO, 200000)],
87
+ },
88
+ "bg": {
89
+ "goals": ["1", "4", "8", "11", "14", "18", "21"],
90
+ "train_configs": [(SAC, 200000) for _ in range(7)],
91
+ },
66
92
  },
67
93
  },
68
94
  }
@@ -70,11 +96,26 @@ PROBLEMS = {
70
96
  PANDA: {
71
97
  "PandaMyReachDense": {
72
98
  "L1": {
73
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
74
- "goals": [
75
- np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
76
- ],
77
- "train_configs": [(SAC, 800000)],
99
+ "base": {
100
+ "gc": {
101
+ "goals": [
102
+ np.array([PandaProperty.sample_goal()])
103
+ for _ in range(1, 30)
104
+ ],
105
+ "train_configs": [(SAC, 800000)],
106
+ },
107
+ "bg": {
108
+ "goals": [
109
+ np.array([[-0.1, -0.1, 0.1]]),
110
+ np.array([[-0.1, 0.1, 0.1]]),
111
+ np.array([[0.2, 0.2, 0.1]]),
112
+ ],
113
+ "train_configs": [
114
+ (PPO, 200000),
115
+ (PPO, 200000),
116
+ (PPO, 300000),
117
+ ],
118
+ },
78
119
  },
79
120
  "G_0": {
80
121
  "goals": [
@@ -86,15 +127,32 @@ PROBLEMS = {
86
127
  (SAC, 00000),
87
128
  (SAC, 200000),
88
129
  (SAC, 300000),
89
- ], # algorithms that use GC agent to generate sequences don't use this
130
+ ],
90
131
  },
91
132
  },
92
133
  "L2": {
93
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
94
- "goals": [
95
- np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
96
- ],
97
- "train_configs": [(SAC, 800000)],
134
+ "base": {
135
+ "gc": {
136
+ "goals": [
137
+ np.array([PandaProperty.sample_goal()])
138
+ for _ in range(1, 30)
139
+ ],
140
+ "train_configs": [(SAC, 800000)],
141
+ },
142
+ "bg": {
143
+ "goals": [
144
+ np.array([[-0.5, -0.5, 0.1]]),
145
+ np.array([[-0.5, 0.2, 0.1]]),
146
+ np.array([[-0.1, 0.1, 0.1]]),
147
+ np.array([[0.1, -0.1, 0.1]]),
148
+ ],
149
+ "train_configs": [
150
+ (PPO, 400000),
151
+ (PPO, 400000),
152
+ (PPO, 400000),
153
+ (PPO, 400000),
154
+ ],
155
+ },
98
156
  },
99
157
  "G_0": {
100
158
  "goals": [
@@ -108,15 +166,29 @@ PROBLEMS = {
108
166
  (SAC, 400000),
109
167
  (SAC, 400000),
110
168
  (SAC, 400000),
111
- ], # algorithms that use GC agent to generate sequences don't use this
169
+ ],
112
170
  },
113
171
  },
114
172
  "L3": {
115
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
116
- "goals": [
117
- np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
118
- ],
119
- "train_configs": [(SAC, 800000)],
173
+ "base": {
174
+ "gc": {
175
+ "goals": [
176
+ np.array([PandaProperty.sample_goal()])
177
+ for _ in range(1, 30)
178
+ ],
179
+ "train_configs": [(SAC, 800000)],
180
+ },
181
+ "bg": {
182
+ "goals": [
183
+ np.array([[-0.5, -0.5, 0.1]]),
184
+ np.array([[-0.1, -0.1, 0.1]]),
185
+ np.array([[-0.5, 0.2, 0.1]]),
186
+ np.array([[-0.1, 0.1, 0.1]]),
187
+ np.array([[0.2, -0.2, 0.1]]),
188
+ np.array([[0.2, 0.2, 0.1]]),
189
+ ],
190
+ "train_configs": [(PPO, 400000) for _ in range(6)],
191
+ },
120
192
  },
121
193
  "G_0": {
122
194
  "goals": [
@@ -127,17 +199,36 @@ PROBLEMS = {
127
199
  np.array([[0.2, -0.2, 0.1]]),
128
200
  np.array([[0.2, 0.2, 0.1]]),
129
201
  ],
130
- "train_configs": [
131
- (SAC, 400000) for _ in range(6)
132
- ], # algorithms that use GC agent to generate sequences don't use this
202
+ "train_configs": [(SAC, 400000) for _ in range(6)],
133
203
  },
134
204
  },
135
205
  "L4": {
136
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
137
- "goals": [
138
- np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
139
- ],
140
- "train_configs": [(SAC, 800000)],
206
+ "base": {
207
+ "gc": {
208
+ "goals": [
209
+ np.array([PandaProperty.sample_goal()])
210
+ for _ in range(1, 30)
211
+ ],
212
+ "train_configs": [(SAC, 800000)],
213
+ },
214
+ "bg": {
215
+ "goals": [
216
+ np.array([[-0.3, -0.3, 0.1]]),
217
+ np.array([[-0.1, -0.1, 0.1]]),
218
+ np.array([[-0.3, 0.2, 0.1]]),
219
+ np.array([[-0.1, 0.1, 0.1]]),
220
+ np.array([[0.1, -0.1, 0.1]]),
221
+ np.array([[0.2, 0.2, 0.1]]),
222
+ ],
223
+ "train_configs": [
224
+ (SAC, 400000),
225
+ (PPO, 400000),
226
+ (PPO, 400000),
227
+ (PPO, 400000),
228
+ (PPO, 400000),
229
+ (PPO, 400000),
230
+ ],
231
+ },
141
232
  },
142
233
  "G_0": {
143
234
  "goals": [
@@ -148,17 +239,42 @@ PROBLEMS = {
148
239
  np.array([[0.1, -0.1, 0.1]]),
149
240
  np.array([[0.2, 0.2, 0.1]]),
150
241
  ],
151
- "train_configs": [
152
- (SAC, 400000) for _ in range(6)
153
- ], # algorithms that use GC agent to generate sequences don't use this
242
+ "train_configs": [(SAC, 400000) for _ in range(6)],
154
243
  },
155
244
  },
156
245
  "L5": {
157
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
158
- "goals": [
159
- np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
160
- ],
161
- "train_configs": [(SAC, 800000)],
246
+ "base": {
247
+ "gc": {
248
+ "goals": [
249
+ np.array([PandaProperty.sample_goal()])
250
+ for _ in range(1, 30)
251
+ ],
252
+ "train_configs": [(SAC, 800000)],
253
+ },
254
+ "bg": {
255
+ "goals": [
256
+ np.array([[-0.5, -0.5, 0.1]]),
257
+ np.array([[-0.3, -0.3, 0.1]]),
258
+ np.array([[-0.1, -0.1, 0.1]]),
259
+ np.array([[-0.5, 0.2, 0.1]]),
260
+ np.array([[-0.3, 0.2, 0.1]]),
261
+ np.array([[-0.1, 0.1, 0.1]]),
262
+ np.array([[0.2, -0.2, 0.1]]),
263
+ np.array([[0.1, -0.1, 0.1]]),
264
+ np.array([[0.2, 0.2, 0.1]]),
265
+ ],
266
+ "train_configs": [
267
+ (PPO, 400000),
268
+ (SAC, 400000),
269
+ (PPO, 400000),
270
+ (PPO, 400000),
271
+ (PPO, 400000),
272
+ (PPO, 400000),
273
+ (PPO, 400000),
274
+ (PPO, 400000),
275
+ (PPO, 400000),
276
+ ],
277
+ },
162
278
  },
163
279
  "G_0": {
164
280
  "goals": [
@@ -172,9 +288,7 @@ PROBLEMS = {
172
288
  np.array([[0.1, -0.1, 0.1]]),
173
289
  np.array([[0.2, 0.2, 0.1]]),
174
290
  ],
175
- "train_configs": [
176
- (SAC, 400000) for _ in range(9)
177
- ], # algorithms that use GC agent to generate sequences don't use this
291
+ "train_configs": [(SAC, 400000) for _ in range(9)],
178
292
  },
179
293
  },
180
294
  }
@@ -182,170 +296,258 @@ PROBLEMS = {
182
296
  POINT_MAZE: {
183
297
  "PointMaze-FourRoomsEnvDense-11x11": {
184
298
  "L1": {
185
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
186
- "goals": [
187
- (9, 1),
188
- (9, 9),
189
- (1, 9),
190
- (3, 3),
191
- (3, 4),
192
- (8, 2),
193
- (3, 7),
194
- (2, 8),
195
- ],
196
- "train_configs": [(SAC, 400000) for _ in range(8)],
299
+ "base": {
300
+ "gc": {
301
+ "goals": [
302
+ (9, 1),
303
+ (9, 9),
304
+ (1, 9),
305
+ (3, 3),
306
+ (3, 4),
307
+ (8, 2),
308
+ (3, 7),
309
+ (2, 8),
310
+ ],
311
+ "train_configs": [(SAC, 400000) for _ in range(8)],
312
+ },
313
+ "bg": {
314
+ "goals": [(4, 4), (7, 3), (3, 7)],
315
+ "train_configs": [(SAC, 400000) for _ in range(3)],
316
+ },
197
317
  },
198
318
  "G_0": {
199
319
  "goals": [(4, 4), (7, 3), (3, 7)],
200
- "train_configs": [
201
- (SAC, 400000) for _ in range(3)
202
- ], # algorithms that use GC agent to generate sequences don't use this
320
+ "train_configs": [(SAC, 400000) for _ in range(3)],
203
321
  },
204
322
  },
205
323
  "L2": {
206
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
207
- "goals": [
208
- (9, 1),
209
- (9, 9),
210
- (1, 9),
211
- (3, 3),
212
- (3, 4),
213
- (8, 2),
214
- (3, 7),
215
- (2, 8),
216
- ],
217
- "train_configs": [(SAC, 400000) for _ in range(8)],
324
+ "base": {
325
+ "gc": {
326
+ "goals": [
327
+ (9, 1),
328
+ (9, 9),
329
+ (1, 9),
330
+ (3, 3),
331
+ (3, 4),
332
+ (8, 2),
333
+ (3, 7),
334
+ (2, 8),
335
+ ],
336
+ "train_configs": [(SAC, 400000) for _ in range(8)],
337
+ },
338
+ "bg": {
339
+ "goals": [(4, 4), (7, 3), (3, 7), (8, 2)],
340
+ "train_configs": [(SAC, 400000) for _ in range(4)],
341
+ },
218
342
  },
219
343
  "G_0": {
220
344
  "goals": [(4, 4), (7, 3), (3, 7), (8, 2)],
221
- "train_configs": [
222
- (SAC, 400000) for _ in range(4)
223
- ], # algorithms that use GC agent to generate sequences don't use this
345
+ "train_configs": [(SAC, 400000) for _ in range(4)],
224
346
  },
225
347
  },
226
348
  "L3": {
227
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
228
- "goals": [
229
- (9, 1),
230
- (9, 9),
231
- (1, 9),
232
- (3, 3),
233
- (3, 4),
234
- (8, 2),
235
- (3, 7),
236
- (2, 8),
237
- ],
238
- "train_configs": [(SAC, 400000) for _ in range(8)],
349
+ "base": {
350
+ "gc": {
351
+ "goals": [
352
+ (9, 1),
353
+ (9, 9),
354
+ (1, 9),
355
+ (3, 3),
356
+ (3, 4),
357
+ (8, 2),
358
+ (3, 7),
359
+ (2, 8),
360
+ ],
361
+ "train_configs": [(SAC, 400000) for _ in range(8)],
362
+ },
363
+ "bg": {
364
+ "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8)],
365
+ "train_configs": [(SAC, 400000) for _ in range(5)],
366
+ },
239
367
  },
240
368
  "G_0": {
241
369
  "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8)],
242
- "train_configs": [
243
- (SAC, 400000) for _ in range(5)
244
- ], # algorithms that use GC agent to generate sequences don't use this
370
+ "train_configs": [(SAC, 400000) for _ in range(5)],
245
371
  },
246
372
  },
247
373
  "L4": {
248
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
249
- "goals": [
250
- (9, 1),
251
- (9, 9),
252
- (1, 9),
253
- (3, 3),
254
- (3, 4),
255
- (8, 2),
256
- (3, 7),
257
- (2, 8),
258
- ],
259
- "train_configs": [(SAC, 400000) for _ in range(8)],
374
+ "base": {
375
+ "gc": {
376
+ "goals": [
377
+ (9, 1),
378
+ (9, 9),
379
+ (1, 9),
380
+ (3, 3),
381
+ (3, 4),
382
+ (8, 2),
383
+ (3, 7),
384
+ (2, 8),
385
+ ],
386
+ "train_configs": [(SAC, 400000) for _ in range(8)],
387
+ },
388
+ "bg": {
389
+ "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4)],
390
+ "train_configs": [(SAC, 400000) for _ in range(6)],
391
+ },
260
392
  },
261
393
  "G_0": {
262
394
  "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4)],
263
- "train_configs": [
264
- (SAC, 400000) for _ in range(6)
265
- ], # algorithms that use GC agent to generate sequences don't use this
395
+ "train_configs": [(SAC, 400000) for _ in range(6)],
266
396
  },
267
397
  },
268
398
  "L5": {
269
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
270
- "goals": [
271
- (9, 1),
272
- (9, 9),
273
- (1, 9),
274
- (3, 3),
275
- (3, 4),
276
- (8, 2),
277
- (3, 7),
278
- (2, 8),
279
- ],
280
- "train_configs": [(SAC, 400000) for _ in range(8)],
399
+ "base": {
400
+ "gc": {
401
+ "goals": [
402
+ (9, 1),
403
+ (9, 9),
404
+ (1, 9),
405
+ (3, 3),
406
+ (3, 4),
407
+ (8, 2),
408
+ (3, 7),
409
+ (2, 8),
410
+ ],
411
+ "train_configs": [(SAC, 400000) for _ in range(8)],
412
+ },
413
+ "bg": {
414
+ "goals": [
415
+ (4, 4),
416
+ (7, 3),
417
+ (3, 7),
418
+ (8, 2),
419
+ (2, 8),
420
+ (3, 4),
421
+ (4, 3),
422
+ ],
423
+ "train_configs": [(SAC, 400000) for _ in range(7)],
424
+ },
281
425
  },
282
426
  "G_0": {
283
427
  "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4), (4, 3)],
284
- "train_configs": [
285
- (SAC, 400000) for _ in range(7)
286
- ], # algorithms that use GC agent to generate sequences don't use this
428
+ "train_configs": [(SAC, 400000) for _ in range(7)],
287
429
  },
288
430
  },
289
431
  },
290
432
  "PointMaze-ObstaclesEnvDense-11x11": {
291
433
  "L1": {
292
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
293
- "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
294
- "train_configs": [(SAC, 400000) for _ in range(7)],
434
+ "base": {
435
+ "gc": {
436
+ "goals": [
437
+ (5, 1),
438
+ (9, 9),
439
+ (1, 5),
440
+ (6, 4),
441
+ (4, 6),
442
+ (6, 6),
443
+ (7, 7),
444
+ ],
445
+ "train_configs": [(SAC, 400000) for _ in range(7)],
446
+ },
447
+ "bg": {
448
+ "goals": [(5, 5), (7, 4), (4, 7)],
449
+ "train_configs": [(SAC, 400000) for _ in range(3)],
450
+ },
295
451
  },
296
452
  "G_0": {
297
453
  "goals": [(5, 5), (7, 4), (4, 7)],
298
- "train_configs": [
299
- (SAC, 400000) for _ in range(3)
300
- ], # algorithms that use GC agent to generate sequences don't use this
454
+ "train_configs": [(SAC, 400000) for _ in range(3)],
301
455
  },
302
456
  },
303
457
  "L2": {
304
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
305
- "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
306
- "train_configs": [(SAC, 400000) for _ in range(7)],
458
+ "base": {
459
+ "gc": {
460
+ "goals": [
461
+ (5, 1),
462
+ (9, 9),
463
+ (1, 5),
464
+ (6, 4),
465
+ (4, 6),
466
+ (6, 6),
467
+ (7, 7),
468
+ ],
469
+ "train_configs": [(SAC, 400000) for _ in range(7)],
470
+ },
471
+ "bg": {
472
+ "goals": [(5, 5), (3, 6), (7, 4)],
473
+ "train_configs": [(SAC, 400000) for _ in range(3)],
474
+ },
307
475
  },
308
476
  "G_0": {
309
477
  "goals": [(5, 5), (3, 6), (7, 4)],
310
- "train_configs": [
311
- (SAC, 400000) for _ in range(3)
312
- ], # algorithms that use GC agent to generate sequences don't use this
478
+ "train_configs": [(SAC, 400000) for _ in range(3)],
313
479
  },
314
480
  },
315
481
  "L3": {
316
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
317
- "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
318
- "train_configs": [(SAC, 400000) for _ in range(7)],
482
+ "base": {
483
+ "gc": {
484
+ "goals": [
485
+ (5, 1),
486
+ (9, 9),
487
+ (1, 5),
488
+ (6, 4),
489
+ (4, 6),
490
+ (6, 6),
491
+ (7, 7),
492
+ ],
493
+ "train_configs": [(SAC, 400000) for _ in range(7)],
494
+ },
495
+ "bg": {
496
+ "goals": [(5, 5), (3, 6), (7, 4), (4, 7)],
497
+ "train_configs": [(SAC, 400000) for _ in range(4)],
498
+ },
319
499
  },
320
500
  "G_0": {
321
501
  "goals": [(5, 5), (3, 6), (7, 4), (4, 7)],
322
- "train_configs": [
323
- (SAC, 400000) for _ in range(4)
324
- ], # algorithms that use GC agent to generate sequences don't use this
502
+ "train_configs": [(SAC, 400000) for _ in range(4)],
325
503
  },
326
504
  },
327
505
  "L4": {
328
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
329
- "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
330
- "train_configs": [(SAC, 400000) for _ in range(7)],
506
+ "base": {
507
+ "gc": {
508
+ "goals": [
509
+ (5, 1),
510
+ (9, 9),
511
+ (1, 5),
512
+ (6, 4),
513
+ (4, 6),
514
+ (6, 6),
515
+ (7, 7),
516
+ ],
517
+ "train_configs": [(SAC, 400000) for _ in range(7)],
518
+ },
519
+ "bg": {
520
+ "goals": [(3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
521
+ "train_configs": [(SAC, 400000) for _ in range(5)],
522
+ },
331
523
  },
332
524
  "G_0": {
333
525
  "goals": [(3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
334
- "train_configs": [
335
- (SAC, 400000) for _ in range(5)
336
- ], # algorithms that use GC agent to generate sequences don't use this
526
+ "train_configs": [(SAC, 400000) for _ in range(5)],
337
527
  },
338
528
  },
339
529
  "L5": {
340
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
341
- "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
342
- "train_configs": [(SAC, 400000) for _ in range(7)],
530
+ "base": {
531
+ "gc": {
532
+ "goals": [
533
+ (5, 1),
534
+ (9, 9),
535
+ (1, 5),
536
+ (6, 4),
537
+ (4, 6),
538
+ (6, 6),
539
+ (7, 7),
540
+ ],
541
+ "train_configs": [(SAC, 400000) for _ in range(7)],
542
+ },
543
+ "bg": {
544
+ "goals": [(5, 5), (3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
545
+ "train_configs": [(SAC, 400000) for _ in range(6)],
546
+ },
343
547
  },
344
548
  "G_0": {
345
549
  "goals": [(5, 5), (3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
346
- "train_configs": [
347
- (SAC, 400000) for _ in range(6)
348
- ], # algorithms that use GC agent to generate sequences don't use this
550
+ "train_configs": [(SAC, 400000) for _ in range(6)],
349
551
  },
350
552
  },
351
553
  },
@@ -353,117 +555,137 @@ PROBLEMS = {
353
555
  MINIGRID: {
354
556
  "MiniGrid-SimpleCrossingS13N4": {
355
557
  "L1": {
356
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
357
- "goals": [
358
- (11, 1),
359
- (11, 11),
360
- (1, 11),
361
- (7, 11),
362
- (8, 1),
363
- (10, 6),
364
- (6, 9),
365
- (11, 3),
366
- (11, 5),
367
- ],
368
- "train_configs": [
369
- (QLEARNING, 100000) for _ in range(9)
370
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
558
+ "base": {
559
+ "gc": {
560
+ "goals": [
561
+ (11, 1),
562
+ (11, 11),
563
+ (1, 11),
564
+ (7, 11),
565
+ (8, 1),
566
+ (10, 6),
567
+ (6, 9),
568
+ (11, 3),
569
+ (11, 5),
570
+ ],
571
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)],
572
+ },
573
+ "bg": {
574
+ "goals": [(11, 1), (11, 11), (1, 11)],
575
+ "train_configs": [(QLEARNING, 100000) for _ in range(3)],
576
+ },
371
577
  },
372
578
  "G_0": {
373
579
  "goals": [(11, 1), (11, 11), (1, 11)],
374
- "train_configs": [
375
- (QLEARNING, 100000) for _ in range(3)
376
- ], # algorithms that use GC agent to generate sequences don't use this
580
+ "train_configs": [(QLEARNING, 100000) for _ in range(3)],
377
581
  },
378
582
  },
379
583
  "L2": {
380
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
381
- "goals": [
382
- (11, 1),
383
- (11, 11),
384
- (1, 11),
385
- (7, 11),
386
- (8, 1),
387
- (10, 6),
388
- (6, 9),
389
- (11, 3),
390
- (11, 5),
391
- ],
392
- "train_configs": [
393
- (QLEARNING, 100000) for _ in range(9)
394
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
584
+ "base": {
585
+ "gc": {
586
+ "goals": [
587
+ (11, 1),
588
+ (11, 11),
589
+ (1, 11),
590
+ (7, 11),
591
+ (8, 1),
592
+ (10, 6),
593
+ (6, 9),
594
+ (11, 3),
595
+ (11, 5),
596
+ ],
597
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)],
598
+ },
599
+ "bg": {
600
+ "goals": [(11, 1), (11, 11), (1, 11), (5, 9)],
601
+ "train_configs": [(QLEARNING, 100000) for _ in range(4)],
602
+ },
395
603
  },
396
604
  "G_0": {
397
605
  "goals": [(11, 1), (11, 11), (1, 11), (5, 9)],
398
- "train_configs": [
399
- (QLEARNING, 100000) for _ in range(4)
400
- ], # algorithms that use GC agent to generate sequences don't use this
606
+ "train_configs": [(QLEARNING, 100000) for _ in range(4)],
401
607
  },
402
608
  },
403
609
  "L3": {
404
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
405
- "goals": [
406
- (11, 1),
407
- (11, 11),
408
- (1, 11),
409
- (7, 11),
410
- (8, 1),
411
- (10, 6),
412
- (6, 9),
413
- (11, 3),
414
- (11, 5),
415
- ],
416
- "train_configs": [
417
- (QLEARNING, 100000) for _ in range(9)
418
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
610
+ "base": {
611
+ "gc": {
612
+ "goals": [
613
+ (11, 1),
614
+ (11, 11),
615
+ (1, 11),
616
+ (7, 11),
617
+ (8, 1),
618
+ (10, 6),
619
+ (6, 9),
620
+ (11, 3),
621
+ (11, 5),
622
+ ],
623
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)],
624
+ },
625
+ "bg": {
626
+ "goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1)],
627
+ "train_configs": [(QLEARNING, 100000) for _ in range(5)],
628
+ },
419
629
  },
420
630
  "G_0": {
421
631
  "goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1)],
422
- "train_configs": [
423
- (QLEARNING, 100000) for _ in range(5)
424
- ], # algorithms that use GC agent to generate sequences don't use this
632
+ "train_configs": [(QLEARNING, 100000) for _ in range(5)],
425
633
  },
426
634
  },
427
635
  "L4": {
428
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
429
- "goals": [
430
- (11, 1),
431
- (11, 11),
432
- (1, 11),
433
- (7, 11),
434
- (8, 1),
435
- (10, 6),
436
- (6, 9),
437
- (11, 3),
438
- (11, 5),
439
- ],
440
- "train_configs": [
441
- (QLEARNING, 100000) for _ in range(9)
442
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
636
+ "base": {
637
+ "gc": {
638
+ "goals": [
639
+ (11, 1),
640
+ (11, 11),
641
+ (1, 11),
642
+ (7, 11),
643
+ (8, 1),
644
+ (10, 6),
645
+ (6, 9),
646
+ (11, 3),
647
+ (11, 5),
648
+ ],
649
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)],
650
+ },
651
+ "bg": {
652
+ "goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1), (11, 3)],
653
+ "train_configs": [(QLEARNING, 100000) for _ in range(6)],
654
+ },
443
655
  },
444
656
  "G_0": {
445
657
  "goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1), (11, 3)],
446
- "train_configs": [
447
- (QLEARNING, 100000) for _ in range(6)
448
- ], # algorithms that use GC agent to generate sequences don't use this
658
+ "train_configs": [(QLEARNING, 100000) for _ in range(6)],
449
659
  },
450
660
  },
451
661
  "L5": {
452
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
453
- "goals": [
454
- (11, 1),
455
- (11, 11),
456
- (1, 11),
457
- (7, 11),
458
- (8, 1),
459
- (10, 6),
460
- (6, 9),
461
- (11, 3),
462
- (11, 5),
463
- ],
464
- "train_configs": [
465
- (QLEARNING, 100000) for _ in range(9)
466
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
662
+ "base": {
663
+ "gc": {
664
+ "goals": [
665
+ (11, 1),
666
+ (11, 11),
667
+ (1, 11),
668
+ (7, 11),
669
+ (8, 1),
670
+ (10, 6),
671
+ (6, 9),
672
+ (11, 3),
673
+ (11, 5),
674
+ ],
675
+ "train_configs": [(QLEARNING, 100000) for _ in range(9)],
676
+ },
677
+ "bg": {
678
+ "goals": [
679
+ (11, 1),
680
+ (11, 11),
681
+ (1, 11),
682
+ (5, 9),
683
+ (6, 1),
684
+ (11, 3),
685
+ (11, 5),
686
+ ],
687
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
688
+ },
467
689
  },
468
690
  "G_0": {
469
691
  "goals": [
@@ -475,81 +697,137 @@ PROBLEMS = {
475
697
  (11, 3),
476
698
  (11, 5),
477
699
  ],
478
- "train_configs": [
479
- (QLEARNING, 100000) for _ in range(7)
480
- ], # algorithms that use GC agent to generate sequences don't use this
700
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
481
701
  },
482
702
  },
483
703
  },
484
704
  "MiniGrid-LavaCrossingS9N2": {
485
705
  "L1": {
486
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
487
- "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
488
- "train_configs": [
489
- (QLEARNING, 100000) for _ in range(7)
490
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
706
+ "base": {
707
+ "gc": {
708
+ "goals": [
709
+ (7, 7),
710
+ (1, 7),
711
+ (7, 1),
712
+ (1, 3),
713
+ (2, 5),
714
+ (5, 2),
715
+ (6, 5),
716
+ ],
717
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
718
+ },
719
+ "bg": {
720
+ "goals": [(1, 3), (6, 5), (4, 7)],
721
+ "train_configs": [(QLEARNING, 100000) for _ in range(3)],
722
+ },
491
723
  },
492
724
  "G_0": {
493
725
  "goals": [(1, 3), (6, 5), (4, 7)],
494
- "train_configs": [
495
- (QLEARNING, 100000) for _ in range(3)
496
- ], # algorithms that use GC agent to generate sequences don't use this
726
+ "train_configs": [(QLEARNING, 100000) for _ in range(3)],
497
727
  },
498
728
  },
499
729
  "L2": {
500
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
501
- "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
502
- "train_configs": [
503
- (QLEARNING, 100000) for _ in range(7)
504
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
730
+ "base": {
731
+ "gc": {
732
+ "goals": [
733
+ (7, 7),
734
+ (1, 7),
735
+ (7, 1),
736
+ (1, 3),
737
+ (2, 5),
738
+ (5, 2),
739
+ (6, 5),
740
+ ],
741
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
742
+ },
743
+ "bg": {
744
+ "goals": [(1, 3), (6, 5), (4, 7), (2, 5)],
745
+ "train_configs": [(QLEARNING, 100000) for _ in range(4)],
746
+ },
505
747
  },
506
748
  "G_0": {
507
749
  "goals": [(1, 3), (6, 5), (4, 7), (2, 5)],
508
- "train_configs": [
509
- (QLEARNING, 100000) for _ in range(4)
510
- ], # algorithms that use GC agent to generate sequences don't use this
750
+ "train_configs": [(QLEARNING, 100000) for _ in range(4)],
511
751
  },
512
752
  },
513
753
  "L3": {
514
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
515
- "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
516
- "train_configs": [
517
- (QLEARNING, 100000) for _ in range(7)
518
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
754
+ "base": {
755
+ "gc": {
756
+ "goals": [
757
+ (7, 7),
758
+ (1, 7),
759
+ (7, 1),
760
+ (1, 3),
761
+ (2, 5),
762
+ (5, 2),
763
+ (6, 5),
764
+ ],
765
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
766
+ },
767
+ "bg": {
768
+ "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2)],
769
+ "train_configs": [(QLEARNING, 100000) for _ in range(5)],
770
+ },
519
771
  },
520
772
  "G_0": {
521
773
  "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2)],
522
- "train_configs": [
523
- (QLEARNING, 100000) for _ in range(5)
524
- ], # algorithms that use GC agent to generate sequences don't use this
774
+ "train_configs": [(QLEARNING, 100000) for _ in range(5)],
525
775
  },
526
776
  },
527
777
  "L4": {
528
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
529
- "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
530
- "train_configs": [
531
- (QLEARNING, 100000) for _ in range(7)
532
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
778
+ "base": {
779
+ "gc": {
780
+ "goals": [
781
+ (7, 7),
782
+ (1, 7),
783
+ (7, 1),
784
+ (1, 3),
785
+ (2, 5),
786
+ (5, 2),
787
+ (6, 5),
788
+ ],
789
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
790
+ },
791
+ "bg": {
792
+ "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5)],
793
+ "train_configs": [(QLEARNING, 100000) for _ in range(6)],
794
+ },
533
795
  },
534
796
  "G_0": {
535
797
  "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5)],
536
- "train_configs": [
537
- (QLEARNING, 100000) for _ in range(6)
538
- ], # algorithms that use GC agent to generate sequences don't use this
798
+ "train_configs": [(QLEARNING, 100000) for _ in range(6)],
539
799
  },
540
800
  },
541
801
  "L5": {
542
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
543
- "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
544
- "train_configs": [
545
- (QLEARNING, 100000) for _ in range(7)
546
- ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
802
+ "base": {
803
+ "gc": {
804
+ "goals": [
805
+ (7, 7),
806
+ (1, 7),
807
+ (7, 1),
808
+ (1, 3),
809
+ (2, 5),
810
+ (5, 2),
811
+ (6, 5),
812
+ ],
813
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
814
+ },
815
+ "bg": {
816
+ "goals": [
817
+ (1, 3),
818
+ (6, 5),
819
+ (4, 7),
820
+ (2, 5),
821
+ (5, 2),
822
+ (4, 5),
823
+ (1, 1),
824
+ ],
825
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
826
+ },
547
827
  },
548
828
  "G_0": {
549
829
  "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5), (1, 1)],
550
- "train_configs": [
551
- (QLEARNING, 100000) for _ in range(7)
552
- ], # algorithms that use GC agent to generate sequences don't use this
830
+ "train_configs": [(QLEARNING, 100000) for _ in range(7)],
553
831
  },
554
832
  },
555
833
  },