gr-libs 0.1.7.post0__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
  2. evaluation/create_minigrid_map_image.py +10 -6
  3. evaluation/file_system.py +16 -5
  4. evaluation/generate_experiments_results.py +123 -74
  5. evaluation/generate_experiments_results_new_ver1.py +227 -243
  6. evaluation/generate_experiments_results_new_ver2.py +317 -317
  7. evaluation/generate_task_specific_statistics_plots.py +481 -253
  8. evaluation/get_plans_images.py +41 -26
  9. evaluation/increasing_and_decreasing_.py +97 -56
  10. gr_libs/__init__.py +2 -1
  11. gr_libs/_version.py +2 -2
  12. gr_libs/environment/__init__.py +16 -8
  13. gr_libs/environment/environment.py +167 -39
  14. gr_libs/environment/utils/utils.py +22 -12
  15. gr_libs/metrics/__init__.py +5 -0
  16. gr_libs/metrics/metrics.py +76 -34
  17. gr_libs/ml/__init__.py +2 -0
  18. gr_libs/ml/agent.py +21 -6
  19. gr_libs/ml/base/__init__.py +1 -1
  20. gr_libs/ml/base/rl_agent.py +13 -10
  21. gr_libs/ml/consts.py +1 -1
  22. gr_libs/ml/neural/deep_rl_learner.py +433 -352
  23. gr_libs/ml/neural/utils/__init__.py +1 -1
  24. gr_libs/ml/neural/utils/dictlist.py +3 -3
  25. gr_libs/ml/neural/utils/penv.py +5 -2
  26. gr_libs/ml/planner/mcts/mcts_model.py +524 -302
  27. gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
  28. gr_libs/ml/planner/mcts/utils/node.py +11 -7
  29. gr_libs/ml/planner/mcts/utils/tree.py +14 -10
  30. gr_libs/ml/sequential/__init__.py +1 -1
  31. gr_libs/ml/sequential/lstm_model.py +256 -175
  32. gr_libs/ml/tabular/state.py +7 -7
  33. gr_libs/ml/tabular/tabular_q_learner.py +123 -73
  34. gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
  35. gr_libs/ml/utils/__init__.py +8 -2
  36. gr_libs/ml/utils/format.py +78 -70
  37. gr_libs/ml/utils/math.py +2 -1
  38. gr_libs/ml/utils/other.py +1 -1
  39. gr_libs/ml/utils/storage.py +88 -28
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
  42. gr_libs/recognizer/graml/gr_dataset.py +209 -110
  43. gr_libs/recognizer/graml/graml_recognizer.py +431 -240
  44. gr_libs/recognizer/recognizer.py +38 -27
  45. gr_libs/recognizer/utils/__init__.py +1 -1
  46. gr_libs/recognizer/utils/format.py +8 -3
  47. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
  48. gr_libs-0.1.8.dist-info/RECORD +70 -0
  49. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
  50. tests/test_gcdraco.py +10 -0
  51. tests/test_graml.py +8 -4
  52. tests/test_graql.py +2 -1
  53. tutorials/gcdraco_panda_tutorial.py +66 -0
  54. tutorials/gcdraco_parking_tutorial.py +61 -0
  55. tutorials/graml_minigrid_tutorial.py +42 -12
  56. tutorials/graml_panda_tutorial.py +35 -14
  57. tutorials/graml_parking_tutorial.py +37 -20
  58. tutorials/graml_point_maze_tutorial.py +33 -13
  59. tutorials/graql_minigrid_tutorial.py +31 -15
  60. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  61. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,1244 +1,1566 @@
1
1
  import numpy as np
2
2
  from stable_baselines3 import PPO, SAC, TD3
3
- from gr_libs.environment.environment import MINIGRID, PARKING, PANDA, POINT_MAZE, QLEARNING, PandaProperty
3
+ from gr_libs.environment.environment import (
4
+ MINIGRID,
5
+ PARKING,
6
+ PANDA,
7
+ POINT_MAZE,
8
+ QLEARNING,
9
+ PandaProperty,
10
+ )
4
11
 
5
12
 
6
13
  PROBLEMS = {
7
- PARKING: {
14
+ PARKING: {
8
15
  "Parking-S-14-PC-": {
9
- "L1": {
10
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
11
- "goals": [i for i in range(1,21)],
12
- "train_configs": [(PPO, 200000)]
13
- },
14
- "G_0": {
15
- "goals": ["1", "11", "21"],
16
- "train_configs": [(SAC, 200000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
17
- }
18
- },
19
- "L2": {
20
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
21
- "goals": [i for i in range(1,21)],
22
- "train_configs": [(PPO, 200000)]
23
- },
24
- "G_0": {
25
- "goals": ["1", "8", "14", "21"],
26
- "train_configs": [(SAC, 200000) for _ in range(4)]
27
- }
28
- },
29
- "L3": {
30
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
31
- "goals": [i for i in range(1,21)],
32
- "train_configs": [(PPO, 200000)]
33
- },
34
- "G_0": {
35
- "goals": ["1", "8", "11", "18"],
36
- "train_configs": [(SAC, 200000) for _ in range(4)]
37
- }
38
- },
39
- "L4": {
40
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
41
- "goals": [i for i in range(1,21)],
42
- "train_configs": [(PPO, 200000)]
43
- },
44
- "G_0": {
45
- "goals": ["4", "8", "11", "14", "18"],
46
- "train_configs": [(SAC, 200000) for _ in range(5)]
47
- }
48
- },
49
- "L5": {
50
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
51
- "goals": [i for i in range(1,21)],
52
- "train_configs": [(PPO, 200000)]
53
- },
54
- "G_0": {
55
- "goals": ["1", "4", "8", "11", "14", "18", "21"],
56
- "train_configs": [(SAC, 200000) for _ in range(7)]
57
- }
58
- }
59
- }
60
- }, # PARKING
61
- PANDA: {
62
- "PandaMyReachDense": {
63
- "L1": {
64
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
65
- "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
66
- "train_configs": [(SAC, 800000)]
67
- },
68
- "G_0": {
69
- "goals": [np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
70
- "train_configs": [(SAC, 00000), (SAC, 200000), (SAC, 300000)] # algorithms that use GC agent to generate sequences don't use this
71
- }
72
- },
73
- "L2": {
74
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
75
- "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
76
- "train_configs": [(SAC, 800000)]
77
- },
78
- "G_0": {
79
- "goals": [np.array([[-0.5, -0.5, 0.1]]), np.array([[-0.5, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.1, -0.1, 0.1]])],
80
- "train_configs": [(SAC, 400000), (SAC, 400000), (SAC, 400000), (SAC, 400000)] # algorithms that use GC agent to generate sequences don't use this
81
- }
82
- },
83
- "L3": {
84
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
85
- "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
86
- "train_configs": [(SAC, 800000)]
87
- },
88
- "G_0": {
89
- "goals": [np.array([[-0.5, -0.5, 0.1]]), np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.5, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, -0.2, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
90
- "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
91
- }
92
- },
93
- "L4": {
94
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
95
- "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
96
- "train_configs": [(SAC, 800000)]
97
- },
98
- "G_0": {
99
- "goals": [np.array([[-0.3, -0.3, 0.1]]), np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.3, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.1, -0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
100
- "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
101
- }
102
- },
103
- "L5": {
104
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
105
- "goals": [np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
106
- "train_configs": [(SAC, 800000)]
107
- },
108
- "G_0": {
109
- "goals": [np.array([[-0.5, -0.5, 0.1]]), np.array([[-0.3, -0.3, 0.1]]), np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.5, 0.2, 0.1]]), np.array([[-0.3, 0.2, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, -0.2, 0.1]]), np.array([[0.1, -0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])],
110
- "train_configs": [(SAC, 400000) for _ in range(9)] # algorithms that use GC agent to generate sequences don't use this
111
- }
112
- }
113
- }
114
- }, # PANDA
115
- POINT_MAZE: {
16
+ "L1": {
17
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
18
+ "goals": [i for i in range(1, 21)],
19
+ "train_configs": [(PPO, 200000)],
20
+ },
21
+ "G_0": {
22
+ "goals": ["1", "11", "21"],
23
+ "train_configs": [
24
+ (SAC, 200000) for _ in range(3)
25
+ ], # algorithms that use GC agent to generate sequences don't use this
26
+ },
27
+ },
28
+ "L2": {
29
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
30
+ "goals": [i for i in range(1, 21)],
31
+ "train_configs": [(PPO, 200000)],
32
+ },
33
+ "G_0": {
34
+ "goals": ["1", "8", "14", "21"],
35
+ "train_configs": [(SAC, 200000) for _ in range(4)],
36
+ },
37
+ },
38
+ "L3": {
39
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
40
+ "goals": [i for i in range(1, 21)],
41
+ "train_configs": [(PPO, 200000)],
42
+ },
43
+ "G_0": {
44
+ "goals": ["1", "8", "11", "18"],
45
+ "train_configs": [(SAC, 200000) for _ in range(4)],
46
+ },
47
+ },
48
+ "L4": {
49
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
50
+ "goals": [i for i in range(1, 21)],
51
+ "train_configs": [(PPO, 200000)],
52
+ },
53
+ "G_0": {
54
+ "goals": ["4", "8", "11", "14", "18"],
55
+ "train_configs": [(SAC, 200000) for _ in range(5)],
56
+ },
57
+ },
58
+ "L5": {
59
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
60
+ "goals": [i for i in range(1, 21)],
61
+ "train_configs": [(PPO, 200000)],
62
+ },
63
+ "G_0": {
64
+ "goals": ["1", "4", "8", "11", "14", "18", "21"],
65
+ "train_configs": [(SAC, 200000) for _ in range(7)],
66
+ },
67
+ },
68
+ }
69
+ }, # PARKING
70
+ PANDA: {
71
+ "PandaMyReachDense": {
72
+ "L1": {
73
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
74
+ "goals": [
75
+ np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
76
+ ],
77
+ "train_configs": [(SAC, 800000)],
78
+ },
79
+ "G_0": {
80
+ "goals": [
81
+ np.array([[-0.1, -0.1, 0.1]]),
82
+ np.array([[-0.1, 0.1, 0.1]]),
83
+ np.array([[0.2, 0.2, 0.1]]),
84
+ ],
85
+ "train_configs": [
86
+ (SAC, 00000),
87
+ (SAC, 200000),
88
+ (SAC, 300000),
89
+ ], # algorithms that use GC agent to generate sequences don't use this
90
+ },
91
+ },
92
+ "L2": {
93
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
94
+ "goals": [
95
+ np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
96
+ ],
97
+ "train_configs": [(SAC, 800000)],
98
+ },
99
+ "G_0": {
100
+ "goals": [
101
+ np.array([[-0.5, -0.5, 0.1]]),
102
+ np.array([[-0.5, 0.2, 0.1]]),
103
+ np.array([[-0.1, 0.1, 0.1]]),
104
+ np.array([[0.1, -0.1, 0.1]]),
105
+ ],
106
+ "train_configs": [
107
+ (SAC, 400000),
108
+ (SAC, 400000),
109
+ (SAC, 400000),
110
+ (SAC, 400000),
111
+ ], # algorithms that use GC agent to generate sequences don't use this
112
+ },
113
+ },
114
+ "L3": {
115
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
116
+ "goals": [
117
+ np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
118
+ ],
119
+ "train_configs": [(SAC, 800000)],
120
+ },
121
+ "G_0": {
122
+ "goals": [
123
+ np.array([[-0.5, -0.5, 0.1]]),
124
+ np.array([[-0.1, -0.1, 0.1]]),
125
+ np.array([[-0.5, 0.2, 0.1]]),
126
+ np.array([[-0.1, 0.1, 0.1]]),
127
+ np.array([[0.2, -0.2, 0.1]]),
128
+ np.array([[0.2, 0.2, 0.1]]),
129
+ ],
130
+ "train_configs": [
131
+ (SAC, 400000) for _ in range(6)
132
+ ], # algorithms that use GC agent to generate sequences don't use this
133
+ },
134
+ },
135
+ "L4": {
136
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
137
+ "goals": [
138
+ np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
139
+ ],
140
+ "train_configs": [(SAC, 800000)],
141
+ },
142
+ "G_0": {
143
+ "goals": [
144
+ np.array([[-0.3, -0.3, 0.1]]),
145
+ np.array([[-0.1, -0.1, 0.1]]),
146
+ np.array([[-0.3, 0.2, 0.1]]),
147
+ np.array([[-0.1, 0.1, 0.1]]),
148
+ np.array([[0.1, -0.1, 0.1]]),
149
+ np.array([[0.2, 0.2, 0.1]]),
150
+ ],
151
+ "train_configs": [
152
+ (SAC, 400000) for _ in range(6)
153
+ ], # algorithms that use GC agent to generate sequences don't use this
154
+ },
155
+ },
156
+ "L5": {
157
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
158
+ "goals": [
159
+ np.array([PandaProperty.sample_goal()]) for _ in range(1, 30)
160
+ ],
161
+ "train_configs": [(SAC, 800000)],
162
+ },
163
+ "G_0": {
164
+ "goals": [
165
+ np.array([[-0.5, -0.5, 0.1]]),
166
+ np.array([[-0.3, -0.3, 0.1]]),
167
+ np.array([[-0.1, -0.1, 0.1]]),
168
+ np.array([[-0.5, 0.2, 0.1]]),
169
+ np.array([[-0.3, 0.2, 0.1]]),
170
+ np.array([[-0.1, 0.1, 0.1]]),
171
+ np.array([[0.2, -0.2, 0.1]]),
172
+ np.array([[0.1, -0.1, 0.1]]),
173
+ np.array([[0.2, 0.2, 0.1]]),
174
+ ],
175
+ "train_configs": [
176
+ (SAC, 400000) for _ in range(9)
177
+ ], # algorithms that use GC agent to generate sequences don't use this
178
+ },
179
+ },
180
+ }
181
+ }, # PANDA
182
+ POINT_MAZE: {
116
183
  "PointMaze-FourRoomsEnvDense-11x11": {
117
- "L1": {
118
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
119
- "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
120
- "train_configs": [(SAC, 400000) for _ in range(8)]
121
- },
122
- "G_0": {
123
- "goals": [(4,4), (7,3), (3,7)],
124
- "train_configs": [(SAC, 400000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
125
- }
126
- },
127
- "L2": {
128
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
129
- "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
130
- "train_configs": [(SAC, 400000) for _ in range(8)]
131
- },
132
- "G_0": {
133
- "goals": [(4,4), (7,3), (3,7), (8,2)],
134
- "train_configs": [(SAC, 400000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
135
- }
136
- },
137
- "L3": {
138
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
139
- "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
140
- "train_configs": [(SAC, 400000) for _ in range(8)]
141
- },
142
- "G_0": {
143
- "goals": [(4,4), (7,3), (3,7), (8,2), (2,8)],
144
- "train_configs": [(SAC, 400000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
145
- }
146
- },
147
- "L4": {
148
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
149
- "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
150
- "train_configs": [(SAC, 400000) for _ in range(8)]
151
- },
152
- "G_0": {
153
- "goals": [(4,4), (7,3), (3,7), (8,2), (2,8), (3,4)],
154
- "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
155
- }
156
- },
157
- "L5": {
158
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
159
- "goals": [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
160
- "train_configs": [(SAC, 400000) for _ in range(8)]
161
- },
162
- "G_0": {
163
- "goals": [(4,4), (7,3), (3,7), (8,2), (2,8), (3,4), (4,3)],
164
- "train_configs": [(SAC, 400000) for _ in range(7)] # algorithms that use GC agent to generate sequences don't use this
165
- }
166
- }
167
- },
168
- "PointMaze-ObstaclesEnvDense-11x11": {
169
- "L1": {
170
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
171
- "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
172
- "train_configs": [(SAC, 400000) for _ in range(7)]
173
- },
174
- "G_0": {
175
- "goals": [(5,5), (7,4), (4,7)],
176
- "train_configs": [(SAC, 400000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
177
- }
178
- },
179
- "L2": {
180
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
181
- "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
182
- "train_configs": [(SAC, 400000) for _ in range(7)]
183
- },
184
- "G_0": {
185
- "goals": [(5,5), (3,6), (7,4)],
186
- "train_configs": [(SAC, 400000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
187
- }
188
- },
189
- "L3": {
190
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
191
- "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
192
- "train_configs": [(SAC, 400000) for _ in range(7)]
193
- },
194
- "G_0": {
195
- "goals": [(5,5), (3,6), (7,4), (4,7)],
196
- "train_configs": [(SAC, 400000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
197
- }
198
- },
199
- "L4": {
200
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
201
- "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
202
- "train_configs": [(SAC, 400000) for _ in range(7)]
203
- },
204
- "G_0": {
205
- "goals": [(3,6), (6,3), (7,4), (4,7), (8,8)],
206
- "train_configs": [(SAC, 400000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
207
- }
208
- },
209
- "L5": {
210
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
211
- "goals": [(5,1), (9,9), (1,5), (6,4), (4,6), (6,6), (7,7)],
212
- "train_configs": [(SAC, 400000) for _ in range(7)]
213
- },
214
- "G_0": {
215
- "goals": [(5,5), (3,6), (6,3), (7,4), (4,7), (8,8)],
216
- "train_configs": [(SAC, 400000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
217
- }
218
- }
219
- }
220
- }, # POINT_MAZE
221
- MINIGRID: {
184
+ "L1": {
185
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
186
+ "goals": [
187
+ (9, 1),
188
+ (9, 9),
189
+ (1, 9),
190
+ (3, 3),
191
+ (3, 4),
192
+ (8, 2),
193
+ (3, 7),
194
+ (2, 8),
195
+ ],
196
+ "train_configs": [(SAC, 400000) for _ in range(8)],
197
+ },
198
+ "G_0": {
199
+ "goals": [(4, 4), (7, 3), (3, 7)],
200
+ "train_configs": [
201
+ (SAC, 400000) for _ in range(3)
202
+ ], # algorithms that use GC agent to generate sequences don't use this
203
+ },
204
+ },
205
+ "L2": {
206
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
207
+ "goals": [
208
+ (9, 1),
209
+ (9, 9),
210
+ (1, 9),
211
+ (3, 3),
212
+ (3, 4),
213
+ (8, 2),
214
+ (3, 7),
215
+ (2, 8),
216
+ ],
217
+ "train_configs": [(SAC, 400000) for _ in range(8)],
218
+ },
219
+ "G_0": {
220
+ "goals": [(4, 4), (7, 3), (3, 7), (8, 2)],
221
+ "train_configs": [
222
+ (SAC, 400000) for _ in range(4)
223
+ ], # algorithms that use GC agent to generate sequences don't use this
224
+ },
225
+ },
226
+ "L3": {
227
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
228
+ "goals": [
229
+ (9, 1),
230
+ (9, 9),
231
+ (1, 9),
232
+ (3, 3),
233
+ (3, 4),
234
+ (8, 2),
235
+ (3, 7),
236
+ (2, 8),
237
+ ],
238
+ "train_configs": [(SAC, 400000) for _ in range(8)],
239
+ },
240
+ "G_0": {
241
+ "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8)],
242
+ "train_configs": [
243
+ (SAC, 400000) for _ in range(5)
244
+ ], # algorithms that use GC agent to generate sequences don't use this
245
+ },
246
+ },
247
+ "L4": {
248
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
249
+ "goals": [
250
+ (9, 1),
251
+ (9, 9),
252
+ (1, 9),
253
+ (3, 3),
254
+ (3, 4),
255
+ (8, 2),
256
+ (3, 7),
257
+ (2, 8),
258
+ ],
259
+ "train_configs": [(SAC, 400000) for _ in range(8)],
260
+ },
261
+ "G_0": {
262
+ "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4)],
263
+ "train_configs": [
264
+ (SAC, 400000) for _ in range(6)
265
+ ], # algorithms that use GC agent to generate sequences don't use this
266
+ },
267
+ },
268
+ "L5": {
269
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
270
+ "goals": [
271
+ (9, 1),
272
+ (9, 9),
273
+ (1, 9),
274
+ (3, 3),
275
+ (3, 4),
276
+ (8, 2),
277
+ (3, 7),
278
+ (2, 8),
279
+ ],
280
+ "train_configs": [(SAC, 400000) for _ in range(8)],
281
+ },
282
+ "G_0": {
283
+ "goals": [(4, 4), (7, 3), (3, 7), (8, 2), (2, 8), (3, 4), (4, 3)],
284
+ "train_configs": [
285
+ (SAC, 400000) for _ in range(7)
286
+ ], # algorithms that use GC agent to generate sequences don't use this
287
+ },
288
+ },
289
+ },
290
+ "PointMaze-ObstaclesEnvDense-11x11": {
291
+ "L1": {
292
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
293
+ "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
294
+ "train_configs": [(SAC, 400000) for _ in range(7)],
295
+ },
296
+ "G_0": {
297
+ "goals": [(5, 5), (7, 4), (4, 7)],
298
+ "train_configs": [
299
+ (SAC, 400000) for _ in range(3)
300
+ ], # algorithms that use GC agent to generate sequences don't use this
301
+ },
302
+ },
303
+ "L2": {
304
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
305
+ "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
306
+ "train_configs": [(SAC, 400000) for _ in range(7)],
307
+ },
308
+ "G_0": {
309
+ "goals": [(5, 5), (3, 6), (7, 4)],
310
+ "train_configs": [
311
+ (SAC, 400000) for _ in range(3)
312
+ ], # algorithms that use GC agent to generate sequences don't use this
313
+ },
314
+ },
315
+ "L3": {
316
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
317
+ "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
318
+ "train_configs": [(SAC, 400000) for _ in range(7)],
319
+ },
320
+ "G_0": {
321
+ "goals": [(5, 5), (3, 6), (7, 4), (4, 7)],
322
+ "train_configs": [
323
+ (SAC, 400000) for _ in range(4)
324
+ ], # algorithms that use GC agent to generate sequences don't use this
325
+ },
326
+ },
327
+ "L4": {
328
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
329
+ "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
330
+ "train_configs": [(SAC, 400000) for _ in range(7)],
331
+ },
332
+ "G_0": {
333
+ "goals": [(3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
334
+ "train_configs": [
335
+ (SAC, 400000) for _ in range(5)
336
+ ], # algorithms that use GC agent to generate sequences don't use this
337
+ },
338
+ },
339
+ "L5": {
340
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
341
+ "goals": [(5, 1), (9, 9), (1, 5), (6, 4), (4, 6), (6, 6), (7, 7)],
342
+ "train_configs": [(SAC, 400000) for _ in range(7)],
343
+ },
344
+ "G_0": {
345
+ "goals": [(5, 5), (3, 6), (6, 3), (7, 4), (4, 7), (8, 8)],
346
+ "train_configs": [
347
+ (SAC, 400000) for _ in range(6)
348
+ ], # algorithms that use GC agent to generate sequences don't use this
349
+ },
350
+ },
351
+ },
352
+ }, # POINT_MAZE
353
+ MINIGRID: {
222
354
  "MiniGrid-SimpleCrossingS13N4": {
223
- "L1": {
224
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
225
- "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
226
- "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
227
- },
228
- "G_0": {
229
- "goals": [(11,1), (11,11), (1,11)],
230
- "train_configs": [(QLEARNING, 100000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
231
- }
232
- },
233
- "L2": {
234
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
235
- "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
236
- "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
237
- },
238
- "G_0": {
239
- "goals": [(11,1), (11,11), (1,11), (5,9)],
240
- "train_configs": [(QLEARNING, 100000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
241
- }
242
- },
243
- "L3": {
244
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
245
- "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
246
- "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
247
- },
248
- "G_0": {
249
- "goals": [(11,1), (11,11), (1,11), (5,9), (6,1)],
250
- "train_configs": [(QLEARNING, 100000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
251
- }
252
- },
253
- "L4": {
254
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
255
- "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
256
- "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
257
- },
258
- "G_0": {
259
- "goals": [(11,1), (11,11), (1,11), (5,9), (6,1), (11,3)],
260
- "train_configs": [(QLEARNING, 100000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
261
- }
262
- },
263
- "L5": {
264
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
265
- "goals": [(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
266
- "train_configs": [(QLEARNING, 100000) for _ in range(9)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
267
- },
268
- "G_0": {
269
- "goals": [(11,1), (11,11), (1,11), (5,9), (6,1), (11,3), (11,5)],
270
- "train_configs": [(QLEARNING, 100000) for _ in range(7)] # algorithms that use GC agent to generate sequences don't use this
271
- }
272
- }
273
- },
274
- "MiniGrid-LavaCrossingS9N2": {
275
- "L1": {
276
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
277
- "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
278
- "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
279
- },
280
- "G_0": {
281
- "goals": [(1,3), (6,5), (4,7)],
282
- "train_configs": [(QLEARNING, 100000) for _ in range(3)] # algorithms that use GC agent to generate sequences don't use this
283
- }
284
- },
285
- "L2": {
286
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
287
- "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
288
- "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
289
- },
290
- "G_0": {
291
- "goals": [(1,3), (6,5), (4,7), (2,5)],
292
- "train_configs": [(QLEARNING, 100000) for _ in range(4)] # algorithms that use GC agent to generate sequences don't use this
293
- }
294
- },
295
- "L3": {
296
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
297
- "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
298
- "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
299
- },
300
- "G_0": {
301
- "goals": [(1,3), (6,5), (4,7), (2,5), (5,2)],
302
- "train_configs": [(QLEARNING, 100000) for _ in range(5)] # algorithms that use GC agent to generate sequences don't use this
303
- }
304
- },
305
- "L4": {
306
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
307
- "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
308
- "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
309
- },
310
- "G_0": {
311
- "goals": [(1,3), (6,5), (4,7), (2,5), (5,2), (4,5)],
312
- "train_configs": [(QLEARNING, 100000) for _ in range(6)] # algorithms that use GC agent to generate sequences don't use this
313
- }
314
- },
315
- "L5": {
316
- "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
317
- "goals": [(7,7), (1,7), (7,1), (1,3), (2,5), (5,2), (6,5)],
318
- "train_configs": [(QLEARNING, 100000) for _ in range(7)] # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
319
- },
320
- "G_0": {
321
- "goals": [(1,3), (6,5), (4,7), (2,5), (5,2), (4,5), (1,1)],
322
- "train_configs": [(QLEARNING, 100000) for _ in range(7)] # algorithms that use GC agent to generate sequences don't use this
323
- }
324
- }
325
- }
326
- }, # MINIGRID
327
- } # PROBLEMS
355
+ "L1": {
356
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
357
+ "goals": [
358
+ (11, 1),
359
+ (11, 11),
360
+ (1, 11),
361
+ (7, 11),
362
+ (8, 1),
363
+ (10, 6),
364
+ (6, 9),
365
+ (11, 3),
366
+ (11, 5),
367
+ ],
368
+ "train_configs": [
369
+ (QLEARNING, 100000) for _ in range(9)
370
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
371
+ },
372
+ "G_0": {
373
+ "goals": [(11, 1), (11, 11), (1, 11)],
374
+ "train_configs": [
375
+ (QLEARNING, 100000) for _ in range(3)
376
+ ], # algorithms that use GC agent to generate sequences don't use this
377
+ },
378
+ },
379
+ "L2": {
380
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
381
+ "goals": [
382
+ (11, 1),
383
+ (11, 11),
384
+ (1, 11),
385
+ (7, 11),
386
+ (8, 1),
387
+ (10, 6),
388
+ (6, 9),
389
+ (11, 3),
390
+ (11, 5),
391
+ ],
392
+ "train_configs": [
393
+ (QLEARNING, 100000) for _ in range(9)
394
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
395
+ },
396
+ "G_0": {
397
+ "goals": [(11, 1), (11, 11), (1, 11), (5, 9)],
398
+ "train_configs": [
399
+ (QLEARNING, 100000) for _ in range(4)
400
+ ], # algorithms that use GC agent to generate sequences don't use this
401
+ },
402
+ },
403
+ "L3": {
404
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
405
+ "goals": [
406
+ (11, 1),
407
+ (11, 11),
408
+ (1, 11),
409
+ (7, 11),
410
+ (8, 1),
411
+ (10, 6),
412
+ (6, 9),
413
+ (11, 3),
414
+ (11, 5),
415
+ ],
416
+ "train_configs": [
417
+ (QLEARNING, 100000) for _ in range(9)
418
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
419
+ },
420
+ "G_0": {
421
+ "goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1)],
422
+ "train_configs": [
423
+ (QLEARNING, 100000) for _ in range(5)
424
+ ], # algorithms that use GC agent to generate sequences don't use this
425
+ },
426
+ },
427
+ "L4": {
428
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
429
+ "goals": [
430
+ (11, 1),
431
+ (11, 11),
432
+ (1, 11),
433
+ (7, 11),
434
+ (8, 1),
435
+ (10, 6),
436
+ (6, 9),
437
+ (11, 3),
438
+ (11, 5),
439
+ ],
440
+ "train_configs": [
441
+ (QLEARNING, 100000) for _ in range(9)
442
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
443
+ },
444
+ "G_0": {
445
+ "goals": [(11, 1), (11, 11), (1, 11), (5, 9), (6, 1), (11, 3)],
446
+ "train_configs": [
447
+ (QLEARNING, 100000) for _ in range(6)
448
+ ], # algorithms that use GC agent to generate sequences don't use this
449
+ },
450
+ },
451
+ "L5": {
452
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
453
+ "goals": [
454
+ (11, 1),
455
+ (11, 11),
456
+ (1, 11),
457
+ (7, 11),
458
+ (8, 1),
459
+ (10, 6),
460
+ (6, 9),
461
+ (11, 3),
462
+ (11, 5),
463
+ ],
464
+ "train_configs": [
465
+ (QLEARNING, 100000) for _ in range(9)
466
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
467
+ },
468
+ "G_0": {
469
+ "goals": [
470
+ (11, 1),
471
+ (11, 11),
472
+ (1, 11),
473
+ (5, 9),
474
+ (6, 1),
475
+ (11, 3),
476
+ (11, 5),
477
+ ],
478
+ "train_configs": [
479
+ (QLEARNING, 100000) for _ in range(7)
480
+ ], # algorithms that use GC agent to generate sequences don't use this
481
+ },
482
+ },
483
+ },
484
+ "MiniGrid-LavaCrossingS9N2": {
485
+ "L1": {
486
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
487
+ "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
488
+ "train_configs": [
489
+ (QLEARNING, 100000) for _ in range(7)
490
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
491
+ },
492
+ "G_0": {
493
+ "goals": [(1, 3), (6, 5), (4, 7)],
494
+ "train_configs": [
495
+ (QLEARNING, 100000) for _ in range(3)
496
+ ], # algorithms that use GC agent to generate sequences don't use this
497
+ },
498
+ },
499
+ "L2": {
500
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
501
+ "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
502
+ "train_configs": [
503
+ (QLEARNING, 100000) for _ in range(7)
504
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
505
+ },
506
+ "G_0": {
507
+ "goals": [(1, 3), (6, 5), (4, 7), (2, 5)],
508
+ "train_configs": [
509
+ (QLEARNING, 100000) for _ in range(4)
510
+ ], # algorithms that use GC agent to generate sequences don't use this
511
+ },
512
+ },
513
+ "L3": {
514
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
515
+ "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
516
+ "train_configs": [
517
+ (QLEARNING, 100000) for _ in range(7)
518
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
519
+ },
520
+ "G_0": {
521
+ "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2)],
522
+ "train_configs": [
523
+ (QLEARNING, 100000) for _ in range(5)
524
+ ], # algorithms that use GC agent to generate sequences don't use this
525
+ },
526
+ },
527
+ "L4": {
528
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
529
+ "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
530
+ "train_configs": [
531
+ (QLEARNING, 100000) for _ in range(7)
532
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
533
+ },
534
+ "G_0": {
535
+ "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5)],
536
+ "train_configs": [
537
+ (QLEARNING, 100000) for _ in range(6)
538
+ ], # algorithms that use GC agent to generate sequences don't use this
539
+ },
540
+ },
541
+ "L5": {
542
+ "base": { # only 1 train_config for the GC agent. the goals are samples of possible goals in the env.
543
+ "goals": [(7, 7), (1, 7), (7, 1), (1, 3), (2, 5), (5, 2), (6, 5)],
544
+ "train_configs": [
545
+ (QLEARNING, 100000) for _ in range(7)
546
+ ], # TODO Find a way to prevent the 'none' or simply accept only a Q-learning algorithm and assert everything else
547
+ },
548
+ "G_0": {
549
+ "goals": [(1, 3), (6, 5), (4, 7), (2, 5), (5, 2), (4, 5), (1, 1)],
550
+ "train_configs": [
551
+ (QLEARNING, 100000) for _ in range(7)
552
+ ], # algorithms that use GC agent to generate sequences don't use this
553
+ },
554
+ },
555
+ },
556
+ }, # MINIGRID
557
+ } # PROBLEMS
328
558
 
329
- for i,perc in enumerate([0.3, 0.5, 0.7, 0.9, 1]):
330
- for j, cons in enumerate([True, False]):
331
-
332
- ### PARKING ###
559
+ for i, perc in enumerate([0.3, 0.5, 0.7, 0.9, 1]):
560
+ for j, cons in enumerate([True, False]):
333
561
 
334
- PROBLEMS[PARKING]["Parking-S-14-PC-"]["L1"].update(
335
- {f"I_0_{i*6+j*3}": {
336
- "goal": "1",
337
- "train_config": (TD3, 200000),
338
- "consecutive": cons,
339
- "percentage": perc
340
- },
341
- f"I_0_{i*6+j*3+1}": {
342
- "goal": "11",
343
- "train_config": (TD3, 200000),
344
- "consecutive": cons,
345
- "percentage": perc
346
- },
347
- f"I_0_{i*6+j*3+2}": {
348
- "goal": "21",
349
- "train_config": (TD3, 300000),
350
- "consecutive": cons,
351
- "percentage": perc
352
- }})
353
- PROBLEMS[PARKING]["Parking-S-14-PC-"]["L2"].update(
354
- {f"I_0_{i*8+j*4}": {
355
- "goal": "1",
356
- "train_config": (TD3, 200000),
357
- "consecutive": cons,
358
- "percentage": perc
359
- },
360
- f"I_0_{i*8+j*4+1}": {
361
- "goal": "8",
362
- "train_config": (TD3, 200000),
363
- "consecutive": cons,
364
- "percentage": perc
365
- },
366
- f"I_0_{i*8+j*4+2}": {
367
- "goal": "14",
368
- "train_config": (TD3, 400000),
369
- "consecutive": cons,
370
- "percentage": perc
371
- },
372
- f"I_0_{i*8+j*4+3}": {
373
- "goal": "21",
374
- "train_config": (TD3, 300000),
375
- "consecutive": cons,
376
- "percentage": perc
377
- }})
378
- PROBLEMS[PARKING]["Parking-S-14-PC-"]["L3"].update(
379
- {f"I_0_{i*8+j*4}": {
380
- "goal": "1",
381
- "train_config": (TD3, 200000),
382
- "consecutive": cons,
383
- "percentage": perc
384
- },
385
- f"I_0_{i*8+j*4+1}": {
386
- "goal": "8",
387
- "train_config": (TD3, 200000),
388
- "consecutive": cons,
389
- "percentage": perc
390
- },
391
- f"I_0_{i*8+j*4+2}": {
392
- "goal": "11",
393
- "train_config": (TD3, 400000),
394
- "consecutive": cons,
395
- "percentage": perc
396
- },
397
- f"I_0_{i*8+j*4+3}": {
398
- "goal": "18",
399
- "train_config": (TD3, 300000),
400
- "consecutive": cons,
401
- "percentage": perc
402
- }})
403
- PROBLEMS[PARKING]["Parking-S-14-PC-"]["L4"].update(
404
- {f"I_0_{i*10+j*5}": {
405
- "goal": "4",
406
- "train_config": (TD3, 200000),
407
- "consecutive": cons,
408
- "percentage": perc
409
- },
410
- f"I_0_{i*10+j*5+1}": {
411
- "goal": "8",
412
- "train_config": (TD3, 200000),
413
- "consecutive": cons,
414
- "percentage": perc
415
- },
416
- f"I_0_{i*10+j*5+2}": {
417
- "goal": "11",
418
- "train_config": (TD3, 400000),
419
- "consecutive": cons,
420
- "percentage": perc
421
- },
422
- f"I_0_{i*10+j*5+3}": {
423
- "goal": "14",
424
- "train_config": (TD3, 300000),
425
- "consecutive": cons,
426
- "percentage": perc
427
- },
428
- f"I_0_{i*10+j*5+4}": {
429
- "goal": "18",
430
- "train_config": (TD3, 300000),
431
- "consecutive": cons,
432
- "percentage": perc
433
- }})
434
- PROBLEMS[PARKING]["Parking-S-14-PC-"]["L5"].update(
435
- {f"I_0_{i*14+j*7}": {
436
- "goal": "1",
437
- "train_config": (TD3, 200000),
438
- "consecutive": cons,
439
- "percentage": perc
440
- },
441
- f"I_0_{i*14+j*7+1}": {
442
- "goal": "4",
443
- "train_config": (TD3, 200000),
444
- "consecutive": cons,
445
- "percentage": perc
446
- },
447
- f"I_0_{i*14+j*7+2}": {
448
- "goal": "8",
449
- "train_config": (TD3, 400000),
450
- "consecutive": cons,
451
- "percentage": perc
452
- },
453
- f"I_0_{i*14+j*7+3}": {
454
- "goal": "11",
455
- "train_config": (TD3, 300000),
456
- "consecutive": cons,
457
- "percentage": perc
458
- },
459
- f"I_0_{i*14+j*7+4}": {
460
- "goal": "14",
461
- "train_config": (TD3, 300000),
462
- "consecutive": cons,
463
- "percentage": perc
464
- },
465
- f"I_0_{i*14+j*7+5}": {
466
- "goal": "18",
467
- "train_config": (TD3, 300000),
468
- "consecutive": cons,
469
- "percentage": perc
470
- },
471
- f"I_0_{i*14+j*7+6}": {
472
- "goal": "21",
473
- "train_config": (TD3, 300000),
474
- "consecutive": cons,
475
- "percentage": perc
476
- }})
562
+ ### PARKING ###
477
563
 
478
- ### PANDA ###
564
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L1"].update(
565
+ {
566
+ f"I_0_{i*6+j*3}": {
567
+ "goal": "1",
568
+ "train_config": (TD3, 200000),
569
+ "consecutive": cons,
570
+ "percentage": perc,
571
+ },
572
+ f"I_0_{i*6+j*3+1}": {
573
+ "goal": "11",
574
+ "train_config": (TD3, 200000),
575
+ "consecutive": cons,
576
+ "percentage": perc,
577
+ },
578
+ f"I_0_{i*6+j*3+2}": {
579
+ "goal": "21",
580
+ "train_config": (TD3, 300000),
581
+ "consecutive": cons,
582
+ "percentage": perc,
583
+ },
584
+ }
585
+ )
586
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L2"].update(
587
+ {
588
+ f"I_0_{i*8+j*4}": {
589
+ "goal": "1",
590
+ "train_config": (TD3, 200000),
591
+ "consecutive": cons,
592
+ "percentage": perc,
593
+ },
594
+ f"I_0_{i*8+j*4+1}": {
595
+ "goal": "8",
596
+ "train_config": (TD3, 200000),
597
+ "consecutive": cons,
598
+ "percentage": perc,
599
+ },
600
+ f"I_0_{i*8+j*4+2}": {
601
+ "goal": "14",
602
+ "train_config": (TD3, 400000),
603
+ "consecutive": cons,
604
+ "percentage": perc,
605
+ },
606
+ f"I_0_{i*8+j*4+3}": {
607
+ "goal": "21",
608
+ "train_config": (TD3, 300000),
609
+ "consecutive": cons,
610
+ "percentage": perc,
611
+ },
612
+ }
613
+ )
614
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L3"].update(
615
+ {
616
+ f"I_0_{i*8+j*4}": {
617
+ "goal": "1",
618
+ "train_config": (TD3, 200000),
619
+ "consecutive": cons,
620
+ "percentage": perc,
621
+ },
622
+ f"I_0_{i*8+j*4+1}": {
623
+ "goal": "8",
624
+ "train_config": (TD3, 200000),
625
+ "consecutive": cons,
626
+ "percentage": perc,
627
+ },
628
+ f"I_0_{i*8+j*4+2}": {
629
+ "goal": "11",
630
+ "train_config": (TD3, 400000),
631
+ "consecutive": cons,
632
+ "percentage": perc,
633
+ },
634
+ f"I_0_{i*8+j*4+3}": {
635
+ "goal": "18",
636
+ "train_config": (TD3, 300000),
637
+ "consecutive": cons,
638
+ "percentage": perc,
639
+ },
640
+ }
641
+ )
642
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L4"].update(
643
+ {
644
+ f"I_0_{i*10+j*5}": {
645
+ "goal": "4",
646
+ "train_config": (TD3, 200000),
647
+ "consecutive": cons,
648
+ "percentage": perc,
649
+ },
650
+ f"I_0_{i*10+j*5+1}": {
651
+ "goal": "8",
652
+ "train_config": (TD3, 200000),
653
+ "consecutive": cons,
654
+ "percentage": perc,
655
+ },
656
+ f"I_0_{i*10+j*5+2}": {
657
+ "goal": "11",
658
+ "train_config": (TD3, 400000),
659
+ "consecutive": cons,
660
+ "percentage": perc,
661
+ },
662
+ f"I_0_{i*10+j*5+3}": {
663
+ "goal": "14",
664
+ "train_config": (TD3, 300000),
665
+ "consecutive": cons,
666
+ "percentage": perc,
667
+ },
668
+ f"I_0_{i*10+j*5+4}": {
669
+ "goal": "18",
670
+ "train_config": (TD3, 300000),
671
+ "consecutive": cons,
672
+ "percentage": perc,
673
+ },
674
+ }
675
+ )
676
+ PROBLEMS[PARKING]["Parking-S-14-PC-"]["L5"].update(
677
+ {
678
+ f"I_0_{i*14+j*7}": {
679
+ "goal": "1",
680
+ "train_config": (TD3, 200000),
681
+ "consecutive": cons,
682
+ "percentage": perc,
683
+ },
684
+ f"I_0_{i*14+j*7+1}": {
685
+ "goal": "4",
686
+ "train_config": (TD3, 200000),
687
+ "consecutive": cons,
688
+ "percentage": perc,
689
+ },
690
+ f"I_0_{i*14+j*7+2}": {
691
+ "goal": "8",
692
+ "train_config": (TD3, 400000),
693
+ "consecutive": cons,
694
+ "percentage": perc,
695
+ },
696
+ f"I_0_{i*14+j*7+3}": {
697
+ "goal": "11",
698
+ "train_config": (TD3, 300000),
699
+ "consecutive": cons,
700
+ "percentage": perc,
701
+ },
702
+ f"I_0_{i*14+j*7+4}": {
703
+ "goal": "14",
704
+ "train_config": (TD3, 300000),
705
+ "consecutive": cons,
706
+ "percentage": perc,
707
+ },
708
+ f"I_0_{i*14+j*7+5}": {
709
+ "goal": "18",
710
+ "train_config": (TD3, 300000),
711
+ "consecutive": cons,
712
+ "percentage": perc,
713
+ },
714
+ f"I_0_{i*14+j*7+6}": {
715
+ "goal": "21",
716
+ "train_config": (TD3, 300000),
717
+ "consecutive": cons,
718
+ "percentage": perc,
719
+ },
720
+ }
721
+ )
479
722
 
480
- PROBLEMS[PANDA]["PandaMyReachDense"]["L1"].update(
481
- {f"I_0_{i*6+j*3}": {
482
- "goal": np.array([[-0.1, -0.1, 0.1]]),
483
- "train_config": (PPO, 200000),
484
- "consecutive": cons,
485
- "percentage": perc
486
- },
487
- f"I_0_{i*6+j*3+1}": {
488
- "goal": np.array([[-0.1, 0.1, 0.1]]),
489
- "train_config": (PPO, 200000),
490
- "consecutive": cons,
491
- "percentage": perc
492
- },
493
- f"I_0_{i*6+j*3+2}": {
494
- "goal": np.array([[0.2, 0.2, 0.1]]),
495
- "train_config": (PPO, 200000),
496
- "consecutive": cons,
497
- "percentage": perc
498
- }})
499
- PROBLEMS[PANDA]["PandaMyReachDense"]["L2"].update(
500
- {f"I_0_{i*8+j*4}": {
501
- "goal": np.array([[-0.5, -0.5, 0.1]]),
502
- "train_config": (PPO, 200000),
503
- "consecutive": cons,
504
- "percentage": perc
505
- },
506
- f"I_0_{i*8+j*4+1}": {
507
- "goal": np.array([[-0.5, 0.2, 0.1]]),
508
- "train_config": (PPO, 200000),
509
- "consecutive": cons,
510
- "percentage": perc
511
- },
512
- f"I_0_{i*8+j*4+2}": {
513
- "goal": np.array([[-0.1, 0.1, 0.1]]),
514
- "train_config": (PPO, 200000),
515
- "consecutive": cons,
516
- "percentage": perc
517
- },
518
- f"I_0_{i*8+j*4+3}": {
519
- "goal": np.array([[0.1, -0.1, 0.1]]),
520
- "train_config": (PPO, 200000),
521
- "consecutive": cons,
522
- "percentage": perc
523
- }})
524
- PROBLEMS[PANDA]["PandaMyReachDense"]["L3"].update(
525
- {f"I_0_{i*12+j*6}": {
526
- "goal": np.array([[-0.5, -0.5, 0.1]]),
527
- "train_config": (PPO, 200000),
528
- "consecutive": cons,
529
- "percentage": perc
530
- },
531
- f"I_0_{i*12+j*6+1}": {
532
- "goal": np.array([[-0.1, -0.1, 0.1]]),
533
- "train_config": (PPO, 200000),
534
- "consecutive": cons,
535
- "percentage": perc
536
- },
537
- f"I_0_{i*12+j*6+2}": {
538
- "goal": np.array([[-0.5, 0.2, 0.1]]),
539
- "train_config": (PPO, 200000),
540
- "consecutive": cons,
541
- "percentage": perc
542
- },
543
- f"I_0_{i*12+j*6+3}": {
544
- "goal": np.array([[-0.1, 0.1, 0.1]]),
545
- "train_config": (PPO, 200000),
546
- "consecutive": cons,
547
- "percentage": perc
548
- },
549
- f"I_0_{i*12+j*6+4}": {
550
- "goal": np.array([[0.2, -0.2, 0.1]]),
551
- "train_config": (PPO, 200000),
552
- "consecutive": cons,
553
- "percentage": perc
554
- },
555
- f"I_0_{i*12+j*6+5}": {
556
- "goal": np.array([[0.2, 0.2, 0.1]]),
557
- "train_config": (PPO, 200000),
558
- "consecutive": cons,
559
- "percentage": perc
560
- }})
561
- PROBLEMS[PANDA]["PandaMyReachDense"]["L4"].update(
562
- {f"I_0_{i*12+j*6}": {
563
- "goal": np.array([[-0.3, -0.3, 0.1]]),
564
- "train_config": (SAC, 200000),
565
- "consecutive": cons,
566
- "percentage": perc
567
- },
568
- f"I_0_{i*12+j*6+1}": {
569
- "goal": np.array([[-0.1, -0.1, 0.1]]),
570
- "train_config": (PPO, 200000),
571
- "consecutive": cons,
572
- "percentage": perc
573
- },
574
- f"I_0_{i*12+j*6+2}": {
575
- "goal": np.array([[-0.3, 0.2, 0.1]]),
576
- "train_config": (PPO, 200000),
577
- "consecutive": cons,
578
- "percentage": perc
579
- },
580
- f"I_0_{i*12+j*6+3}": {
581
- "goal": np.array([[-0.1, 0.1, 0.1]]),
582
- "train_config": (PPO, 200000),
583
- "consecutive": cons,
584
- "percentage": perc
585
- },
586
- f"I_0_{i*12+j*6+4}": {
587
- "goal": np.array([[0.1, -0.1, 0.1]]),
588
- "train_config": (PPO, 200000),
589
- "consecutive": cons,
590
- "percentage": perc
591
- },
592
- f"I_0_{i*12+j*6+5}": {
593
- "goal": np.array([[0.2, 0.2, 0.1]]),
594
- "train_config": (PPO, 200000),
595
- "consecutive": cons,
596
- "percentage": perc
597
- }})
598
- PROBLEMS[PANDA]["PandaMyReachDense"]["L5"].update(
599
- {f"I_0_{i*18+j*9}": {
600
- "goal": np.array([[-0.5, -0.5, 0.1]]),
601
- "train_config": (PPO, 200000),
602
- "consecutive": cons,
603
- "percentage": perc
604
- },
605
- f"I_0_{i*18+j*9+1}": {
606
- "goal": np.array([[-0.3, -0.3, 0.1]]),
607
- "train_config": (SAC, 200000),
608
- "consecutive": cons,
609
- "percentage": perc
610
- },
611
- f"I_0_{i*18+j*9+2}": {
612
- "goal": np.array([[-0.1, -0.1, 0.1]]),
613
- "train_config": (PPO, 200000),
614
- "consecutive": cons,
615
- "percentage": perc
616
- },
617
- f"I_0_{i*18+j*9+3}": {
618
- "goal": np.array([[-0.5, 0.2, 0.1]]),
619
- "train_config": (PPO, 200000),
620
- "consecutive": cons,
621
- "percentage": perc
622
- },
623
- f"I_0_{i*18+j*9+4}": {
624
- "goal": np.array([[-0.3, 0.2, 0.1]]),
625
- "train_config": (PPO, 200000),
626
- "consecutive": cons,
627
- "percentage": perc
628
- },
629
- f"I_0_{i*18+j*9+5}": {
630
- "goal": np.array([[-0.1, 0.1, 0.1]]),
631
- "train_config": (PPO, 200000),
632
- "consecutive": cons,
633
- "percentage": perc
634
- },
635
- f"I_0_{i*18+j*9+6}": {
636
- "goal": np.array([[0.2, -0.2, 0.1]]),
637
- "train_config": (PPO, 200000),
638
- "consecutive": cons,
639
- "percentage": perc
640
- },
641
- f"I_0_{i*18+j*9+7}": {
642
- "goal": np.array([[0.1, -0.1, 0.1]]),
643
- "train_config": (PPO, 200000),
644
- "consecutive": cons,
645
- "percentage": perc
646
- },
647
- f"I_0_{i*18+j*9+8}": {
648
- "goal": np.array([[0.2, 0.2, 0.1]]),
649
- "train_config": (PPO, 200000),
650
- "consecutive": cons,
651
- "percentage": perc
652
- }})
653
-
654
- ### POINT_MAZE ###
723
+ ### PANDA ###
655
724
 
656
- PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L1"].update( # TODO the existing working 9x9 is not Dense. need to duplicate it for the dense one
657
- {f"I_0_{i*6+j*3}": {
658
- "goal": (4,4),
659
- "train_config": (TD3, 400000),
660
- "consecutive": cons,
661
- "percentage": perc
662
- },
663
- f"I_0_{i*6+j*3+1}": {
664
- "goal": (7,3),
665
- "train_config": (TD3, 400000),
666
- "consecutive": cons,
667
- "percentage": perc
668
- },
669
- f"I_0_{i*6+j*3+2}": {
670
- "goal": (3,7),
671
- "train_config": (TD3, 400000),
672
- "consecutive": cons,
673
- "percentage": perc
674
- }})
675
- PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L2"].update(
676
- {f"I_0_{i*8+j*4}": {
677
- "goal": (4,4),
678
- "train_config": (TD3, 400000),
679
- "consecutive": cons,
680
- "percentage": perc
681
- },
682
- f"I_0_{i*8+j*4+1}": {
683
- "goal": (7,3),
684
- "train_config": (TD3, 400000),
685
- "consecutive": cons,
686
- "percentage": perc
687
- },
688
- f"I_0_{i*8+j*4+2}": {
689
- "goal": (3,7),
690
- "train_config": (TD3, 400000),
691
- "consecutive": cons,
692
- "percentage": perc
693
- },
694
- f"I_0_{i*8+j*4+3}": {
695
- "goal": (8,2),
696
- "train_config": (TD3, 400000),
697
- "consecutive": cons,
698
- "percentage": perc
699
- }})
700
- PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L3"].update(
701
- {f"I_0_{i*10+j*5}": {
702
- "goal": (4,4),
703
- "train_config": (TD3, 400000),
704
- "consecutive": cons,
705
- "percentage": perc
706
- },
707
- f"I_0_{i*10+j*5+1}": {
708
- "goal": (7,3),
709
- "train_config": (TD3, 400000),
710
- "consecutive": cons,
711
- "percentage": perc
712
- },
713
- f"I_0_{i*10+j*5+2}": {
714
- "goal": (3,7),
715
- "train_config": (TD3, 400000),
716
- "consecutive": cons,
717
- "percentage": perc
718
- },
719
- f"I_0_{i*10+j*5+3}": {
720
- "goal": (8,2),
721
- "train_config": (TD3, 400000),
722
- "consecutive": cons,
723
- "percentage": perc
724
- },
725
- f"I_0_{i*10+j*5+4}": {
726
- "goal": (2,8),
727
- "train_config": (TD3, 400000),
728
- "consecutive": cons,
729
- "percentage": perc
730
- }})
731
- PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L4"].update(
732
- {f"I_0_{i*12+j*6}": {
733
- "goal": (4,4),
734
- "train_config": (TD3, 400000),
735
- "consecutive": cons,
736
- "percentage": perc
737
- },
738
- f"I_0_{i*12+j*6+1}": {
739
- "goal": (7,3),
740
- "train_config": (TD3, 400000),
741
- "consecutive": cons,
742
- "percentage": perc
743
- },
744
- f"I_0_{i*12+j*6+2}": {
745
- "goal": (3,7),
746
- "train_config": (TD3, 400000),
747
- "consecutive": cons,
748
- "percentage": perc
749
- },
750
- f"I_0_{i*12+j*6+3}": {
751
- "goal": (8,2),
752
- "train_config": (TD3, 400000),
753
- "consecutive": cons,
754
- "percentage": perc
755
- },
756
- f"I_0_{i*12+j*6+4}": {
757
- "goal": (2,8),
758
- "train_config": (TD3, 400000),
759
- "consecutive": cons,
760
- "percentage": perc
761
- },
762
- f"I_0_{i*12+j*6+5}": {
763
- "goal": (3,4),
764
- "train_config": (TD3, 400000),
765
- "consecutive": cons,
766
- "percentage": perc
767
- }})
768
- PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L5"].update(
769
- {f"I_0_{i*14+j*7}": {
770
- "goal": (4,4),
771
- "train_config": (TD3, 400000),
772
- "consecutive": cons,
773
- "percentage": perc
774
- },
775
- f"I_0_{i*14+j*7+1}": {
776
- "goal": (7,3),
777
- "train_config": (TD3, 400000),
778
- "consecutive": cons,
779
- "percentage": perc
780
- },
781
- f"I_0_{i*14+j*7+2}": {
782
- "goal": (3,7),
783
- "train_config": (TD3, 400000),
784
- "consecutive": cons,
785
- "percentage": perc
786
- },
787
- f"I_0_{i*14+j*7+3}": {
788
- "goal": (8,2),
789
- "train_config": (TD3, 400000),
790
- "consecutive": cons,
791
- "percentage": perc
792
- },
793
- f"I_0_{i*14+j*7+4}": {
794
- "goal": (2,8),
795
- "train_config": (TD3, 400000),
796
- "consecutive": cons,
797
- "percentage": perc
798
- },
799
- f"I_0_{i*14+j*7+5}": {
800
- "goal": (3,4),
801
- "train_config": (TD3, 400000),
802
- "consecutive": cons,
803
- "percentage": perc
804
- },
805
- f"I_0_{i*14+j*7+6}": {
806
- "goal": (4,3),
807
- "train_config": (TD3, 400000),
808
- "consecutive": cons,
809
- "percentage": perc
810
- }})
725
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L1"].update(
726
+ {
727
+ f"I_0_{i*6+j*3}": {
728
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
729
+ "train_config": (PPO, 200000),
730
+ "consecutive": cons,
731
+ "percentage": perc,
732
+ },
733
+ f"I_0_{i*6+j*3+1}": {
734
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
735
+ "train_config": (PPO, 200000),
736
+ "consecutive": cons,
737
+ "percentage": perc,
738
+ },
739
+ f"I_0_{i*6+j*3+2}": {
740
+ "goal": np.array([[0.2, 0.2, 0.1]]),
741
+ "train_config": (PPO, 200000),
742
+ "consecutive": cons,
743
+ "percentage": perc,
744
+ },
745
+ }
746
+ )
747
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L2"].update(
748
+ {
749
+ f"I_0_{i*8+j*4}": {
750
+ "goal": np.array([[-0.5, -0.5, 0.1]]),
751
+ "train_config": (PPO, 200000),
752
+ "consecutive": cons,
753
+ "percentage": perc,
754
+ },
755
+ f"I_0_{i*8+j*4+1}": {
756
+ "goal": np.array([[-0.5, 0.2, 0.1]]),
757
+ "train_config": (PPO, 200000),
758
+ "consecutive": cons,
759
+ "percentage": perc,
760
+ },
761
+ f"I_0_{i*8+j*4+2}": {
762
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
763
+ "train_config": (PPO, 200000),
764
+ "consecutive": cons,
765
+ "percentage": perc,
766
+ },
767
+ f"I_0_{i*8+j*4+3}": {
768
+ "goal": np.array([[0.1, -0.1, 0.1]]),
769
+ "train_config": (PPO, 200000),
770
+ "consecutive": cons,
771
+ "percentage": perc,
772
+ },
773
+ }
774
+ )
775
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L3"].update(
776
+ {
777
+ f"I_0_{i*12+j*6}": {
778
+ "goal": np.array([[-0.5, -0.5, 0.1]]),
779
+ "train_config": (PPO, 200000),
780
+ "consecutive": cons,
781
+ "percentage": perc,
782
+ },
783
+ f"I_0_{i*12+j*6+1}": {
784
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
785
+ "train_config": (PPO, 200000),
786
+ "consecutive": cons,
787
+ "percentage": perc,
788
+ },
789
+ f"I_0_{i*12+j*6+2}": {
790
+ "goal": np.array([[-0.5, 0.2, 0.1]]),
791
+ "train_config": (PPO, 200000),
792
+ "consecutive": cons,
793
+ "percentage": perc,
794
+ },
795
+ f"I_0_{i*12+j*6+3}": {
796
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
797
+ "train_config": (PPO, 200000),
798
+ "consecutive": cons,
799
+ "percentage": perc,
800
+ },
801
+ f"I_0_{i*12+j*6+4}": {
802
+ "goal": np.array([[0.2, -0.2, 0.1]]),
803
+ "train_config": (PPO, 200000),
804
+ "consecutive": cons,
805
+ "percentage": perc,
806
+ },
807
+ f"I_0_{i*12+j*6+5}": {
808
+ "goal": np.array([[0.2, 0.2, 0.1]]),
809
+ "train_config": (PPO, 200000),
810
+ "consecutive": cons,
811
+ "percentage": perc,
812
+ },
813
+ }
814
+ )
815
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L4"].update(
816
+ {
817
+ f"I_0_{i*12+j*6}": {
818
+ "goal": np.array([[-0.3, -0.3, 0.1]]),
819
+ "train_config": (SAC, 200000),
820
+ "consecutive": cons,
821
+ "percentage": perc,
822
+ },
823
+ f"I_0_{i*12+j*6+1}": {
824
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
825
+ "train_config": (PPO, 200000),
826
+ "consecutive": cons,
827
+ "percentage": perc,
828
+ },
829
+ f"I_0_{i*12+j*6+2}": {
830
+ "goal": np.array([[-0.3, 0.2, 0.1]]),
831
+ "train_config": (PPO, 200000),
832
+ "consecutive": cons,
833
+ "percentage": perc,
834
+ },
835
+ f"I_0_{i*12+j*6+3}": {
836
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
837
+ "train_config": (PPO, 200000),
838
+ "consecutive": cons,
839
+ "percentage": perc,
840
+ },
841
+ f"I_0_{i*12+j*6+4}": {
842
+ "goal": np.array([[0.1, -0.1, 0.1]]),
843
+ "train_config": (PPO, 200000),
844
+ "consecutive": cons,
845
+ "percentage": perc,
846
+ },
847
+ f"I_0_{i*12+j*6+5}": {
848
+ "goal": np.array([[0.2, 0.2, 0.1]]),
849
+ "train_config": (PPO, 200000),
850
+ "consecutive": cons,
851
+ "percentage": perc,
852
+ },
853
+ }
854
+ )
855
+ PROBLEMS[PANDA]["PandaMyReachDense"]["L5"].update(
856
+ {
857
+ f"I_0_{i*18+j*9}": {
858
+ "goal": np.array([[-0.5, -0.5, 0.1]]),
859
+ "train_config": (PPO, 200000),
860
+ "consecutive": cons,
861
+ "percentage": perc,
862
+ },
863
+ f"I_0_{i*18+j*9+1}": {
864
+ "goal": np.array([[-0.3, -0.3, 0.1]]),
865
+ "train_config": (SAC, 200000),
866
+ "consecutive": cons,
867
+ "percentage": perc,
868
+ },
869
+ f"I_0_{i*18+j*9+2}": {
870
+ "goal": np.array([[-0.1, -0.1, 0.1]]),
871
+ "train_config": (PPO, 200000),
872
+ "consecutive": cons,
873
+ "percentage": perc,
874
+ },
875
+ f"I_0_{i*18+j*9+3}": {
876
+ "goal": np.array([[-0.5, 0.2, 0.1]]),
877
+ "train_config": (PPO, 200000),
878
+ "consecutive": cons,
879
+ "percentage": perc,
880
+ },
881
+ f"I_0_{i*18+j*9+4}": {
882
+ "goal": np.array([[-0.3, 0.2, 0.1]]),
883
+ "train_config": (PPO, 200000),
884
+ "consecutive": cons,
885
+ "percentage": perc,
886
+ },
887
+ f"I_0_{i*18+j*9+5}": {
888
+ "goal": np.array([[-0.1, 0.1, 0.1]]),
889
+ "train_config": (PPO, 200000),
890
+ "consecutive": cons,
891
+ "percentage": perc,
892
+ },
893
+ f"I_0_{i*18+j*9+6}": {
894
+ "goal": np.array([[0.2, -0.2, 0.1]]),
895
+ "train_config": (PPO, 200000),
896
+ "consecutive": cons,
897
+ "percentage": perc,
898
+ },
899
+ f"I_0_{i*18+j*9+7}": {
900
+ "goal": np.array([[0.1, -0.1, 0.1]]),
901
+ "train_config": (PPO, 200000),
902
+ "consecutive": cons,
903
+ "percentage": perc,
904
+ },
905
+ f"I_0_{i*18+j*9+8}": {
906
+ "goal": np.array([[0.2, 0.2, 0.1]]),
907
+ "train_config": (PPO, 200000),
908
+ "consecutive": cons,
909
+ "percentage": perc,
910
+ },
911
+ }
912
+ )
811
913
 
812
- PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L1"].update(
813
- {f"I_0_{i*6+j*3}": {
814
- "goal": (5,5),
815
- "train_config": (TD3, 400000),
816
- "consecutive": cons,
817
- "percentage": perc
818
- },
819
- f"I_0_{i*6+j*3+1}": {
820
- "goal": (7,4),
821
- "train_config": (TD3, 400000),
822
- "consecutive": cons,
823
- "percentage": perc
824
- },
825
- f"I_0_{i*6+j*3+2}": {
826
- "goal": (4,7),
827
- "train_config": (TD3, 400000),
828
- "consecutive": cons,
829
- "percentage": perc
830
- }})
831
- PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L2"].update(
832
- {f"I_0_{i*6+j*3}": {
833
- "goal": (5,5),
834
- "train_config": (TD3, 400000),
835
- "consecutive": cons,
836
- "percentage": perc
837
- },
838
- f"I_0_{i*6+j*3+1}": {
839
- "goal": (3,6),
840
- "train_config": (TD3, 400000),
841
- "consecutive": cons,
842
- "percentage": perc
843
- },
844
- f"I_0_{i*6+j*3+2}": {
845
- "goal": (7,4),
846
- "train_config": (TD3, 400000),
847
- "consecutive": cons,
848
- "percentage": perc
849
- }})
850
- PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L3"].update(
851
- {f"I_0_{i*8+j*4}": {
852
- "goal": (5,5),
853
- "train_config": (TD3, 400000),
854
- "consecutive": cons,
855
- "percentage": perc
856
- },
857
- f"I_0_{i*8+j*4+1}": {
858
- "goal": (3,6),
859
- "train_config": (TD3, 400000),
860
- "consecutive": cons,
861
- "percentage": perc
862
- },
863
- f"I_0_{i*8+j*4+2}": {
864
- "goal": (7,4),
865
- "train_config": (TD3, 400000),
866
- "consecutive": cons,
867
- "percentage": perc
868
- },
869
- f"I_0_{i*8+j*4+3}": {
870
- "goal": (4,7),
871
- "train_config": (TD3, 400000),
872
- "consecutive": cons,
873
- "percentage": perc
874
- }})
875
- PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L4"].update(
876
- {f"I_0_{i*10+j*5}": {
877
- "goal": (5,5),
878
- "train_config": (TD3, 400000),
879
- "consecutive": cons,
880
- "percentage": perc
881
- },
882
- f"I_0_{i*10+j*5+1}": {
883
- "goal": (3,6),
884
- "train_config": (TD3, 400000),
885
- "consecutive": cons,
886
- "percentage": perc
887
- },
888
- f"I_0_{i*10+j*5+2}": {
889
- "goal": (7,4),
890
- "train_config": (TD3, 400000),
891
- "consecutive": cons,
892
- "percentage": perc
893
- },
894
- f"I_0_{i*10+j*5+3}": {
895
- "goal": (4,7),
896
- "train_config": (TD3, 400000),
897
- "consecutive": cons,
898
- "percentage": perc
899
- },
900
- f"I_0_{i*10+j*5+4}": {
901
- "goal": (8,8),
902
- "train_config": (TD3, 400000),
903
- "consecutive": cons,
904
- "percentage": perc
905
- }})
906
- PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L5"].update(
907
- {f"I_0_{i*12+j*6}": {
908
- "goal": (5,5),
909
- "train_config": (TD3, 400000),
910
- "consecutive": cons,
911
- "percentage": perc
912
- },
913
- f"I_0_{i*12+j*6+1}": {
914
- "goal": (3,6),
915
- "train_config": (TD3, 400000),
916
- "consecutive": cons,
917
- "percentage": perc
918
- },
919
- f"I_0_{i*12+j*6+2}": {
920
- "goal": (6,3),
921
- "train_config": (TD3, 400000),
922
- "consecutive": cons,
923
- "percentage": perc
924
- },
925
- f"I_0_{i*12+j*6+3}": {
926
- "goal": (7,4),
927
- "train_config": (TD3, 400000),
928
- "consecutive": cons,
929
- "percentage": perc
930
- },
931
- f"I_0_{i*12+j*6+4}": {
932
- "goal": (4,7),
933
- "train_config": (TD3, 400000),
934
- "consecutive": cons,
935
- "percentage": perc
936
- },
937
- f"I_0_{i*12+j*6+5}": {
938
- "goal": (8,8),
939
- "train_config": (TD3, 400000),
940
- "consecutive": cons,
941
- "percentage": perc
942
- }})
914
+ ### POINT_MAZE ###
943
915
 
944
- ### MINIGRID ###
916
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"][
917
+ "L1"
918
+ ].update( # TODO the existing working 9x9 is not Dense. need to duplicate it for the dense one
919
+ {
920
+ f"I_0_{i*6+j*3}": {
921
+ "goal": (4, 4),
922
+ "train_config": (TD3, 400000),
923
+ "consecutive": cons,
924
+ "percentage": perc,
925
+ },
926
+ f"I_0_{i*6+j*3+1}": {
927
+ "goal": (7, 3),
928
+ "train_config": (TD3, 400000),
929
+ "consecutive": cons,
930
+ "percentage": perc,
931
+ },
932
+ f"I_0_{i*6+j*3+2}": {
933
+ "goal": (3, 7),
934
+ "train_config": (TD3, 400000),
935
+ "consecutive": cons,
936
+ "percentage": perc,
937
+ },
938
+ }
939
+ )
940
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L2"].update(
941
+ {
942
+ f"I_0_{i*8+j*4}": {
943
+ "goal": (4, 4),
944
+ "train_config": (TD3, 400000),
945
+ "consecutive": cons,
946
+ "percentage": perc,
947
+ },
948
+ f"I_0_{i*8+j*4+1}": {
949
+ "goal": (7, 3),
950
+ "train_config": (TD3, 400000),
951
+ "consecutive": cons,
952
+ "percentage": perc,
953
+ },
954
+ f"I_0_{i*8+j*4+2}": {
955
+ "goal": (3, 7),
956
+ "train_config": (TD3, 400000),
957
+ "consecutive": cons,
958
+ "percentage": perc,
959
+ },
960
+ f"I_0_{i*8+j*4+3}": {
961
+ "goal": (8, 2),
962
+ "train_config": (TD3, 400000),
963
+ "consecutive": cons,
964
+ "percentage": perc,
965
+ },
966
+ }
967
+ )
968
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L3"].update(
969
+ {
970
+ f"I_0_{i*10+j*5}": {
971
+ "goal": (4, 4),
972
+ "train_config": (TD3, 400000),
973
+ "consecutive": cons,
974
+ "percentage": perc,
975
+ },
976
+ f"I_0_{i*10+j*5+1}": {
977
+ "goal": (7, 3),
978
+ "train_config": (TD3, 400000),
979
+ "consecutive": cons,
980
+ "percentage": perc,
981
+ },
982
+ f"I_0_{i*10+j*5+2}": {
983
+ "goal": (3, 7),
984
+ "train_config": (TD3, 400000),
985
+ "consecutive": cons,
986
+ "percentage": perc,
987
+ },
988
+ f"I_0_{i*10+j*5+3}": {
989
+ "goal": (8, 2),
990
+ "train_config": (TD3, 400000),
991
+ "consecutive": cons,
992
+ "percentage": perc,
993
+ },
994
+ f"I_0_{i*10+j*5+4}": {
995
+ "goal": (2, 8),
996
+ "train_config": (TD3, 400000),
997
+ "consecutive": cons,
998
+ "percentage": perc,
999
+ },
1000
+ }
1001
+ )
1002
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L4"].update(
1003
+ {
1004
+ f"I_0_{i*12+j*6}": {
1005
+ "goal": (4, 4),
1006
+ "train_config": (TD3, 400000),
1007
+ "consecutive": cons,
1008
+ "percentage": perc,
1009
+ },
1010
+ f"I_0_{i*12+j*6+1}": {
1011
+ "goal": (7, 3),
1012
+ "train_config": (TD3, 400000),
1013
+ "consecutive": cons,
1014
+ "percentage": perc,
1015
+ },
1016
+ f"I_0_{i*12+j*6+2}": {
1017
+ "goal": (3, 7),
1018
+ "train_config": (TD3, 400000),
1019
+ "consecutive": cons,
1020
+ "percentage": perc,
1021
+ },
1022
+ f"I_0_{i*12+j*6+3}": {
1023
+ "goal": (8, 2),
1024
+ "train_config": (TD3, 400000),
1025
+ "consecutive": cons,
1026
+ "percentage": perc,
1027
+ },
1028
+ f"I_0_{i*12+j*6+4}": {
1029
+ "goal": (2, 8),
1030
+ "train_config": (TD3, 400000),
1031
+ "consecutive": cons,
1032
+ "percentage": perc,
1033
+ },
1034
+ f"I_0_{i*12+j*6+5}": {
1035
+ "goal": (3, 4),
1036
+ "train_config": (TD3, 400000),
1037
+ "consecutive": cons,
1038
+ "percentage": perc,
1039
+ },
1040
+ }
1041
+ )
1042
+ PROBLEMS[POINT_MAZE]["PointMaze-FourRoomsEnvDense-11x11"]["L5"].update(
1043
+ {
1044
+ f"I_0_{i*14+j*7}": {
1045
+ "goal": (4, 4),
1046
+ "train_config": (TD3, 400000),
1047
+ "consecutive": cons,
1048
+ "percentage": perc,
1049
+ },
1050
+ f"I_0_{i*14+j*7+1}": {
1051
+ "goal": (7, 3),
1052
+ "train_config": (TD3, 400000),
1053
+ "consecutive": cons,
1054
+ "percentage": perc,
1055
+ },
1056
+ f"I_0_{i*14+j*7+2}": {
1057
+ "goal": (3, 7),
1058
+ "train_config": (TD3, 400000),
1059
+ "consecutive": cons,
1060
+ "percentage": perc,
1061
+ },
1062
+ f"I_0_{i*14+j*7+3}": {
1063
+ "goal": (8, 2),
1064
+ "train_config": (TD3, 400000),
1065
+ "consecutive": cons,
1066
+ "percentage": perc,
1067
+ },
1068
+ f"I_0_{i*14+j*7+4}": {
1069
+ "goal": (2, 8),
1070
+ "train_config": (TD3, 400000),
1071
+ "consecutive": cons,
1072
+ "percentage": perc,
1073
+ },
1074
+ f"I_0_{i*14+j*7+5}": {
1075
+ "goal": (3, 4),
1076
+ "train_config": (TD3, 400000),
1077
+ "consecutive": cons,
1078
+ "percentage": perc,
1079
+ },
1080
+ f"I_0_{i*14+j*7+6}": {
1081
+ "goal": (4, 3),
1082
+ "train_config": (TD3, 400000),
1083
+ "consecutive": cons,
1084
+ "percentage": perc,
1085
+ },
1086
+ }
1087
+ )
945
1088
 
946
- PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L1"].update(
947
- {f"I_0_{i*6+j*3}": {
948
- "goal": (11,1),
949
- "train_config": (QLEARNING, 100000),
950
- "consecutive": cons,
951
- "percentage": perc
952
- },
953
- f"I_0_{i*6+j*3+1}": {
954
- "goal": (1,11),
955
- "train_config": (QLEARNING, 100000),
956
- "consecutive": cons,
957
- "percentage": perc
958
- },
959
- f"I_0_{i*6+j*3+2}": {
960
- "goal": (11,11),
961
- "train_config": (QLEARNING, 100000),
962
- "consecutive": cons,
963
- "percentage": perc
964
- }})
965
- PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L2"].update(
966
- {f"I_0_{i*8+j*4}": {
967
- "goal": (11,1),
968
- "train_config": (QLEARNING, 100000),
969
- "consecutive": cons,
970
- "percentage": perc
971
- },
972
- f"I_0_{i*8+j*4+1}": {
973
- "goal": (1,11),
974
- "train_config": (QLEARNING, 100000),
975
- "consecutive": cons,
976
- "percentage": perc
977
- },
978
- f"I_0_{i*8+j*4+2}": {
979
- "goal": (11,11),
980
- "train_config": (QLEARNING, 100000),
981
- "consecutive": cons,
982
- "percentage": perc
983
- },
984
- f"I_0_{i*8+j*4+3}": {
985
- "goal": (5,9),
986
- "train_config": (QLEARNING, 100000),
987
- "consecutive": cons,
988
- "percentage": perc
989
- }})
990
- PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L3"].update(
991
- {f"I_0_{i*10+j*5}": {
992
- "goal": (11,1),
993
- "train_config": (QLEARNING, 100000),
994
- "consecutive": cons,
995
- "percentage": perc
996
- },
997
- f"I_0_{i*10+j*5+1}": {
998
- "goal": (1,11),
999
- "train_config": (QLEARNING, 100000),
1000
- "consecutive": cons,
1001
- "percentage": perc
1002
- },
1003
- f"I_0_{i*10+j*5+2}": {
1004
- "goal": (11,11),
1005
- "train_config": (QLEARNING, 100000),
1006
- "consecutive": cons,
1007
- "percentage": perc
1008
- },
1009
- f"I_0_{i*10+j*5+3}": {
1010
- "goal": (5,9),
1011
- "train_config": (QLEARNING, 100000),
1012
- "consecutive": cons,
1013
- "percentage": perc
1014
- },
1015
- f"I_0_{i*10+j*5+4}": {
1016
- "goal": (6,1),
1017
- "train_config": (QLEARNING, 100000),
1018
- "consecutive": cons,
1019
- "percentage": perc
1020
- }})
1021
- PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L4"].update(
1022
- {f"I_0_{i*12+j*6}": {
1023
- "goal": (11,1),
1024
- "train_config": (QLEARNING, 100000),
1025
- "consecutive": cons,
1026
- "percentage": perc
1027
- },
1028
- f"I_0_{i*12+j*6+1}": {
1029
- "goal": (1,11),
1030
- "train_config": (QLEARNING, 100000),
1031
- "consecutive": cons,
1032
- "percentage": perc
1033
- },
1034
- f"I_0_{i*12+j*6+2}": {
1035
- "goal": (11,11),
1036
- "train_config": (QLEARNING, 100000),
1037
- "consecutive": cons,
1038
- "percentage": perc
1039
- },
1040
- f"I_0_{i*12+j*6+3}": {
1041
- "goal": (5,9),
1042
- "train_config": (QLEARNING, 100000),
1043
- "consecutive": cons,
1044
- "percentage": perc
1045
- },
1046
- f"I_0_{i*12+j*6+5}": {
1047
- "goal": (11,3),
1048
- "train_config": (QLEARNING, 100000),
1049
- "consecutive": cons,
1050
- "percentage": perc
1051
- }})
1052
- PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L5"].update(
1053
- {f"I_0_{i*14+j*7}": {
1054
- "goal": (11,1),
1055
- "train_config": (QLEARNING, 100000),
1056
- "consecutive": cons,
1057
- "percentage": perc
1058
- },
1059
- f"I_0_{i*14+j*7+1}": {
1060
- "goal": (1,11),
1061
- "train_config": (QLEARNING, 100000),
1062
- "consecutive": cons,
1063
- "percentage": perc
1064
- },
1065
- f"I_0_{i*14+j*7+2}": {
1066
- "goal": (11,11),
1067
- "train_config": (QLEARNING, 100000),
1068
- "consecutive": cons,
1069
- "percentage": perc
1070
- },
1071
- f"I_0_{i*14+j*7+3}": {
1072
- "goal": (5,9),
1073
- "train_config": (QLEARNING, 100000),
1074
- "consecutive": cons,
1075
- "percentage": perc
1076
- },
1077
- f"I_0_{i*14+j*7+5}": {
1078
- "goal": (11,3),
1079
- "train_config": (QLEARNING, 100000),
1080
- "consecutive": cons,
1081
- "percentage": perc
1082
- },
1083
- f"I_0_{i*14+j*7+6}": {
1084
- "goal": (11,5),
1085
- "train_config": (QLEARNING, 100000),
1086
- "consecutive": cons,
1087
- "percentage": perc
1088
- }})
1089
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L1"].update(
1090
+ {
1091
+ f"I_0_{i*6+j*3}": {
1092
+ "goal": (5, 5),
1093
+ "train_config": (TD3, 400000),
1094
+ "consecutive": cons,
1095
+ "percentage": perc,
1096
+ },
1097
+ f"I_0_{i*6+j*3+1}": {
1098
+ "goal": (7, 4),
1099
+ "train_config": (TD3, 400000),
1100
+ "consecutive": cons,
1101
+ "percentage": perc,
1102
+ },
1103
+ f"I_0_{i*6+j*3+2}": {
1104
+ "goal": (4, 7),
1105
+ "train_config": (TD3, 400000),
1106
+ "consecutive": cons,
1107
+ "percentage": perc,
1108
+ },
1109
+ }
1110
+ )
1111
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L2"].update(
1112
+ {
1113
+ f"I_0_{i*6+j*3}": {
1114
+ "goal": (5, 5),
1115
+ "train_config": (TD3, 400000),
1116
+ "consecutive": cons,
1117
+ "percentage": perc,
1118
+ },
1119
+ f"I_0_{i*6+j*3+1}": {
1120
+ "goal": (3, 6),
1121
+ "train_config": (TD3, 400000),
1122
+ "consecutive": cons,
1123
+ "percentage": perc,
1124
+ },
1125
+ f"I_0_{i*6+j*3+2}": {
1126
+ "goal": (7, 4),
1127
+ "train_config": (TD3, 400000),
1128
+ "consecutive": cons,
1129
+ "percentage": perc,
1130
+ },
1131
+ }
1132
+ )
1133
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L3"].update(
1134
+ {
1135
+ f"I_0_{i*8+j*4}": {
1136
+ "goal": (5, 5),
1137
+ "train_config": (TD3, 400000),
1138
+ "consecutive": cons,
1139
+ "percentage": perc,
1140
+ },
1141
+ f"I_0_{i*8+j*4+1}": {
1142
+ "goal": (3, 6),
1143
+ "train_config": (TD3, 400000),
1144
+ "consecutive": cons,
1145
+ "percentage": perc,
1146
+ },
1147
+ f"I_0_{i*8+j*4+2}": {
1148
+ "goal": (7, 4),
1149
+ "train_config": (TD3, 400000),
1150
+ "consecutive": cons,
1151
+ "percentage": perc,
1152
+ },
1153
+ f"I_0_{i*8+j*4+3}": {
1154
+ "goal": (4, 7),
1155
+ "train_config": (TD3, 400000),
1156
+ "consecutive": cons,
1157
+ "percentage": perc,
1158
+ },
1159
+ }
1160
+ )
1161
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L4"].update(
1162
+ {
1163
+ f"I_0_{i*10+j*5}": {
1164
+ "goal": (5, 5),
1165
+ "train_config": (TD3, 400000),
1166
+ "consecutive": cons,
1167
+ "percentage": perc,
1168
+ },
1169
+ f"I_0_{i*10+j*5+1}": {
1170
+ "goal": (3, 6),
1171
+ "train_config": (TD3, 400000),
1172
+ "consecutive": cons,
1173
+ "percentage": perc,
1174
+ },
1175
+ f"I_0_{i*10+j*5+2}": {
1176
+ "goal": (7, 4),
1177
+ "train_config": (TD3, 400000),
1178
+ "consecutive": cons,
1179
+ "percentage": perc,
1180
+ },
1181
+ f"I_0_{i*10+j*5+3}": {
1182
+ "goal": (4, 7),
1183
+ "train_config": (TD3, 400000),
1184
+ "consecutive": cons,
1185
+ "percentage": perc,
1186
+ },
1187
+ f"I_0_{i*10+j*5+4}": {
1188
+ "goal": (8, 8),
1189
+ "train_config": (TD3, 400000),
1190
+ "consecutive": cons,
1191
+ "percentage": perc,
1192
+ },
1193
+ }
1194
+ )
1195
+ PROBLEMS[POINT_MAZE]["PointMaze-ObstaclesEnvDense-11x11"]["L5"].update(
1196
+ {
1197
+ f"I_0_{i*12+j*6}": {
1198
+ "goal": (5, 5),
1199
+ "train_config": (TD3, 400000),
1200
+ "consecutive": cons,
1201
+ "percentage": perc,
1202
+ },
1203
+ f"I_0_{i*12+j*6+1}": {
1204
+ "goal": (3, 6),
1205
+ "train_config": (TD3, 400000),
1206
+ "consecutive": cons,
1207
+ "percentage": perc,
1208
+ },
1209
+ f"I_0_{i*12+j*6+2}": {
1210
+ "goal": (6, 3),
1211
+ "train_config": (TD3, 400000),
1212
+ "consecutive": cons,
1213
+ "percentage": perc,
1214
+ },
1215
+ f"I_0_{i*12+j*6+3}": {
1216
+ "goal": (7, 4),
1217
+ "train_config": (TD3, 400000),
1218
+ "consecutive": cons,
1219
+ "percentage": perc,
1220
+ },
1221
+ f"I_0_{i*12+j*6+4}": {
1222
+ "goal": (4, 7),
1223
+ "train_config": (TD3, 400000),
1224
+ "consecutive": cons,
1225
+ "percentage": perc,
1226
+ },
1227
+ f"I_0_{i*12+j*6+5}": {
1228
+ "goal": (8, 8),
1229
+ "train_config": (TD3, 400000),
1230
+ "consecutive": cons,
1231
+ "percentage": perc,
1232
+ },
1233
+ }
1234
+ )
1089
1235
 
1090
- PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L1"].update(
1091
- {f"I_0_{i*6+j*3}": {
1092
- "goal": (1,3),
1093
- "train_config": (QLEARNING, 100000),
1094
- "consecutive": cons,
1095
- "percentage": perc
1096
- },
1097
- f"I_0_{i*6+j*3+1}": {
1098
- "goal": (6,5),
1099
- "train_config": (QLEARNING, 100000),
1100
- "consecutive": cons,
1101
- "percentage": perc
1102
- },
1103
- f"I_0_{i*6+j*3+2}": {
1104
- "goal": (4,7),
1105
- "train_config": (QLEARNING, 100000),
1106
- "consecutive": cons,
1107
- "percentage": perc
1108
- }})
1109
- PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L2"].update(
1110
- {f"I_0_{i*8+j*4}": {
1111
- "goal": (1,3),
1112
- "train_config": (QLEARNING, 100000),
1113
- "consecutive": cons,
1114
- "percentage": perc
1115
- },
1116
- f"I_0_{i*8+j*4+1}": {
1117
- "goal": (6,5),
1118
- "train_config": (QLEARNING, 100000),
1119
- "consecutive": cons,
1120
- "percentage": perc
1121
- },
1122
- f"I_0_{i*8+j*4+2}": {
1123
- "goal": (4,7),
1124
- "train_config": (QLEARNING, 100000),
1125
- "consecutive": cons,
1126
- "percentage": perc
1127
- },
1128
- f"I_0_{i*8+j*4+3}": {
1129
- "goal": (2,5),
1130
- "train_config": (QLEARNING, 100000),
1131
- "consecutive": cons,
1132
- "percentage": perc
1133
- }})
1134
- PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L3"].update(
1135
- {f"I_0_{i*10+j*5}": {
1136
- "goal": (1,3),
1137
- "train_config": (QLEARNING, 100000),
1138
- "consecutive": cons,
1139
- "percentage": perc
1140
- },
1141
- f"I_0_{i*10+j*5+1}": {
1142
- "goal": (6,5),
1143
- "train_config": (QLEARNING, 100000),
1144
- "consecutive": cons,
1145
- "percentage": perc
1146
- },
1147
- f"I_0_{i*10+j*5+2}": {
1148
- "goal": (4,7),
1149
- "train_config": (QLEARNING, 100000),
1150
- "consecutive": cons,
1151
- "percentage": perc
1152
- },
1153
- f"I_0_{i*10+j*5+3}": {
1154
- "goal": (2,5),
1155
- "train_config": (QLEARNING, 100000),
1156
- "consecutive": cons,
1157
- "percentage": perc
1158
- },
1159
- f"I_0_{i*10+j*5+4}": {
1160
- "goal": (5,2),
1161
- "train_config": (QLEARNING, 100000),
1162
- "consecutive": cons,
1163
- "percentage": perc
1164
- }})
1165
- PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L4"].update(
1166
- {f"I_0_{i*12+j*6}": {
1167
- "goal": (1,3),
1168
- "train_config": (QLEARNING, 100000),
1169
- "consecutive": cons,
1170
- "percentage": perc
1171
- },
1172
- f"I_0_{i*12+j*6+1}": {
1173
- "goal": (6,5),
1174
- "train_config": (QLEARNING, 100000),
1175
- "consecutive": cons,
1176
- "percentage": perc
1177
- },
1178
- f"I_0_{i*12+j*6+2}": {
1179
- "goal": (4,7),
1180
- "train_config": (QLEARNING, 100000),
1181
- "consecutive": cons,
1182
- "percentage": perc
1183
- },
1184
- f"I_0_{i*12+j*6+3}": {
1185
- "goal": (2,5),
1186
- "train_config": (QLEARNING, 100000),
1187
- "consecutive": cons,
1188
- "percentage": perc
1189
- },
1190
- f"I_0_{i*12+j*6+4}": {
1191
- "goal": (5,2),
1192
- "train_config": (QLEARNING, 100000),
1193
- "consecutive": cons,
1194
- "percentage": perc
1195
- },
1196
- f"I_0_{i*12+j*6+5}": {
1197
- "goal": (4,5),
1198
- "train_config": (QLEARNING, 100000),
1199
- "consecutive": cons,
1200
- "percentage": perc
1201
- }})
1202
- PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L5"].update(
1203
- {f"I_0_{i*14+j*7}": {
1204
- "goal": (1,3),
1205
- "train_config": (QLEARNING, 100000),
1206
- "consecutive": cons,
1207
- "percentage": perc
1208
- },
1209
- f"I_0_{i*14+j*7+1}": {
1210
- "goal": (6,5),
1211
- "train_config": (QLEARNING, 100000),
1212
- "consecutive": cons,
1213
- "percentage": perc
1214
- },
1215
- f"I_0_{i*14+j*7+2}": {
1216
- "goal": (4,7),
1217
- "train_config": (QLEARNING, 100000),
1218
- "consecutive": cons,
1219
- "percentage": perc
1220
- },
1221
- f"I_0_{i*14+j*7+3}": {
1222
- "goal": (2,5),
1223
- "train_config": (QLEARNING, 100000),
1224
- "consecutive": cons,
1225
- "percentage": perc
1226
- },
1227
- f"I_0_{i*14+j*7+4}": {
1228
- "goal": (5,2),
1229
- "train_config": (QLEARNING, 100000),
1230
- "consecutive": cons,
1231
- "percentage": perc
1232
- },
1233
- f"I_0_{i*14+j*7+5}": {
1234
- "goal": (4,5),
1235
- "train_config": (QLEARNING, 100000),
1236
- "consecutive": cons,
1237
- "percentage": perc
1238
- },
1239
- f"I_0_{i*14+j*7+6}": {
1240
- "goal": (1,1),
1241
- "train_config": (QLEARNING, 100000),
1242
- "consecutive": cons,
1243
- "percentage": perc
1244
- }})
1236
+ ### MINIGRID ###
1237
+
1238
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L1"].update(
1239
+ {
1240
+ f"I_0_{i*6+j*3}": {
1241
+ "goal": (11, 1),
1242
+ "train_config": (QLEARNING, 100000),
1243
+ "consecutive": cons,
1244
+ "percentage": perc,
1245
+ },
1246
+ f"I_0_{i*6+j*3+1}": {
1247
+ "goal": (1, 11),
1248
+ "train_config": (QLEARNING, 100000),
1249
+ "consecutive": cons,
1250
+ "percentage": perc,
1251
+ },
1252
+ f"I_0_{i*6+j*3+2}": {
1253
+ "goal": (11, 11),
1254
+ "train_config": (QLEARNING, 100000),
1255
+ "consecutive": cons,
1256
+ "percentage": perc,
1257
+ },
1258
+ }
1259
+ )
1260
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L2"].update(
1261
+ {
1262
+ f"I_0_{i*8+j*4}": {
1263
+ "goal": (11, 1),
1264
+ "train_config": (QLEARNING, 100000),
1265
+ "consecutive": cons,
1266
+ "percentage": perc,
1267
+ },
1268
+ f"I_0_{i*8+j*4+1}": {
1269
+ "goal": (1, 11),
1270
+ "train_config": (QLEARNING, 100000),
1271
+ "consecutive": cons,
1272
+ "percentage": perc,
1273
+ },
1274
+ f"I_0_{i*8+j*4+2}": {
1275
+ "goal": (11, 11),
1276
+ "train_config": (QLEARNING, 100000),
1277
+ "consecutive": cons,
1278
+ "percentage": perc,
1279
+ },
1280
+ f"I_0_{i*8+j*4+3}": {
1281
+ "goal": (5, 9),
1282
+ "train_config": (QLEARNING, 100000),
1283
+ "consecutive": cons,
1284
+ "percentage": perc,
1285
+ },
1286
+ }
1287
+ )
1288
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L3"].update(
1289
+ {
1290
+ f"I_0_{i*10+j*5}": {
1291
+ "goal": (11, 1),
1292
+ "train_config": (QLEARNING, 100000),
1293
+ "consecutive": cons,
1294
+ "percentage": perc,
1295
+ },
1296
+ f"I_0_{i*10+j*5+1}": {
1297
+ "goal": (1, 11),
1298
+ "train_config": (QLEARNING, 100000),
1299
+ "consecutive": cons,
1300
+ "percentage": perc,
1301
+ },
1302
+ f"I_0_{i*10+j*5+2}": {
1303
+ "goal": (11, 11),
1304
+ "train_config": (QLEARNING, 100000),
1305
+ "consecutive": cons,
1306
+ "percentage": perc,
1307
+ },
1308
+ f"I_0_{i*10+j*5+3}": {
1309
+ "goal": (5, 9),
1310
+ "train_config": (QLEARNING, 100000),
1311
+ "consecutive": cons,
1312
+ "percentage": perc,
1313
+ },
1314
+ f"I_0_{i*10+j*5+4}": {
1315
+ "goal": (6, 1),
1316
+ "train_config": (QLEARNING, 100000),
1317
+ "consecutive": cons,
1318
+ "percentage": perc,
1319
+ },
1320
+ }
1321
+ )
1322
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L4"].update(
1323
+ {
1324
+ f"I_0_{i*12+j*6}": {
1325
+ "goal": (11, 1),
1326
+ "train_config": (QLEARNING, 100000),
1327
+ "consecutive": cons,
1328
+ "percentage": perc,
1329
+ },
1330
+ f"I_0_{i*12+j*6+1}": {
1331
+ "goal": (1, 11),
1332
+ "train_config": (QLEARNING, 100000),
1333
+ "consecutive": cons,
1334
+ "percentage": perc,
1335
+ },
1336
+ f"I_0_{i*12+j*6+2}": {
1337
+ "goal": (11, 11),
1338
+ "train_config": (QLEARNING, 100000),
1339
+ "consecutive": cons,
1340
+ "percentage": perc,
1341
+ },
1342
+ f"I_0_{i*12+j*6+3}": {
1343
+ "goal": (5, 9),
1344
+ "train_config": (QLEARNING, 100000),
1345
+ "consecutive": cons,
1346
+ "percentage": perc,
1347
+ },
1348
+ f"I_0_{i*12+j*6+5}": {
1349
+ "goal": (11, 3),
1350
+ "train_config": (QLEARNING, 100000),
1351
+ "consecutive": cons,
1352
+ "percentage": perc,
1353
+ },
1354
+ }
1355
+ )
1356
+ PROBLEMS[MINIGRID]["MiniGrid-SimpleCrossingS13N4"]["L5"].update(
1357
+ {
1358
+ f"I_0_{i*14+j*7}": {
1359
+ "goal": (11, 1),
1360
+ "train_config": (QLEARNING, 100000),
1361
+ "consecutive": cons,
1362
+ "percentage": perc,
1363
+ },
1364
+ f"I_0_{i*14+j*7+1}": {
1365
+ "goal": (1, 11),
1366
+ "train_config": (QLEARNING, 100000),
1367
+ "consecutive": cons,
1368
+ "percentage": perc,
1369
+ },
1370
+ f"I_0_{i*14+j*7+2}": {
1371
+ "goal": (11, 11),
1372
+ "train_config": (QLEARNING, 100000),
1373
+ "consecutive": cons,
1374
+ "percentage": perc,
1375
+ },
1376
+ f"I_0_{i*14+j*7+3}": {
1377
+ "goal": (5, 9),
1378
+ "train_config": (QLEARNING, 100000),
1379
+ "consecutive": cons,
1380
+ "percentage": perc,
1381
+ },
1382
+ f"I_0_{i*14+j*7+5}": {
1383
+ "goal": (11, 3),
1384
+ "train_config": (QLEARNING, 100000),
1385
+ "consecutive": cons,
1386
+ "percentage": perc,
1387
+ },
1388
+ f"I_0_{i*14+j*7+6}": {
1389
+ "goal": (11, 5),
1390
+ "train_config": (QLEARNING, 100000),
1391
+ "consecutive": cons,
1392
+ "percentage": perc,
1393
+ },
1394
+ }
1395
+ )
1396
+
1397
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L1"].update(
1398
+ {
1399
+ f"I_0_{i*6+j*3}": {
1400
+ "goal": (1, 3),
1401
+ "train_config": (QLEARNING, 100000),
1402
+ "consecutive": cons,
1403
+ "percentage": perc,
1404
+ },
1405
+ f"I_0_{i*6+j*3+1}": {
1406
+ "goal": (6, 5),
1407
+ "train_config": (QLEARNING, 100000),
1408
+ "consecutive": cons,
1409
+ "percentage": perc,
1410
+ },
1411
+ f"I_0_{i*6+j*3+2}": {
1412
+ "goal": (4, 7),
1413
+ "train_config": (QLEARNING, 100000),
1414
+ "consecutive": cons,
1415
+ "percentage": perc,
1416
+ },
1417
+ }
1418
+ )
1419
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L2"].update(
1420
+ {
1421
+ f"I_0_{i*8+j*4}": {
1422
+ "goal": (1, 3),
1423
+ "train_config": (QLEARNING, 100000),
1424
+ "consecutive": cons,
1425
+ "percentage": perc,
1426
+ },
1427
+ f"I_0_{i*8+j*4+1}": {
1428
+ "goal": (6, 5),
1429
+ "train_config": (QLEARNING, 100000),
1430
+ "consecutive": cons,
1431
+ "percentage": perc,
1432
+ },
1433
+ f"I_0_{i*8+j*4+2}": {
1434
+ "goal": (4, 7),
1435
+ "train_config": (QLEARNING, 100000),
1436
+ "consecutive": cons,
1437
+ "percentage": perc,
1438
+ },
1439
+ f"I_0_{i*8+j*4+3}": {
1440
+ "goal": (2, 5),
1441
+ "train_config": (QLEARNING, 100000),
1442
+ "consecutive": cons,
1443
+ "percentage": perc,
1444
+ },
1445
+ }
1446
+ )
1447
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L3"].update(
1448
+ {
1449
+ f"I_0_{i*10+j*5}": {
1450
+ "goal": (1, 3),
1451
+ "train_config": (QLEARNING, 100000),
1452
+ "consecutive": cons,
1453
+ "percentage": perc,
1454
+ },
1455
+ f"I_0_{i*10+j*5+1}": {
1456
+ "goal": (6, 5),
1457
+ "train_config": (QLEARNING, 100000),
1458
+ "consecutive": cons,
1459
+ "percentage": perc,
1460
+ },
1461
+ f"I_0_{i*10+j*5+2}": {
1462
+ "goal": (4, 7),
1463
+ "train_config": (QLEARNING, 100000),
1464
+ "consecutive": cons,
1465
+ "percentage": perc,
1466
+ },
1467
+ f"I_0_{i*10+j*5+3}": {
1468
+ "goal": (2, 5),
1469
+ "train_config": (QLEARNING, 100000),
1470
+ "consecutive": cons,
1471
+ "percentage": perc,
1472
+ },
1473
+ f"I_0_{i*10+j*5+4}": {
1474
+ "goal": (5, 2),
1475
+ "train_config": (QLEARNING, 100000),
1476
+ "consecutive": cons,
1477
+ "percentage": perc,
1478
+ },
1479
+ }
1480
+ )
1481
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L4"].update(
1482
+ {
1483
+ f"I_0_{i*12+j*6}": {
1484
+ "goal": (1, 3),
1485
+ "train_config": (QLEARNING, 100000),
1486
+ "consecutive": cons,
1487
+ "percentage": perc,
1488
+ },
1489
+ f"I_0_{i*12+j*6+1}": {
1490
+ "goal": (6, 5),
1491
+ "train_config": (QLEARNING, 100000),
1492
+ "consecutive": cons,
1493
+ "percentage": perc,
1494
+ },
1495
+ f"I_0_{i*12+j*6+2}": {
1496
+ "goal": (4, 7),
1497
+ "train_config": (QLEARNING, 100000),
1498
+ "consecutive": cons,
1499
+ "percentage": perc,
1500
+ },
1501
+ f"I_0_{i*12+j*6+3}": {
1502
+ "goal": (2, 5),
1503
+ "train_config": (QLEARNING, 100000),
1504
+ "consecutive": cons,
1505
+ "percentage": perc,
1506
+ },
1507
+ f"I_0_{i*12+j*6+4}": {
1508
+ "goal": (5, 2),
1509
+ "train_config": (QLEARNING, 100000),
1510
+ "consecutive": cons,
1511
+ "percentage": perc,
1512
+ },
1513
+ f"I_0_{i*12+j*6+5}": {
1514
+ "goal": (4, 5),
1515
+ "train_config": (QLEARNING, 100000),
1516
+ "consecutive": cons,
1517
+ "percentage": perc,
1518
+ },
1519
+ }
1520
+ )
1521
+ PROBLEMS[MINIGRID]["MiniGrid-LavaCrossingS9N2"]["L5"].update(
1522
+ {
1523
+ f"I_0_{i*14+j*7}": {
1524
+ "goal": (1, 3),
1525
+ "train_config": (QLEARNING, 100000),
1526
+ "consecutive": cons,
1527
+ "percentage": perc,
1528
+ },
1529
+ f"I_0_{i*14+j*7+1}": {
1530
+ "goal": (6, 5),
1531
+ "train_config": (QLEARNING, 100000),
1532
+ "consecutive": cons,
1533
+ "percentage": perc,
1534
+ },
1535
+ f"I_0_{i*14+j*7+2}": {
1536
+ "goal": (4, 7),
1537
+ "train_config": (QLEARNING, 100000),
1538
+ "consecutive": cons,
1539
+ "percentage": perc,
1540
+ },
1541
+ f"I_0_{i*14+j*7+3}": {
1542
+ "goal": (2, 5),
1543
+ "train_config": (QLEARNING, 100000),
1544
+ "consecutive": cons,
1545
+ "percentage": perc,
1546
+ },
1547
+ f"I_0_{i*14+j*7+4}": {
1548
+ "goal": (5, 2),
1549
+ "train_config": (QLEARNING, 100000),
1550
+ "consecutive": cons,
1551
+ "percentage": perc,
1552
+ },
1553
+ f"I_0_{i*14+j*7+5}": {
1554
+ "goal": (4, 5),
1555
+ "train_config": (QLEARNING, 100000),
1556
+ "consecutive": cons,
1557
+ "percentage": perc,
1558
+ },
1559
+ f"I_0_{i*14+j*7+6}": {
1560
+ "goal": (1, 1),
1561
+ "train_config": (QLEARNING, 100000),
1562
+ "consecutive": cons,
1563
+ "percentage": perc,
1564
+ },
1565
+ }
1566
+ )