gr-libs 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {gr_libs-0.2.5 → gr_libs-0.2.6}/PKG-INFO +16 -11
  2. {gr_libs-0.2.5 → gr_libs-0.2.6}/README.md +15 -10
  3. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/__init__.py +6 -1
  4. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/_version.py +2 -2
  5. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/environment/environment.py +104 -15
  6. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/consts.py +1 -0
  7. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/neural/deep_rl_learner.py +101 -14
  8. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/odgr_executor.py +7 -2
  9. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/_utils/format.py +7 -1
  10. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +146 -1
  11. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/graml/graml_recognizer.py +4 -4
  12. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/recognizer.py +4 -4
  13. gr_libs-0.2.6/gr_libs/tutorials/gcaura_panda_tutorial.py +168 -0
  14. gr_libs-0.2.6/gr_libs/tutorials/gcaura_parking_tutorial.py +167 -0
  15. gr_libs-0.2.6/gr_libs/tutorials/gcaura_point_maze_tutorial.py +169 -0
  16. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs.egg-info/PKG-INFO +16 -11
  17. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs.egg-info/SOURCES.txt +5 -0
  18. gr_libs-0.2.6/tests/test_gcaura.py +15 -0
  19. gr_libs-0.2.6/tests/test_odgr_executor_gcaura.py +14 -0
  20. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/_evaluation/_generate_experiments_results.py +0 -0
  21. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/all_experiments.py +0 -0
  22. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/environment/__init__.py +0 -0
  23. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/environment/_utils/__init__.py +0 -0
  24. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/environment/_utils/utils.py +0 -0
  25. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/evaluation/__init__.py +0 -0
  26. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/evaluation/generate_experiments_results.py +0 -0
  27. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/metrics/__init__.py +0 -0
  28. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/metrics/metrics.py +0 -0
  29. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/__init__.py +0 -0
  30. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/agent.py +0 -0
  31. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/base/__init__.py +0 -0
  32. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/base/rl_agent.py +0 -0
  33. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/neural/__init__.py +0 -0
  34. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/neural/utils/__init__.py +0 -0
  35. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/neural/utils/dictlist.py +0 -0
  36. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/planner/__init__.py +0 -0
  37. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/planner/mcts/__init__.py +0 -0
  38. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/planner/mcts/_utils/__init__.py +0 -0
  39. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/planner/mcts/_utils/node.py +0 -0
  40. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/planner/mcts/_utils/tree.py +0 -0
  41. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/planner/mcts/mcts_model.py +0 -0
  42. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/sequential/__init__.py +0 -0
  43. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/sequential/_lstm_model.py +0 -0
  44. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/tabular/__init__.py +0 -0
  45. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/tabular/state.py +0 -0
  46. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/tabular/tabular_q_learner.py +0 -0
  47. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/tabular/tabular_rl_agent.py +0 -0
  48. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/utils/__init__.py +0 -0
  49. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/utils/env.py +0 -0
  50. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/utils/format.py +0 -0
  51. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/utils/math.py +0 -0
  52. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/utils/other.py +0 -0
  53. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/ml/utils/storage.py +0 -0
  54. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/problems/__init__.py +0 -0
  55. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/problems/consts.py +0 -0
  56. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/__init__.py +0 -0
  57. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/_utils/__init__.py +0 -0
  58. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
  59. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/graml/__init__.py +0 -0
  60. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/recognizer/graml/_gr_dataset.py +0 -0
  61. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/draco_panda_tutorial.py +0 -0
  62. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/draco_parking_tutorial.py +0 -0
  63. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/gcdraco_panda_tutorial.py +0 -0
  64. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/gcdraco_parking_tutorial.py +0 -0
  65. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/graml_minigrid_tutorial.py +0 -0
  66. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/graml_panda_tutorial.py +0 -0
  67. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/graml_parking_tutorial.py +0 -0
  68. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/graml_point_maze_tutorial.py +0 -0
  69. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs/tutorials/graql_minigrid_tutorial.py +0 -0
  70. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs.egg-info/dependency_links.txt +0 -0
  71. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs.egg-info/requires.txt +0 -0
  72. {gr_libs-0.2.5 → gr_libs-0.2.6}/gr_libs.egg-info/top_level.txt +0 -0
  73. {gr_libs-0.2.5 → gr_libs-0.2.6}/pyproject.toml +0 -0
  74. {gr_libs-0.2.5 → gr_libs-0.2.6}/setup.cfg +0 -0
  75. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_draco.py +0 -0
  76. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_gcdraco.py +0 -0
  77. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_graml.py +0 -0
  78. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_graql.py +0 -0
  79. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_odgr_executor_expertbasedgraml.py +0 -0
  80. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_odgr_executor_gcdraco.py +0 -0
  81. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_odgr_executor_gcgraml.py +0 -0
  82. {gr_libs-0.2.5 → gr_libs-0.2.6}/tests/test_odgr_executor_graql.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -108,16 +108,16 @@ For any issues or troubleshooting, please refer to the repository's issue tracke
108
108
 
109
109
  ## Supported Algorithms
110
110
 
111
- Successors of algorithms that don't differ in their specifics are added in parentheses after the algorithm name. For example, since GC-DRACO and DRACO share the same column values, they're written on one line as DRACO (GC).
111
+ | **Algorithm** | **Supervised** | **Reinforcement Learning** | **Discrete States** | **Continuous States** | **Discrete Actions** | **Continuous Actions** | **Model-Based** | **Model-Free** | **Action-Only** | **Goal Conditioned** | **Fine-Tuning** | **Supported Environments** |
112
+ |---------------------|----------------|---------------------------|---------------------|----------------------|----------------------|-----------------------|------------------|----------------|----------------|---------------------|-----------------|-------------------------------------------|
113
+ | Graql | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | Minigrid |
114
+ | Draco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | PointMaze, Panda Reach, Parking |
115
+ | GCDraco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | Panda Reach, Parking |
116
+ | GCAura | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | PointMaze, Panda Reach, Parking |
117
+ | ExpertBasedGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | Panda Reach, Parking |
118
+ | BGGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | Minigrid, PointMaze |
119
+ | GCGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | Panda Reach, Parking |
112
120
 
113
- | **Algorithm** | **Supervised** | **Reinforcement Learning** | **Discrete States** | **Continuous States** | **Discrete Actions** | **Continuous Actions** | **Model-Based** | **Model-Free** | **Action-Only** | **Supported Environments** |
114
- |---------------------|----------------|---------------------------|---------------------|----------------------|----------------------|-----------------------|------------------|----------------|----------------|--------------------------------------------|
115
- | Graql | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | Minigrid |
116
- | Draco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | PointMaze, Panda Reach, Parking |
117
- | GCDraco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | Panda Reach, Parking |
118
- | ExpertBasedGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
119
- | BGGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Minigrid, PointMaze |
120
- | GCGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
121
121
 
122
122
  ## Supported Domains
123
123
 
@@ -283,7 +283,12 @@ python gr_libs/all_experiments.py \
283
283
 
284
284
  This script uses multiprocessing to simultaneously execute many `odgr_executor.py` runs as child processes. It logs failures and successful executions for debugability.
285
285
 
286
- After execution, summary files are generated in `outputs/summaries/` for further analysis and plotting.
286
+ After execution summary files are generated in `outputs/summaries/` for further analysis and plotting.
287
+
288
+ another execution example:
289
+ ```sh
290
+ python gr_libs/all_experiments.py --domains parking --envs Parking-S-14-PC- --tasks L1 L2 L3 L4 L5 --recognizers GCAura GCGraml GCDraco BGGraml Draco --n 5
291
+ ```
287
292
 
288
293
  ### Using analysis scripts
289
294
  The repository provides benchmark domains and scripts for analyzing experimental results. The `evaluation` directory contains tools for processing and visualizing the results from odgr_executor.py and all_experiments.py.
@@ -79,16 +79,16 @@ For any issues or troubleshooting, please refer to the repository's issue tracke
79
79
 
80
80
  ## Supported Algorithms
81
81
 
82
- Successors of algorithms that don't differ in their specifics are added in parentheses after the algorithm name. For example, since GC-DRACO and DRACO share the same column values, they're written on one line as DRACO (GC).
82
+ | **Algorithm** | **Supervised** | **Reinforcement Learning** | **Discrete States** | **Continuous States** | **Discrete Actions** | **Continuous Actions** | **Model-Based** | **Model-Free** | **Action-Only** | **Goal Conditioned** | **Fine-Tuning** | **Supported Environments** |
83
+ |---------------------|----------------|---------------------------|---------------------|----------------------|----------------------|-----------------------|------------------|----------------|----------------|---------------------|-----------------|-------------------------------------------|
84
+ | Graql | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | Minigrid |
85
+ | Draco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | PointMaze, Panda Reach, Parking |
86
+ | GCDraco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | Panda Reach, Parking |
87
+ | GCAura | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | PointMaze, Panda Reach, Parking |
88
+ | ExpertBasedGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | Panda Reach, Parking |
89
+ | BGGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | Minigrid, PointMaze |
90
+ | GCGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | Panda Reach, Parking |
83
91
 
84
- | **Algorithm** | **Supervised** | **Reinforcement Learning** | **Discrete States** | **Continuous States** | **Discrete Actions** | **Continuous Actions** | **Model-Based** | **Model-Free** | **Action-Only** | **Supported Environments** |
85
- |---------------------|----------------|---------------------------|---------------------|----------------------|----------------------|-----------------------|------------------|----------------|----------------|--------------------------------------------|
86
- | Graql | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | Minigrid |
87
- | Draco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | PointMaze, Panda Reach, Parking |
88
- | GCDraco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | Panda Reach, Parking |
89
- | ExpertBasedGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
90
- | BGGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Minigrid, PointMaze |
91
- | GCGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
92
92
 
93
93
  ## Supported Domains
94
94
 
@@ -254,7 +254,12 @@ python gr_libs/all_experiments.py \
254
254
 
255
255
  This script uses multiprocessing to simultaneously execute many `odgr_executor.py` runs as child processes. It logs failures and successful executions for debugability.
256
256
 
257
- After execution, summary files are generated in `outputs/summaries/` for further analysis and plotting.
257
+ After execution summary files are generated in `outputs/summaries/` for further analysis and plotting.
258
+
259
+ another execution example:
260
+ ```sh
261
+ python gr_libs/all_experiments.py --domains parking --envs Parking-S-14-PC- --tasks L1 L2 L3 L4 L5 --recognizers GCAura GCGraml GCDraco BGGraml Draco --n 5
262
+ ```
258
263
 
259
264
  ### Using analysis scripts
260
265
  The repository provides benchmark domains and scripts for analyzing experimental results. The `evaluation` directory contains tools for processing and visualizing the results from odgr_executor.py and all_experiments.py.
@@ -1,6 +1,11 @@
1
1
  """gr_libs: Baselines for goal recognition executions on gym environments."""
2
2
 
3
- from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
3
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import (
4
+ Draco,
5
+ GCDraco,
6
+ Graql,
7
+ GCAura,
8
+ )
4
9
  from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
5
10
 
6
11
  try:
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.5'
21
- __version_tuple__ = version_tuple = (0, 2, 5)
20
+ __version__ = version = '0.2.6'
21
+ __version_tuple__ = version_tuple = (0, 2, 6)
@@ -1,4 +1,4 @@
1
- """ environment.py """
1
+ """environment.py"""
2
2
 
3
3
  import os
4
4
  import sys
@@ -14,6 +14,8 @@ from minigrid.wrappers import ImgObsWrapper, RGBImgPartialObsWrapper
14
14
  from PIL import Image
15
15
  from stable_baselines3.common.vec_env import DummyVecEnv
16
16
 
17
+ from gr_envs.wrappers.goal_wrapper import GoalRecognitionWrapper
18
+
17
19
  MINIGRID, PANDA, PARKING, POINT_MAZE = "minigrid", "panda", "parking", "point_maze"
18
20
 
19
21
  QLEARNING = "QLEARNING"
@@ -110,6 +112,12 @@ class EnvProperty:
110
112
  Convert a list of problems to a string tuple.
111
113
  """
112
114
 
115
+ @abstractmethod
116
+ def goal_to_str(self, goal):
117
+ """
118
+ Convert a goal to a string representation.
119
+ """
120
+
113
121
  @abstractmethod
114
122
  def goal_to_problem_str(self, goal):
115
123
  """
@@ -166,6 +174,29 @@ class EnvProperty:
166
174
  Change the goal to a specific desired goal.
167
175
  """
168
176
 
177
+ def is_goal_in_subspace(self, goal):
178
+ """
179
+ Check if a goal is within the specified goal subspace.
180
+
181
+ Args:
182
+ goal: The goal to check
183
+ goal_subspace: The goal subspace to check against
184
+
185
+ Returns:
186
+ bool: True if the goal is within the subspace, False otherwise
187
+ """
188
+ env = gym.make(id=self.name)
189
+ while env is not None and hasattr(env, "env"):
190
+ if isinstance(env, GoalRecognitionWrapper) and hasattr(
191
+ env, "is_goal_in_subspace"
192
+ ):
193
+ # If the environment has a goal recognition wrapper, use its method
194
+ return env.is_goal_in_subspace(goal)
195
+ # Traverse through wrappers to find the base environment
196
+ env = env.env
197
+
198
+ return True
199
+
169
200
 
170
201
  class GCEnvProperty(EnvProperty):
171
202
  """
@@ -194,16 +225,25 @@ class MinigridProperty(EnvProperty):
194
225
  super().__init__(name)
195
226
  self.domain_name = "minigrid"
196
227
 
228
+ def goal_to_str(self, goal):
229
+ """
230
+ Convert a goal to a string representation.
231
+ """
232
+ return f"{goal[0]}x{goal[1]}"
233
+
197
234
  def goal_to_problem_str(self, goal):
198
235
  """
199
236
  Convert a goal to a problem string.
200
237
  """
201
- return self.name + f"-DynamicGoal-{goal[0]}x{goal[1]}-v0"
238
+ return self.name + f"-DynamicGoal-{self.goal_to_str(goal)}-v0"
202
239
 
203
- def str_to_goal(self, problem_name):
240
+ def str_to_goal(self, problem_name=None):
204
241
  """
205
242
  Convert a problem name to a goal.
206
243
  """
244
+ if problem_name is None:
245
+ problem_name = self.name
246
+
207
247
  parts = problem_name.split("-")
208
248
  goal_part = [part for part in parts if "x" in part]
209
249
  width, height = goal_part[0].split("x")
@@ -325,30 +365,36 @@ class PandaProperty(GCEnvProperty):
325
365
  super().__init__(name)
326
366
  self.domain_name = "panda"
327
367
 
328
- def str_to_goal(self, problem_name):
368
+ def str_to_goal(self, problem_name=None):
329
369
  """
330
370
  Convert a problem name to a goal.
331
371
  """
372
+ if problem_name is None:
373
+ return "general"
332
374
  try:
333
375
  numeric_part = problem_name.split("PandaMyReachDenseX")[1]
334
376
  components = [
335
377
  component.replace("-v3", "").replace("y", ".").replace("M", "-")
336
378
  for component in numeric_part.split("X")
337
379
  ]
338
- floats = []
339
- for component in components:
340
- floats.append(float(component))
341
- return np.array([floats], dtype=np.float32)
380
+ floats = [float(component) for component in components]
381
+ return np.array([floats])
342
382
  except Exception:
343
383
  return "general"
344
384
 
345
- def goal_to_problem_str(self, goal):
385
+ def goal_to_str(self, goal):
346
386
  """
347
- Convert a goal to a problem string.
387
+ Convert a goal to a string representation.
348
388
  """
349
- goal_str = "X".join(
389
+ return "X".join(
350
390
  [str(float(g)).replace(".", "y").replace("-", "M") for g in goal[0]]
351
391
  )
392
+
393
+ def goal_to_problem_str(self, goal):
394
+ """
395
+ Convert a goal to a problem string.
396
+ """
397
+ goal_str = self.goal_to_str(goal)
352
398
  return f"PandaMyReachDenseX{goal_str}-v3"
353
399
 
354
400
  def gc_adaptable(self):
@@ -450,10 +496,34 @@ class ParkingProperty(GCEnvProperty):
450
496
  super().__init__(name)
451
497
  self.domain_name = "parking"
452
498
 
499
+ def str_to_goal(self, problem_name=None):
500
+ """
501
+ Convert a problem name to a goal.
502
+ """
503
+ if not problem_name:
504
+ problem_name = self.name
505
+ # Extract the goal from the part
506
+ return int(problem_name.split("GI-")[1].split("-v0")[0])
507
+
508
+ def goal_to_str(self, goal):
509
+ """
510
+ Convert a goal to a string representation.
511
+ """
512
+ if isinstance(goal, int):
513
+ return str(goal)
514
+ elif isinstance(goal, str):
515
+ return goal
516
+ else:
517
+ raise ValueError(
518
+ f"Unsupported goal type: {type(goal)}. Expected int or str."
519
+ )
520
+
453
521
  def goal_to_problem_str(self, goal):
454
522
  """
455
523
  Convert a goal to a problem string.
456
524
  """
525
+ if "-GI-" in self.name:
526
+ return self.name.split("-GI-")[0] + f"-GI-{goal}-v0"
457
527
  return self.name.split("-v0")[0] + f"-GI-{goal}-v0"
458
528
 
459
529
  def gc_adaptable(self):
@@ -536,9 +606,11 @@ class PointMazeProperty(EnvProperty):
536
606
  super().__init__(name)
537
607
  self.domain_name = "point_maze"
538
608
 
539
- def str_to_goal(self):
609
+ def str_to_goal(self, problem_name=None):
540
610
  """Convert a problem name to a goal."""
541
- parts = self.name.split("-")
611
+ if not problem_name:
612
+ problem_name = self.name
613
+ parts = problem_name.split("-")
542
614
  # Find the part containing the goal size (usually after "DynamicGoal")
543
615
  sizes_parts = [part for part in parts if "x" in part]
544
616
  goal_part = sizes_parts[1]
@@ -546,9 +618,15 @@ class PointMazeProperty(EnvProperty):
546
618
  width, height = goal_part.split("x")
547
619
  return (int(width), int(height))
548
620
 
621
+ def goal_to_str(self, goal):
622
+ """
623
+ Convert a goal to a string representation.
624
+ """
625
+ return f"{goal[0]}x{goal[1]}"
626
+
549
627
  def gc_adaptable(self):
550
628
  """Check if the environment is goal-conditioned adaptable."""
551
- return False
629
+ return True
552
630
 
553
631
  def problem_list_to_str_tuple(self, problems):
554
632
  """Convert a list of problems to a string tuple."""
@@ -574,7 +652,12 @@ class PointMazeProperty(EnvProperty):
574
652
  """
575
653
  Convert a goal to a problem string.
576
654
  """
577
- return self.name + f"-Goal-{goal[0]}x{goal[1]}"
655
+ possible_suffixes = ["-Goals-", "-Goal-", "-MultiGoals-", "-GoalConditioned-"]
656
+ for suffix in possible_suffixes:
657
+ if suffix in self.name:
658
+ return self.name.split(suffix)[0] + f"-Goal-{self.goal_to_str(goal)}"
659
+
660
+ return self.name + f"-Goal-{self.goal_to_str(goal)}"
578
661
 
579
662
  def change_done_by_specific_desired(self, obs, desired, old_success_done):
580
663
  """
@@ -592,6 +675,12 @@ class PointMazeProperty(EnvProperty):
592
675
  assert isinstance(done, np.ndarray)
593
676
  return done[0]
594
677
 
678
+ def use_goal_directed_problem(self):
679
+ """
680
+ Check if the environment uses a goal-directed problem.
681
+ """
682
+ return True
683
+
595
684
  def is_success(self, info):
596
685
  """
597
686
  Check if the episode is successful.
@@ -20,3 +20,4 @@ OPTIM_ALPHA = 0.99
20
20
  CLIP_EPS = 0.2
21
21
  RECURRENCE = 1
22
22
  TEXT = False
23
+ FINETUNE_TIMESTEPS = 100000 # for GCAura fine-tuning
@@ -1,6 +1,7 @@
1
1
  import gc
2
2
  from collections import OrderedDict
3
3
  from types import MethodType
4
+ from typing import Any
4
5
 
5
6
  import cv2
6
7
  import numpy as np
@@ -22,6 +23,10 @@ from stable_baselines3.common.base_class import BaseAlgorithm
22
23
 
23
24
  from gr_libs.ml.utils import device
24
25
 
26
+ from gr_libs.ml.consts import (
27
+ FINETUNE_TIMESTEPS,
28
+ )
29
+
25
30
  # TODO do we need this?
26
31
  NETWORK_SETUP = {
27
32
  SAC: OrderedDict(
@@ -236,27 +241,46 @@ class DeepRLAgent:
236
241
  self._model_file_path, env=self.env, device=device, **self.model_kwargs
237
242
  )
238
243
 
239
- def learn(self):
244
+ def learn(self, goal=None, total_timesteps=None):
240
245
  """Train the agent."""
241
- if os.path.exists(self._model_file_path):
242
- print(f"Loading pre-existing model in {self._model_file_path}")
246
+ model_file_path = self._model_file_path
247
+ old_model_file_path = model_file_path
248
+ if goal is not None:
249
+ model_file_path = self._model_file_path.replace(
250
+ ".pth", f"_{goal}.pth"
251
+ ).replace(".zip", f"_{goal}.zip")
252
+ if total_timesteps is not None:
253
+ model_file_path = model_file_path.replace(
254
+ ".pth", f"_{total_timesteps}.pth"
255
+ ).replace(".zip", f"_{total_timesteps}.zip")
256
+
257
+ self._model_file_path = model_file_path
258
+
259
+ if os.path.exists(model_file_path):
260
+ print(f"Loading pre-existing model in {model_file_path}")
243
261
  self.load_model()
244
262
  else:
245
- print(f"No existing model in {self._model_file_path}, starting learning")
246
- if self.exploration_rate is not None:
247
- self._model = self.algorithm(
248
- "MultiInputPolicy",
249
- self.env,
250
- ent_coef=self.exploration_rate,
251
- verbose=1,
252
- )
253
- else:
254
- self._model = self.algorithm("MultiInputPolicy", self.env, verbose=1)
263
+ print(f"No existing model in {model_file_path}, starting learning")
264
+ if total_timesteps is None:
265
+ total_timesteps = self.num_timesteps
266
+ if self.exploration_rate is not None:
267
+ self._model = self.algorithm(
268
+ "MultiInputPolicy",
269
+ self.env,
270
+ ent_coef=self.exploration_rate,
271
+ verbose=1,
272
+ )
273
+ else:
274
+ self._model = self.algorithm(
275
+ "MultiInputPolicy", self.env, verbose=1
276
+ )
255
277
  self._model.learn(
256
- total_timesteps=self.num_timesteps, progress_bar=True
278
+ total_timesteps=total_timesteps, progress_bar=True
257
279
  ) # comment this in a normal env
258
280
  self.save_model()
259
281
 
282
+ self._model_file_path = old_model_file_path
283
+
260
284
  def safe_env_reset(self):
261
285
  """
262
286
  Reset the environment safely, suppressing output.
@@ -503,6 +527,69 @@ class DeepRLAgent:
503
527
  self.env.close()
504
528
  return observations
505
529
 
530
+ def fine_tune(
531
+ self,
532
+ goal: Any,
533
+ num_timesteps: int = FINETUNE_TIMESTEPS,
534
+ ) -> None:
535
+ """
536
+ Fine-tune this goal-conditioned agent on a single specified goal.
537
+ Overrides optimizer LR if provided, resets the env to the goal, and continues training.
538
+
539
+ Args:
540
+ goal: The specific goal to fine-tune on. Type depends on the environment.
541
+ num_timesteps: Number of timesteps for fine-tuning. Defaults to FINETUNE_TIMESTEPS.
542
+ learning_rate: Learning rate for fine-tuning. Defaults to FINETUNE_LR.
543
+ """
544
+ # Store original environment and problem
545
+ original_env = self.env
546
+ original_problem = self.problem_name
547
+ created_new_env = False
548
+
549
+ try:
550
+ # Try to create a goal-specific environment
551
+ if hasattr(self.env_prop, "goal_to_problem_str") and callable(
552
+ self.env_prop.goal_to_problem_str
553
+ ):
554
+ try:
555
+ goal_problem = self.env_prop.goal_to_problem_str(goal)
556
+
557
+ # Create the goal-specific environment
558
+ env_kwargs = {"id": goal_problem, "render_mode": "rgb_array"}
559
+ new_env = self.env_prop.create_vec_env(env_kwargs)
560
+
561
+ # Update the model's environment
562
+ self._model.set_env(new_env)
563
+ self.env = new_env
564
+ self.problem_name = goal_problem
565
+ created_new_env = True
566
+ print(f"Created a new environment for fine-tuning: {goal_problem}")
567
+ except Exception as e:
568
+ print(f"Warning: Could not create goal-specific environment: {e}")
569
+
570
+ if not created_new_env:
571
+ print(
572
+ (
573
+ "Fine-tuning requires a goal-specific environment."
574
+ "Please ensure that the environment with the specified goal exists."
575
+ )
576
+ )
577
+
578
+ print(f"Fine-tuning for {num_timesteps} timesteps...")
579
+ self.learn(
580
+ goal=self.env_prop.goal_to_str(goal), total_timesteps=num_timesteps
581
+ )
582
+ print("Fine-tuning complete. Model saved.")
583
+
584
+ finally:
585
+ # Restore original environment if needed
586
+ if created_new_env:
587
+ self.env.close()
588
+ self._model.set_env(original_env)
589
+ self.env = original_env
590
+ self.problem_name = original_problem
591
+ print("Restored original environment.")
592
+
506
593
 
507
594
  class GCDeepRLAgent(DeepRLAgent):
508
595
  """
@@ -15,7 +15,7 @@ from gr_libs.ml.utils.storage import (
15
15
  )
16
16
  from gr_libs.problems.consts import PROBLEMS
17
17
  from gr_libs.recognizer._utils import recognizer_str_to_obj
18
- from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco
18
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, GCAura
19
19
  from gr_libs.recognizer.graml.graml_recognizer import Graml
20
20
  from gr_libs.recognizer.recognizer import GaAgentTrainerRecognizer, LearningRecognizer
21
21
 
@@ -102,7 +102,11 @@ def run_odgr_problem(args):
102
102
  }
103
103
 
104
104
  # need to dump the whole plan for draco because it needs it for inference phase for checking likelihood.
105
- if (recognizer_type == Draco or recognizer_type == GCDraco) and issubclass(
105
+ if (
106
+ recognizer_type == Draco
107
+ or recognizer_type == GCDraco
108
+ or recognizer_type == GCAura
109
+ ) and issubclass(
106
110
  rl_agent_type, DeepRLAgent
107
111
  ): # TODO remove this condition, remove the assumption.
108
112
  generate_obs_kwargs["with_dict"] = True
@@ -224,6 +228,7 @@ def parse_args():
224
228
  "Graql",
225
229
  "Draco",
226
230
  "GCDraco",
231
+ "GCAura",
227
232
  ],
228
233
  required=True,
229
234
  help="Recognizer type. Follow readme.md and recognizer folder for more information and rules.",
@@ -1,4 +1,9 @@
1
- from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
1
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import (
2
+ Draco,
3
+ GCDraco,
4
+ Graql,
5
+ GCAura,
6
+ )
2
7
  from gr_libs.recognizer.graml.graml_recognizer import (
3
8
  ExpertBasedGraml,
4
9
  GCGraml,
@@ -14,5 +19,6 @@ def recognizer_str_to_obj(recognizer_str: str):
14
19
  "Graql": Graql,
15
20
  "Draco": Draco,
16
21
  "GCDraco": GCDraco,
22
+ "GCAura": GCAura,
17
23
  }
18
24
  return recognizer_map.get(recognizer_str)