gr-libs 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. gr_libs/__init__.py +6 -1
  2. gr_libs/_evaluation/_generate_experiments_results.py +0 -141
  3. gr_libs/_version.py +2 -2
  4. gr_libs/all_experiments.py +73 -107
  5. gr_libs/environment/environment.py +126 -17
  6. gr_libs/evaluation/generate_experiments_results.py +100 -0
  7. gr_libs/ml/consts.py +1 -0
  8. gr_libs/ml/neural/deep_rl_learner.py +118 -34
  9. gr_libs/odgr_executor.py +27 -27
  10. gr_libs/problems/consts.py +568 -290
  11. gr_libs/recognizer/_utils/__init__.py +1 -0
  12. gr_libs/recognizer/_utils/format.py +7 -1
  13. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +158 -2
  14. gr_libs/recognizer/graml/graml_recognizer.py +18 -10
  15. gr_libs/recognizer/recognizer.py +4 -4
  16. gr_libs/tutorials/gcaura_panda_tutorial.py +168 -0
  17. gr_libs/tutorials/gcaura_parking_tutorial.py +167 -0
  18. gr_libs/tutorials/gcaura_point_maze_tutorial.py +169 -0
  19. gr_libs/tutorials/gcdraco_panda_tutorial.py +6 -2
  20. gr_libs/tutorials/gcdraco_parking_tutorial.py +3 -1
  21. gr_libs/tutorials/graml_minigrid_tutorial.py +16 -12
  22. gr_libs/tutorials/graml_panda_tutorial.py +6 -2
  23. gr_libs/tutorials/graml_parking_tutorial.py +3 -1
  24. gr_libs/tutorials/graml_point_maze_tutorial.py +15 -2
  25. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/METADATA +31 -15
  26. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/RECORD +35 -29
  27. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/WHEEL +1 -1
  28. tests/test_gcaura.py +15 -0
  29. tests/test_odgr_executor_expertbasedgraml.py +14 -0
  30. tests/test_odgr_executor_gcaura.py +14 -0
  31. tests/test_odgr_executor_gcdraco.py +14 -0
  32. tests/test_odgr_executor_gcgraml.py +14 -0
  33. tests/test_odgr_executor_graql.py +14 -0
  34. gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +0 -260
  35. gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +0 -497
  36. gr_libs/_evaluation/_get_plans_images.py +0 -61
  37. gr_libs/_evaluation/_increasing_and_decreasing_.py +0 -106
  38. /gr_libs/{_evaluation → evaluation}/__init__.py +0 -0
  39. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1 @@
1
+ from .format import recognizer_str_to_obj
@@ -1,4 +1,9 @@
1
- from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
1
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import (
2
+ Draco,
3
+ GCDraco,
4
+ Graql,
5
+ GCAura,
6
+ )
2
7
  from gr_libs.recognizer.graml.graml_recognizer import (
3
8
  ExpertBasedGraml,
4
9
  GCGraml,
@@ -14,5 +19,6 @@ def recognizer_str_to_obj(recognizer_str: str):
14
19
  "Graql": Graql,
15
20
  "Draco": Draco,
16
21
  "GCDraco": GCDraco,
22
+ "GCAura": GCAura,
17
23
  }
18
24
  return recognizer_map.get(recognizer_str)
@@ -8,12 +8,14 @@ from gr_libs.ml.base import RLAgent
8
8
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
9
9
  from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
10
10
  from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
11
+ from gymnasium.envs.registration import register, registry
11
12
  from gr_libs.recognizer.recognizer import (
12
13
  GaAdaptingRecognizer,
13
14
  GaAgentTrainerRecognizer,
14
15
  LearningRecognizer,
15
16
  Recognizer,
16
17
  )
18
+ from gr_libs.ml.consts import FINETUNE_TIMESTEPS
17
19
 
18
20
 
19
21
  class GRAsRL(Recognizer):
@@ -193,6 +195,10 @@ class Draco(GRAsRL, GaAgentTrainerRecognizer):
193
195
  if self.rl_agent_type == None:
194
196
  self.rl_agent_type = DeepRLAgent
195
197
  self.evaluation_function = kwargs.get("evaluation_function")
198
+ if self.evaluation_function is None:
199
+ from gr_libs.metrics.metrics import mean_wasserstein_distance
200
+
201
+ self.evaluation_function = mean_wasserstein_distance
196
202
  assert callable(
197
203
  self.evaluation_function
198
204
  ), "Evaluation function must be a callable function."
@@ -218,12 +224,19 @@ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
218
224
  if self.rl_agent_type == None:
219
225
  self.rl_agent_type = GCDeepRLAgent
220
226
  self.evaluation_function = kwargs.get("evaluation_function")
227
+ if self.evaluation_function is None:
228
+ from gr_libs.metrics.metrics import mean_wasserstein_distance
229
+
230
+ self.evaluation_function = mean_wasserstein_distance
221
231
  assert callable(
222
232
  self.evaluation_function
223
233
  ), "Evaluation function must be a callable function."
224
234
 
225
- def domain_learning_phase(self, base_goals: list[str], train_configs):
226
- super().domain_learning_phase(base_goals, train_configs)
235
+ def domain_learning_phase(self, problems):
236
+ base = problems["gc"]
237
+ base_goals = base["goals"]
238
+ train_configs = base["train_configs"]
239
+ super().domain_learning_phase(train_configs, base_goals)
227
240
  agent_kwargs = {
228
241
  "domain_name": self.env_prop.domain_name,
229
242
  "problem_name": self.env_prop.name,
@@ -245,3 +258,146 @@ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
245
258
 
246
259
  def choose_agent(self, problem_name: str) -> RLAgent:
247
260
  return next(iter(self.agents.values()))
261
+
262
+
263
+ class GCAura(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
264
+ """
265
+ GCAura uses goal-conditioned reinforcement learning with adaptive fine-tuning.
266
+
267
+ It trains a base goal-conditioned policy over a goal subspace in the domain learning phase.
268
+ During the goal adaptation phase, it checks if new goals are within the original goal subspace:
269
+ - If a goal is within the subspace, it uses the original trained model
270
+ - If a goal is outside the subspace, it fine-tunes the model for that specific goal
271
+
272
+ This approach combines the efficiency of goal-conditioned RL with the precision of
273
+ goal-specific fine-tuning when needed.
274
+ """
275
+
276
+ def __init__(self, *args, **kwargs):
277
+ super().__init__(*args, **kwargs)
278
+ assert (
279
+ self.env_prop.gc_adaptable()
280
+ and not self.env_prop.is_state_discrete()
281
+ and not self.env_prop.is_action_discrete()
282
+ )
283
+ if self.rl_agent_type is None:
284
+ self.rl_agent_type = GCDeepRLAgent
285
+ self.evaluation_function = kwargs.get("evaluation_function")
286
+ if self.evaluation_function is None:
287
+ from gr_libs.metrics.metrics import mean_wasserstein_distance
288
+
289
+ self.evaluation_function = mean_wasserstein_distance
290
+ assert callable(
291
+ self.evaluation_function
292
+ ), "Evaluation function must be a callable function."
293
+
294
+ # Store fine-tuning parameters
295
+ self.finetune_timesteps = kwargs.get("finetune_timesteps", FINETUNE_TIMESTEPS)
296
+
297
+ # Dictionary to store fine-tuned agents for specific goals
298
+ self.fine_tuned_agents = {}
299
+
300
+ def domain_learning_phase(self, problems):
301
+ base = problems["gc"]
302
+ train_configs = base["train_configs"]
303
+
304
+ # Store the goal subspace for later checks
305
+ self.original_train_configs = train_configs
306
+
307
+ super().domain_learning_phase(train_configs)
308
+
309
+ agent_kwargs = {
310
+ "domain_name": self.env_prop.domain_name,
311
+ "problem_name": self.env_prop.name,
312
+ "algorithm": train_configs[0][0],
313
+ "num_timesteps": train_configs[0][1],
314
+ "env_prop": self.env_prop,
315
+ }
316
+
317
+ agent = self.rl_agent_type(**agent_kwargs)
318
+ agent.learn()
319
+ self.agents[self.env_prop.name] = agent
320
+ self.action_space = agent.env.action_space
321
+
322
+ def _is_goal_in_subspace(self, goal):
323
+ """
324
+ Check if a goal is within the original training subspace.
325
+
326
+ Delegates to the environment property's implementation.
327
+
328
+ Args:
329
+ goal: The goal to check
330
+
331
+ Returns:
332
+ bool: True if the goal is within the training subspace
333
+ """
334
+ # Use the environment property's implementation
335
+ return self.env_prop.is_goal_in_subspace(goal)
336
+
337
+ def goals_adaptation_phase(self, dynamic_goals):
338
+ """
339
+ Adapt to new goals, fine-tuning if necessary.
340
+
341
+ For goals outside the original training subspace, fine-tune the model.
342
+
343
+ Args:
344
+ dynamic_goals: List of goals to adapt to
345
+ """
346
+ self.active_goals = dynamic_goals
347
+ self.active_problems = [
348
+ self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
349
+ ]
350
+
351
+ # Check each goal and fine-tune if needed
352
+ for goal in dynamic_goals:
353
+ if not self._is_goal_in_subspace(goal):
354
+ print(f"Goal {goal} is outside the training subspace. Fine-tuning...")
355
+
356
+ # Create a new agent for this goal
357
+ agent_kwargs = {
358
+ "domain_name": self.env_prop.domain_name,
359
+ "problem_name": self.env_prop.name,
360
+ "algorithm": self.original_train_configs[0][0],
361
+ "num_timesteps": self.original_train_configs[0][1],
362
+ "env_prop": self.env_prop,
363
+ }
364
+
365
+ # Create new agent with base model
366
+ fine_tuned_agent = self.rl_agent_type(**agent_kwargs)
367
+ fine_tuned_agent.learn() # This loads the existing model
368
+
369
+ # Fine-tune for this specific goal
370
+ fine_tuned_agent.fine_tune(
371
+ goal=goal,
372
+ num_timesteps=self.finetune_timesteps,
373
+ )
374
+
375
+ # Store the fine-tuned agent
376
+ self.fine_tuned_agents[
377
+ f"{self.env_prop.goal_to_str(goal)}_{self.finetune_timesteps}"
378
+ ] = fine_tuned_agent
379
+ else:
380
+ print(f"Goal {goal} is within the training subspace. Using base agent.")
381
+
382
+ def choose_agent(self, problem_name: str) -> RLAgent:
383
+ """
384
+ Return the appropriate agent for the given problem.
385
+
386
+ If the goal has a fine-tuned agent, return that; otherwise return the base agent.
387
+
388
+ Args:
389
+ problem_name: The problem name to get agent for
390
+
391
+ Returns:
392
+ The appropriate agent (base or fine-tuned)
393
+ """
394
+ # Extract the goal from the problem name
395
+ goal = self.env_prop.str_to_goal(problem_name)
396
+ agent_name = f"{self.env_prop.goal_to_str(goal)}_{self.finetune_timesteps}"
397
+
398
+ # Check if we have a fine-tuned agent for this goal
399
+ if agent_name in self.fine_tuned_agents:
400
+ return self.fine_tuned_agents[agent_name]
401
+
402
+ # Otherwise return the base agent
403
+ return self.agents[self.env_prop.name]
@@ -1,4 +1,4 @@
1
- """ Collection of recognizers that use GRAML methods: metric learning for ODGR. """
1
+ """Collection of recognizers that use GRAML methods: metric learning for ODGR."""
2
2
 
3
3
  import os
4
4
  from abc import abstractmethod
@@ -124,7 +124,7 @@ class Graml(LearningRecognizer):
124
124
  pass
125
125
 
126
126
  def domain_learning_phase(self, base_goals: list[str], train_configs: list):
127
- super().domain_learning_phase(base_goals, train_configs)
127
+ super().domain_learning_phase(train_configs, base_goals)
128
128
  self.train_agents_on_base_goals(base_goals, train_configs)
129
129
  # train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
130
130
  self.model_directory = get_lstm_model_dir(
@@ -335,11 +335,15 @@ class BGGraml(Graml):
335
335
  def __init__(self, *args, **kwargs):
336
336
  super().__init__(*args, **kwargs)
337
337
 
338
- def domain_learning_phase(self, base_goals: list[str], train_configs: list):
339
- assert len(train_configs) == len(
340
- base_goals
341
- ), "There should be train configs for every goal in BGGraml."
342
- return super().domain_learning_phase(base_goals, train_configs)
338
+ def domain_learning_phase(self, problems):
339
+ # Always use 'bg' for BGGraml
340
+ base = problems["bg"]
341
+ base_goals = base["goals"]
342
+ train_configs = base["train_configs"]
343
+ assert len(base_goals) == len(
344
+ train_configs
345
+ ), "base_goals and train_configs should have the same length"
346
+ super().domain_learning_phase(train_configs=train_configs, base_goals=base_goals)
343
347
 
344
348
  # In case we need goal-directed agent for every goal
345
349
  def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
@@ -544,11 +548,15 @@ class GCGraml(Graml, GaAdaptingRecognizer):
544
548
  and not self.env_prop.is_action_discrete()
545
549
  )
546
550
 
547
- def domain_learning_phase(self, base_goals: list[str], train_configs: list):
551
+ def domain_learning_phase(self, problems):
552
+ # Always use 'gc' for GCGraml
553
+ base = problems["gc"]
554
+ base_goals = base["goals"]
555
+ train_configs = base["train_configs"]
548
556
  assert (
549
557
  len(train_configs) == 1
550
- ), "There should be one train config for the sole gc agent in GCGraml."
551
- return super().domain_learning_phase(base_goals, train_configs)
558
+ ), "GCGraml should only have one train config for the base goals, it uses a single agent"
559
+ super().domain_learning_phase(train_configs=train_configs, base_goals=base_goals)
552
560
 
553
561
  # In case we need goal-directed agent for every goal
554
562
  def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
@@ -36,7 +36,7 @@ class LearningRecognizer(Recognizer):
36
36
  def __init__(self, *args, **kwargs):
37
37
  super().__init__(*args, **kwargs)
38
38
 
39
- def domain_learning_phase(self, base_goals: list[str], train_configs: list):
39
+ def domain_learning_phase(self, train_configs: list, base_goals: list[str] = None):
40
40
  """
41
41
  Perform the domain learning phase.
42
42
 
@@ -70,18 +70,18 @@ class GaAgentTrainerRecognizer(Recognizer):
70
70
  None
71
71
  """
72
72
 
73
- def domain_learning_phase(self, base_goals: list[str], train_configs: list):
73
+ def domain_learning_phase(self, train_configs: list, base_goals: list[str] = None):
74
74
  """
75
75
  Perform the domain learning phase.
76
76
 
77
77
  Args:
78
- base_goals (List[str]): List of base goals.
79
78
  train_configs (List): List of training configurations.
79
+ base_goals (List[str]): List of base goals for the learning phase.
80
80
 
81
81
  Returns:
82
82
  None
83
83
  """
84
- super().domain_learning_phase(base_goals, train_configs)
84
+ super().domain_learning_phase(train_configs, base_goals)
85
85
 
86
86
 
87
87
  class GaAdaptingRecognizer(Recognizer):
@@ -0,0 +1,168 @@
1
+ import numpy as np
2
+ from stable_baselines3 import PPO, SAC
3
+
4
+ from gr_libs import GCAura
5
+ from gr_libs.environment._utils.utils import domain_to_env_property
6
+ from gr_libs.environment.environment import PANDA
7
+ from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
8
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
9
+ from gr_libs.ml.utils.format import random_subset_with_order
10
+
11
+
12
+ def run_gcaura_panda_tutorial():
13
+ """
14
+ Tutorial for GCAura on the Panda environment.
15
+
16
+ This tutorial demonstrates:
17
+ 1. Training a goal-conditioned model on a registered goal subspace (center area)
18
+ 2. Adapting to goals both inside and outside this subspace
19
+ 3. Testing inference on multiple goal types
20
+ """
21
+ print("Starting GCAura tutorial with Panda environment...")
22
+
23
+ print(f"Using training subspace with center-area goals")
24
+
25
+ # Initialize the recognizer with the center subspace environment
26
+ recognizer = GCAura(
27
+ domain_name=PANDA,
28
+ env_name="PandaMyReachDenseSubspaceCenterOnly-v3", # Use the subspace environment
29
+ evaluation_function=mean_wasserstein_distance,
30
+ finetune_timesteps=50000,
31
+ )
32
+
33
+ # Domain learning phase - train on the center goal subspace
34
+ print("\nStarting domain learning phase - training on registered goal subspace...")
35
+ recognizer.domain_learning_phase(
36
+ {
37
+ "gc": {
38
+ "train_configs": [(SAC, 500000)],
39
+ }
40
+ }
41
+ )
42
+
43
+ # Define adaptation goals - mix of in-subspace and out-of-subspace goals
44
+ # Use predefined goals from our environment registration
45
+ in_subspace_goal = np.array([[0.0, 0.0, 0.1]]) # Center goal (in subspace)
46
+ out_subspace_goal1 = np.array([[-0.3, -0.3, 0.1]]) # Far corner (out of subspace)
47
+ out_subspace_goal2 = np.array([[0.2, 0.2, 0.1]]) # Far corner (out of subspace)
48
+
49
+ print(
50
+ "\nStarting goal adaptation phase with both in-subspace and out-of-subspace goals..."
51
+ )
52
+
53
+ # Goals adaptation phase with mixed goals
54
+ recognizer.goals_adaptation_phase(
55
+ dynamic_goals=[
56
+ in_subspace_goal,
57
+ out_subspace_goal1,
58
+ out_subspace_goal2,
59
+ ],
60
+ )
61
+
62
+ # Setup for testing
63
+ property_type = domain_to_env_property(PANDA)
64
+ env_property = property_type("PandaMyReachDense")
65
+
66
+ # Create test actor for in-subspace goal
67
+ print("\nCreating test actor for in-subspace goal...")
68
+ problem_name_in = env_property.goal_to_problem_str(in_subspace_goal)
69
+ actor_in = DeepRLAgent(
70
+ domain_name=PANDA,
71
+ problem_name=problem_name_in,
72
+ env_prop=env_property,
73
+ algorithm=PPO,
74
+ num_timesteps=250000,
75
+ )
76
+ actor_in.learn()
77
+
78
+ # Create test actor for out-of-subspace goal
79
+ print("\nCreating test actor for out-of-subspace goal...")
80
+ problem_name_out = env_property.goal_to_problem_str(out_subspace_goal1)
81
+ actor_out = DeepRLAgent(
82
+ domain_name=PANDA,
83
+ problem_name=problem_name_out,
84
+ env_prop=env_property,
85
+ algorithm=PPO,
86
+ num_timesteps=250000,
87
+ )
88
+ actor_out.learn()
89
+
90
+ # Test inference with in-subspace goal
91
+ print("\nTesting inference with in-subspace goal (should use base model)...")
92
+ full_sequence_in = actor_in.generate_observation(
93
+ action_selection_method=stochastic_amplified_selection,
94
+ random_optimalism=True,
95
+ with_dict=True,
96
+ )
97
+ partial_sequence_in = random_subset_with_order(
98
+ full_sequence_in, (int)(0.5 * len(full_sequence_in)), is_consecutive=False
99
+ )
100
+ recognized_goal_in = recognizer.inference_phase(
101
+ partial_sequence_in, in_subspace_goal, 0.5
102
+ )
103
+ print(f"Goal recognized for in-subspace sequence: {recognized_goal_in}")
104
+ print(f"Actual goal: {in_subspace_goal}")
105
+
106
+ assert str(recognized_goal_in) == str(
107
+ in_subspace_goal
108
+ ), f"In-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_in}."
109
+
110
+ # Test inference with out-of-subspace goal
111
+ print(
112
+ "\nTesting inference with out-of-subspace goal (should use fine-tuned model)..."
113
+ )
114
+ full_sequence_out = actor_out.generate_observation(
115
+ action_selection_method=stochastic_amplified_selection,
116
+ random_optimalism=True,
117
+ with_dict=True,
118
+ )
119
+ partial_sequence_out = random_subset_with_order(
120
+ full_sequence_out, (int)(0.5 * len(full_sequence_out)), is_consecutive=False
121
+ )
122
+ recognized_goal_out = recognizer.inference_phase(
123
+ partial_sequence_out, out_subspace_goal1, 0.5
124
+ )
125
+ print(f"Goal recognized for out-of-subspace sequence: {recognized_goal_out}")
126
+ print(f"Actual goal: {out_subspace_goal1}")
127
+
128
+ assert str(recognized_goal_out) == str(
129
+ out_subspace_goal1
130
+ ), f"Out-of-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_out}."
131
+
132
+ # Try another out-of-subspace goal
133
+ print("\nTesting inference with second out-of-subspace goal...")
134
+ problem_name_out2 = env_property.goal_to_problem_str(out_subspace_goal2)
135
+ actor_out2 = DeepRLAgent(
136
+ domain_name=PANDA,
137
+ problem_name=problem_name_out2,
138
+ env_prop=env_property,
139
+ algorithm=PPO,
140
+ num_timesteps=250000,
141
+ )
142
+ actor_out2.learn()
143
+
144
+ full_sequence_out2 = actor_out2.generate_observation(
145
+ action_selection_method=stochastic_amplified_selection,
146
+ random_optimalism=True,
147
+ with_dict=True,
148
+ )
149
+ partial_sequence_out2 = random_subset_with_order(
150
+ full_sequence_out2, (int)(0.5 * len(full_sequence_out2)), is_consecutive=False
151
+ )
152
+ recognized_goal_out2 = recognizer.inference_phase(
153
+ partial_sequence_out2, out_subspace_goal2, 0.5
154
+ )
155
+ print(
156
+ f"Goal recognized for second out-of-subspace sequence: {recognized_goal_out2}"
157
+ )
158
+ print(f"Actual goal: {out_subspace_goal2}")
159
+
160
+ assert str(recognized_goal_out2) == str(
161
+ out_subspace_goal2
162
+ ), f"Out-of-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_out2}."
163
+
164
+ print("\nGCAura tutorial completed successfully!")
165
+
166
+
167
+ if __name__ == "__main__":
168
+ run_gcaura_panda_tutorial()
@@ -0,0 +1,167 @@
1
+ from stable_baselines3 import SAC, TD3
2
+
3
+ from gr_libs import GCAura
4
+ from gr_libs.environment._utils.utils import domain_to_env_property
5
+ from gr_libs.environment.environment import PARKING
6
+ from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
7
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
8
+ from gr_libs.ml.utils.format import random_subset_with_order
9
+
10
+
11
+ def run_gcaura_parking_tutorial():
12
+ """
13
+ Tutorial for GCAura on the Parking environment.
14
+
15
+ This tutorial demonstrates:
16
+ 1. Training a goal-conditioned model on a goal subspace (parking spots 1-10)
17
+ 2. Adapting to goals both inside and outside this subspace
18
+ 3. Testing inference on multiple goal types
19
+ """
20
+ print("Starting GCAura tutorial with Parking environment...")
21
+
22
+ print(f"Using training subspace with parking spots (1-10)")
23
+
24
+ # Initialize the recognizer with the standard parking environment
25
+ # We'll explicitly define the goal subspace in domain_learning_phase
26
+ recognizer = GCAura(
27
+ domain_name=PARKING,
28
+ env_name="Parking-S-14-PC--GI-8Y10Y13-v0",
29
+ evaluation_function=mean_wasserstein_distance,
30
+ finetune_timesteps=40000, # Fine-tuning timesteps for out-of-subspace goals
31
+ )
32
+
33
+ # Domain learning phase - train on the goal subspace
34
+ print("\nStarting domain learning phase - training on goal subspace...")
35
+ recognizer.domain_learning_phase(
36
+ {
37
+ "gc": {
38
+ "train_configs": [(SAC, 500000)],
39
+ }
40
+ }
41
+ )
42
+
43
+ # Define adaptation goals - mix of in-subspace and out-of-subspace goals
44
+ in_subspace_goal = "8" # Parking spot 8 (in subspace)
45
+ out_subspace_goal1 = "1" # Parking spot 1 (out of subspace)
46
+ out_subspace_goal2 = "18" # Parking spot 18 (out of subspace)
47
+
48
+ print(
49
+ "\nStarting goal adaptation phase with both in-subspace and out-of-subspace goals..."
50
+ )
51
+
52
+ # Goals adaptation phase with mixed goals
53
+ recognizer.goals_adaptation_phase(
54
+ dynamic_goals=[
55
+ in_subspace_goal, # In subspace - will use base model
56
+ out_subspace_goal1, # Out of subspace - will be fine-tuned
57
+ out_subspace_goal2, # Out of subspace - will be fine-tuned
58
+ ],
59
+ )
60
+
61
+ # Setup for testing
62
+ property_type = domain_to_env_property(PARKING)
63
+ env_property = property_type("Parking-S-14-PC--v0")
64
+
65
+ # Create test actor for in-subspace goal
66
+ print("\nCreating test actor for in-subspace goal...")
67
+ problem_name_in = env_property.goal_to_problem_str(in_subspace_goal)
68
+ actor_in = DeepRLAgent(
69
+ domain_name=PARKING,
70
+ problem_name=problem_name_in,
71
+ env_prop=env_property,
72
+ algorithm=TD3,
73
+ num_timesteps=400000,
74
+ )
75
+ actor_in.learn()
76
+
77
+ # Create test actor for out-of-subspace goal
78
+ print("\nCreating test actor for out-of-subspace goal...")
79
+ problem_name_out = env_property.goal_to_problem_str(out_subspace_goal1)
80
+ actor_out = DeepRLAgent(
81
+ domain_name=PARKING,
82
+ problem_name=problem_name_out,
83
+ env_prop=env_property,
84
+ algorithm=TD3,
85
+ num_timesteps=400000,
86
+ )
87
+ actor_out.learn()
88
+
89
+ # Test inference with in-subspace goal
90
+ print("\nTesting inference with in-subspace goal (should use base model)...")
91
+ full_sequence_in = actor_in.generate_observation(
92
+ action_selection_method=stochastic_amplified_selection,
93
+ random_optimalism=True,
94
+ with_dict=True,
95
+ )
96
+ partial_sequence_in = random_subset_with_order(
97
+ full_sequence_in, (int)(0.5 * len(full_sequence_in)), is_consecutive=False
98
+ )
99
+ recognized_goal_in = recognizer.inference_phase(
100
+ partial_sequence_in, in_subspace_goal, 0.5
101
+ )
102
+ print(f"Goal recognized for in-subspace sequence: {recognized_goal_in}")
103
+ print(f"Actual goal: {in_subspace_goal}")
104
+
105
+ assert (
106
+ recognized_goal_in == in_subspace_goal
107
+ ), f"In-subspace goal recognition failed, expected to recognize the parking spot {in_subspace_goal}."
108
+
109
+ # Test inference with out-of-subspace goal
110
+ print(
111
+ "\nTesting inference with out-of-subspace goal (should use fine-tuned model)..."
112
+ )
113
+ full_sequence_out = actor_out.generate_observation(
114
+ action_selection_method=stochastic_amplified_selection,
115
+ random_optimalism=True,
116
+ with_dict=True,
117
+ )
118
+ partial_sequence_out = random_subset_with_order(
119
+ full_sequence_out, (int)(0.5 * len(full_sequence_out)), is_consecutive=False
120
+ )
121
+ recognized_goal_out = recognizer.inference_phase(
122
+ partial_sequence_out, out_subspace_goal1, 0.5
123
+ )
124
+ print(f"Goal recognized for out-of-subspace sequence: {recognized_goal_out}")
125
+ print(f"Actual goal: {out_subspace_goal1}")
126
+
127
+ assert (
128
+ recognized_goal_out == out_subspace_goal1
129
+ ), f"Out-of-subspace goal recognition failed, expected to recognize the parking spot {out_subspace_goal1}."
130
+
131
+ # Try another out-of-subspace goal
132
+ print("\nTesting inference with second out-of-subspace goal...")
133
+ problem_name_out2 = env_property.goal_to_problem_str(out_subspace_goal2)
134
+ actor_out2 = DeepRLAgent(
135
+ domain_name=PARKING,
136
+ problem_name=problem_name_out2,
137
+ env_prop=env_property,
138
+ algorithm=TD3,
139
+ num_timesteps=400000,
140
+ )
141
+ actor_out2.learn()
142
+
143
+ full_sequence_out2 = actor_out2.generate_observation(
144
+ action_selection_method=stochastic_amplified_selection,
145
+ random_optimalism=True,
146
+ with_dict=True,
147
+ )
148
+ partial_sequence_out2 = random_subset_with_order(
149
+ full_sequence_out2, (int)(0.5 * len(full_sequence_out2)), is_consecutive=False
150
+ )
151
+ recognized_goal_out2 = recognizer.inference_phase(
152
+ partial_sequence_out2, out_subspace_goal2, 0.5
153
+ )
154
+ print(
155
+ f"Goal recognized for second out-of-subspace sequence: {recognized_goal_out2}"
156
+ )
157
+ print(f"Actual goal: {out_subspace_goal2}")
158
+
159
+ assert (
160
+ recognized_goal_out2 == out_subspace_goal2
161
+ ), f"Second out-of-subspace goal recognition failed, expected to recognize the parking spot {out_subspace_goal2}."
162
+
163
+ print("\nGCAura Parking tutorial completed successfully!")
164
+
165
+
166
+ if __name__ == "__main__":
167
+ run_gcaura_parking_tutorial()