gr-libs 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +6 -1
- gr_libs/_evaluation/_generate_experiments_results.py +0 -141
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +73 -107
- gr_libs/environment/environment.py +126 -17
- gr_libs/evaluation/generate_experiments_results.py +100 -0
- gr_libs/ml/consts.py +1 -0
- gr_libs/ml/neural/deep_rl_learner.py +118 -34
- gr_libs/odgr_executor.py +27 -27
- gr_libs/problems/consts.py +568 -290
- gr_libs/recognizer/_utils/__init__.py +1 -0
- gr_libs/recognizer/_utils/format.py +7 -1
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +158 -2
- gr_libs/recognizer/graml/graml_recognizer.py +18 -10
- gr_libs/recognizer/recognizer.py +4 -4
- gr_libs/tutorials/gcaura_panda_tutorial.py +168 -0
- gr_libs/tutorials/gcaura_parking_tutorial.py +167 -0
- gr_libs/tutorials/gcaura_point_maze_tutorial.py +169 -0
- gr_libs/tutorials/gcdraco_panda_tutorial.py +6 -2
- gr_libs/tutorials/gcdraco_parking_tutorial.py +3 -1
- gr_libs/tutorials/graml_minigrid_tutorial.py +16 -12
- gr_libs/tutorials/graml_panda_tutorial.py +6 -2
- gr_libs/tutorials/graml_parking_tutorial.py +3 -1
- gr_libs/tutorials/graml_point_maze_tutorial.py +15 -2
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/METADATA +31 -15
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/RECORD +35 -29
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/WHEEL +1 -1
- tests/test_gcaura.py +15 -0
- tests/test_odgr_executor_expertbasedgraml.py +14 -0
- tests/test_odgr_executor_gcaura.py +14 -0
- tests/test_odgr_executor_gcdraco.py +14 -0
- tests/test_odgr_executor_gcgraml.py +14 -0
- tests/test_odgr_executor_graql.py +14 -0
- gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +0 -260
- gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +0 -497
- gr_libs/_evaluation/_get_plans_images.py +0 -61
- gr_libs/_evaluation/_increasing_and_decreasing_.py +0 -106
- /gr_libs/{_evaluation → evaluation}/__init__.py +0 -0
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
from .format import recognizer_str_to_obj
|
@@ -1,4 +1,9 @@
|
|
1
|
-
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import
|
1
|
+
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import (
|
2
|
+
Draco,
|
3
|
+
GCDraco,
|
4
|
+
Graql,
|
5
|
+
GCAura,
|
6
|
+
)
|
2
7
|
from gr_libs.recognizer.graml.graml_recognizer import (
|
3
8
|
ExpertBasedGraml,
|
4
9
|
GCGraml,
|
@@ -14,5 +19,6 @@ def recognizer_str_to_obj(recognizer_str: str):
|
|
14
19
|
"Graql": Graql,
|
15
20
|
"Draco": Draco,
|
16
21
|
"GCDraco": GCDraco,
|
22
|
+
"GCAura": GCAura,
|
17
23
|
}
|
18
24
|
return recognizer_map.get(recognizer_str)
|
@@ -8,12 +8,14 @@ from gr_libs.ml.base import RLAgent
|
|
8
8
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
9
9
|
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
10
10
|
from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
|
11
|
+
from gymnasium.envs.registration import register, registry
|
11
12
|
from gr_libs.recognizer.recognizer import (
|
12
13
|
GaAdaptingRecognizer,
|
13
14
|
GaAgentTrainerRecognizer,
|
14
15
|
LearningRecognizer,
|
15
16
|
Recognizer,
|
16
17
|
)
|
18
|
+
from gr_libs.ml.consts import FINETUNE_TIMESTEPS
|
17
19
|
|
18
20
|
|
19
21
|
class GRAsRL(Recognizer):
|
@@ -193,6 +195,10 @@ class Draco(GRAsRL, GaAgentTrainerRecognizer):
|
|
193
195
|
if self.rl_agent_type == None:
|
194
196
|
self.rl_agent_type = DeepRLAgent
|
195
197
|
self.evaluation_function = kwargs.get("evaluation_function")
|
198
|
+
if self.evaluation_function is None:
|
199
|
+
from gr_libs.metrics.metrics import mean_wasserstein_distance
|
200
|
+
|
201
|
+
self.evaluation_function = mean_wasserstein_distance
|
196
202
|
assert callable(
|
197
203
|
self.evaluation_function
|
198
204
|
), "Evaluation function must be a callable function."
|
@@ -218,12 +224,19 @@ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
|
|
218
224
|
if self.rl_agent_type == None:
|
219
225
|
self.rl_agent_type = GCDeepRLAgent
|
220
226
|
self.evaluation_function = kwargs.get("evaluation_function")
|
227
|
+
if self.evaluation_function is None:
|
228
|
+
from gr_libs.metrics.metrics import mean_wasserstein_distance
|
229
|
+
|
230
|
+
self.evaluation_function = mean_wasserstein_distance
|
221
231
|
assert callable(
|
222
232
|
self.evaluation_function
|
223
233
|
), "Evaluation function must be a callable function."
|
224
234
|
|
225
|
-
def domain_learning_phase(self,
|
226
|
-
|
235
|
+
def domain_learning_phase(self, problems):
|
236
|
+
base = problems["gc"]
|
237
|
+
base_goals = base["goals"]
|
238
|
+
train_configs = base["train_configs"]
|
239
|
+
super().domain_learning_phase(train_configs, base_goals)
|
227
240
|
agent_kwargs = {
|
228
241
|
"domain_name": self.env_prop.domain_name,
|
229
242
|
"problem_name": self.env_prop.name,
|
@@ -245,3 +258,146 @@ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
|
|
245
258
|
|
246
259
|
def choose_agent(self, problem_name: str) -> RLAgent:
|
247
260
|
return next(iter(self.agents.values()))
|
261
|
+
|
262
|
+
|
263
|
+
class GCAura(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
|
264
|
+
"""
|
265
|
+
GCAura uses goal-conditioned reinforcement learning with adaptive fine-tuning.
|
266
|
+
|
267
|
+
It trains a base goal-conditioned policy over a goal subspace in the domain learning phase.
|
268
|
+
During the goal adaptation phase, it checks if new goals are within the original goal subspace:
|
269
|
+
- If a goal is within the subspace, it uses the original trained model
|
270
|
+
- If a goal is outside the subspace, it fine-tunes the model for that specific goal
|
271
|
+
|
272
|
+
This approach combines the efficiency of goal-conditioned RL with the precision of
|
273
|
+
goal-specific fine-tuning when needed.
|
274
|
+
"""
|
275
|
+
|
276
|
+
def __init__(self, *args, **kwargs):
|
277
|
+
super().__init__(*args, **kwargs)
|
278
|
+
assert (
|
279
|
+
self.env_prop.gc_adaptable()
|
280
|
+
and not self.env_prop.is_state_discrete()
|
281
|
+
and not self.env_prop.is_action_discrete()
|
282
|
+
)
|
283
|
+
if self.rl_agent_type is None:
|
284
|
+
self.rl_agent_type = GCDeepRLAgent
|
285
|
+
self.evaluation_function = kwargs.get("evaluation_function")
|
286
|
+
if self.evaluation_function is None:
|
287
|
+
from gr_libs.metrics.metrics import mean_wasserstein_distance
|
288
|
+
|
289
|
+
self.evaluation_function = mean_wasserstein_distance
|
290
|
+
assert callable(
|
291
|
+
self.evaluation_function
|
292
|
+
), "Evaluation function must be a callable function."
|
293
|
+
|
294
|
+
# Store fine-tuning parameters
|
295
|
+
self.finetune_timesteps = kwargs.get("finetune_timesteps", FINETUNE_TIMESTEPS)
|
296
|
+
|
297
|
+
# Dictionary to store fine-tuned agents for specific goals
|
298
|
+
self.fine_tuned_agents = {}
|
299
|
+
|
300
|
+
def domain_learning_phase(self, problems):
|
301
|
+
base = problems["gc"]
|
302
|
+
train_configs = base["train_configs"]
|
303
|
+
|
304
|
+
# Store the goal subspace for later checks
|
305
|
+
self.original_train_configs = train_configs
|
306
|
+
|
307
|
+
super().domain_learning_phase(train_configs)
|
308
|
+
|
309
|
+
agent_kwargs = {
|
310
|
+
"domain_name": self.env_prop.domain_name,
|
311
|
+
"problem_name": self.env_prop.name,
|
312
|
+
"algorithm": train_configs[0][0],
|
313
|
+
"num_timesteps": train_configs[0][1],
|
314
|
+
"env_prop": self.env_prop,
|
315
|
+
}
|
316
|
+
|
317
|
+
agent = self.rl_agent_type(**agent_kwargs)
|
318
|
+
agent.learn()
|
319
|
+
self.agents[self.env_prop.name] = agent
|
320
|
+
self.action_space = agent.env.action_space
|
321
|
+
|
322
|
+
def _is_goal_in_subspace(self, goal):
|
323
|
+
"""
|
324
|
+
Check if a goal is within the original training subspace.
|
325
|
+
|
326
|
+
Delegates to the environment property's implementation.
|
327
|
+
|
328
|
+
Args:
|
329
|
+
goal: The goal to check
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
bool: True if the goal is within the training subspace
|
333
|
+
"""
|
334
|
+
# Use the environment property's implementation
|
335
|
+
return self.env_prop.is_goal_in_subspace(goal)
|
336
|
+
|
337
|
+
def goals_adaptation_phase(self, dynamic_goals):
|
338
|
+
"""
|
339
|
+
Adapt to new goals, fine-tuning if necessary.
|
340
|
+
|
341
|
+
For goals outside the original training subspace, fine-tune the model.
|
342
|
+
|
343
|
+
Args:
|
344
|
+
dynamic_goals: List of goals to adapt to
|
345
|
+
"""
|
346
|
+
self.active_goals = dynamic_goals
|
347
|
+
self.active_problems = [
|
348
|
+
self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
|
349
|
+
]
|
350
|
+
|
351
|
+
# Check each goal and fine-tune if needed
|
352
|
+
for goal in dynamic_goals:
|
353
|
+
if not self._is_goal_in_subspace(goal):
|
354
|
+
print(f"Goal {goal} is outside the training subspace. Fine-tuning...")
|
355
|
+
|
356
|
+
# Create a new agent for this goal
|
357
|
+
agent_kwargs = {
|
358
|
+
"domain_name": self.env_prop.domain_name,
|
359
|
+
"problem_name": self.env_prop.name,
|
360
|
+
"algorithm": self.original_train_configs[0][0],
|
361
|
+
"num_timesteps": self.original_train_configs[0][1],
|
362
|
+
"env_prop": self.env_prop,
|
363
|
+
}
|
364
|
+
|
365
|
+
# Create new agent with base model
|
366
|
+
fine_tuned_agent = self.rl_agent_type(**agent_kwargs)
|
367
|
+
fine_tuned_agent.learn() # This loads the existing model
|
368
|
+
|
369
|
+
# Fine-tune for this specific goal
|
370
|
+
fine_tuned_agent.fine_tune(
|
371
|
+
goal=goal,
|
372
|
+
num_timesteps=self.finetune_timesteps,
|
373
|
+
)
|
374
|
+
|
375
|
+
# Store the fine-tuned agent
|
376
|
+
self.fine_tuned_agents[
|
377
|
+
f"{self.env_prop.goal_to_str(goal)}_{self.finetune_timesteps}"
|
378
|
+
] = fine_tuned_agent
|
379
|
+
else:
|
380
|
+
print(f"Goal {goal} is within the training subspace. Using base agent.")
|
381
|
+
|
382
|
+
def choose_agent(self, problem_name: str) -> RLAgent:
|
383
|
+
"""
|
384
|
+
Return the appropriate agent for the given problem.
|
385
|
+
|
386
|
+
If the goal has a fine-tuned agent, return that; otherwise return the base agent.
|
387
|
+
|
388
|
+
Args:
|
389
|
+
problem_name: The problem name to get agent for
|
390
|
+
|
391
|
+
Returns:
|
392
|
+
The appropriate agent (base or fine-tuned)
|
393
|
+
"""
|
394
|
+
# Extract the goal from the problem name
|
395
|
+
goal = self.env_prop.str_to_goal(problem_name)
|
396
|
+
agent_name = f"{self.env_prop.goal_to_str(goal)}_{self.finetune_timesteps}"
|
397
|
+
|
398
|
+
# Check if we have a fine-tuned agent for this goal
|
399
|
+
if agent_name in self.fine_tuned_agents:
|
400
|
+
return self.fine_tuned_agents[agent_name]
|
401
|
+
|
402
|
+
# Otherwise return the base agent
|
403
|
+
return self.agents[self.env_prop.name]
|
@@ -1,4 +1,4 @@
|
|
1
|
-
"""
|
1
|
+
"""Collection of recognizers that use GRAML methods: metric learning for ODGR."""
|
2
2
|
|
3
3
|
import os
|
4
4
|
from abc import abstractmethod
|
@@ -124,7 +124,7 @@ class Graml(LearningRecognizer):
|
|
124
124
|
pass
|
125
125
|
|
126
126
|
def domain_learning_phase(self, base_goals: list[str], train_configs: list):
|
127
|
-
super().domain_learning_phase(
|
127
|
+
super().domain_learning_phase(train_configs, base_goals)
|
128
128
|
self.train_agents_on_base_goals(base_goals, train_configs)
|
129
129
|
# train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
|
130
130
|
self.model_directory = get_lstm_model_dir(
|
@@ -335,11 +335,15 @@ class BGGraml(Graml):
|
|
335
335
|
def __init__(self, *args, **kwargs):
|
336
336
|
super().__init__(*args, **kwargs)
|
337
337
|
|
338
|
-
def domain_learning_phase(self,
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
338
|
+
def domain_learning_phase(self, problems):
|
339
|
+
# Always use 'bg' for BGGraml
|
340
|
+
base = problems["bg"]
|
341
|
+
base_goals = base["goals"]
|
342
|
+
train_configs = base["train_configs"]
|
343
|
+
assert len(base_goals) == len(
|
344
|
+
train_configs
|
345
|
+
), "base_goals and train_configs should have the same length"
|
346
|
+
super().domain_learning_phase(train_configs=train_configs, base_goals=base_goals)
|
343
347
|
|
344
348
|
# In case we need goal-directed agent for every goal
|
345
349
|
def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
|
@@ -544,11 +548,15 @@ class GCGraml(Graml, GaAdaptingRecognizer):
|
|
544
548
|
and not self.env_prop.is_action_discrete()
|
545
549
|
)
|
546
550
|
|
547
|
-
def domain_learning_phase(self,
|
551
|
+
def domain_learning_phase(self, problems):
|
552
|
+
# Always use 'gc' for GCGraml
|
553
|
+
base = problems["gc"]
|
554
|
+
base_goals = base["goals"]
|
555
|
+
train_configs = base["train_configs"]
|
548
556
|
assert (
|
549
557
|
len(train_configs) == 1
|
550
|
-
), "
|
551
|
-
|
558
|
+
), "GCGraml should only have one train config for the base goals, it uses a single agent"
|
559
|
+
super().domain_learning_phase(train_configs=train_configs, base_goals=base_goals)
|
552
560
|
|
553
561
|
# In case we need goal-directed agent for every goal
|
554
562
|
def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
|
gr_libs/recognizer/recognizer.py
CHANGED
@@ -36,7 +36,7 @@ class LearningRecognizer(Recognizer):
|
|
36
36
|
def __init__(self, *args, **kwargs):
|
37
37
|
super().__init__(*args, **kwargs)
|
38
38
|
|
39
|
-
def domain_learning_phase(self, base_goals: list[str]
|
39
|
+
def domain_learning_phase(self, train_configs: list, base_goals: list[str] = None):
|
40
40
|
"""
|
41
41
|
Perform the domain learning phase.
|
42
42
|
|
@@ -70,18 +70,18 @@ class GaAgentTrainerRecognizer(Recognizer):
|
|
70
70
|
None
|
71
71
|
"""
|
72
72
|
|
73
|
-
def domain_learning_phase(self, base_goals: list[str]
|
73
|
+
def domain_learning_phase(self, train_configs: list, base_goals: list[str] = None):
|
74
74
|
"""
|
75
75
|
Perform the domain learning phase.
|
76
76
|
|
77
77
|
Args:
|
78
|
-
base_goals (List[str]): List of base goals.
|
79
78
|
train_configs (List): List of training configurations.
|
79
|
+
base_goals (List[str]): List of base goals for the learning phase.
|
80
80
|
|
81
81
|
Returns:
|
82
82
|
None
|
83
83
|
"""
|
84
|
-
super().domain_learning_phase(
|
84
|
+
super().domain_learning_phase(train_configs, base_goals)
|
85
85
|
|
86
86
|
|
87
87
|
class GaAdaptingRecognizer(Recognizer):
|
@@ -0,0 +1,168 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from stable_baselines3 import PPO, SAC
|
3
|
+
|
4
|
+
from gr_libs import GCAura
|
5
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
6
|
+
from gr_libs.environment.environment import PANDA
|
7
|
+
from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
|
8
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
9
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
10
|
+
|
11
|
+
|
12
|
+
def run_gcaura_panda_tutorial():
|
13
|
+
"""
|
14
|
+
Tutorial for GCAura on the Panda environment.
|
15
|
+
|
16
|
+
This tutorial demonstrates:
|
17
|
+
1. Training a goal-conditioned model on a registered goal subspace (center area)
|
18
|
+
2. Adapting to goals both inside and outside this subspace
|
19
|
+
3. Testing inference on multiple goal types
|
20
|
+
"""
|
21
|
+
print("Starting GCAura tutorial with Panda environment...")
|
22
|
+
|
23
|
+
print(f"Using training subspace with center-area goals")
|
24
|
+
|
25
|
+
# Initialize the recognizer with the center subspace environment
|
26
|
+
recognizer = GCAura(
|
27
|
+
domain_name=PANDA,
|
28
|
+
env_name="PandaMyReachDenseSubspaceCenterOnly-v3", # Use the subspace environment
|
29
|
+
evaluation_function=mean_wasserstein_distance,
|
30
|
+
finetune_timesteps=50000,
|
31
|
+
)
|
32
|
+
|
33
|
+
# Domain learning phase - train on the center goal subspace
|
34
|
+
print("\nStarting domain learning phase - training on registered goal subspace...")
|
35
|
+
recognizer.domain_learning_phase(
|
36
|
+
{
|
37
|
+
"gc": {
|
38
|
+
"train_configs": [(SAC, 500000)],
|
39
|
+
}
|
40
|
+
}
|
41
|
+
)
|
42
|
+
|
43
|
+
# Define adaptation goals - mix of in-subspace and out-of-subspace goals
|
44
|
+
# Use predefined goals from our environment registration
|
45
|
+
in_subspace_goal = np.array([[0.0, 0.0, 0.1]]) # Center goal (in subspace)
|
46
|
+
out_subspace_goal1 = np.array([[-0.3, -0.3, 0.1]]) # Far corner (out of subspace)
|
47
|
+
out_subspace_goal2 = np.array([[0.2, 0.2, 0.1]]) # Far corner (out of subspace)
|
48
|
+
|
49
|
+
print(
|
50
|
+
"\nStarting goal adaptation phase with both in-subspace and out-of-subspace goals..."
|
51
|
+
)
|
52
|
+
|
53
|
+
# Goals adaptation phase with mixed goals
|
54
|
+
recognizer.goals_adaptation_phase(
|
55
|
+
dynamic_goals=[
|
56
|
+
in_subspace_goal,
|
57
|
+
out_subspace_goal1,
|
58
|
+
out_subspace_goal2,
|
59
|
+
],
|
60
|
+
)
|
61
|
+
|
62
|
+
# Setup for testing
|
63
|
+
property_type = domain_to_env_property(PANDA)
|
64
|
+
env_property = property_type("PandaMyReachDense")
|
65
|
+
|
66
|
+
# Create test actor for in-subspace goal
|
67
|
+
print("\nCreating test actor for in-subspace goal...")
|
68
|
+
problem_name_in = env_property.goal_to_problem_str(in_subspace_goal)
|
69
|
+
actor_in = DeepRLAgent(
|
70
|
+
domain_name=PANDA,
|
71
|
+
problem_name=problem_name_in,
|
72
|
+
env_prop=env_property,
|
73
|
+
algorithm=PPO,
|
74
|
+
num_timesteps=250000,
|
75
|
+
)
|
76
|
+
actor_in.learn()
|
77
|
+
|
78
|
+
# Create test actor for out-of-subspace goal
|
79
|
+
print("\nCreating test actor for out-of-subspace goal...")
|
80
|
+
problem_name_out = env_property.goal_to_problem_str(out_subspace_goal1)
|
81
|
+
actor_out = DeepRLAgent(
|
82
|
+
domain_name=PANDA,
|
83
|
+
problem_name=problem_name_out,
|
84
|
+
env_prop=env_property,
|
85
|
+
algorithm=PPO,
|
86
|
+
num_timesteps=250000,
|
87
|
+
)
|
88
|
+
actor_out.learn()
|
89
|
+
|
90
|
+
# Test inference with in-subspace goal
|
91
|
+
print("\nTesting inference with in-subspace goal (should use base model)...")
|
92
|
+
full_sequence_in = actor_in.generate_observation(
|
93
|
+
action_selection_method=stochastic_amplified_selection,
|
94
|
+
random_optimalism=True,
|
95
|
+
with_dict=True,
|
96
|
+
)
|
97
|
+
partial_sequence_in = random_subset_with_order(
|
98
|
+
full_sequence_in, (int)(0.5 * len(full_sequence_in)), is_consecutive=False
|
99
|
+
)
|
100
|
+
recognized_goal_in = recognizer.inference_phase(
|
101
|
+
partial_sequence_in, in_subspace_goal, 0.5
|
102
|
+
)
|
103
|
+
print(f"Goal recognized for in-subspace sequence: {recognized_goal_in}")
|
104
|
+
print(f"Actual goal: {in_subspace_goal}")
|
105
|
+
|
106
|
+
assert str(recognized_goal_in) == str(
|
107
|
+
in_subspace_goal
|
108
|
+
), f"In-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_in}."
|
109
|
+
|
110
|
+
# Test inference with out-of-subspace goal
|
111
|
+
print(
|
112
|
+
"\nTesting inference with out-of-subspace goal (should use fine-tuned model)..."
|
113
|
+
)
|
114
|
+
full_sequence_out = actor_out.generate_observation(
|
115
|
+
action_selection_method=stochastic_amplified_selection,
|
116
|
+
random_optimalism=True,
|
117
|
+
with_dict=True,
|
118
|
+
)
|
119
|
+
partial_sequence_out = random_subset_with_order(
|
120
|
+
full_sequence_out, (int)(0.5 * len(full_sequence_out)), is_consecutive=False
|
121
|
+
)
|
122
|
+
recognized_goal_out = recognizer.inference_phase(
|
123
|
+
partial_sequence_out, out_subspace_goal1, 0.5
|
124
|
+
)
|
125
|
+
print(f"Goal recognized for out-of-subspace sequence: {recognized_goal_out}")
|
126
|
+
print(f"Actual goal: {out_subspace_goal1}")
|
127
|
+
|
128
|
+
assert str(recognized_goal_out) == str(
|
129
|
+
out_subspace_goal1
|
130
|
+
), f"Out-of-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_out}."
|
131
|
+
|
132
|
+
# Try another out-of-subspace goal
|
133
|
+
print("\nTesting inference with second out-of-subspace goal...")
|
134
|
+
problem_name_out2 = env_property.goal_to_problem_str(out_subspace_goal2)
|
135
|
+
actor_out2 = DeepRLAgent(
|
136
|
+
domain_name=PANDA,
|
137
|
+
problem_name=problem_name_out2,
|
138
|
+
env_prop=env_property,
|
139
|
+
algorithm=PPO,
|
140
|
+
num_timesteps=250000,
|
141
|
+
)
|
142
|
+
actor_out2.learn()
|
143
|
+
|
144
|
+
full_sequence_out2 = actor_out2.generate_observation(
|
145
|
+
action_selection_method=stochastic_amplified_selection,
|
146
|
+
random_optimalism=True,
|
147
|
+
with_dict=True,
|
148
|
+
)
|
149
|
+
partial_sequence_out2 = random_subset_with_order(
|
150
|
+
full_sequence_out2, (int)(0.5 * len(full_sequence_out2)), is_consecutive=False
|
151
|
+
)
|
152
|
+
recognized_goal_out2 = recognizer.inference_phase(
|
153
|
+
partial_sequence_out2, out_subspace_goal2, 0.5
|
154
|
+
)
|
155
|
+
print(
|
156
|
+
f"Goal recognized for second out-of-subspace sequence: {recognized_goal_out2}"
|
157
|
+
)
|
158
|
+
print(f"Actual goal: {out_subspace_goal2}")
|
159
|
+
|
160
|
+
assert str(recognized_goal_out2) == str(
|
161
|
+
out_subspace_goal2
|
162
|
+
), f"Out-of-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_out2}."
|
163
|
+
|
164
|
+
print("\nGCAura tutorial completed successfully!")
|
165
|
+
|
166
|
+
|
167
|
+
if __name__ == "__main__":
|
168
|
+
run_gcaura_panda_tutorial()
|
@@ -0,0 +1,167 @@
|
|
1
|
+
from stable_baselines3 import SAC, TD3
|
2
|
+
|
3
|
+
from gr_libs import GCAura
|
4
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
5
|
+
from gr_libs.environment.environment import PARKING
|
6
|
+
from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
|
7
|
+
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
8
|
+
from gr_libs.ml.utils.format import random_subset_with_order
|
9
|
+
|
10
|
+
|
11
|
+
def run_gcaura_parking_tutorial():
|
12
|
+
"""
|
13
|
+
Tutorial for GCAura on the Parking environment.
|
14
|
+
|
15
|
+
This tutorial demonstrates:
|
16
|
+
1. Training a goal-conditioned model on a goal subspace (parking spots 1-10)
|
17
|
+
2. Adapting to goals both inside and outside this subspace
|
18
|
+
3. Testing inference on multiple goal types
|
19
|
+
"""
|
20
|
+
print("Starting GCAura tutorial with Parking environment...")
|
21
|
+
|
22
|
+
print(f"Using training subspace with parking spots (1-10)")
|
23
|
+
|
24
|
+
# Initialize the recognizer with the standard parking environment
|
25
|
+
# We'll explicitly define the goal subspace in domain_learning_phase
|
26
|
+
recognizer = GCAura(
|
27
|
+
domain_name=PARKING,
|
28
|
+
env_name="Parking-S-14-PC--GI-8Y10Y13-v0",
|
29
|
+
evaluation_function=mean_wasserstein_distance,
|
30
|
+
finetune_timesteps=40000, # Fine-tuning timesteps for out-of-subspace goals
|
31
|
+
)
|
32
|
+
|
33
|
+
# Domain learning phase - train on the goal subspace
|
34
|
+
print("\nStarting domain learning phase - training on goal subspace...")
|
35
|
+
recognizer.domain_learning_phase(
|
36
|
+
{
|
37
|
+
"gc": {
|
38
|
+
"train_configs": [(SAC, 500000)],
|
39
|
+
}
|
40
|
+
}
|
41
|
+
)
|
42
|
+
|
43
|
+
# Define adaptation goals - mix of in-subspace and out-of-subspace goals
|
44
|
+
in_subspace_goal = "8" # Parking spot 8 (in subspace)
|
45
|
+
out_subspace_goal1 = "1" # Parking spot 1 (out of subspace)
|
46
|
+
out_subspace_goal2 = "18" # Parking spot 18 (out of subspace)
|
47
|
+
|
48
|
+
print(
|
49
|
+
"\nStarting goal adaptation phase with both in-subspace and out-of-subspace goals..."
|
50
|
+
)
|
51
|
+
|
52
|
+
# Goals adaptation phase with mixed goals
|
53
|
+
recognizer.goals_adaptation_phase(
|
54
|
+
dynamic_goals=[
|
55
|
+
in_subspace_goal, # In subspace - will use base model
|
56
|
+
out_subspace_goal1, # Out of subspace - will be fine-tuned
|
57
|
+
out_subspace_goal2, # Out of subspace - will be fine-tuned
|
58
|
+
],
|
59
|
+
)
|
60
|
+
|
61
|
+
# Setup for testing
|
62
|
+
property_type = domain_to_env_property(PARKING)
|
63
|
+
env_property = property_type("Parking-S-14-PC--v0")
|
64
|
+
|
65
|
+
# Create test actor for in-subspace goal
|
66
|
+
print("\nCreating test actor for in-subspace goal...")
|
67
|
+
problem_name_in = env_property.goal_to_problem_str(in_subspace_goal)
|
68
|
+
actor_in = DeepRLAgent(
|
69
|
+
domain_name=PARKING,
|
70
|
+
problem_name=problem_name_in,
|
71
|
+
env_prop=env_property,
|
72
|
+
algorithm=TD3,
|
73
|
+
num_timesteps=400000,
|
74
|
+
)
|
75
|
+
actor_in.learn()
|
76
|
+
|
77
|
+
# Create test actor for out-of-subspace goal
|
78
|
+
print("\nCreating test actor for out-of-subspace goal...")
|
79
|
+
problem_name_out = env_property.goal_to_problem_str(out_subspace_goal1)
|
80
|
+
actor_out = DeepRLAgent(
|
81
|
+
domain_name=PARKING,
|
82
|
+
problem_name=problem_name_out,
|
83
|
+
env_prop=env_property,
|
84
|
+
algorithm=TD3,
|
85
|
+
num_timesteps=400000,
|
86
|
+
)
|
87
|
+
actor_out.learn()
|
88
|
+
|
89
|
+
# Test inference with in-subspace goal
|
90
|
+
print("\nTesting inference with in-subspace goal (should use base model)...")
|
91
|
+
full_sequence_in = actor_in.generate_observation(
|
92
|
+
action_selection_method=stochastic_amplified_selection,
|
93
|
+
random_optimalism=True,
|
94
|
+
with_dict=True,
|
95
|
+
)
|
96
|
+
partial_sequence_in = random_subset_with_order(
|
97
|
+
full_sequence_in, (int)(0.5 * len(full_sequence_in)), is_consecutive=False
|
98
|
+
)
|
99
|
+
recognized_goal_in = recognizer.inference_phase(
|
100
|
+
partial_sequence_in, in_subspace_goal, 0.5
|
101
|
+
)
|
102
|
+
print(f"Goal recognized for in-subspace sequence: {recognized_goal_in}")
|
103
|
+
print(f"Actual goal: {in_subspace_goal}")
|
104
|
+
|
105
|
+
assert (
|
106
|
+
recognized_goal_in == in_subspace_goal
|
107
|
+
), f"In-subspace goal recognition failed, expected to recognize the parking spot {in_subspace_goal}."
|
108
|
+
|
109
|
+
# Test inference with out-of-subspace goal
|
110
|
+
print(
|
111
|
+
"\nTesting inference with out-of-subspace goal (should use fine-tuned model)..."
|
112
|
+
)
|
113
|
+
full_sequence_out = actor_out.generate_observation(
|
114
|
+
action_selection_method=stochastic_amplified_selection,
|
115
|
+
random_optimalism=True,
|
116
|
+
with_dict=True,
|
117
|
+
)
|
118
|
+
partial_sequence_out = random_subset_with_order(
|
119
|
+
full_sequence_out, (int)(0.5 * len(full_sequence_out)), is_consecutive=False
|
120
|
+
)
|
121
|
+
recognized_goal_out = recognizer.inference_phase(
|
122
|
+
partial_sequence_out, out_subspace_goal1, 0.5
|
123
|
+
)
|
124
|
+
print(f"Goal recognized for out-of-subspace sequence: {recognized_goal_out}")
|
125
|
+
print(f"Actual goal: {out_subspace_goal1}")
|
126
|
+
|
127
|
+
assert (
|
128
|
+
recognized_goal_out == out_subspace_goal1
|
129
|
+
), f"Out-of-subspace goal recognition failed, expected to recognize the parking spot {out_subspace_goal1}."
|
130
|
+
|
131
|
+
# Try another out-of-subspace goal
|
132
|
+
print("\nTesting inference with second out-of-subspace goal...")
|
133
|
+
problem_name_out2 = env_property.goal_to_problem_str(out_subspace_goal2)
|
134
|
+
actor_out2 = DeepRLAgent(
|
135
|
+
domain_name=PARKING,
|
136
|
+
problem_name=problem_name_out2,
|
137
|
+
env_prop=env_property,
|
138
|
+
algorithm=TD3,
|
139
|
+
num_timesteps=400000,
|
140
|
+
)
|
141
|
+
actor_out2.learn()
|
142
|
+
|
143
|
+
full_sequence_out2 = actor_out2.generate_observation(
|
144
|
+
action_selection_method=stochastic_amplified_selection,
|
145
|
+
random_optimalism=True,
|
146
|
+
with_dict=True,
|
147
|
+
)
|
148
|
+
partial_sequence_out2 = random_subset_with_order(
|
149
|
+
full_sequence_out2, (int)(0.5 * len(full_sequence_out2)), is_consecutive=False
|
150
|
+
)
|
151
|
+
recognized_goal_out2 = recognizer.inference_phase(
|
152
|
+
partial_sequence_out2, out_subspace_goal2, 0.5
|
153
|
+
)
|
154
|
+
print(
|
155
|
+
f"Goal recognized for second out-of-subspace sequence: {recognized_goal_out2}"
|
156
|
+
)
|
157
|
+
print(f"Actual goal: {out_subspace_goal2}")
|
158
|
+
|
159
|
+
assert (
|
160
|
+
recognized_goal_out2 == out_subspace_goal2
|
161
|
+
), f"Second out-of-subspace goal recognition failed, expected to recognize the parking spot {out_subspace_goal2}."
|
162
|
+
|
163
|
+
print("\nGCAura Parking tutorial completed successfully!")
|
164
|
+
|
165
|
+
|
166
|
+
if __name__ == "__main__":
|
167
|
+
run_gcaura_parking_tutorial()
|