PyPI - gr-libs - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

gr-libs 0.2.2py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

gr_libs/__init__.py +6 -1
gr_libs/_evaluation/_generate_experiments_results.py +0 -141
gr_libs/_version.py +2 -2
gr_libs/all_experiments.py +73 -107
gr_libs/environment/environment.py +126 -17
gr_libs/evaluation/generate_experiments_results.py +100 -0
gr_libs/ml/consts.py +1 -0
gr_libs/ml/neural/deep_rl_learner.py +118 -34
gr_libs/odgr_executor.py +27 -27
gr_libs/problems/consts.py +568 -290
gr_libs/recognizer/_utils/__init__.py +1 -0
gr_libs/recognizer/_utils/format.py +7 -1
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +158 -2
gr_libs/recognizer/graml/graml_recognizer.py +18 -10
gr_libs/recognizer/recognizer.py +4 -4
gr_libs/tutorials/gcaura_panda_tutorial.py +168 -0
gr_libs/tutorials/gcaura_parking_tutorial.py +167 -0
gr_libs/tutorials/gcaura_point_maze_tutorial.py +169 -0
gr_libs/tutorials/gcdraco_panda_tutorial.py +6 -2
gr_libs/tutorials/gcdraco_parking_tutorial.py +3 -1
gr_libs/tutorials/graml_minigrid_tutorial.py +16 -12
gr_libs/tutorials/graml_panda_tutorial.py +6 -2
gr_libs/tutorials/graml_parking_tutorial.py +3 -1
gr_libs/tutorials/graml_point_maze_tutorial.py +15 -2
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/METADATA +31 -15
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/RECORD +35 -29
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/WHEEL +1 -1
tests/test_gcaura.py +15 -0
tests/test_odgr_executor_expertbasedgraml.py +14 -0
tests/test_odgr_executor_gcaura.py +14 -0
tests/test_odgr_executor_gcdraco.py +14 -0
tests/test_odgr_executor_gcgraml.py +14 -0
tests/test_odgr_executor_graql.py +14 -0
gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +0 -260
gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +0 -497
gr_libs/_evaluation/_get_plans_images.py +0 -61
gr_libs/_evaluation/_increasing_and_decreasing_.py +0 -106
/gr_libs/{_evaluation → evaluation}/__init__.py +0 -0
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/top_level.txt +0 -0

gr_libs/recognizer/_utils/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ from .format import recognizer_str_to_obj

gr_libs/recognizer/_utils/format.py CHANGED Viewed

@@ -1,4 +1,9 @@
-from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
+from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import (
+    Draco,
+    GCDraco,
+    Graql,
+    GCAura,
+)
 from gr_libs.recognizer.graml.graml_recognizer import (
     ExpertBasedGraml,
     GCGraml,
@@ -14,5 +19,6 @@ def recognizer_str_to_obj(recognizer_str: str):
         "Graql": Graql,
         "Draco": Draco,
         "GCDraco": GCDraco,
+        "GCAura": GCAura,
     }
     return recognizer_map.get(recognizer_str)

gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py CHANGED Viewed

@@ -8,12 +8,14 @@ from gr_libs.ml.base import RLAgent
 from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
 from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
 from gr_libs.ml.utils.storage import get_gr_as_rl_experiment_confidence_path
+from gymnasium.envs.registration import register, registry
 from gr_libs.recognizer.recognizer import (
     GaAdaptingRecognizer,
     GaAgentTrainerRecognizer,
     LearningRecognizer,
     Recognizer,
 )
+from gr_libs.ml.consts import FINETUNE_TIMESTEPS
 class GRAsRL(Recognizer):
@@ -193,6 +195,10 @@ class Draco(GRAsRL, GaAgentTrainerRecognizer):
         if self.rl_agent_type == None:
             self.rl_agent_type = DeepRLAgent
         self.evaluation_function = kwargs.get("evaluation_function")
+        if self.evaluation_function is None:
+            from gr_libs.metrics.metrics import mean_wasserstein_distance
+            self.evaluation_function = mean_wasserstein_distance
         assert callable(
             self.evaluation_function
         ), "Evaluation function must be a callable function."
@@ -218,12 +224,19 @@ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
         if self.rl_agent_type == None:
             self.rl_agent_type = GCDeepRLAgent
         self.evaluation_function = kwargs.get("evaluation_function")
+        if self.evaluation_function is None:
+            from gr_libs.metrics.metrics import mean_wasserstein_distance
+            self.evaluation_function = mean_wasserstein_distance
         assert callable(
             self.evaluation_function
         ), "Evaluation function must be a callable function."
-    def domain_learning_phase(self, base_goals: list[str], train_configs):
-        super().domain_learning_phase(base_goals, train_configs)
+    def domain_learning_phase(self, problems):
+        base = problems["gc"]
+        base_goals = base["goals"]
+        train_configs = base["train_configs"]
+        super().domain_learning_phase(train_configs, base_goals)
         agent_kwargs = {
             "domain_name": self.env_prop.domain_name,
             "problem_name": self.env_prop.name,
@@ -245,3 +258,146 @@ class GCDraco(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
     def choose_agent(self, problem_name: str) -> RLAgent:
         return next(iter(self.agents.values()))
+class GCAura(GRAsRL, LearningRecognizer, GaAdaptingRecognizer):
+    """
+    GCAura uses goal-conditioned reinforcement learning with adaptive fine-tuning.
+    It trains a base goal-conditioned policy over a goal subspace in the domain learning phase.
+    During the goal adaptation phase, it checks if new goals are within the original goal subspace:
+    - If a goal is within the subspace, it uses the original trained model
+    - If a goal is outside the subspace, it fine-tunes the model for that specific goal
+    This approach combines the efficiency of goal-conditioned RL with the precision of
+    goal-specific fine-tuning when needed.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert (
+            self.env_prop.gc_adaptable()
+            and not self.env_prop.is_state_discrete()
+            and not self.env_prop.is_action_discrete()
+        )
+        if self.rl_agent_type is None:
+            self.rl_agent_type = GCDeepRLAgent
+        self.evaluation_function = kwargs.get("evaluation_function")
+        if self.evaluation_function is None:
+            from gr_libs.metrics.metrics import mean_wasserstein_distance
+            self.evaluation_function = mean_wasserstein_distance
+        assert callable(
+            self.evaluation_function
+        ), "Evaluation function must be a callable function."
+        # Store fine-tuning parameters
+        self.finetune_timesteps = kwargs.get("finetune_timesteps", FINETUNE_TIMESTEPS)
+        # Dictionary to store fine-tuned agents for specific goals
+        self.fine_tuned_agents = {}
+    def domain_learning_phase(self, problems):
+        base = problems["gc"]
+        train_configs = base["train_configs"]
+        # Store the goal subspace for later checks
+        self.original_train_configs = train_configs
+        super().domain_learning_phase(train_configs)
+        agent_kwargs = {
+            "domain_name": self.env_prop.domain_name,
+            "problem_name": self.env_prop.name,
+            "algorithm": train_configs[0][0],
+            "num_timesteps": train_configs[0][1],
+            "env_prop": self.env_prop,
+        }
+        agent = self.rl_agent_type(**agent_kwargs)
+        agent.learn()
+        self.agents[self.env_prop.name] = agent
+        self.action_space = agent.env.action_space
+    def _is_goal_in_subspace(self, goal):
+        """
+        Check if a goal is within the original training subspace.
+        Delegates to the environment property's implementation.
+        Args:
+            goal: The goal to check
+        Returns:
+            bool: True if the goal is within the training subspace
+        """
+        # Use the environment property's implementation
+        return self.env_prop.is_goal_in_subspace(goal)
+    def goals_adaptation_phase(self, dynamic_goals):
+        """
+        Adapt to new goals, fine-tuning if necessary.
+        For goals outside the original training subspace, fine-tune the model.
+        Args:
+            dynamic_goals: List of goals to adapt to
+        """
+        self.active_goals = dynamic_goals
+        self.active_problems = [
+            self.env_prop.goal_to_problem_str(goal) for goal in dynamic_goals
+        ]
+        # Check each goal and fine-tune if needed
+        for goal in dynamic_goals:
+            if not self._is_goal_in_subspace(goal):
+                print(f"Goal {goal} is outside the training subspace. Fine-tuning...")
+                # Create a new agent for this goal
+                agent_kwargs = {
+                    "domain_name": self.env_prop.domain_name,
+                    "problem_name": self.env_prop.name,
+                    "algorithm": self.original_train_configs[0][0],
+                    "num_timesteps": self.original_train_configs[0][1],
+                    "env_prop": self.env_prop,
+                }
+                # Create new agent with base model
+                fine_tuned_agent = self.rl_agent_type(**agent_kwargs)
+                fine_tuned_agent.learn()  # This loads the existing model
+                # Fine-tune for this specific goal
+                fine_tuned_agent.fine_tune(
+                    goal=goal,
+                    num_timesteps=self.finetune_timesteps,
+                )
+                # Store the fine-tuned agent
+                self.fine_tuned_agents[
+                    f"{self.env_prop.goal_to_str(goal)}_{self.finetune_timesteps}"
+                ] = fine_tuned_agent
+            else:
+                print(f"Goal {goal} is within the training subspace. Using base agent.")
+    def choose_agent(self, problem_name: str) -> RLAgent:
+        """
+        Return the appropriate agent for the given problem.
+        If the goal has a fine-tuned agent, return that; otherwise return the base agent.
+        Args:
+            problem_name: The problem name to get agent for
+        Returns:
+            The appropriate agent (base or fine-tuned)
+        """
+        # Extract the goal from the problem name
+        goal = self.env_prop.str_to_goal(problem_name)
+        agent_name = f"{self.env_prop.goal_to_str(goal)}_{self.finetune_timesteps}"
+        # Check if we have a fine-tuned agent for this goal
+        if agent_name in self.fine_tuned_agents:
+            return self.fine_tuned_agents[agent_name]
+        # Otherwise return the base agent
+        return self.agents[self.env_prop.name]

gr_libs/recognizer/graml/graml_recognizer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-""" Collection of recognizers that use GRAML methods: metric learning for ODGR. """
+"""Collection of recognizers that use GRAML methods: metric learning for ODGR."""
 import os
 from abc import abstractmethod
@@ -124,7 +124,7 @@ class Graml(LearningRecognizer):
         pass
     def domain_learning_phase(self, base_goals: list[str], train_configs: list):
-        super().domain_learning_phase(base_goals, train_configs)
+        super().domain_learning_phase(train_configs, base_goals)
         self.train_agents_on_base_goals(base_goals, train_configs)
         # train the network so it will find a metric for the observations of the base agents such that traces of agents to different goals are far from one another
         self.model_directory = get_lstm_model_dir(
@@ -335,11 +335,15 @@ class BGGraml(Graml):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-    def domain_learning_phase(self, base_goals: list[str], train_configs: list):
-        assert len(train_configs) == len(
-            base_goals
-        ), "There should be train configs for every goal in BGGraml."
-        return super().domain_learning_phase(base_goals, train_configs)
+    def domain_learning_phase(self, problems):
+        # Always use 'bg' for BGGraml
+        base = problems["bg"]
+        base_goals = base["goals"]
+        train_configs = base["train_configs"]
+        assert len(base_goals) == len(
+            train_configs
+        ), "base_goals and train_configs should have the same length"
+        super().domain_learning_phase(train_configs=train_configs, base_goals=base_goals)
     # In case we need goal-directed agent for every goal
     def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):
@@ -544,11 +548,15 @@ class GCGraml(Graml, GaAdaptingRecognizer):
             and not self.env_prop.is_action_discrete()
         )
-    def domain_learning_phase(self, base_goals: list[str], train_configs: list):
+    def domain_learning_phase(self, problems):
+        # Always use 'gc' for GCGraml
+        base = problems["gc"]
+        base_goals = base["goals"]
+        train_configs = base["train_configs"]
         assert (
             len(train_configs) == 1
-        ), "There should be one train config for the sole gc agent in GCGraml."
-        return super().domain_learning_phase(base_goals, train_configs)
+        ), "GCGraml should only have one train config for the base goals, it uses a single agent"
+        super().domain_learning_phase(train_configs=train_configs, base_goals=base_goals)
     # In case we need goal-directed agent for every goal
     def train_agents_on_base_goals(self, base_goals: list[str], train_configs: list):

gr_libs/recognizer/recognizer.py CHANGED Viewed

@@ -36,7 +36,7 @@ class LearningRecognizer(Recognizer):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-    def domain_learning_phase(self, base_goals: list[str], train_configs: list):
+    def domain_learning_phase(self, train_configs: list, base_goals: list[str] = None):
         """
         Perform the domain learning phase.
@@ -70,18 +70,18 @@ class GaAgentTrainerRecognizer(Recognizer):
             None
         """
-    def domain_learning_phase(self, base_goals: list[str], train_configs: list):
+    def domain_learning_phase(self, train_configs: list, base_goals: list[str] = None):
         """
         Perform the domain learning phase.
         Args:
-            base_goals (List[str]): List of base goals.
             train_configs (List): List of training configurations.
+            base_goals (List[str]): List of base goals for the learning phase.
         Returns:
             None
         """
-        super().domain_learning_phase(base_goals, train_configs)
+        super().domain_learning_phase(train_configs, base_goals)
 class GaAdaptingRecognizer(Recognizer):

gr_libs/tutorials/gcaura_panda_tutorial.py ADDED Viewed

@@ -0,0 +1,168 @@
+import numpy as np
+from stable_baselines3 import PPO, SAC
+from gr_libs import GCAura
+from gr_libs.environment._utils.utils import domain_to_env_property
+from gr_libs.environment.environment import PANDA
+from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
+from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
+from gr_libs.ml.utils.format import random_subset_with_order
+def run_gcaura_panda_tutorial():
+    """
+    Tutorial for GCAura on the Panda environment.
+    This tutorial demonstrates:
+    1. Training a goal-conditioned model on a registered goal subspace (center area)
+    2. Adapting to goals both inside and outside this subspace
+    3. Testing inference on multiple goal types
+    """
+    print("Starting GCAura tutorial with Panda environment...")
+    print(f"Using training subspace with center-area goals")
+    # Initialize the recognizer with the center subspace environment
+    recognizer = GCAura(
+        domain_name=PANDA,
+        env_name="PandaMyReachDenseSubspaceCenterOnly-v3",  # Use the subspace environment
+        evaluation_function=mean_wasserstein_distance,
+        finetune_timesteps=50000,
+    )
+    # Domain learning phase - train on the center goal subspace
+    print("\nStarting domain learning phase - training on registered goal subspace...")
+    recognizer.domain_learning_phase(
+        {
+            "gc": {
+                "train_configs": [(SAC, 500000)],
+            }
+        }
+    )
+    # Define adaptation goals - mix of in-subspace and out-of-subspace goals
+    # Use predefined goals from our environment registration
+    in_subspace_goal = np.array([[0.0, 0.0, 0.1]])  # Center goal (in subspace)
+    out_subspace_goal1 = np.array([[-0.3, -0.3, 0.1]])  # Far corner (out of subspace)
+    out_subspace_goal2 = np.array([[0.2, 0.2, 0.1]])  # Far corner (out of subspace)
+    print(
+        "\nStarting goal adaptation phase with both in-subspace and out-of-subspace goals..."
+    )
+    # Goals adaptation phase with mixed goals
+    recognizer.goals_adaptation_phase(
+        dynamic_goals=[
+            in_subspace_goal,
+            out_subspace_goal1,
+            out_subspace_goal2,
+        ],
+    )
+    # Setup for testing
+    property_type = domain_to_env_property(PANDA)
+    env_property = property_type("PandaMyReachDense")
+    # Create test actor for in-subspace goal
+    print("\nCreating test actor for in-subspace goal...")
+    problem_name_in = env_property.goal_to_problem_str(in_subspace_goal)
+    actor_in = DeepRLAgent(
+        domain_name=PANDA,
+        problem_name=problem_name_in,
+        env_prop=env_property,
+        algorithm=PPO,
+        num_timesteps=250000,
+    )
+    actor_in.learn()
+    # Create test actor for out-of-subspace goal
+    print("\nCreating test actor for out-of-subspace goal...")
+    problem_name_out = env_property.goal_to_problem_str(out_subspace_goal1)
+    actor_out = DeepRLAgent(
+        domain_name=PANDA,
+        problem_name=problem_name_out,
+        env_prop=env_property,
+        algorithm=PPO,
+        num_timesteps=250000,
+    )
+    actor_out.learn()
+    # Test inference with in-subspace goal
+    print("\nTesting inference with in-subspace goal (should use base model)...")
+    full_sequence_in = actor_in.generate_observation(
+        action_selection_method=stochastic_amplified_selection,
+        random_optimalism=True,
+        with_dict=True,
+    )
+    partial_sequence_in = random_subset_with_order(
+        full_sequence_in, (int)(0.5 * len(full_sequence_in)), is_consecutive=False
+    )
+    recognized_goal_in = recognizer.inference_phase(
+        partial_sequence_in, in_subspace_goal, 0.5
+    )
+    print(f"Goal recognized for in-subspace sequence: {recognized_goal_in}")
+    print(f"Actual goal: {in_subspace_goal}")
+    assert str(recognized_goal_in) == str(
+        in_subspace_goal
+    ), f"In-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_in}."
+    # Test inference with out-of-subspace goal
+    print(
+        "\nTesting inference with out-of-subspace goal (should use fine-tuned model)..."
+    )
+    full_sequence_out = actor_out.generate_observation(
+        action_selection_method=stochastic_amplified_selection,
+        random_optimalism=True,
+        with_dict=True,
+    )
+    partial_sequence_out = random_subset_with_order(
+        full_sequence_out, (int)(0.5 * len(full_sequence_out)), is_consecutive=False
+    )
+    recognized_goal_out = recognizer.inference_phase(
+        partial_sequence_out, out_subspace_goal1, 0.5
+    )
+    print(f"Goal recognized for out-of-subspace sequence: {recognized_goal_out}")
+    print(f"Actual goal: {out_subspace_goal1}")
+    assert str(recognized_goal_out) == str(
+        out_subspace_goal1
+    ), f"Out-of-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_out}."
+    # Try another out-of-subspace goal
+    print("\nTesting inference with second out-of-subspace goal...")
+    problem_name_out2 = env_property.goal_to_problem_str(out_subspace_goal2)
+    actor_out2 = DeepRLAgent(
+        domain_name=PANDA,
+        problem_name=problem_name_out2,
+        env_prop=env_property,
+        algorithm=PPO,
+        num_timesteps=250000,
+    )
+    actor_out2.learn()
+    full_sequence_out2 = actor_out2.generate_observation(
+        action_selection_method=stochastic_amplified_selection,
+        random_optimalism=True,
+        with_dict=True,
+    )
+    partial_sequence_out2 = random_subset_with_order(
+        full_sequence_out2, (int)(0.5 * len(full_sequence_out2)), is_consecutive=False
+    )
+    recognized_goal_out2 = recognizer.inference_phase(
+        partial_sequence_out2, out_subspace_goal2, 0.5
+    )
+    print(
+        f"Goal recognized for second out-of-subspace sequence: {recognized_goal_out2}"
+    )
+    print(f"Actual goal: {out_subspace_goal2}")
+    assert str(recognized_goal_out2) == str(
+        out_subspace_goal2
+    ), f"Out-of-subspace goal recognition failed. Expected goal does not match recognized goal {recognized_goal_out2}."
+    print("\nGCAura tutorial completed successfully!")
+if __name__ == "__main__":
+    run_gcaura_panda_tutorial()

gr_libs/tutorials/gcaura_parking_tutorial.py ADDED Viewed

@@ -0,0 +1,167 @@
+from stable_baselines3 import SAC, TD3
+from gr_libs import GCAura
+from gr_libs.environment._utils.utils import domain_to_env_property
+from gr_libs.environment.environment import PARKING
+from gr_libs.metrics import mean_wasserstein_distance, stochastic_amplified_selection
+from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
+from gr_libs.ml.utils.format import random_subset_with_order
+def run_gcaura_parking_tutorial():
+    """
+    Tutorial for GCAura on the Parking environment.
+    This tutorial demonstrates:
+    1. Training a goal-conditioned model on a goal subspace (parking spots 1-10)
+    2. Adapting to goals both inside and outside this subspace
+    3. Testing inference on multiple goal types
+    """
+    print("Starting GCAura tutorial with Parking environment...")
+    print(f"Using training subspace with parking spots (1-10)")
+    # Initialize the recognizer with the standard parking environment
+    # We'll explicitly define the goal subspace in domain_learning_phase
+    recognizer = GCAura(
+        domain_name=PARKING,
+        env_name="Parking-S-14-PC--GI-8Y10Y13-v0",
+        evaluation_function=mean_wasserstein_distance,
+        finetune_timesteps=40000,  # Fine-tuning timesteps for out-of-subspace goals
+    )
+    # Domain learning phase - train on the goal subspace
+    print("\nStarting domain learning phase - training on goal subspace...")
+    recognizer.domain_learning_phase(
+        {
+            "gc": {
+                "train_configs": [(SAC, 500000)],
+            }
+        }
+    )
+    # Define adaptation goals - mix of in-subspace and out-of-subspace goals
+    in_subspace_goal = "8"  # Parking spot 8 (in subspace)
+    out_subspace_goal1 = "1"  # Parking spot 1 (out of subspace)
+    out_subspace_goal2 = "18"  # Parking spot 18 (out of subspace)
+    print(
+        "\nStarting goal adaptation phase with both in-subspace and out-of-subspace goals..."
+    )
+    # Goals adaptation phase with mixed goals
+    recognizer.goals_adaptation_phase(
+        dynamic_goals=[
+            in_subspace_goal,  # In subspace - will use base model
+            out_subspace_goal1,  # Out of subspace - will be fine-tuned
+            out_subspace_goal2,  # Out of subspace - will be fine-tuned
+        ],
+    )
+    # Setup for testing
+    property_type = domain_to_env_property(PARKING)
+    env_property = property_type("Parking-S-14-PC--v0")
+    # Create test actor for in-subspace goal
+    print("\nCreating test actor for in-subspace goal...")
+    problem_name_in = env_property.goal_to_problem_str(in_subspace_goal)
+    actor_in = DeepRLAgent(
+        domain_name=PARKING,
+        problem_name=problem_name_in,
+        env_prop=env_property,
+        algorithm=TD3,
+        num_timesteps=400000,
+    )
+    actor_in.learn()
+    # Create test actor for out-of-subspace goal
+    print("\nCreating test actor for out-of-subspace goal...")
+    problem_name_out = env_property.goal_to_problem_str(out_subspace_goal1)
+    actor_out = DeepRLAgent(
+        domain_name=PARKING,
+        problem_name=problem_name_out,
+        env_prop=env_property,
+        algorithm=TD3,
+        num_timesteps=400000,
+    )
+    actor_out.learn()
+    # Test inference with in-subspace goal
+    print("\nTesting inference with in-subspace goal (should use base model)...")
+    full_sequence_in = actor_in.generate_observation(
+        action_selection_method=stochastic_amplified_selection,
+        random_optimalism=True,
+        with_dict=True,
+    )
+    partial_sequence_in = random_subset_with_order(
+        full_sequence_in, (int)(0.5 * len(full_sequence_in)), is_consecutive=False
+    )
+    recognized_goal_in = recognizer.inference_phase(
+        partial_sequence_in, in_subspace_goal, 0.5
+    )
+    print(f"Goal recognized for in-subspace sequence: {recognized_goal_in}")
+    print(f"Actual goal: {in_subspace_goal}")
+    assert (
+        recognized_goal_in == in_subspace_goal
+    ), f"In-subspace goal recognition failed, expected to recognize the parking spot {in_subspace_goal}."
+    # Test inference with out-of-subspace goal
+    print(
+        "\nTesting inference with out-of-subspace goal (should use fine-tuned model)..."
+    )
+    full_sequence_out = actor_out.generate_observation(
+        action_selection_method=stochastic_amplified_selection,
+        random_optimalism=True,
+        with_dict=True,
+    )
+    partial_sequence_out = random_subset_with_order(
+        full_sequence_out, (int)(0.5 * len(full_sequence_out)), is_consecutive=False
+    )
+    recognized_goal_out = recognizer.inference_phase(
+        partial_sequence_out, out_subspace_goal1, 0.5
+    )
+    print(f"Goal recognized for out-of-subspace sequence: {recognized_goal_out}")
+    print(f"Actual goal: {out_subspace_goal1}")
+    assert (
+        recognized_goal_out == out_subspace_goal1
+    ), f"Out-of-subspace goal recognition failed, expected to recognize the parking spot {out_subspace_goal1}."
+    # Try another out-of-subspace goal
+    print("\nTesting inference with second out-of-subspace goal...")
+    problem_name_out2 = env_property.goal_to_problem_str(out_subspace_goal2)
+    actor_out2 = DeepRLAgent(
+        domain_name=PARKING,
+        problem_name=problem_name_out2,
+        env_prop=env_property,
+        algorithm=TD3,
+        num_timesteps=400000,
+    )
+    actor_out2.learn()
+    full_sequence_out2 = actor_out2.generate_observation(
+        action_selection_method=stochastic_amplified_selection,
+        random_optimalism=True,
+        with_dict=True,
+    )
+    partial_sequence_out2 = random_subset_with_order(
+        full_sequence_out2, (int)(0.5 * len(full_sequence_out2)), is_consecutive=False
+    )
+    recognized_goal_out2 = recognizer.inference_phase(
+        partial_sequence_out2, out_subspace_goal2, 0.5
+    )
+    print(
+        f"Goal recognized for second out-of-subspace sequence: {recognized_goal_out2}"
+    )
+    print(f"Actual goal: {out_subspace_goal2}")
+    assert (
+        recognized_goal_out2 == out_subspace_goal2
+    ), f"Second out-of-subspace goal recognition failed, expected to recognize the parking spot {out_subspace_goal2}."
+    print("\nGCAura Parking tutorial completed successfully!")
+if __name__ == "__main__":
+    run_gcaura_parking_tutorial()

gr-libs 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl

gr-libs 0.2.2py3-none-any.whl → 0.2.6py3-none-any.whl