PyPI - gr-libs - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl - Mend

gr-libs 0.2.2py3-none-any.whl → 0.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

gr_libs/__init__.py +6 -1
gr_libs/_evaluation/_generate_experiments_results.py +0 -141
gr_libs/_version.py +2 -2
gr_libs/all_experiments.py +73 -107
gr_libs/environment/environment.py +126 -17
gr_libs/evaluation/generate_experiments_results.py +100 -0
gr_libs/ml/consts.py +1 -0
gr_libs/ml/neural/deep_rl_learner.py +118 -34
gr_libs/odgr_executor.py +27 -27
gr_libs/problems/consts.py +568 -290
gr_libs/recognizer/_utils/__init__.py +1 -0
gr_libs/recognizer/_utils/format.py +7 -1
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +158 -2
gr_libs/recognizer/graml/graml_recognizer.py +18 -10
gr_libs/recognizer/recognizer.py +4 -4
gr_libs/tutorials/gcaura_panda_tutorial.py +168 -0
gr_libs/tutorials/gcaura_parking_tutorial.py +167 -0
gr_libs/tutorials/gcaura_point_maze_tutorial.py +169 -0
gr_libs/tutorials/gcdraco_panda_tutorial.py +6 -2
gr_libs/tutorials/gcdraco_parking_tutorial.py +3 -1
gr_libs/tutorials/graml_minigrid_tutorial.py +16 -12
gr_libs/tutorials/graml_panda_tutorial.py +6 -2
gr_libs/tutorials/graml_parking_tutorial.py +3 -1
gr_libs/tutorials/graml_point_maze_tutorial.py +15 -2
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/METADATA +31 -15
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/RECORD +35 -29
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/WHEEL +1 -1
tests/test_gcaura.py +15 -0
tests/test_odgr_executor_expertbasedgraml.py +14 -0
tests/test_odgr_executor_gcaura.py +14 -0
tests/test_odgr_executor_gcdraco.py +14 -0
tests/test_odgr_executor_gcgraml.py +14 -0
tests/test_odgr_executor_graql.py +14 -0
gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +0 -260
gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +0 -497
gr_libs/_evaluation/_get_plans_images.py +0 -61
gr_libs/_evaluation/_increasing_and_decreasing_.py +0 -106
/gr_libs/{_evaluation → evaluation}/__init__.py +0 -0
{gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/top_level.txt +0 -0

gr_libs/evaluation/generate_experiments_results.py ADDED Viewed

@@ -0,0 +1,100 @@
+import argparse
+import os
+import dill
+import matplotlib.pyplot as plt
+import numpy as np
+from gr_libs.ml.utils.storage import get_experiment_results_path
+def load_results(domain, env, task, recognizer, n_runs, percentage, cons_type):
+    # Collect accuracy for a single task and recognizer
+    accs = []
+    res_dir = get_experiment_results_path(domain, env, task, recognizer)
+    if not os.path.exists(res_dir):
+        return accs
+    for i in range(n_runs):
+        res_file = os.path.join(res_dir, f"res_{i}.pkl")
+        if not os.path.exists(res_file):
+            continue
+        with open(res_file, "rb") as f:
+            results = dill.load(f)
+        if percentage in results and cons_type in results[percentage]:
+            acc = results[percentage][cons_type].get("accuracy")
+            if acc is not None:
+                accs.append(acc)
+    return accs
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--domain", required=True)
+    parser.add_argument("--env", required=True)
+    parser.add_argument("--tasks", nargs="+", required=True)
+    parser.add_argument("--recognizers", nargs="+", required=True)
+    parser.add_argument("--n_runs", type=int, default=5)
+    parser.add_argument("--percentage", required=True)
+    parser.add_argument(
+        "--cons_type", choices=["consecutive", "non_consecutive"], required=True
+    )
+    parser.add_argument("--graph_name", type=str, default="experiment_results")
+    args = parser.parse_args()
+    plt.figure(figsize=(7, 5))
+    has_data = False
+    missing_recognizers = []
+    for recognizer in args.recognizers:
+        x_vals = []
+        y_means = []
+        y_sems = []
+        for task in args.tasks:
+            accs = load_results(
+                args.domain,
+                args.env,
+                task,
+                recognizer,
+                args.n_runs,
+                args.percentage,
+                args.cons_type,
+            )
+            if accs:
+                x_vals.append(task)
+                y_means.append(np.mean(accs))
+                y_sems.append(np.std(accs) / np.sqrt(len(accs)))
+        if x_vals:
+            has_data = True
+            x_ticks = np.arange(len(x_vals))
+            plt.plot(x_ticks, y_means, marker="o", label=recognizer)
+            plt.fill_between(
+                x_ticks,
+                np.array(y_means) - np.array(y_sems),
+                np.array(y_means) + np.array(y_sems),
+                alpha=0.2,
+            )
+            plt.xticks(x_ticks, x_vals)
+        else:
+            print(
+                f"Warning: No data found for recognizer '{recognizer}' in {args.domain} / {args.env} / {args.percentage} / {args.cons_type}"
+            )
+            missing_recognizers.append(recognizer)
+    if not has_data:
+        raise RuntimeError(
+            f"No data found for any recognizer in {args.domain} / {args.env} / {args.percentage} / {args.cons_type}. "
+            f"Missing recognizers: {', '.join(missing_recognizers)}"
+        )
+    plt.xlabel("Task")
+    plt.ylabel("Accuracy")
+    plt.title(f"{args.domain} - {args.env} ({args.percentage}, {args.cons_type})")
+    plt.legend()
+    plt.grid(True)
+    fig_path = f"{args.graph_name}_{'_'.join(args.recognizers)}_{args.domain}_{args.env}_{args.percentage}_{args.cons_type}.png"
+    plt.savefig(fig_path)
+    print(f"Figure saved at: {fig_path}")
+if __name__ == "__main__":
+    main()

gr_libs/ml/consts.py CHANGED Viewed

@@ -20,3 +20,4 @@ OPTIM_ALPHA = 0.99
 CLIP_EPS = 0.2
 RECURRENCE = 1
 TEXT = False
+FINETUNE_TIMESTEPS = 100000  # for GCAura fine-tuning

gr_libs/ml/neural/deep_rl_learner.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import gc
 from collections import OrderedDict
 from types import MethodType
+from typing import Any
 import cv2
 import numpy as np
-from gr_libs.environment.environment import EnvProperty
+from gr_libs.environment.environment import EnvProperty, suppress_output
 if __name__ != "__main__":
     from gr_libs.ml.utils.storage import get_agent_model_dir
@@ -22,6 +23,10 @@ from stable_baselines3.common.base_class import BaseAlgorithm
 from gr_libs.ml.utils import device
+from gr_libs.ml.consts import (
+    FINETUNE_TIMESTEPS,
+)
 # TODO do we need this?
 NETWORK_SETUP = {
     SAC: OrderedDict(
@@ -184,12 +189,7 @@ class DeepRLAgent:
         """
         fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
         fps = 30.0
-        # if is_gc:
-        # 	assert goal_idx is not None
-        # 	self.reset_with_goal_idx(goal_idx)
-        # else:
-        # 	assert goal_idx is None
-        self.env.reset()
+        self.safe_env_reset()
         frame_size = (
             self.env.render(mode="rgb_array").shape[1],
             self.env.render(mode="rgb_array").shape[0],
@@ -198,7 +198,7 @@ class DeepRLAgent:
         video_writer = cv2.VideoWriter(video_path, fourcc, fps, frame_size)
         general_done, success_done = False, False
         gc.collect()
-        obs = self.env.reset()
+        obs = self.safe_env_reset()
         self.env_prop.change_goal_to_specific_desired(obs, desired)
         counter = 0
         while not (general_done or success_done):
@@ -209,17 +209,11 @@ class DeepRLAgent:
                 general_done = general_done[0]
             self.env_prop.change_goal_to_specific_desired(obs, desired)
             if "success" in info[0].keys():
-                success_done = info[0][
-                    "success"
-                ]  # make sure the agent actually reached the goal within the max time
+                success_done = info[0]["success"]
             elif "is_success" in info[0].keys():
-                success_done = info[0][
-                    "is_success"
-                ]  # make sure the agent actually reached the goal within the max time
+                success_done = info[0]["is_success"]
             elif "step_task_completions" in info[0].keys():
-                success_done = (
-                    len(info[0]["step_task_completions"]) == 1
-                )  # bug of dummyVecEnv, it removes the episode_task_completions from the info dict.
+                success_done = len(info[0]["step_task_completions"]) == 1
             else:
                 raise NotImplementedError(
                     "no other option for any of the environments."
@@ -247,40 +241,59 @@ class DeepRLAgent:
             self._model_file_path, env=self.env, device=device, **self.model_kwargs
         )
-    def learn(self):
+    def learn(self, goal=None, total_timesteps=None):
         """Train the agent."""
-        if os.path.exists(self._model_file_path):
-            print(f"Loading pre-existing model in {self._model_file_path}")
+        model_file_path = self._model_file_path
+        old_model_file_path = model_file_path
+        if goal is not None:
+            model_file_path = self._model_file_path.replace(
+                ".pth", f"_{goal}.pth"
+            ).replace(".zip", f"_{goal}.zip")
+            if total_timesteps is not None:
+                model_file_path = model_file_path.replace(
+                    ".pth", f"_{total_timesteps}.pth"
+                ).replace(".zip", f"_{total_timesteps}.zip")
+        self._model_file_path = model_file_path
+        if os.path.exists(model_file_path):
+            print(f"Loading pre-existing model in {model_file_path}")
             self.load_model()
         else:
-            print(f"No existing model in {self._model_file_path}, starting learning")
-            if self.exploration_rate is not None:
-                self._model = self.algorithm(
-                    "MultiInputPolicy",
-                    self.env,
-                    ent_coef=self.exploration_rate,
-                    verbose=1,
-                )
-            else:
-                self._model = self.algorithm("MultiInputPolicy", self.env, verbose=1)
+            print(f"No existing model in {model_file_path}, starting learning")
+            if total_timesteps is None:
+                total_timesteps = self.num_timesteps
+                if self.exploration_rate is not None:
+                    self._model = self.algorithm(
+                        "MultiInputPolicy",
+                        self.env,
+                        ent_coef=self.exploration_rate,
+                        verbose=1,
+                    )
+                else:
+                    self._model = self.algorithm(
+                        "MultiInputPolicy", self.env, verbose=1
+                    )
             self._model.learn(
-                total_timesteps=self.num_timesteps, progress_bar=True
+                total_timesteps=total_timesteps, progress_bar=True
             )  # comment this in a normal env
             self.save_model()
+        self._model_file_path = old_model_file_path
     def safe_env_reset(self):
         """
-        Reset the environment safely.
+        Reset the environment safely, suppressing output.
         Returns:
             The initial observation.
         """
         try:
-            obs = self.env.reset()
+            obs = suppress_env_reset(self.env)
         except Exception:
             kwargs = {"id": self.problem_name, "render_mode": "rgb_array"}
             self.env = self.env_prop.create_vec_env(kwargs)
-            obs = self.env.reset()
+            obs = suppress_env_reset(self.env)
         return obs
     def get_mean_and_std_dev(self, observation):
@@ -514,6 +527,69 @@ class DeepRLAgent:
         self.env.close()
         return observations
+    def fine_tune(
+        self,
+        goal: Any,
+        num_timesteps: int = FINETUNE_TIMESTEPS,
+    ) -> None:
+        """
+        Fine-tune this goal-conditioned agent on a single specified goal.
+        Overrides optimizer LR if provided, resets the env to the goal, and continues training.
+        Args:
+            goal: The specific goal to fine-tune on. Type depends on the environment.
+            num_timesteps: Number of timesteps for fine-tuning. Defaults to FINETUNE_TIMESTEPS.
+            learning_rate: Learning rate for fine-tuning. Defaults to FINETUNE_LR.
+        """
+        # Store original environment and problem
+        original_env = self.env
+        original_problem = self.problem_name
+        created_new_env = False
+        try:
+            # Try to create a goal-specific environment
+            if hasattr(self.env_prop, "goal_to_problem_str") and callable(
+                self.env_prop.goal_to_problem_str
+            ):
+                try:
+                    goal_problem = self.env_prop.goal_to_problem_str(goal)
+                    # Create the goal-specific environment
+                    env_kwargs = {"id": goal_problem, "render_mode": "rgb_array"}
+                    new_env = self.env_prop.create_vec_env(env_kwargs)
+                    # Update the model's environment
+                    self._model.set_env(new_env)
+                    self.env = new_env
+                    self.problem_name = goal_problem
+                    created_new_env = True
+                    print(f"Created a new environment for fine-tuning: {goal_problem}")
+                except Exception as e:
+                    print(f"Warning: Could not create goal-specific environment: {e}")
+            if not created_new_env:
+                print(
+                    (
+                        "Fine-tuning requires a goal-specific environment."
+                        "Please ensure that the environment with the specified goal exists."
+                    )
+                )
+            print(f"Fine-tuning for {num_timesteps} timesteps...")
+            self.learn(
+                goal=self.env_prop.goal_to_str(goal), total_timesteps=num_timesteps
+            )
+            print("Fine-tuning complete. Model saved.")
+        finally:
+            # Restore original environment if needed
+            if created_new_env:
+                self.env.close()
+                self._model.set_env(original_env)
+                self.env = original_env
+                self.problem_name = original_problem
+                print("Restored original environment.")
 class GCDeepRLAgent(DeepRLAgent):
     """
@@ -632,3 +708,11 @@ class GCDeepRLAgent(DeepRLAgent):
                 desired=goal_directed_goal,
             )
         return observations
+def suppress_env_reset(env):
+    """
+    Utility function to suppress prints during env.reset().
+    """
+    with suppress_output():
+        return env.reset()

gr_libs/odgr_executor.py CHANGED Viewed

@@ -4,7 +4,7 @@ import time
 import dill
-from gr_libs.environment.utils.utils import domain_to_env_property
+from gr_libs.environment._utils.utils import domain_to_env_property
 from gr_libs.metrics.metrics import stochastic_amplified_selection
 from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
 from gr_libs.ml.utils.format import random_subset_with_order
@@ -14,10 +14,10 @@ from gr_libs.ml.utils.storage import (
     get_policy_sequences_result_path,
 )
 from gr_libs.problems.consts import PROBLEMS
-from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco
+from gr_libs.recognizer._utils import recognizer_str_to_obj
+from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, GCAura
 from gr_libs.recognizer.graml.graml_recognizer import Graml
 from gr_libs.recognizer.recognizer import GaAgentTrainerRecognizer, LearningRecognizer
-from gr_libs.recognizer.utils import recognizer_str_to_obj
 def validate(args, recognizer_type, task_inputs):
@@ -52,9 +52,7 @@ def run_odgr_problem(args):
             dlp_time = 0
             if issubclass(recognizer_type, LearningRecognizer):
                 start_dlp_time = time.time()
-                recognizer.domain_learning_phase(
-                    base_goals=value["goals"], train_configs=value["train_configs"]
-                )
+                recognizer.domain_learning_phase(value)
                 dlp_time = time.time() - start_dlp_time
         elif key.startswith("G_"):
             start_ga_time = time.time()
@@ -104,7 +102,11 @@ def run_odgr_problem(args):
             }
             # need to dump the whole plan for draco because it needs it for inference phase for checking likelihood.
-            if (recognizer_type == Draco or recognizer_type == GCDraco) and issubclass(
+            if (
+                recognizer_type == Draco
+                or recognizer_type == GCDraco
+                or recognizer_type == GCAura
+            ) and issubclass(
                 rl_agent_type, DeepRLAgent
             ):  # TODO remove this condition, remove the assumption.
                 generate_obs_kwargs["with_dict"] = True
@@ -184,10 +186,17 @@ def run_odgr_problem(args):
             recognizer=args.recognizer,
         )
     )
-    print(f"generating results into {res_file_path}")
-    with open(os.path.join(res_file_path, "res.pkl"), "wb") as results_file:
+    if args.experiment_num is not None:
+        res_txt = os.path.join(res_file_path, f"res_{args.experiment_num}.txt")
+        res_pkl = os.path.join(res_file_path, f"res_{args.experiment_num}.pkl")
+    else:
+        res_txt = os.path.join(res_file_path, "res.txt")
+        res_pkl = os.path.join(res_file_path, "res.pkl")
+    print(f"generating results into {res_txt} and {res_pkl}")
+    with open(res_pkl, "wb") as results_file:
         dill.dump(results, results_file)
-    with open(os.path.join(res_file_path, "res.txt"), "w") as results_file:
+    with open(res_txt, "w") as results_file:
         results_file.write(str(results))
@@ -219,29 +228,14 @@ def parse_args():
             "Graql",
             "Draco",
             "GCDraco",
+            "GCAura",
         ],
         required=True,
         help="Recognizer type. Follow readme.md and recognizer folder for more information and rules.",
     )
     required_group.add_argument(
         "--task",
-        choices=[
-            "L1",
-            "L2",
-            "L3",
-            "L4",
-            "L5",
-            "L11",
-            "L22",
-            "L33",
-            "L44",
-            "L55",
-            "L111",
-            "L222",
-            "L333",
-            "L444",
-            "L555",
-        ],
+        choices=["L1", "L2", "L3", "L4", "L5"],
         required=True,
         help="Task identifier (e.g., L1, L2,...,L5)",
     )
@@ -251,6 +245,12 @@ def parse_args():
     optional_group.add_argument(
         "--collect_stats", action="store_true", help="Whether to collect statistics"
     )
+    optional_group.add_argument(
+        "--experiment_num",
+        type=int,
+        default=None,
+        help="Experiment number for parallel runs",
+    )
     args = parser.parse_args()
     ### VALIDATE INPUTS ###

gr-libs 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl

gr-libs 0.2.2py3-none-any.whl → 0.2.6py3-none-any.whl