gr-libs 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. gr_libs/__init__.py +6 -1
  2. gr_libs/_evaluation/_generate_experiments_results.py +0 -141
  3. gr_libs/_version.py +2 -2
  4. gr_libs/all_experiments.py +73 -107
  5. gr_libs/environment/environment.py +126 -17
  6. gr_libs/evaluation/generate_experiments_results.py +100 -0
  7. gr_libs/ml/consts.py +1 -0
  8. gr_libs/ml/neural/deep_rl_learner.py +118 -34
  9. gr_libs/odgr_executor.py +27 -27
  10. gr_libs/problems/consts.py +568 -290
  11. gr_libs/recognizer/_utils/__init__.py +1 -0
  12. gr_libs/recognizer/_utils/format.py +7 -1
  13. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +158 -2
  14. gr_libs/recognizer/graml/graml_recognizer.py +18 -10
  15. gr_libs/recognizer/recognizer.py +4 -4
  16. gr_libs/tutorials/gcaura_panda_tutorial.py +168 -0
  17. gr_libs/tutorials/gcaura_parking_tutorial.py +167 -0
  18. gr_libs/tutorials/gcaura_point_maze_tutorial.py +169 -0
  19. gr_libs/tutorials/gcdraco_panda_tutorial.py +6 -2
  20. gr_libs/tutorials/gcdraco_parking_tutorial.py +3 -1
  21. gr_libs/tutorials/graml_minigrid_tutorial.py +16 -12
  22. gr_libs/tutorials/graml_panda_tutorial.py +6 -2
  23. gr_libs/tutorials/graml_parking_tutorial.py +3 -1
  24. gr_libs/tutorials/graml_point_maze_tutorial.py +15 -2
  25. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/METADATA +31 -15
  26. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/RECORD +35 -29
  27. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/WHEEL +1 -1
  28. tests/test_gcaura.py +15 -0
  29. tests/test_odgr_executor_expertbasedgraml.py +14 -0
  30. tests/test_odgr_executor_gcaura.py +14 -0
  31. tests/test_odgr_executor_gcdraco.py +14 -0
  32. tests/test_odgr_executor_gcgraml.py +14 -0
  33. tests/test_odgr_executor_graql.py +14 -0
  34. gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +0 -260
  35. gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +0 -497
  36. gr_libs/_evaluation/_get_plans_images.py +0 -61
  37. gr_libs/_evaluation/_increasing_and_decreasing_.py +0 -106
  38. /gr_libs/{_evaluation → evaluation}/__init__.py +0 -0
  39. {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,100 @@
1
+ import argparse
2
+ import os
3
+
4
+ import dill
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+
8
+ from gr_libs.ml.utils.storage import get_experiment_results_path
9
+
10
+
11
+ def load_results(domain, env, task, recognizer, n_runs, percentage, cons_type):
12
+ # Collect accuracy for a single task and recognizer
13
+ accs = []
14
+ res_dir = get_experiment_results_path(domain, env, task, recognizer)
15
+ if not os.path.exists(res_dir):
16
+ return accs
17
+ for i in range(n_runs):
18
+ res_file = os.path.join(res_dir, f"res_{i}.pkl")
19
+ if not os.path.exists(res_file):
20
+ continue
21
+ with open(res_file, "rb") as f:
22
+ results = dill.load(f)
23
+ if percentage in results and cons_type in results[percentage]:
24
+ acc = results[percentage][cons_type].get("accuracy")
25
+ if acc is not None:
26
+ accs.append(acc)
27
+ return accs
28
+
29
+
30
+ def main():
31
+ parser = argparse.ArgumentParser()
32
+ parser.add_argument("--domain", required=True)
33
+ parser.add_argument("--env", required=True)
34
+ parser.add_argument("--tasks", nargs="+", required=True)
35
+ parser.add_argument("--recognizers", nargs="+", required=True)
36
+ parser.add_argument("--n_runs", type=int, default=5)
37
+ parser.add_argument("--percentage", required=True)
38
+ parser.add_argument(
39
+ "--cons_type", choices=["consecutive", "non_consecutive"], required=True
40
+ )
41
+ parser.add_argument("--graph_name", type=str, default="experiment_results")
42
+ args = parser.parse_args()
43
+
44
+ plt.figure(figsize=(7, 5))
45
+ has_data = False
46
+ missing_recognizers = []
47
+
48
+ for recognizer in args.recognizers:
49
+ x_vals = []
50
+ y_means = []
51
+ y_sems = []
52
+ for task in args.tasks:
53
+ accs = load_results(
54
+ args.domain,
55
+ args.env,
56
+ task,
57
+ recognizer,
58
+ args.n_runs,
59
+ args.percentage,
60
+ args.cons_type,
61
+ )
62
+ if accs:
63
+ x_vals.append(task)
64
+ y_means.append(np.mean(accs))
65
+ y_sems.append(np.std(accs) / np.sqrt(len(accs)))
66
+ if x_vals:
67
+ has_data = True
68
+ x_ticks = np.arange(len(x_vals))
69
+ plt.plot(x_ticks, y_means, marker="o", label=recognizer)
70
+ plt.fill_between(
71
+ x_ticks,
72
+ np.array(y_means) - np.array(y_sems),
73
+ np.array(y_means) + np.array(y_sems),
74
+ alpha=0.2,
75
+ )
76
+ plt.xticks(x_ticks, x_vals)
77
+ else:
78
+ print(
79
+ f"Warning: No data found for recognizer '{recognizer}' in {args.domain} / {args.env} / {args.percentage} / {args.cons_type}"
80
+ )
81
+ missing_recognizers.append(recognizer)
82
+
83
+ if not has_data:
84
+ raise RuntimeError(
85
+ f"No data found for any recognizer in {args.domain} / {args.env} / {args.percentage} / {args.cons_type}. "
86
+ f"Missing recognizers: {', '.join(missing_recognizers)}"
87
+ )
88
+
89
+ plt.xlabel("Task")
90
+ plt.ylabel("Accuracy")
91
+ plt.title(f"{args.domain} - {args.env} ({args.percentage}, {args.cons_type})")
92
+ plt.legend()
93
+ plt.grid(True)
94
+ fig_path = f"{args.graph_name}_{'_'.join(args.recognizers)}_{args.domain}_{args.env}_{args.percentage}_{args.cons_type}.png"
95
+ plt.savefig(fig_path)
96
+ print(f"Figure saved at: {fig_path}")
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()
gr_libs/ml/consts.py CHANGED
@@ -20,3 +20,4 @@ OPTIM_ALPHA = 0.99
20
20
  CLIP_EPS = 0.2
21
21
  RECURRENCE = 1
22
22
  TEXT = False
23
+ FINETUNE_TIMESTEPS = 100000 # for GCAura fine-tuning
@@ -1,11 +1,12 @@
1
1
  import gc
2
2
  from collections import OrderedDict
3
3
  from types import MethodType
4
+ from typing import Any
4
5
 
5
6
  import cv2
6
7
  import numpy as np
7
8
 
8
- from gr_libs.environment.environment import EnvProperty
9
+ from gr_libs.environment.environment import EnvProperty, suppress_output
9
10
 
10
11
  if __name__ != "__main__":
11
12
  from gr_libs.ml.utils.storage import get_agent_model_dir
@@ -22,6 +23,10 @@ from stable_baselines3.common.base_class import BaseAlgorithm
22
23
 
23
24
  from gr_libs.ml.utils import device
24
25
 
26
+ from gr_libs.ml.consts import (
27
+ FINETUNE_TIMESTEPS,
28
+ )
29
+
25
30
  # TODO do we need this?
26
31
  NETWORK_SETUP = {
27
32
  SAC: OrderedDict(
@@ -184,12 +189,7 @@ class DeepRLAgent:
184
189
  """
185
190
  fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
186
191
  fps = 30.0
187
- # if is_gc:
188
- # assert goal_idx is not None
189
- # self.reset_with_goal_idx(goal_idx)
190
- # else:
191
- # assert goal_idx is None
192
- self.env.reset()
192
+ self.safe_env_reset()
193
193
  frame_size = (
194
194
  self.env.render(mode="rgb_array").shape[1],
195
195
  self.env.render(mode="rgb_array").shape[0],
@@ -198,7 +198,7 @@ class DeepRLAgent:
198
198
  video_writer = cv2.VideoWriter(video_path, fourcc, fps, frame_size)
199
199
  general_done, success_done = False, False
200
200
  gc.collect()
201
- obs = self.env.reset()
201
+ obs = self.safe_env_reset()
202
202
  self.env_prop.change_goal_to_specific_desired(obs, desired)
203
203
  counter = 0
204
204
  while not (general_done or success_done):
@@ -209,17 +209,11 @@ class DeepRLAgent:
209
209
  general_done = general_done[0]
210
210
  self.env_prop.change_goal_to_specific_desired(obs, desired)
211
211
  if "success" in info[0].keys():
212
- success_done = info[0][
213
- "success"
214
- ] # make sure the agent actually reached the goal within the max time
212
+ success_done = info[0]["success"]
215
213
  elif "is_success" in info[0].keys():
216
- success_done = info[0][
217
- "is_success"
218
- ] # make sure the agent actually reached the goal within the max time
214
+ success_done = info[0]["is_success"]
219
215
  elif "step_task_completions" in info[0].keys():
220
- success_done = (
221
- len(info[0]["step_task_completions"]) == 1
222
- ) # bug of dummyVecEnv, it removes the episode_task_completions from the info dict.
216
+ success_done = len(info[0]["step_task_completions"]) == 1
223
217
  else:
224
218
  raise NotImplementedError(
225
219
  "no other option for any of the environments."
@@ -247,40 +241,59 @@ class DeepRLAgent:
247
241
  self._model_file_path, env=self.env, device=device, **self.model_kwargs
248
242
  )
249
243
 
250
- def learn(self):
244
+ def learn(self, goal=None, total_timesteps=None):
251
245
  """Train the agent."""
252
- if os.path.exists(self._model_file_path):
253
- print(f"Loading pre-existing model in {self._model_file_path}")
246
+ model_file_path = self._model_file_path
247
+ old_model_file_path = model_file_path
248
+ if goal is not None:
249
+ model_file_path = self._model_file_path.replace(
250
+ ".pth", f"_{goal}.pth"
251
+ ).replace(".zip", f"_{goal}.zip")
252
+ if total_timesteps is not None:
253
+ model_file_path = model_file_path.replace(
254
+ ".pth", f"_{total_timesteps}.pth"
255
+ ).replace(".zip", f"_{total_timesteps}.zip")
256
+
257
+ self._model_file_path = model_file_path
258
+
259
+ if os.path.exists(model_file_path):
260
+ print(f"Loading pre-existing model in {model_file_path}")
254
261
  self.load_model()
255
262
  else:
256
- print(f"No existing model in {self._model_file_path}, starting learning")
257
- if self.exploration_rate is not None:
258
- self._model = self.algorithm(
259
- "MultiInputPolicy",
260
- self.env,
261
- ent_coef=self.exploration_rate,
262
- verbose=1,
263
- )
264
- else:
265
- self._model = self.algorithm("MultiInputPolicy", self.env, verbose=1)
263
+ print(f"No existing model in {model_file_path}, starting learning")
264
+ if total_timesteps is None:
265
+ total_timesteps = self.num_timesteps
266
+ if self.exploration_rate is not None:
267
+ self._model = self.algorithm(
268
+ "MultiInputPolicy",
269
+ self.env,
270
+ ent_coef=self.exploration_rate,
271
+ verbose=1,
272
+ )
273
+ else:
274
+ self._model = self.algorithm(
275
+ "MultiInputPolicy", self.env, verbose=1
276
+ )
266
277
  self._model.learn(
267
- total_timesteps=self.num_timesteps, progress_bar=True
278
+ total_timesteps=total_timesteps, progress_bar=True
268
279
  ) # comment this in a normal env
269
280
  self.save_model()
270
281
 
282
+ self._model_file_path = old_model_file_path
283
+
271
284
  def safe_env_reset(self):
272
285
  """
273
- Reset the environment safely.
286
+ Reset the environment safely, suppressing output.
274
287
 
275
288
  Returns:
276
289
  The initial observation.
277
290
  """
278
291
  try:
279
- obs = self.env.reset()
292
+ obs = suppress_env_reset(self.env)
280
293
  except Exception:
281
294
  kwargs = {"id": self.problem_name, "render_mode": "rgb_array"}
282
295
  self.env = self.env_prop.create_vec_env(kwargs)
283
- obs = self.env.reset()
296
+ obs = suppress_env_reset(self.env)
284
297
  return obs
285
298
 
286
299
  def get_mean_and_std_dev(self, observation):
@@ -514,6 +527,69 @@ class DeepRLAgent:
514
527
  self.env.close()
515
528
  return observations
516
529
 
530
+ def fine_tune(
531
+ self,
532
+ goal: Any,
533
+ num_timesteps: int = FINETUNE_TIMESTEPS,
534
+ ) -> None:
535
+ """
536
+ Fine-tune this goal-conditioned agent on a single specified goal.
537
+ Overrides optimizer LR if provided, resets the env to the goal, and continues training.
538
+
539
+ Args:
540
+ goal: The specific goal to fine-tune on. Type depends on the environment.
541
+ num_timesteps: Number of timesteps for fine-tuning. Defaults to FINETUNE_TIMESTEPS.
542
+ learning_rate: Learning rate for fine-tuning. Defaults to FINETUNE_LR.
543
+ """
544
+ # Store original environment and problem
545
+ original_env = self.env
546
+ original_problem = self.problem_name
547
+ created_new_env = False
548
+
549
+ try:
550
+ # Try to create a goal-specific environment
551
+ if hasattr(self.env_prop, "goal_to_problem_str") and callable(
552
+ self.env_prop.goal_to_problem_str
553
+ ):
554
+ try:
555
+ goal_problem = self.env_prop.goal_to_problem_str(goal)
556
+
557
+ # Create the goal-specific environment
558
+ env_kwargs = {"id": goal_problem, "render_mode": "rgb_array"}
559
+ new_env = self.env_prop.create_vec_env(env_kwargs)
560
+
561
+ # Update the model's environment
562
+ self._model.set_env(new_env)
563
+ self.env = new_env
564
+ self.problem_name = goal_problem
565
+ created_new_env = True
566
+ print(f"Created a new environment for fine-tuning: {goal_problem}")
567
+ except Exception as e:
568
+ print(f"Warning: Could not create goal-specific environment: {e}")
569
+
570
+ if not created_new_env:
571
+ print(
572
+ (
573
+ "Fine-tuning requires a goal-specific environment."
574
+ "Please ensure that the environment with the specified goal exists."
575
+ )
576
+ )
577
+
578
+ print(f"Fine-tuning for {num_timesteps} timesteps...")
579
+ self.learn(
580
+ goal=self.env_prop.goal_to_str(goal), total_timesteps=num_timesteps
581
+ )
582
+ print("Fine-tuning complete. Model saved.")
583
+
584
+ finally:
585
+ # Restore original environment if needed
586
+ if created_new_env:
587
+ self.env.close()
588
+ self._model.set_env(original_env)
589
+ self.env = original_env
590
+ self.problem_name = original_problem
591
+ print("Restored original environment.")
592
+
517
593
 
518
594
  class GCDeepRLAgent(DeepRLAgent):
519
595
  """
@@ -632,3 +708,11 @@ class GCDeepRLAgent(DeepRLAgent):
632
708
  desired=goal_directed_goal,
633
709
  )
634
710
  return observations
711
+
712
+
713
+ def suppress_env_reset(env):
714
+ """
715
+ Utility function to suppress prints during env.reset().
716
+ """
717
+ with suppress_output():
718
+ return env.reset()
gr_libs/odgr_executor.py CHANGED
@@ -4,7 +4,7 @@ import time
4
4
 
5
5
  import dill
6
6
 
7
- from gr_libs.environment.utils.utils import domain_to_env_property
7
+ from gr_libs.environment._utils.utils import domain_to_env_property
8
8
  from gr_libs.metrics.metrics import stochastic_amplified_selection
9
9
  from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
10
10
  from gr_libs.ml.utils.format import random_subset_with_order
@@ -14,10 +14,10 @@ from gr_libs.ml.utils.storage import (
14
14
  get_policy_sequences_result_path,
15
15
  )
16
16
  from gr_libs.problems.consts import PROBLEMS
17
- from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco
17
+ from gr_libs.recognizer._utils import recognizer_str_to_obj
18
+ from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, GCAura
18
19
  from gr_libs.recognizer.graml.graml_recognizer import Graml
19
20
  from gr_libs.recognizer.recognizer import GaAgentTrainerRecognizer, LearningRecognizer
20
- from gr_libs.recognizer.utils import recognizer_str_to_obj
21
21
 
22
22
 
23
23
  def validate(args, recognizer_type, task_inputs):
@@ -52,9 +52,7 @@ def run_odgr_problem(args):
52
52
  dlp_time = 0
53
53
  if issubclass(recognizer_type, LearningRecognizer):
54
54
  start_dlp_time = time.time()
55
- recognizer.domain_learning_phase(
56
- base_goals=value["goals"], train_configs=value["train_configs"]
57
- )
55
+ recognizer.domain_learning_phase(value)
58
56
  dlp_time = time.time() - start_dlp_time
59
57
  elif key.startswith("G_"):
60
58
  start_ga_time = time.time()
@@ -104,7 +102,11 @@ def run_odgr_problem(args):
104
102
  }
105
103
 
106
104
  # need to dump the whole plan for draco because it needs it for inference phase for checking likelihood.
107
- if (recognizer_type == Draco or recognizer_type == GCDraco) and issubclass(
105
+ if (
106
+ recognizer_type == Draco
107
+ or recognizer_type == GCDraco
108
+ or recognizer_type == GCAura
109
+ ) and issubclass(
108
110
  rl_agent_type, DeepRLAgent
109
111
  ): # TODO remove this condition, remove the assumption.
110
112
  generate_obs_kwargs["with_dict"] = True
@@ -184,10 +186,17 @@ def run_odgr_problem(args):
184
186
  recognizer=args.recognizer,
185
187
  )
186
188
  )
187
- print(f"generating results into {res_file_path}")
188
- with open(os.path.join(res_file_path, "res.pkl"), "wb") as results_file:
189
+ if args.experiment_num is not None:
190
+ res_txt = os.path.join(res_file_path, f"res_{args.experiment_num}.txt")
191
+ res_pkl = os.path.join(res_file_path, f"res_{args.experiment_num}.pkl")
192
+ else:
193
+ res_txt = os.path.join(res_file_path, "res.txt")
194
+ res_pkl = os.path.join(res_file_path, "res.pkl")
195
+
196
+ print(f"generating results into {res_txt} and {res_pkl}")
197
+ with open(res_pkl, "wb") as results_file:
189
198
  dill.dump(results, results_file)
190
- with open(os.path.join(res_file_path, "res.txt"), "w") as results_file:
199
+ with open(res_txt, "w") as results_file:
191
200
  results_file.write(str(results))
192
201
 
193
202
 
@@ -219,29 +228,14 @@ def parse_args():
219
228
  "Graql",
220
229
  "Draco",
221
230
  "GCDraco",
231
+ "GCAura",
222
232
  ],
223
233
  required=True,
224
234
  help="Recognizer type. Follow readme.md and recognizer folder for more information and rules.",
225
235
  )
226
236
  required_group.add_argument(
227
237
  "--task",
228
- choices=[
229
- "L1",
230
- "L2",
231
- "L3",
232
- "L4",
233
- "L5",
234
- "L11",
235
- "L22",
236
- "L33",
237
- "L44",
238
- "L55",
239
- "L111",
240
- "L222",
241
- "L333",
242
- "L444",
243
- "L555",
244
- ],
238
+ choices=["L1", "L2", "L3", "L4", "L5"],
245
239
  required=True,
246
240
  help="Task identifier (e.g., L1, L2,...,L5)",
247
241
  )
@@ -251,6 +245,12 @@ def parse_args():
251
245
  optional_group.add_argument(
252
246
  "--collect_stats", action="store_true", help="Whether to collect statistics"
253
247
  )
248
+ optional_group.add_argument(
249
+ "--experiment_num",
250
+ type=int,
251
+ default=None,
252
+ help="Experiment number for parallel runs",
253
+ )
254
254
  args = parser.parse_args()
255
255
 
256
256
  ### VALIDATE INPUTS ###