gr-libs 0.2.2__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +6 -1
- gr_libs/_evaluation/_generate_experiments_results.py +0 -141
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +73 -107
- gr_libs/environment/environment.py +126 -17
- gr_libs/evaluation/generate_experiments_results.py +100 -0
- gr_libs/ml/consts.py +1 -0
- gr_libs/ml/neural/deep_rl_learner.py +118 -34
- gr_libs/odgr_executor.py +27 -27
- gr_libs/problems/consts.py +568 -290
- gr_libs/recognizer/_utils/__init__.py +1 -0
- gr_libs/recognizer/_utils/format.py +7 -1
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +158 -2
- gr_libs/recognizer/graml/graml_recognizer.py +18 -10
- gr_libs/recognizer/recognizer.py +4 -4
- gr_libs/tutorials/gcaura_panda_tutorial.py +168 -0
- gr_libs/tutorials/gcaura_parking_tutorial.py +167 -0
- gr_libs/tutorials/gcaura_point_maze_tutorial.py +169 -0
- gr_libs/tutorials/gcdraco_panda_tutorial.py +6 -2
- gr_libs/tutorials/gcdraco_parking_tutorial.py +3 -1
- gr_libs/tutorials/graml_minigrid_tutorial.py +16 -12
- gr_libs/tutorials/graml_panda_tutorial.py +6 -2
- gr_libs/tutorials/graml_parking_tutorial.py +3 -1
- gr_libs/tutorials/graml_point_maze_tutorial.py +15 -2
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/METADATA +31 -15
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/RECORD +35 -29
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/WHEEL +1 -1
- tests/test_gcaura.py +15 -0
- tests/test_odgr_executor_expertbasedgraml.py +14 -0
- tests/test_odgr_executor_gcaura.py +14 -0
- tests/test_odgr_executor_gcdraco.py +14 -0
- tests/test_odgr_executor_gcgraml.py +14 -0
- tests/test_odgr_executor_graql.py +14 -0
- gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +0 -260
- gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +0 -497
- gr_libs/_evaluation/_get_plans_images.py +0 -61
- gr_libs/_evaluation/_increasing_and_decreasing_.py +0 -106
- /gr_libs/{_evaluation → evaluation}/__init__.py +0 -0
- {gr_libs-0.2.2.dist-info → gr_libs-0.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,100 @@
|
|
1
|
+
import argparse
|
2
|
+
import os
|
3
|
+
|
4
|
+
import dill
|
5
|
+
import matplotlib.pyplot as plt
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
from gr_libs.ml.utils.storage import get_experiment_results_path
|
9
|
+
|
10
|
+
|
11
|
+
def load_results(domain, env, task, recognizer, n_runs, percentage, cons_type):
|
12
|
+
# Collect accuracy for a single task and recognizer
|
13
|
+
accs = []
|
14
|
+
res_dir = get_experiment_results_path(domain, env, task, recognizer)
|
15
|
+
if not os.path.exists(res_dir):
|
16
|
+
return accs
|
17
|
+
for i in range(n_runs):
|
18
|
+
res_file = os.path.join(res_dir, f"res_{i}.pkl")
|
19
|
+
if not os.path.exists(res_file):
|
20
|
+
continue
|
21
|
+
with open(res_file, "rb") as f:
|
22
|
+
results = dill.load(f)
|
23
|
+
if percentage in results and cons_type in results[percentage]:
|
24
|
+
acc = results[percentage][cons_type].get("accuracy")
|
25
|
+
if acc is not None:
|
26
|
+
accs.append(acc)
|
27
|
+
return accs
|
28
|
+
|
29
|
+
|
30
|
+
def main():
|
31
|
+
parser = argparse.ArgumentParser()
|
32
|
+
parser.add_argument("--domain", required=True)
|
33
|
+
parser.add_argument("--env", required=True)
|
34
|
+
parser.add_argument("--tasks", nargs="+", required=True)
|
35
|
+
parser.add_argument("--recognizers", nargs="+", required=True)
|
36
|
+
parser.add_argument("--n_runs", type=int, default=5)
|
37
|
+
parser.add_argument("--percentage", required=True)
|
38
|
+
parser.add_argument(
|
39
|
+
"--cons_type", choices=["consecutive", "non_consecutive"], required=True
|
40
|
+
)
|
41
|
+
parser.add_argument("--graph_name", type=str, default="experiment_results")
|
42
|
+
args = parser.parse_args()
|
43
|
+
|
44
|
+
plt.figure(figsize=(7, 5))
|
45
|
+
has_data = False
|
46
|
+
missing_recognizers = []
|
47
|
+
|
48
|
+
for recognizer in args.recognizers:
|
49
|
+
x_vals = []
|
50
|
+
y_means = []
|
51
|
+
y_sems = []
|
52
|
+
for task in args.tasks:
|
53
|
+
accs = load_results(
|
54
|
+
args.domain,
|
55
|
+
args.env,
|
56
|
+
task,
|
57
|
+
recognizer,
|
58
|
+
args.n_runs,
|
59
|
+
args.percentage,
|
60
|
+
args.cons_type,
|
61
|
+
)
|
62
|
+
if accs:
|
63
|
+
x_vals.append(task)
|
64
|
+
y_means.append(np.mean(accs))
|
65
|
+
y_sems.append(np.std(accs) / np.sqrt(len(accs)))
|
66
|
+
if x_vals:
|
67
|
+
has_data = True
|
68
|
+
x_ticks = np.arange(len(x_vals))
|
69
|
+
plt.plot(x_ticks, y_means, marker="o", label=recognizer)
|
70
|
+
plt.fill_between(
|
71
|
+
x_ticks,
|
72
|
+
np.array(y_means) - np.array(y_sems),
|
73
|
+
np.array(y_means) + np.array(y_sems),
|
74
|
+
alpha=0.2,
|
75
|
+
)
|
76
|
+
plt.xticks(x_ticks, x_vals)
|
77
|
+
else:
|
78
|
+
print(
|
79
|
+
f"Warning: No data found for recognizer '{recognizer}' in {args.domain} / {args.env} / {args.percentage} / {args.cons_type}"
|
80
|
+
)
|
81
|
+
missing_recognizers.append(recognizer)
|
82
|
+
|
83
|
+
if not has_data:
|
84
|
+
raise RuntimeError(
|
85
|
+
f"No data found for any recognizer in {args.domain} / {args.env} / {args.percentage} / {args.cons_type}. "
|
86
|
+
f"Missing recognizers: {', '.join(missing_recognizers)}"
|
87
|
+
)
|
88
|
+
|
89
|
+
plt.xlabel("Task")
|
90
|
+
plt.ylabel("Accuracy")
|
91
|
+
plt.title(f"{args.domain} - {args.env} ({args.percentage}, {args.cons_type})")
|
92
|
+
plt.legend()
|
93
|
+
plt.grid(True)
|
94
|
+
fig_path = f"{args.graph_name}_{'_'.join(args.recognizers)}_{args.domain}_{args.env}_{args.percentage}_{args.cons_type}.png"
|
95
|
+
plt.savefig(fig_path)
|
96
|
+
print(f"Figure saved at: {fig_path}")
|
97
|
+
|
98
|
+
|
99
|
+
if __name__ == "__main__":
|
100
|
+
main()
|
gr_libs/ml/consts.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
import gc
|
2
2
|
from collections import OrderedDict
|
3
3
|
from types import MethodType
|
4
|
+
from typing import Any
|
4
5
|
|
5
6
|
import cv2
|
6
7
|
import numpy as np
|
7
8
|
|
8
|
-
from gr_libs.environment.environment import EnvProperty
|
9
|
+
from gr_libs.environment.environment import EnvProperty, suppress_output
|
9
10
|
|
10
11
|
if __name__ != "__main__":
|
11
12
|
from gr_libs.ml.utils.storage import get_agent_model_dir
|
@@ -22,6 +23,10 @@ from stable_baselines3.common.base_class import BaseAlgorithm
|
|
22
23
|
|
23
24
|
from gr_libs.ml.utils import device
|
24
25
|
|
26
|
+
from gr_libs.ml.consts import (
|
27
|
+
FINETUNE_TIMESTEPS,
|
28
|
+
)
|
29
|
+
|
25
30
|
# TODO do we need this?
|
26
31
|
NETWORK_SETUP = {
|
27
32
|
SAC: OrderedDict(
|
@@ -184,12 +189,7 @@ class DeepRLAgent:
|
|
184
189
|
"""
|
185
190
|
fourcc = cv2.VideoWriter_fourcc("m", "p", "4", "v")
|
186
191
|
fps = 30.0
|
187
|
-
|
188
|
-
# assert goal_idx is not None
|
189
|
-
# self.reset_with_goal_idx(goal_idx)
|
190
|
-
# else:
|
191
|
-
# assert goal_idx is None
|
192
|
-
self.env.reset()
|
192
|
+
self.safe_env_reset()
|
193
193
|
frame_size = (
|
194
194
|
self.env.render(mode="rgb_array").shape[1],
|
195
195
|
self.env.render(mode="rgb_array").shape[0],
|
@@ -198,7 +198,7 @@ class DeepRLAgent:
|
|
198
198
|
video_writer = cv2.VideoWriter(video_path, fourcc, fps, frame_size)
|
199
199
|
general_done, success_done = False, False
|
200
200
|
gc.collect()
|
201
|
-
obs = self.
|
201
|
+
obs = self.safe_env_reset()
|
202
202
|
self.env_prop.change_goal_to_specific_desired(obs, desired)
|
203
203
|
counter = 0
|
204
204
|
while not (general_done or success_done):
|
@@ -209,17 +209,11 @@ class DeepRLAgent:
|
|
209
209
|
general_done = general_done[0]
|
210
210
|
self.env_prop.change_goal_to_specific_desired(obs, desired)
|
211
211
|
if "success" in info[0].keys():
|
212
|
-
success_done = info[0][
|
213
|
-
"success"
|
214
|
-
] # make sure the agent actually reached the goal within the max time
|
212
|
+
success_done = info[0]["success"]
|
215
213
|
elif "is_success" in info[0].keys():
|
216
|
-
success_done = info[0][
|
217
|
-
"is_success"
|
218
|
-
] # make sure the agent actually reached the goal within the max time
|
214
|
+
success_done = info[0]["is_success"]
|
219
215
|
elif "step_task_completions" in info[0].keys():
|
220
|
-
success_done = (
|
221
|
-
len(info[0]["step_task_completions"]) == 1
|
222
|
-
) # bug of dummyVecEnv, it removes the episode_task_completions from the info dict.
|
216
|
+
success_done = len(info[0]["step_task_completions"]) == 1
|
223
217
|
else:
|
224
218
|
raise NotImplementedError(
|
225
219
|
"no other option for any of the environments."
|
@@ -247,40 +241,59 @@ class DeepRLAgent:
|
|
247
241
|
self._model_file_path, env=self.env, device=device, **self.model_kwargs
|
248
242
|
)
|
249
243
|
|
250
|
-
def learn(self):
|
244
|
+
def learn(self, goal=None, total_timesteps=None):
|
251
245
|
"""Train the agent."""
|
252
|
-
|
253
|
-
|
246
|
+
model_file_path = self._model_file_path
|
247
|
+
old_model_file_path = model_file_path
|
248
|
+
if goal is not None:
|
249
|
+
model_file_path = self._model_file_path.replace(
|
250
|
+
".pth", f"_{goal}.pth"
|
251
|
+
).replace(".zip", f"_{goal}.zip")
|
252
|
+
if total_timesteps is not None:
|
253
|
+
model_file_path = model_file_path.replace(
|
254
|
+
".pth", f"_{total_timesteps}.pth"
|
255
|
+
).replace(".zip", f"_{total_timesteps}.zip")
|
256
|
+
|
257
|
+
self._model_file_path = model_file_path
|
258
|
+
|
259
|
+
if os.path.exists(model_file_path):
|
260
|
+
print(f"Loading pre-existing model in {model_file_path}")
|
254
261
|
self.load_model()
|
255
262
|
else:
|
256
|
-
print(f"No existing model in {
|
257
|
-
if
|
258
|
-
|
259
|
-
|
260
|
-
self.
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
263
|
+
print(f"No existing model in {model_file_path}, starting learning")
|
264
|
+
if total_timesteps is None:
|
265
|
+
total_timesteps = self.num_timesteps
|
266
|
+
if self.exploration_rate is not None:
|
267
|
+
self._model = self.algorithm(
|
268
|
+
"MultiInputPolicy",
|
269
|
+
self.env,
|
270
|
+
ent_coef=self.exploration_rate,
|
271
|
+
verbose=1,
|
272
|
+
)
|
273
|
+
else:
|
274
|
+
self._model = self.algorithm(
|
275
|
+
"MultiInputPolicy", self.env, verbose=1
|
276
|
+
)
|
266
277
|
self._model.learn(
|
267
|
-
total_timesteps=
|
278
|
+
total_timesteps=total_timesteps, progress_bar=True
|
268
279
|
) # comment this in a normal env
|
269
280
|
self.save_model()
|
270
281
|
|
282
|
+
self._model_file_path = old_model_file_path
|
283
|
+
|
271
284
|
def safe_env_reset(self):
|
272
285
|
"""
|
273
|
-
Reset the environment safely.
|
286
|
+
Reset the environment safely, suppressing output.
|
274
287
|
|
275
288
|
Returns:
|
276
289
|
The initial observation.
|
277
290
|
"""
|
278
291
|
try:
|
279
|
-
obs = self.env
|
292
|
+
obs = suppress_env_reset(self.env)
|
280
293
|
except Exception:
|
281
294
|
kwargs = {"id": self.problem_name, "render_mode": "rgb_array"}
|
282
295
|
self.env = self.env_prop.create_vec_env(kwargs)
|
283
|
-
obs = self.env
|
296
|
+
obs = suppress_env_reset(self.env)
|
284
297
|
return obs
|
285
298
|
|
286
299
|
def get_mean_and_std_dev(self, observation):
|
@@ -514,6 +527,69 @@ class DeepRLAgent:
|
|
514
527
|
self.env.close()
|
515
528
|
return observations
|
516
529
|
|
530
|
+
def fine_tune(
|
531
|
+
self,
|
532
|
+
goal: Any,
|
533
|
+
num_timesteps: int = FINETUNE_TIMESTEPS,
|
534
|
+
) -> None:
|
535
|
+
"""
|
536
|
+
Fine-tune this goal-conditioned agent on a single specified goal.
|
537
|
+
Overrides optimizer LR if provided, resets the env to the goal, and continues training.
|
538
|
+
|
539
|
+
Args:
|
540
|
+
goal: The specific goal to fine-tune on. Type depends on the environment.
|
541
|
+
num_timesteps: Number of timesteps for fine-tuning. Defaults to FINETUNE_TIMESTEPS.
|
542
|
+
learning_rate: Learning rate for fine-tuning. Defaults to FINETUNE_LR.
|
543
|
+
"""
|
544
|
+
# Store original environment and problem
|
545
|
+
original_env = self.env
|
546
|
+
original_problem = self.problem_name
|
547
|
+
created_new_env = False
|
548
|
+
|
549
|
+
try:
|
550
|
+
# Try to create a goal-specific environment
|
551
|
+
if hasattr(self.env_prop, "goal_to_problem_str") and callable(
|
552
|
+
self.env_prop.goal_to_problem_str
|
553
|
+
):
|
554
|
+
try:
|
555
|
+
goal_problem = self.env_prop.goal_to_problem_str(goal)
|
556
|
+
|
557
|
+
# Create the goal-specific environment
|
558
|
+
env_kwargs = {"id": goal_problem, "render_mode": "rgb_array"}
|
559
|
+
new_env = self.env_prop.create_vec_env(env_kwargs)
|
560
|
+
|
561
|
+
# Update the model's environment
|
562
|
+
self._model.set_env(new_env)
|
563
|
+
self.env = new_env
|
564
|
+
self.problem_name = goal_problem
|
565
|
+
created_new_env = True
|
566
|
+
print(f"Created a new environment for fine-tuning: {goal_problem}")
|
567
|
+
except Exception as e:
|
568
|
+
print(f"Warning: Could not create goal-specific environment: {e}")
|
569
|
+
|
570
|
+
if not created_new_env:
|
571
|
+
print(
|
572
|
+
(
|
573
|
+
"Fine-tuning requires a goal-specific environment."
|
574
|
+
"Please ensure that the environment with the specified goal exists."
|
575
|
+
)
|
576
|
+
)
|
577
|
+
|
578
|
+
print(f"Fine-tuning for {num_timesteps} timesteps...")
|
579
|
+
self.learn(
|
580
|
+
goal=self.env_prop.goal_to_str(goal), total_timesteps=num_timesteps
|
581
|
+
)
|
582
|
+
print("Fine-tuning complete. Model saved.")
|
583
|
+
|
584
|
+
finally:
|
585
|
+
# Restore original environment if needed
|
586
|
+
if created_new_env:
|
587
|
+
self.env.close()
|
588
|
+
self._model.set_env(original_env)
|
589
|
+
self.env = original_env
|
590
|
+
self.problem_name = original_problem
|
591
|
+
print("Restored original environment.")
|
592
|
+
|
517
593
|
|
518
594
|
class GCDeepRLAgent(DeepRLAgent):
|
519
595
|
"""
|
@@ -632,3 +708,11 @@ class GCDeepRLAgent(DeepRLAgent):
|
|
632
708
|
desired=goal_directed_goal,
|
633
709
|
)
|
634
710
|
return observations
|
711
|
+
|
712
|
+
|
713
|
+
def suppress_env_reset(env):
|
714
|
+
"""
|
715
|
+
Utility function to suppress prints during env.reset().
|
716
|
+
"""
|
717
|
+
with suppress_output():
|
718
|
+
return env.reset()
|
gr_libs/odgr_executor.py
CHANGED
@@ -4,7 +4,7 @@ import time
|
|
4
4
|
|
5
5
|
import dill
|
6
6
|
|
7
|
-
from gr_libs.environment.
|
7
|
+
from gr_libs.environment._utils.utils import domain_to_env_property
|
8
8
|
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
9
9
|
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
10
10
|
from gr_libs.ml.utils.format import random_subset_with_order
|
@@ -14,10 +14,10 @@ from gr_libs.ml.utils.storage import (
|
|
14
14
|
get_policy_sequences_result_path,
|
15
15
|
)
|
16
16
|
from gr_libs.problems.consts import PROBLEMS
|
17
|
-
from gr_libs.recognizer.
|
17
|
+
from gr_libs.recognizer._utils import recognizer_str_to_obj
|
18
|
+
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, GCAura
|
18
19
|
from gr_libs.recognizer.graml.graml_recognizer import Graml
|
19
20
|
from gr_libs.recognizer.recognizer import GaAgentTrainerRecognizer, LearningRecognizer
|
20
|
-
from gr_libs.recognizer.utils import recognizer_str_to_obj
|
21
21
|
|
22
22
|
|
23
23
|
def validate(args, recognizer_type, task_inputs):
|
@@ -52,9 +52,7 @@ def run_odgr_problem(args):
|
|
52
52
|
dlp_time = 0
|
53
53
|
if issubclass(recognizer_type, LearningRecognizer):
|
54
54
|
start_dlp_time = time.time()
|
55
|
-
recognizer.domain_learning_phase(
|
56
|
-
base_goals=value["goals"], train_configs=value["train_configs"]
|
57
|
-
)
|
55
|
+
recognizer.domain_learning_phase(value)
|
58
56
|
dlp_time = time.time() - start_dlp_time
|
59
57
|
elif key.startswith("G_"):
|
60
58
|
start_ga_time = time.time()
|
@@ -104,7 +102,11 @@ def run_odgr_problem(args):
|
|
104
102
|
}
|
105
103
|
|
106
104
|
# need to dump the whole plan for draco because it needs it for inference phase for checking likelihood.
|
107
|
-
if (
|
105
|
+
if (
|
106
|
+
recognizer_type == Draco
|
107
|
+
or recognizer_type == GCDraco
|
108
|
+
or recognizer_type == GCAura
|
109
|
+
) and issubclass(
|
108
110
|
rl_agent_type, DeepRLAgent
|
109
111
|
): # TODO remove this condition, remove the assumption.
|
110
112
|
generate_obs_kwargs["with_dict"] = True
|
@@ -184,10 +186,17 @@ def run_odgr_problem(args):
|
|
184
186
|
recognizer=args.recognizer,
|
185
187
|
)
|
186
188
|
)
|
187
|
-
|
188
|
-
|
189
|
+
if args.experiment_num is not None:
|
190
|
+
res_txt = os.path.join(res_file_path, f"res_{args.experiment_num}.txt")
|
191
|
+
res_pkl = os.path.join(res_file_path, f"res_{args.experiment_num}.pkl")
|
192
|
+
else:
|
193
|
+
res_txt = os.path.join(res_file_path, "res.txt")
|
194
|
+
res_pkl = os.path.join(res_file_path, "res.pkl")
|
195
|
+
|
196
|
+
print(f"generating results into {res_txt} and {res_pkl}")
|
197
|
+
with open(res_pkl, "wb") as results_file:
|
189
198
|
dill.dump(results, results_file)
|
190
|
-
with open(
|
199
|
+
with open(res_txt, "w") as results_file:
|
191
200
|
results_file.write(str(results))
|
192
201
|
|
193
202
|
|
@@ -219,29 +228,14 @@ def parse_args():
|
|
219
228
|
"Graql",
|
220
229
|
"Draco",
|
221
230
|
"GCDraco",
|
231
|
+
"GCAura",
|
222
232
|
],
|
223
233
|
required=True,
|
224
234
|
help="Recognizer type. Follow readme.md and recognizer folder for more information and rules.",
|
225
235
|
)
|
226
236
|
required_group.add_argument(
|
227
237
|
"--task",
|
228
|
-
choices=[
|
229
|
-
"L1",
|
230
|
-
"L2",
|
231
|
-
"L3",
|
232
|
-
"L4",
|
233
|
-
"L5",
|
234
|
-
"L11",
|
235
|
-
"L22",
|
236
|
-
"L33",
|
237
|
-
"L44",
|
238
|
-
"L55",
|
239
|
-
"L111",
|
240
|
-
"L222",
|
241
|
-
"L333",
|
242
|
-
"L444",
|
243
|
-
"L555",
|
244
|
-
],
|
238
|
+
choices=["L1", "L2", "L3", "L4", "L5"],
|
245
239
|
required=True,
|
246
240
|
help="Task identifier (e.g., L1, L2,...,L5)",
|
247
241
|
)
|
@@ -251,6 +245,12 @@ def parse_args():
|
|
251
245
|
optional_group.add_argument(
|
252
246
|
"--collect_stats", action="store_true", help="Whether to collect statistics"
|
253
247
|
)
|
248
|
+
optional_group.add_argument(
|
249
|
+
"--experiment_num",
|
250
|
+
type=int,
|
251
|
+
default=None,
|
252
|
+
help="Experiment number for parallel runs",
|
253
|
+
)
|
254
254
|
args = parser.parse_args()
|
255
255
|
|
256
256
|
### VALIDATE INPUTS ###
|