gr-libs 0.1.6.post1__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaluation/analyze_results_cross_alg_cross_domain.py +236 -246
- evaluation/create_minigrid_map_image.py +10 -6
- evaluation/file_system.py +16 -5
- evaluation/generate_experiments_results.py +123 -74
- evaluation/generate_experiments_results_new_ver1.py +227 -243
- evaluation/generate_experiments_results_new_ver2.py +317 -317
- evaluation/generate_task_specific_statistics_plots.py +481 -253
- evaluation/get_plans_images.py +41 -26
- evaluation/increasing_and_decreasing_.py +97 -56
- gr_libs/__init__.py +6 -1
- gr_libs/_version.py +2 -2
- gr_libs/environment/__init__.py +17 -9
- gr_libs/environment/environment.py +167 -39
- gr_libs/environment/utils/utils.py +22 -12
- gr_libs/metrics/__init__.py +5 -0
- gr_libs/metrics/metrics.py +76 -34
- gr_libs/ml/__init__.py +2 -0
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +1 -1
- gr_libs/ml/base/rl_agent.py +13 -10
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/deep_rl_learner.py +433 -352
- gr_libs/ml/neural/utils/__init__.py +1 -1
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/neural/utils/penv.py +5 -2
- gr_libs/ml/planner/mcts/mcts_model.py +524 -302
- gr_libs/ml/planner/mcts/utils/__init__.py +1 -1
- gr_libs/ml/planner/mcts/utils/node.py +11 -7
- gr_libs/ml/planner/mcts/utils/tree.py +14 -10
- gr_libs/ml/sequential/__init__.py +1 -1
- gr_libs/ml/sequential/lstm_model.py +256 -175
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +123 -73
- gr_libs/ml/tabular/tabular_rl_agent.py +20 -19
- gr_libs/ml/utils/__init__.py +8 -2
- gr_libs/ml/utils/format.py +78 -70
- gr_libs/ml/utils/math.py +2 -1
- gr_libs/ml/utils/other.py +1 -1
- gr_libs/ml/utils/storage.py +95 -28
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +145 -80
- gr_libs/recognizer/graml/gr_dataset.py +209 -110
- gr_libs/recognizer/graml/graml_recognizer.py +431 -231
- gr_libs/recognizer/recognizer.py +38 -27
- gr_libs/recognizer/utils/__init__.py +1 -1
- gr_libs/recognizer/utils/format.py +8 -3
- {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/METADATA +1 -1
- gr_libs-0.1.8.dist-info/RECORD +70 -0
- {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/WHEEL +1 -1
- {gr_libs-0.1.6.post1.dist-info → gr_libs-0.1.8.dist-info}/top_level.txt +0 -1
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +8 -4
- tests/test_graql.py +2 -1
- tutorials/gcdraco_panda_tutorial.py +66 -0
- tutorials/gcdraco_parking_tutorial.py +61 -0
- tutorials/graml_minigrid_tutorial.py +42 -12
- tutorials/graml_panda_tutorial.py +35 -14
- tutorials/graml_parking_tutorial.py +37 -19
- tutorials/graml_point_maze_tutorial.py +33 -13
- tutorials/graql_minigrid_tutorial.py +31 -15
- CI/README.md +0 -12
- CI/docker_build_context/Dockerfile +0 -15
- gr_libs/recognizer/recognizer_doc.md +0 -61
- gr_libs-0.1.6.post1.dist-info/RECORD +0 -70
@@ -1,17 +1,27 @@
|
|
1
1
|
import logging
|
2
2
|
import sys
|
3
|
-
from gr_libs.environment.environment import
|
3
|
+
from gr_libs.environment.environment import (
|
4
|
+
MINIGRID,
|
5
|
+
PANDA,
|
6
|
+
PARKING,
|
7
|
+
POINT_MAZE,
|
8
|
+
EnvProperty,
|
9
|
+
MinigridProperty,
|
10
|
+
PandaProperty,
|
11
|
+
ParkingProperty,
|
12
|
+
PointMazeProperty,
|
13
|
+
)
|
4
14
|
|
5
15
|
|
6
16
|
def domain_to_env_property(domain_name: str):
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
17
|
+
if domain_name == MINIGRID:
|
18
|
+
return MinigridProperty
|
19
|
+
elif domain_name == PARKING:
|
20
|
+
return ParkingProperty
|
21
|
+
elif domain_name == PANDA:
|
22
|
+
return PandaProperty
|
23
|
+
elif domain_name == POINT_MAZE:
|
24
|
+
return PointMazeProperty
|
25
|
+
else:
|
26
|
+
logging.error(f"Domain {domain_name} is not supported.")
|
27
|
+
sys.exit(1)
|
gr_libs/metrics/__init__.py
CHANGED
gr_libs/metrics/metrics.py
CHANGED
@@ -7,6 +7,7 @@ from typing import Callable, Generator, List, Dict, Tuple, Any
|
|
7
7
|
from math import log2
|
8
8
|
from scipy.stats import wasserstein_distance
|
9
9
|
from gymnasium.spaces.discrete import Discrete
|
10
|
+
|
10
11
|
# import torch
|
11
12
|
# from torch.distributions.categorical import Categorical
|
12
13
|
|
@@ -26,21 +27,24 @@ def kl_divergence(p1: List[float], p2: List[float]) -> float:
|
|
26
27
|
Returns:
|
27
28
|
float: The KL-divergence between p1 and p2
|
28
29
|
"""
|
29
|
-
assert
|
30
|
+
assert len(p1) == len(p2)
|
30
31
|
return sum(p1[i] * log2(p1[i] / p2[i]) for i in range(len(p1)))
|
31
32
|
|
32
33
|
|
33
|
-
def kl_divergence_norm_softmax(
|
34
|
+
def kl_divergence_norm_softmax(
|
35
|
+
observations: List[Tuple[State, Any]], agent, actions: Discrete
|
36
|
+
):
|
34
37
|
distances = []
|
35
38
|
p_traj = traj_to_policy(observations=observations, actions=actions)
|
36
39
|
|
37
40
|
for (observation, agent_pos), action in observations:
|
38
|
-
state = observation[
|
41
|
+
state = observation["image"]
|
39
42
|
state_pickled = dill.dumps(state)
|
40
43
|
|
41
44
|
qp1 = p_traj[state_pickled]
|
42
45
|
qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
|
43
|
-
observation=(observation, agent_pos)
|
46
|
+
observation=(observation, agent_pos)
|
47
|
+
)
|
44
48
|
distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
|
45
49
|
return np.mean(distances)
|
46
50
|
|
@@ -53,9 +57,10 @@ def amplify(values, alpha=1.0):
|
|
53
57
|
Returns:
|
54
58
|
np.array: amplified softmax probabilities
|
55
59
|
"""
|
56
|
-
values = values[:3]**alpha
|
60
|
+
values = values[:3] ** alpha # currently only choose to turn or move forward
|
57
61
|
return values / np.sum(values)
|
58
62
|
|
63
|
+
|
59
64
|
def stochastic_amplified_selection(actions_probs, alpha=8.0):
|
60
65
|
action_probs_amplified = amplify(actions_probs, alpha)
|
61
66
|
choice = np.random.choice(len(action_probs_amplified), p=action_probs_amplified)
|
@@ -63,12 +68,15 @@ def stochastic_amplified_selection(actions_probs, alpha=8.0):
|
|
63
68
|
choice = 6
|
64
69
|
return choice
|
65
70
|
|
71
|
+
|
66
72
|
def stochastic_selection(actions_probs):
|
67
73
|
return np.random.choice(len(actions_probs), p=actions_probs)
|
68
74
|
|
75
|
+
|
69
76
|
def greedy_selection(actions_probs):
|
70
77
|
return np.argmax(actions_probs)
|
71
78
|
|
79
|
+
|
72
80
|
def measure_average_sequence_distance(seq1, seq2):
|
73
81
|
"""Measures the sequence similarity between two sequences of observations and actions.
|
74
82
|
|
@@ -82,19 +90,22 @@ def measure_average_sequence_distance(seq1, seq2):
|
|
82
90
|
|
83
91
|
# Ensure both sequences have the same length
|
84
92
|
min_seq_len = np.min([len(seq1), len(seq2)])
|
85
|
-
assert
|
93
|
+
assert (
|
94
|
+
np.max([len(seq1), len(seq2)]) <= 30 * min_seq_len
|
95
|
+
), "We can't really measure similarity in case the sequences are really not the same... maybe just return a default NOT_SIMILAR here."
|
86
96
|
|
87
97
|
# Calculate the Euclidean distance between corresponding elements in the sequences
|
88
98
|
distances = []
|
89
99
|
for i in range(0, min_seq_len):
|
90
|
-
distances.append(np.sum(np.abs(np.array(seq1[i])-np.array(seq2[i]))))
|
100
|
+
distances.append(np.sum(np.abs(np.array(seq1[i]) - np.array(seq2[i]))))
|
91
101
|
|
92
102
|
# Calculate the average distance over all elements
|
93
103
|
return np.mean(np.array(distances))
|
94
104
|
|
95
105
|
|
96
|
-
def traj_to_policy(
|
97
|
-
|
106
|
+
def traj_to_policy(
|
107
|
+
observations: List[Tuple[State, Any]], actions: Discrete, epsilon: float = 0.0
|
108
|
+
) -> Dict[str, List[float]]:
|
98
109
|
# converts a trajectory from a planner to a policy
|
99
110
|
# where the taken action has 99.99999% probability
|
100
111
|
trajectory_as_policy = {}
|
@@ -104,22 +115,28 @@ def traj_to_policy(observations: List[Tuple[State, Any]], actions: Discrete, eps
|
|
104
115
|
|
105
116
|
actions_len = actions.n
|
106
117
|
qs = [1e-6 + epsilon / actions_len for _ in range(actions_len)]
|
107
|
-
qs[action_index] = 1. - 1e-6 * (actions_len - 1) - epsilon
|
118
|
+
qs[action_index] = 1.0 - 1e-6 * (actions_len - 1) - epsilon
|
108
119
|
|
109
|
-
state = observation[
|
120
|
+
state = observation["image"]
|
110
121
|
state_pickled = dill.dumps(state)
|
111
122
|
trajectory_as_policy[state_pickled] = qs
|
112
123
|
return trajectory_as_policy
|
113
124
|
|
114
|
-
|
125
|
+
|
126
|
+
def pass_observation_patcher(
|
127
|
+
observations: List[Any], agent: RLAgent
|
128
|
+
) -> Generator[None, None, None]:
|
115
129
|
for observation in observations:
|
116
130
|
yield observation
|
117
131
|
|
132
|
+
|
118
133
|
def mean_wasserstein_distance(
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
134
|
+
observations: List[Tuple[State, Any]],
|
135
|
+
agent: DeepRLAgent,
|
136
|
+
actions: gymnasium.spaces.Box,
|
137
|
+
observation_patcher: Callable[
|
138
|
+
[List[Any], RLAgent], Generator[None, None, None]
|
139
|
+
] = pass_observation_patcher,
|
123
140
|
):
|
124
141
|
distances = []
|
125
142
|
|
@@ -141,22 +158,28 @@ def mean_wasserstein_distance(
|
|
141
158
|
wasserstein_distances.append(
|
142
159
|
wasserstein_distance([observation_action], [actor_mean])
|
143
160
|
)
|
144
|
-
distances.append(mean(wasserstein_distances))
|
145
|
-
return mean(distances)
|
161
|
+
distances.append(np.mean(wasserstein_distances))
|
162
|
+
return np.mean(distances)
|
146
163
|
|
147
164
|
|
148
|
-
def mean_action_distance_continuous(
|
165
|
+
def mean_action_distance_continuous(
|
166
|
+
observations: List[Tuple[State, Any]],
|
167
|
+
agent: DeepRLAgent,
|
168
|
+
actions: gymnasium.spaces.Box,
|
169
|
+
):
|
149
170
|
distances = []
|
150
171
|
for observation, action in observations:
|
151
172
|
action2, _ = agent.model.predict(
|
152
173
|
observation,
|
153
174
|
state=None,
|
154
175
|
deterministic=True,
|
155
|
-
episode_start=np.ones((1,), dtype=bool)
|
176
|
+
episode_start=np.ones((1,), dtype=bool),
|
156
177
|
)
|
157
178
|
action_arr, action2_arr = action[0], action2[0]
|
158
179
|
print(f"actor means:{action2}")
|
159
|
-
assert len(action_arr) == len(
|
180
|
+
assert len(action_arr) == len(
|
181
|
+
action2_arr
|
182
|
+
), f"Actions should be on the same length:{action},{action2}"
|
160
183
|
|
161
184
|
total_diff = 0
|
162
185
|
# total_diff = []
|
@@ -165,24 +188,29 @@ def mean_action_distance_continuous(observations: List[Tuple[State, Any]], agent
|
|
165
188
|
# distances.append(statistics.mean(total_diff))
|
166
189
|
distances.append(total_diff)
|
167
190
|
# print(f"distances:{distances}")
|
168
|
-
return mean(distances)
|
191
|
+
return np.mean(distances)
|
169
192
|
|
170
193
|
|
171
|
-
def set_agent_goal_observation(
|
194
|
+
def set_agent_goal_observation(
|
195
|
+
observations: List[Any], agent: RLAgent
|
196
|
+
) -> Generator[None, None, None]:
|
172
197
|
copy_observation = observations.copy()
|
173
198
|
for observation, action in copy_observation:
|
174
|
-
observation[
|
199
|
+
observation["desired_goal"] = agent.goal
|
175
200
|
yield observation, action
|
176
201
|
|
177
202
|
|
178
203
|
def z_score(x, mean_action: float, std_dev: float):
|
179
204
|
return (x - mean_action) / std_dev
|
180
205
|
|
206
|
+
|
181
207
|
def mean_p_value(
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
208
|
+
observations: List[Tuple[State, Any]],
|
209
|
+
agent: DeepRLAgent,
|
210
|
+
actions: gymnasium.spaces.Box,
|
211
|
+
observation_patcher: Callable[
|
212
|
+
[List[Any], RLAgent], Generator[None, None, None]
|
213
|
+
] = pass_observation_patcher,
|
186
214
|
):
|
187
215
|
distances = []
|
188
216
|
for observation, observed_action in observation_patcher(observations, agent):
|
@@ -194,25 +222,39 @@ def mean_p_value(
|
|
194
222
|
observed_actions = observed_action[0]
|
195
223
|
log_std_dev = log_std_dev[0]
|
196
224
|
|
197
|
-
if
|
225
|
+
if (
|
226
|
+
len(actor_means) != len(observed_actions)
|
227
|
+
or len(actor_means) != len(log_std_dev)
|
228
|
+
or len(observed_actions) != len(log_std_dev)
|
229
|
+
):
|
198
230
|
raise Exception(
|
199
231
|
f"Length of observed actions, actor mean and std-dev should be equal! "
|
200
232
|
f"{len(observed_actions)},{len(actor_means)},{len(log_std_dev)}"
|
201
233
|
)
|
202
234
|
z_scores = []
|
203
|
-
for actor_mean, observation_action, action_log_std_dev in zip(
|
235
|
+
for actor_mean, observation_action, action_log_std_dev in zip(
|
236
|
+
actor_means, observed_actions, log_std_dev
|
237
|
+
):
|
204
238
|
z_scores.append(
|
205
|
-
math.fabs(
|
239
|
+
math.fabs(
|
240
|
+
z_score(
|
241
|
+
observation_action,
|
242
|
+
actor_mean,
|
243
|
+
math.pow(2, math.fabs(action_log_std_dev)),
|
244
|
+
)
|
245
|
+
)
|
206
246
|
)
|
207
|
-
mean_distances = mean(z_scores)
|
247
|
+
mean_distances = np.mean(z_scores)
|
208
248
|
|
209
249
|
distances.append(mean_distances)
|
210
|
-
return mean(distances)
|
250
|
+
return np.mean(distances)
|
251
|
+
|
211
252
|
|
212
253
|
def normalize(values: List[float]) -> List[float]:
|
213
254
|
values /= sum(values)
|
214
255
|
return values
|
215
256
|
|
257
|
+
|
216
258
|
def max(values: List[float]) -> List[float]:
|
217
259
|
if not len(values):
|
218
260
|
return values
|
@@ -220,4 +262,4 @@ def max(values: List[float]) -> List[float]:
|
|
220
262
|
argmax = vals.argmax()
|
221
263
|
vals[:] = 0.0
|
222
264
|
vals[argmax] = 1.0
|
223
|
-
return vals
|
265
|
+
return vals
|
gr_libs/ml/__init__.py
CHANGED
gr_libs/ml/agent.py
CHANGED
@@ -2,6 +2,7 @@ import torch
|
|
2
2
|
|
3
3
|
from gr_libs.ml import utils
|
4
4
|
from gr_libs.ml.utils.other import device
|
5
|
+
|
5
6
|
# from ml.neural import ACModel
|
6
7
|
|
7
8
|
|
@@ -12,15 +13,27 @@ class Agent:
|
|
12
13
|
- to choose an action given an observation,
|
13
14
|
- to analyze the feedback (i.e. reward and done state) of its action."""
|
14
15
|
|
15
|
-
def __init__(
|
16
|
-
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
obs_space,
|
19
|
+
action_space,
|
20
|
+
model_dir,
|
21
|
+
argmax=False,
|
22
|
+
num_envs=1,
|
23
|
+
use_memory=True,
|
24
|
+
use_text=False,
|
25
|
+
):
|
17
26
|
obs_space, self.preprocess_obss = utils.get_obss_preprocessor(obs_space)
|
18
|
-
self.acmodel = ACModel(
|
27
|
+
self.acmodel = ACModel(
|
28
|
+
obs_space, action_space, use_memory=use_memory, use_text=use_text
|
29
|
+
)
|
19
30
|
self.argmax = argmax
|
20
31
|
self.num_envs = num_envs
|
21
32
|
|
22
33
|
if self.acmodel.recurrent:
|
23
|
-
self.memories = torch.zeros(
|
34
|
+
self.memories = torch.zeros(
|
35
|
+
self.num_envs, self.acmodel.memory_size, device=device
|
36
|
+
)
|
24
37
|
|
25
38
|
self.acmodel.load_state_dict(utils.get_model_state(model_dir))
|
26
39
|
self.acmodel.to(device)
|
@@ -49,8 +62,10 @@ class Agent:
|
|
49
62
|
|
50
63
|
def analyze_feedbacks(self, rewards, dones):
|
51
64
|
if self.acmodel.recurrent:
|
52
|
-
masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(
|
65
|
+
masks = 1 - torch.tensor(dones, dtype=torch.float, device=device).unsqueeze(
|
66
|
+
1
|
67
|
+
)
|
53
68
|
self.memories *= masks
|
54
69
|
|
55
70
|
def analyze_feedback(self, reward, done):
|
56
|
-
return self.analyze_feedbacks([reward], [done])
|
71
|
+
return self.analyze_feedbacks([reward], [done])
|
gr_libs/ml/base/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
|
1
|
+
from gr_libs.ml.base.rl_agent import RLAgent, State, ContextualAgent
|
gr_libs/ml/base/rl_agent.py
CHANGED
@@ -4,22 +4,24 @@ import numpy as np
|
|
4
4
|
|
5
5
|
State = Any
|
6
6
|
|
7
|
+
|
7
8
|
class ContextualAgent:
|
8
9
|
def __init__(self, problem_name, problem_goal, agent):
|
9
10
|
self.problem_name = problem_name
|
10
11
|
self.problem_goal = problem_goal
|
11
12
|
self.agent = agent
|
12
13
|
|
14
|
+
|
13
15
|
class RLAgent(ABC):
|
14
16
|
def __init__(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
self,
|
18
|
+
episodes: int,
|
19
|
+
decaying_eps: bool,
|
20
|
+
epsilon: float,
|
21
|
+
learning_rate: float,
|
22
|
+
gamma: float,
|
23
|
+
problem_name: str,
|
24
|
+
domain_name: str,
|
23
25
|
):
|
24
26
|
self.episodes = episodes
|
25
27
|
self.decaying_eps = decaying_eps
|
@@ -46,9 +48,10 @@ class RLAgent(ABC):
|
|
46
48
|
|
47
49
|
def update_states_counter(self, observation_str: str):
|
48
50
|
if observation_str in self.states_counter:
|
49
|
-
self.states_counter[observation_str] =
|
51
|
+
self.states_counter[observation_str] = (
|
52
|
+
self.states_counter[observation_str] + 1
|
53
|
+
)
|
50
54
|
else:
|
51
55
|
self.states_counter[observation_str] = 1
|
52
56
|
if len(self.states_counter) % 10000 == 0:
|
53
57
|
print(f"probably error to many {len(self.states_counter)}")
|
54
|
-
|
gr_libs/ml/consts.py
CHANGED