gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gr_libs/__init__.py +4 -1
- gr_libs/_evaluation/__init__.py +1 -0
- gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
- gr_libs/_evaluation/_generate_experiments_results.py +141 -0
- gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
- gr_libs/_evaluation/_get_plans_images.py +61 -0
- gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
- gr_libs/_version.py +2 -2
- gr_libs/all_experiments.py +294 -0
- gr_libs/environment/__init__.py +30 -9
- gr_libs/environment/_utils/utils.py +27 -0
- gr_libs/environment/environment.py +417 -54
- gr_libs/metrics/__init__.py +7 -0
- gr_libs/metrics/metrics.py +231 -54
- gr_libs/ml/__init__.py +2 -5
- gr_libs/ml/agent.py +21 -6
- gr_libs/ml/base/__init__.py +3 -1
- gr_libs/ml/base/rl_agent.py +81 -13
- gr_libs/ml/consts.py +1 -1
- gr_libs/ml/neural/__init__.py +1 -3
- gr_libs/ml/neural/deep_rl_learner.py +619 -378
- gr_libs/ml/neural/utils/__init__.py +1 -2
- gr_libs/ml/neural/utils/dictlist.py +3 -3
- gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
- gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
- gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
- gr_libs/ml/planner/mcts/mcts_model.py +571 -312
- gr_libs/ml/sequential/__init__.py +0 -1
- gr_libs/ml/sequential/_lstm_model.py +270 -0
- gr_libs/ml/tabular/__init__.py +1 -3
- gr_libs/ml/tabular/state.py +7 -7
- gr_libs/ml/tabular/tabular_q_learner.py +150 -82
- gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
- gr_libs/ml/utils/__init__.py +2 -3
- gr_libs/ml/utils/format.py +28 -97
- gr_libs/ml/utils/math.py +5 -3
- gr_libs/ml/utils/other.py +3 -3
- gr_libs/ml/utils/storage.py +88 -81
- gr_libs/odgr_executor.py +268 -0
- gr_libs/problems/consts.py +1549 -1227
- gr_libs/recognizer/_utils/__init__.py +0 -0
- gr_libs/recognizer/_utils/format.py +18 -0
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
- gr_libs/recognizer/graml/_gr_dataset.py +233 -0
- gr_libs/recognizer/graml/graml_recognizer.py +586 -252
- gr_libs/recognizer/recognizer.py +90 -30
- gr_libs/tutorials/draco_panda_tutorial.py +58 -0
- gr_libs/tutorials/draco_parking_tutorial.py +56 -0
- gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
- gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
- gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
- gr_libs/tutorials/graml_panda_tutorial.py +57 -0
- gr_libs/tutorials/graml_parking_tutorial.py +52 -0
- gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
- gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
- gr_libs-0.2.2.dist-info/RECORD +71 -0
- {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
- gr_libs-0.2.2.dist-info/top_level.txt +2 -0
- tests/test_draco.py +14 -0
- tests/test_gcdraco.py +10 -0
- tests/test_graml.py +12 -8
- tests/test_graql.py +3 -2
- evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
- evaluation/create_minigrid_map_image.py +0 -34
- evaluation/file_system.py +0 -42
- evaluation/generate_experiments_results.py +0 -92
- evaluation/generate_experiments_results_new_ver1.py +0 -254
- evaluation/generate_experiments_results_new_ver2.py +0 -331
- evaluation/generate_task_specific_statistics_plots.py +0 -272
- evaluation/get_plans_images.py +0 -47
- evaluation/increasing_and_decreasing_.py +0 -63
- gr_libs/environment/utils/utils.py +0 -17
- gr_libs/ml/neural/utils/penv.py +0 -57
- gr_libs/ml/sequential/lstm_model.py +0 -192
- gr_libs/recognizer/graml/gr_dataset.py +0 -134
- gr_libs/recognizer/utils/__init__.py +0 -1
- gr_libs/recognizer/utils/format.py +0 -13
- gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
- gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
- tutorials/graml_minigrid_tutorial.py +0 -34
- tutorials/graml_panda_tutorial.py +0 -41
- tutorials/graml_parking_tutorial.py +0 -39
- tutorials/graml_point_maze_tutorial.py +0 -39
- tutorials/graql_minigrid_tutorial.py +0 -34
- /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
@@ -1,134 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
from torch.utils.data import Dataset
|
3
|
-
import random
|
4
|
-
from types import MethodType
|
5
|
-
from typing import List
|
6
|
-
from gr_libs.environment.environment import EnvProperty
|
7
|
-
from gr_libs.metrics.metrics import measure_average_sequence_distance
|
8
|
-
from gr_libs.ml.base.rl_agent import ContextualAgent
|
9
|
-
from gr_libs.ml.utils import get_siamese_dataset_path
|
10
|
-
from gr_libs.ml.base import RLAgent
|
11
|
-
import os
|
12
|
-
import dill
|
13
|
-
import torch
|
14
|
-
|
15
|
-
class GRDataset(Dataset):
|
16
|
-
def __init__(self, num_samples, samples):
|
17
|
-
self.num_samples = num_samples
|
18
|
-
self.samples = samples
|
19
|
-
|
20
|
-
def __len__(self):
|
21
|
-
return self.num_samples
|
22
|
-
|
23
|
-
def __getitem__(self, idx):
|
24
|
-
return self.samples[idx] # returns a tuple - as appended in 'generate_dataset' last line
|
25
|
-
|
26
|
-
def check_diff_goals(first_agent_goal, second_agent_goal):
|
27
|
-
try:
|
28
|
-
assert first_agent_goal != second_agent_goal
|
29
|
-
except Exception as e:
|
30
|
-
try:
|
31
|
-
assert any(first_agent_goal != second_agent_goal)
|
32
|
-
except Exception as e:
|
33
|
-
for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
|
34
|
-
assert any(elm1!=elm2 for elm1, elm2 in zip(arr1, arr2))
|
35
|
-
|
36
|
-
def generate_datasets(num_samples, agents: List[ContextualAgent], observation_creation_method : MethodType, problems: List[str], env_prop:EnvProperty, recognizer_name:str, gc_goal_set=None):
|
37
|
-
if gc_goal_set: model_name = env_prop.name
|
38
|
-
else: model_name = env_prop.problem_list_to_str_tuple(problems)
|
39
|
-
dataset_directory = get_siamese_dataset_path(domain_name=env_prop.domain_name, env_name=env_prop.name, model_name=model_name, recognizer=recognizer_name)
|
40
|
-
dataset_train_path, dataset_dev_path = os.path.join(dataset_directory, 'train.pkl'), os.path.join(dataset_directory, 'dev.pkl')
|
41
|
-
if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
|
42
|
-
print(f"Loading pre-existing datasets in {dataset_directory}")
|
43
|
-
with open(dataset_train_path, 'rb') as train_file:
|
44
|
-
train_samples = dill.load(train_file)
|
45
|
-
with open(dataset_dev_path, 'rb') as dev_file:
|
46
|
-
dev_samples = dill.load(dev_file)
|
47
|
-
else:
|
48
|
-
print(f"{dataset_directory} doesn't exist, generating datasets")
|
49
|
-
if not os.path.exists(dataset_directory):
|
50
|
-
os.makedirs(dataset_directory)
|
51
|
-
all_samples = []
|
52
|
-
for i in range(num_samples):
|
53
|
-
if gc_goal_set != None: # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
|
54
|
-
assert env_prop.gc_adaptable() == True, "shouldn't specify a goal directed representation if not generating datasets with a general agent."
|
55
|
-
is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(gc_goal_set), 6), 1 - 1/max(len(gc_goal_set), 6)]))[0]
|
56
|
-
first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
|
57
|
-
first_random_index = np.random.randint(0, len(gc_goal_set)) # works for lists of every object type, while np.choice only works for 1d arrays
|
58
|
-
first_agent_goal = gc_goal_set[first_random_index] # could be either a real goal or a goal-directed problem name
|
59
|
-
#first_agent_goal = np.random.choice(gc_goal_set)
|
60
|
-
first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
|
61
|
-
first_observation = []
|
62
|
-
first_agent_kwargs = {
|
63
|
-
"action_selection_method": observation_creation_method,
|
64
|
-
"percentage": first_trace_percentage,
|
65
|
-
"is_consecutive": first_is_consecutive,
|
66
|
-
"save_fig": False
|
67
|
-
}
|
68
|
-
while first_observation == []:
|
69
|
-
# needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
|
70
|
-
# but the problem is with the panda because it
|
71
|
-
if env_prop.use_goal_directed_problem(): first_agent_kwargs["goal_directed_problem"] = first_agent_goal
|
72
|
-
else: first_agent_kwargs["goal_directed_goal"] = first_agent_goal
|
73
|
-
first_observation = agents[0].agent.generate_partial_observation(**first_agent_kwargs)
|
74
|
-
first_observation = agents[0].agent.simplify_observation(first_observation)
|
75
|
-
|
76
|
-
second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
|
77
|
-
second_agent_goal = first_agent_goal
|
78
|
-
second_random_index = first_random_index
|
79
|
-
if not is_same_goal:
|
80
|
-
second_random_index = np.random.choice([i for i in range(len(gc_goal_set)) if i != first_random_index])
|
81
|
-
assert first_random_index != second_random_index
|
82
|
-
second_agent_goal = gc_goal_set[second_random_index]
|
83
|
-
if not is_same_goal: check_diff_goals(first_agent_goal, second_agent_goal)
|
84
|
-
second_trace_percentage = first_trace_percentage
|
85
|
-
second_observation = []
|
86
|
-
second_agent_kwargs = {
|
87
|
-
"action_selection_method": observation_creation_method,
|
88
|
-
"percentage": second_trace_percentage,
|
89
|
-
"is_consecutive": second_is_consecutive,
|
90
|
-
"save_fig": False
|
91
|
-
}
|
92
|
-
while second_observation == []:
|
93
|
-
if env_prop.use_goal_directed_problem() == True: second_agent_kwargs["goal_directed_problem"] = second_agent_goal
|
94
|
-
else: second_agent_kwargs["goal_directed_goal"] = second_agent_goal
|
95
|
-
second_observation = agents[0].agent.generate_partial_observation(**second_agent_kwargs)
|
96
|
-
second_observation = agents[0].agent.simplify_observation(second_observation)
|
97
|
-
else:
|
98
|
-
is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(agents), 6), 1 - 1/max(len(agents), 6)]))[0]
|
99
|
-
first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
|
100
|
-
first_agent = np.random.choice(agents)
|
101
|
-
first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
|
102
|
-
first_observation = first_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=first_trace_percentage, is_consecutive=first_is_consecutive, save_fig=False, random_optimalism=True)
|
103
|
-
first_observation = first_agent.agent.simplify_observation(first_observation)
|
104
|
-
|
105
|
-
second_agent = first_agent
|
106
|
-
if not is_same_goal:
|
107
|
-
second_agent = np.random.choice([agent for agent in agents if agent != first_agent])
|
108
|
-
assert second_agent != first_agent
|
109
|
-
second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
|
110
|
-
second_trace_percentage = first_trace_percentage
|
111
|
-
second_observation = second_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=second_trace_percentage, is_consecutive=second_is_consecutive, save_fig=False, random_optimalism=True)
|
112
|
-
second_observation = second_agent.agent.simplify_observation(second_observation)
|
113
|
-
if is_same_goal:
|
114
|
-
observations_distance = measure_average_sequence_distance(first_observation, second_observation) # for debugging mate
|
115
|
-
all_samples.append((
|
116
|
-
[torch.tensor(observation, dtype=torch.float32) for observation in first_observation],
|
117
|
-
[torch.tensor(observation, dtype=torch.float32) for observation in second_observation],
|
118
|
-
torch.tensor(is_same_goal, dtype=torch.float32)))
|
119
|
-
# all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
|
120
|
-
if i % 1000 == 0:
|
121
|
-
print(f'generated {i} samples')
|
122
|
-
|
123
|
-
total_samples = len(all_samples)
|
124
|
-
train_size = int(0.8 * total_samples)
|
125
|
-
train_samples = all_samples[:train_size]
|
126
|
-
dev_samples = all_samples[train_size:]
|
127
|
-
with open(dataset_train_path, 'wb') as train_file:
|
128
|
-
dill.dump(train_samples, train_file)
|
129
|
-
with open(dataset_dev_path, 'wb') as dev_file:
|
130
|
-
dill.dump(dev_samples, dev_file)
|
131
|
-
|
132
|
-
return train_samples, dev_samples
|
133
|
-
|
134
|
-
|
@@ -1 +0,0 @@
|
|
1
|
-
from .format import recognizer_str_to_obj
|
@@ -1,13 +0,0 @@
|
|
1
|
-
from gr_libs.recognizer.graml.graml_recognizer import GCGraml, ExpertBasedGraml, MCTSBasedGraml
|
2
|
-
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
|
3
|
-
|
4
|
-
def recognizer_str_to_obj(recognizer_str: str):
|
5
|
-
recognizer_map = {
|
6
|
-
"GCGraml": GCGraml,
|
7
|
-
"ExpertBasedGraml": ExpertBasedGraml,
|
8
|
-
"MCTSBasedGraml": MCTSBasedGraml,
|
9
|
-
"Graql": Graql,
|
10
|
-
"Draco": Draco,
|
11
|
-
"GCDraco": GCDraco
|
12
|
-
}
|
13
|
-
return recognizer_map.get(recognizer_str)
|
@@ -1,67 +0,0 @@
|
|
1
|
-
evaluation/analyze_results_cross_alg_cross_domain.py,sha256=s_DDh4rNfRnvQ0PDa2d5411jYOa7CaI1YeB8Dpup7QU,9803
|
2
|
-
evaluation/create_minigrid_map_image.py,sha256=jaSW3n3tY222iFUeAMqedBP9cvD88GCzPrQ6_XHv5oQ,1242
|
3
|
-
evaluation/file_system.py,sha256=SSYnj8QGFkq-8V_0s7x2MWbD88aFaoFY4Ogc_Pt8m6U,1601
|
4
|
-
evaluation/generate_experiments_results.py,sha256=oMFt2-TX7g3O6aBflFtQ5q0PT6sngEb8104kpPVMi0s,4051
|
5
|
-
evaluation/generate_experiments_results_new_ver1.py,sha256=P9gz3xa0DoRRMQ16GQL3_wVSDYUfh8oZ3BCIUjphKaM,8909
|
6
|
-
evaluation/generate_experiments_results_new_ver2.py,sha256=jeKj_wgdM50o2vi8WZI-s3GbsQdsjultHX-8H4Xvus4,12276
|
7
|
-
evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icwIg4_dsleFjEH6eiQCcUBj6WU,15286
|
8
|
-
evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
|
9
|
-
evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
|
10
|
-
gr_libs/__init__.py,sha256=WlSRpZIpz5GxLNk96nhympbk3Z5nsMiSOyiAWj17S88,280
|
11
|
-
gr_libs/_version.py,sha256=Zy3HQFB_Viry2Rl81-7LPU4kL2FTQegnwLvl0VxTs3E,526
|
12
|
-
gr_libs/environment/__init__.py,sha256=KlRp3qdgxEmej31zDoDsYPwbcAqyDglx6x0mH0KRmHU,1032
|
13
|
-
gr_libs/environment/environment.py,sha256=d6ZbiAQ4H1aLrUFI8sm0BN9DVW3JtzpkodSi_70Z_PY,6780
|
14
|
-
gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
|
16
|
-
gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
gr_libs/metrics/metrics.py,sha256=4bnvs5suv-QrK9i1NuOzkE_E8uIzS1nlEazNDRXvZGs,8700
|
18
|
-
gr_libs/ml/__init__.py,sha256=jrjxYqvSRgWwFWw7XQP9DzOwvmprMZ2umwT7t-DYtDU,233
|
19
|
-
gr_libs/ml/agent.py,sha256=DSnK8nRx9SS76fAOZZEEvA68_meLjzm9lfQpMUXmGQU,1957
|
20
|
-
gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
|
21
|
-
gr_libs/ml/base/__init__.py,sha256=MfIYhl_UqH8H7YoTCih8wBFA_gpTOUFq8Ph0_Nq0XQk,68
|
22
|
-
gr_libs/ml/base/rl_agent.py,sha256=u9rnb-ma9iDM5b_BlwjcTJGSFezIGrxXINw6b-Dbl8s,1598
|
23
|
-
gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
|
24
|
-
gr_libs/ml/neural/deep_rl_learner.py,sha256=b41_b4GVlYqxhjrr1_YMcGdU9iwcMXsf3zH8D2kEucs,20659
|
25
|
-
gr_libs/ml/neural/utils/__init__.py,sha256=bJgPfRnmfDQxdnb0OyRGwzgebEc1PnlO7-GpqszPBcc,106
|
26
|
-
gr_libs/ml/neural/utils/dictlist.py,sha256=WpHfdWpVZ_T3PcSnOQUC--ro_tsS0dvam2WG3LcsHDw,1039
|
27
|
-
gr_libs/ml/neural/utils/penv.py,sha256=R1uW8sePQqvTlJjpAuMx16eDU6TuGAjQF3hTR1QasMo,1862
|
28
|
-
gr_libs/ml/planner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
29
|
-
gr_libs/ml/planner/mcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
-
gr_libs/ml/planner/mcts/mcts_model.py,sha256=N4B2SRWAySW7sJ1JIIkKHbzpxMYo2GcuaSB-eauJmBg,21068
|
31
|
-
gr_libs/ml/planner/mcts/utils/__init__.py,sha256=8OE_XolCHiWIZZwS23lqLaLd72tsHwO8cQRRToTC0Lk,45
|
32
|
-
gr_libs/ml/planner/mcts/utils/node.py,sha256=WXXaEjfU857yIBF8gKVjr0ZGmU2Du9s1d-dBcA4QS10,1220
|
33
|
-
gr_libs/ml/planner/mcts/utils/tree.py,sha256=mLtLtPoqoU0eauNEExY94px5mdbmH-HCsYAYQDZqioI,3382
|
34
|
-
gr_libs/ml/sequential/__init__.py,sha256=rusN4ahTvAeAq1Saz6qS_9HEU7WuXDJu2zwhc9WUEYQ,61
|
35
|
-
gr_libs/ml/sequential/lstm_model.py,sha256=Vzm-C1URR84PGNEecj69GUtn3ZmOVyh1BAY6CUnfL1Q,8978
|
36
|
-
gr_libs/ml/tabular/__init__.py,sha256=jAfjfTFZLLlVm1KUiJdxdnaNGFp1J2KBU89q_vvradM,177
|
37
|
-
gr_libs/ml/tabular/state.py,sha256=8xroKF3y3nRX0LK1QX5fRT2PS2WmvcDPp0UvPFdSx2A,733
|
38
|
-
gr_libs/ml/tabular/tabular_q_learner.py,sha256=5QU9ZWC-Cu5jxv5K1TohoRjQrRDhCgTs1Mt18cqM_Sc,18970
|
39
|
-
gr_libs/ml/tabular/tabular_rl_agent.py,sha256=7w8PYbKi8QgxHJyECWU_rURtT89spg0tHIMI1cDwYc8,3764
|
40
|
-
gr_libs/ml/utils/__init__.py,sha256=qH3pcnem5Z6rkQ4RTZi47AXJRe1RkFEST_-DrBmfWcM,258
|
41
|
-
gr_libs/ml/utils/env.py,sha256=AWVN0OXYmFU-J3FUiwvEAIY93Suf1oL6VNcxtyWJraM,171
|
42
|
-
gr_libs/ml/utils/format.py,sha256=nu7RzVwn_raG_fqqmnqlJgUjtA0yzKztkB3a5QZnRYo,3071
|
43
|
-
gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
|
44
|
-
gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
|
45
|
-
gr_libs/ml/utils/storage.py,sha256=52wR2pgFmcCOhqbu5Km3tegjAmtI1Fb4HYAVUnUubZk,4626
|
46
|
-
gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
|
-
gr_libs/problems/consts.py,sha256=ON7yfKTAKETg7i3okDYuOzEU7KWvynyubl0m7TlU6Hs,38808
|
48
|
-
gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
-
gr_libs/recognizer/recognizer.py,sha256=ZrApJVdBQxKRYhhDiWLCNGmlxgi674nwgb30BgVggC8,1705
|
50
|
-
gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
|
-
gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
|
52
|
-
gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
|
-
gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
|
54
|
-
gr_libs/recognizer/graml/graml_recognizer.py,sha256=1xLl1gHj3JxWhHtV9h3SvsW7oJdxsQQV0F-VLtlTmKQ,15911
|
55
|
-
gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
|
56
|
-
gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
|
57
|
-
tests/test_graml.py,sha256=ZJB2jqtf4Q2-KZredkJq90teqmHBIvigCAQpvR5G110,559
|
58
|
-
tests/test_graql.py,sha256=-onMi13e2wStOmB5bYv2f3Ita3QFFiw416XMBkby0OI,141
|
59
|
-
tutorials/graml_minigrid_tutorial.py,sha256=ONvxFi79R7d8dcd6gy083Z_yy9A2flhGTDIDRxurdx8,1782
|
60
|
-
tutorials/graml_panda_tutorial.py,sha256=wtv_lsw0vsU7j45GKeWecTfE7jzfh4iVGEVnQyaWthM,2063
|
61
|
-
tutorials/graml_parking_tutorial.py,sha256=M6bt1WQOOgn8_CRyG2kjxF14PMeyXVAWRDq1ZRwGTXo,1808
|
62
|
-
tutorials/graml_point_maze_tutorial.py,sha256=mYq3IxYbf9jidq-4VdT3MdStV80Q5lytFv6Xzzn22Ys,1835
|
63
|
-
tutorials/graql_minigrid_tutorial.py,sha256=Jb0TCUhiZQkFeafJWUTPnCISd4FKfPrqP-xfHiqCGKE,1635
|
64
|
-
gr_libs-0.1.7.post0.dist-info/METADATA,sha256=aS7y9Nl1JErXYdpAHstuQP_W1QMcKMbGet6IfxfJ_Do,9620
|
65
|
-
gr_libs-0.1.7.post0.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
|
66
|
-
gr_libs-0.1.7.post0.dist-info/top_level.txt,sha256=fJQF8Q8Dfh_D3pA2mhNodazNjzW6b3oWfnx6Jdo-pBU,35
|
67
|
-
gr_libs-0.1.7.post0.dist-info/RECORD,,
|
@@ -1,34 +0,0 @@
|
|
1
|
-
from gr_libs.environment.environment import MINIGRID, QLEARNING
|
2
|
-
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
3
|
-
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
4
|
-
from gr_libs.ml.utils.format import random_subset_with_order
|
5
|
-
from gr_libs import ExpertBasedGraml
|
6
|
-
|
7
|
-
def run_graml_minigrid_tutorial():
|
8
|
-
recognizer = ExpertBasedGraml(
|
9
|
-
domain_name=MINIGRID,
|
10
|
-
env_name="MiniGrid-SimpleCrossingS13N4"
|
11
|
-
)
|
12
|
-
|
13
|
-
recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
|
14
|
-
train_configs=[(QLEARNING, 100000) for _ in range(9)])
|
15
|
-
|
16
|
-
recognizer.goals_adaptation_phase(
|
17
|
-
dynamic_goals = [(11,1), (11,11), (1,11)],
|
18
|
-
dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
|
19
|
-
)
|
20
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
21
|
-
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
22
|
-
actor.learn()
|
23
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
24
|
-
full_sequence = actor.generate_observation(
|
25
|
-
action_selection_method=stochastic_amplified_selection,
|
26
|
-
random_optimalism=True, # the noise that's added to the actions
|
27
|
-
)
|
28
|
-
|
29
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
30
|
-
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
31
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
32
|
-
|
33
|
-
if __name__ == "__main__":
|
34
|
-
run_graml_minigrid_tutorial()
|
@@ -1,41 +0,0 @@
|
|
1
|
-
|
2
|
-
import numpy as np
|
3
|
-
from stable_baselines3 import PPO, SAC
|
4
|
-
import gr_libs.environment.environment
|
5
|
-
from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
|
6
|
-
from gr_libs.environment.utils.utils import domain_to_env_property
|
7
|
-
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
8
|
-
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
9
|
-
from gr_libs.ml.utils.format import random_subset_with_order
|
10
|
-
from gr_libs import GCGraml
|
11
|
-
|
12
|
-
def run_graml_panda_tutorial():
|
13
|
-
recognizer = GCGraml( # TODO make these tutorials into pytests
|
14
|
-
domain_name=PANDA,
|
15
|
-
env_name="PandaMyReachDense"
|
16
|
-
)
|
17
|
-
recognizer.domain_learning_phase(
|
18
|
-
base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
|
19
|
-
train_configs=[(SAC, 800000)]
|
20
|
-
)
|
21
|
-
recognizer.goals_adaptation_phase(
|
22
|
-
dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
|
23
|
-
)
|
24
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
25
|
-
property_type = domain_to_env_property(PANDA)
|
26
|
-
env_property = property_type("PandaMyReachDense")
|
27
|
-
problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
|
28
|
-
actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
|
29
|
-
actor.learn()
|
30
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
31
|
-
full_sequence = actor.generate_observation(
|
32
|
-
action_selection_method=stochastic_amplified_selection,
|
33
|
-
random_optimalism=True, # the noise that's added to the actions
|
34
|
-
)
|
35
|
-
|
36
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
37
|
-
closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
|
38
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
|
39
|
-
|
40
|
-
if __name__ == "__main__":
|
41
|
-
run_graml_panda_tutorial()
|
@@ -1,39 +0,0 @@
|
|
1
|
-
|
2
|
-
from stable_baselines3 import PPO, SAC, TD3
|
3
|
-
from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
|
4
|
-
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
|
-
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
|
6
|
-
from gr_libs.ml.utils.format import random_subset_with_order
|
7
|
-
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
|
8
|
-
import gr_libs.environment.environment
|
9
|
-
|
10
|
-
def run_graml_parking_tutorial():
|
11
|
-
recognizer = GCGraml(
|
12
|
-
domain_name=PARKING,
|
13
|
-
env_name="Parking-S-14-PC-"
|
14
|
-
)
|
15
|
-
|
16
|
-
recognizer.domain_learning_phase(
|
17
|
-
[i for i in range(1,21)],
|
18
|
-
[(PPO, 200000)]
|
19
|
-
)
|
20
|
-
recognizer.goals_adaptation_phase(
|
21
|
-
dynamic_goals = ["1", "11", "21"]
|
22
|
-
# no need for expert sequence generation since GCRL is used
|
23
|
-
)
|
24
|
-
|
25
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
26
|
-
actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
|
27
|
-
actor.learn()
|
28
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
29
|
-
full_sequence = actor.generate_observation(
|
30
|
-
action_selection_method=stochastic_amplified_selection,
|
31
|
-
random_optimalism=True, # the noise that's added to the actions
|
32
|
-
)
|
33
|
-
|
34
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
35
|
-
closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
|
36
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
|
37
|
-
|
38
|
-
if __name__ == "__main__":
|
39
|
-
run_graml_parking_tutorial()
|
@@ -1,39 +0,0 @@
|
|
1
|
-
|
2
|
-
from stable_baselines3 import SAC, TD3
|
3
|
-
from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
|
4
|
-
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
5
|
-
from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
|
6
|
-
from gr_libs.ml.utils.format import random_subset_with_order
|
7
|
-
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
|
8
|
-
|
9
|
-
def run_graml_point_maze_tutorial():
|
10
|
-
recognizer = ExpertBasedGraml(
|
11
|
-
domain_name=POINT_MAZE,
|
12
|
-
env_name="PointMaze-FourRoomsEnvDense-11x11"
|
13
|
-
)
|
14
|
-
|
15
|
-
recognizer.domain_learning_phase(
|
16
|
-
[(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
|
17
|
-
[(SAC, 200000) for _ in range(8)]
|
18
|
-
)
|
19
|
-
|
20
|
-
recognizer.goals_adaptation_phase(
|
21
|
-
dynamic_goals = [(4,4), (7,3), (3,7)],
|
22
|
-
dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
|
23
|
-
)
|
24
|
-
|
25
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
26
|
-
actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
|
27
|
-
actor.learn()
|
28
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
29
|
-
full_sequence = actor.generate_observation(
|
30
|
-
action_selection_method=stochastic_amplified_selection,
|
31
|
-
random_optimalism=True, # the noise that's added to the actions
|
32
|
-
)
|
33
|
-
|
34
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
|
35
|
-
closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
|
36
|
-
print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
|
37
|
-
|
38
|
-
if __name__ == "__main__":
|
39
|
-
run_graml_point_maze_tutorial()
|
@@ -1,34 +0,0 @@
|
|
1
|
-
from gr_libs.environment.environment import QLEARNING
|
2
|
-
from gr_libs.metrics.metrics import stochastic_amplified_selection
|
3
|
-
from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
|
4
|
-
from gr_libs.ml.utils.format import random_subset_with_order
|
5
|
-
from gr_libs import Graql
|
6
|
-
|
7
|
-
def run_graql_minigrid_tutorial():
|
8
|
-
recognizer = Graql(
|
9
|
-
domain_name="minigrid",
|
10
|
-
env_name="MiniGrid-SimpleCrossingS13N4"
|
11
|
-
)
|
12
|
-
|
13
|
-
#Graql doesn't have a domain learning phase, so we skip it
|
14
|
-
|
15
|
-
recognizer.goals_adaptation_phase(
|
16
|
-
dynamic_goals = [(11,1), (11,11), (1,11)],
|
17
|
-
dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
|
18
|
-
)
|
19
|
-
# TD3 is different from recognizer and expert algorithms, which are SAC #
|
20
|
-
actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
|
21
|
-
actor.learn()
|
22
|
-
# sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
|
23
|
-
full_sequence = actor.generate_observation(
|
24
|
-
action_selection_method=stochastic_amplified_selection,
|
25
|
-
random_optimalism=True, # the noise that's added to the actions
|
26
|
-
)
|
27
|
-
|
28
|
-
partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
|
29
|
-
closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
|
30
|
-
print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
|
31
|
-
return closest_goal, (11,1)
|
32
|
-
|
33
|
-
if __name__ == "__main__":
|
34
|
-
run_graql_minigrid_tutorial()
|
File without changes
|