gr-libs 0.1.7.post0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. gr_libs/__init__.py +4 -1
  2. gr_libs/_evaluation/__init__.py +1 -0
  3. gr_libs/_evaluation/_analyze_results_cross_alg_cross_domain.py +260 -0
  4. gr_libs/_evaluation/_generate_experiments_results.py +141 -0
  5. gr_libs/_evaluation/_generate_task_specific_statistics_plots.py +497 -0
  6. gr_libs/_evaluation/_get_plans_images.py +61 -0
  7. gr_libs/_evaluation/_increasing_and_decreasing_.py +106 -0
  8. gr_libs/_version.py +2 -2
  9. gr_libs/all_experiments.py +294 -0
  10. gr_libs/environment/__init__.py +30 -9
  11. gr_libs/environment/_utils/utils.py +27 -0
  12. gr_libs/environment/environment.py +417 -54
  13. gr_libs/metrics/__init__.py +7 -0
  14. gr_libs/metrics/metrics.py +231 -54
  15. gr_libs/ml/__init__.py +2 -5
  16. gr_libs/ml/agent.py +21 -6
  17. gr_libs/ml/base/__init__.py +3 -1
  18. gr_libs/ml/base/rl_agent.py +81 -13
  19. gr_libs/ml/consts.py +1 -1
  20. gr_libs/ml/neural/__init__.py +1 -3
  21. gr_libs/ml/neural/deep_rl_learner.py +619 -378
  22. gr_libs/ml/neural/utils/__init__.py +1 -2
  23. gr_libs/ml/neural/utils/dictlist.py +3 -3
  24. gr_libs/ml/planner/mcts/{utils → _utils}/__init__.py +1 -1
  25. gr_libs/ml/planner/mcts/{utils → _utils}/node.py +11 -7
  26. gr_libs/ml/planner/mcts/{utils → _utils}/tree.py +15 -11
  27. gr_libs/ml/planner/mcts/mcts_model.py +571 -312
  28. gr_libs/ml/sequential/__init__.py +0 -1
  29. gr_libs/ml/sequential/_lstm_model.py +270 -0
  30. gr_libs/ml/tabular/__init__.py +1 -3
  31. gr_libs/ml/tabular/state.py +7 -7
  32. gr_libs/ml/tabular/tabular_q_learner.py +150 -82
  33. gr_libs/ml/tabular/tabular_rl_agent.py +42 -28
  34. gr_libs/ml/utils/__init__.py +2 -3
  35. gr_libs/ml/utils/format.py +28 -97
  36. gr_libs/ml/utils/math.py +5 -3
  37. gr_libs/ml/utils/other.py +3 -3
  38. gr_libs/ml/utils/storage.py +88 -81
  39. gr_libs/odgr_executor.py +268 -0
  40. gr_libs/problems/consts.py +1549 -1227
  41. gr_libs/recognizer/_utils/__init__.py +0 -0
  42. gr_libs/recognizer/_utils/format.py +18 -0
  43. gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +233 -88
  44. gr_libs/recognizer/graml/_gr_dataset.py +233 -0
  45. gr_libs/recognizer/graml/graml_recognizer.py +586 -252
  46. gr_libs/recognizer/recognizer.py +90 -30
  47. gr_libs/tutorials/draco_panda_tutorial.py +58 -0
  48. gr_libs/tutorials/draco_parking_tutorial.py +56 -0
  49. gr_libs/tutorials/gcdraco_panda_tutorial.py +62 -0
  50. gr_libs/tutorials/gcdraco_parking_tutorial.py +57 -0
  51. gr_libs/tutorials/graml_minigrid_tutorial.py +64 -0
  52. gr_libs/tutorials/graml_panda_tutorial.py +57 -0
  53. gr_libs/tutorials/graml_parking_tutorial.py +52 -0
  54. gr_libs/tutorials/graml_point_maze_tutorial.py +60 -0
  55. gr_libs/tutorials/graql_minigrid_tutorial.py +50 -0
  56. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/METADATA +84 -29
  57. gr_libs-0.2.2.dist-info/RECORD +71 -0
  58. {gr_libs-0.1.7.post0.dist-info → gr_libs-0.2.2.dist-info}/WHEEL +1 -1
  59. gr_libs-0.2.2.dist-info/top_level.txt +2 -0
  60. tests/test_draco.py +14 -0
  61. tests/test_gcdraco.py +10 -0
  62. tests/test_graml.py +12 -8
  63. tests/test_graql.py +3 -2
  64. evaluation/analyze_results_cross_alg_cross_domain.py +0 -277
  65. evaluation/create_minigrid_map_image.py +0 -34
  66. evaluation/file_system.py +0 -42
  67. evaluation/generate_experiments_results.py +0 -92
  68. evaluation/generate_experiments_results_new_ver1.py +0 -254
  69. evaluation/generate_experiments_results_new_ver2.py +0 -331
  70. evaluation/generate_task_specific_statistics_plots.py +0 -272
  71. evaluation/get_plans_images.py +0 -47
  72. evaluation/increasing_and_decreasing_.py +0 -63
  73. gr_libs/environment/utils/utils.py +0 -17
  74. gr_libs/ml/neural/utils/penv.py +0 -57
  75. gr_libs/ml/sequential/lstm_model.py +0 -192
  76. gr_libs/recognizer/graml/gr_dataset.py +0 -134
  77. gr_libs/recognizer/utils/__init__.py +0 -1
  78. gr_libs/recognizer/utils/format.py +0 -13
  79. gr_libs-0.1.7.post0.dist-info/RECORD +0 -67
  80. gr_libs-0.1.7.post0.dist-info/top_level.txt +0 -4
  81. tutorials/graml_minigrid_tutorial.py +0 -34
  82. tutorials/graml_panda_tutorial.py +0 -41
  83. tutorials/graml_parking_tutorial.py +0 -39
  84. tutorials/graml_point_maze_tutorial.py +0 -39
  85. tutorials/graql_minigrid_tutorial.py +0 -34
  86. /gr_libs/environment/{utils → _utils}/__init__.py +0 -0
@@ -1,134 +0,0 @@
1
- import numpy as np
2
- from torch.utils.data import Dataset
3
- import random
4
- from types import MethodType
5
- from typing import List
6
- from gr_libs.environment.environment import EnvProperty
7
- from gr_libs.metrics.metrics import measure_average_sequence_distance
8
- from gr_libs.ml.base.rl_agent import ContextualAgent
9
- from gr_libs.ml.utils import get_siamese_dataset_path
10
- from gr_libs.ml.base import RLAgent
11
- import os
12
- import dill
13
- import torch
14
-
15
- class GRDataset(Dataset):
16
- def __init__(self, num_samples, samples):
17
- self.num_samples = num_samples
18
- self.samples = samples
19
-
20
- def __len__(self):
21
- return self.num_samples
22
-
23
- def __getitem__(self, idx):
24
- return self.samples[idx] # returns a tuple - as appended in 'generate_dataset' last line
25
-
26
- def check_diff_goals(first_agent_goal, second_agent_goal):
27
- try:
28
- assert first_agent_goal != second_agent_goal
29
- except Exception as e:
30
- try:
31
- assert any(first_agent_goal != second_agent_goal)
32
- except Exception as e:
33
- for arr1, arr2 in zip(first_agent_goal, second_agent_goal):
34
- assert any(elm1!=elm2 for elm1, elm2 in zip(arr1, arr2))
35
-
36
- def generate_datasets(num_samples, agents: List[ContextualAgent], observation_creation_method : MethodType, problems: List[str], env_prop:EnvProperty, recognizer_name:str, gc_goal_set=None):
37
- if gc_goal_set: model_name = env_prop.name
38
- else: model_name = env_prop.problem_list_to_str_tuple(problems)
39
- dataset_directory = get_siamese_dataset_path(domain_name=env_prop.domain_name, env_name=env_prop.name, model_name=model_name, recognizer=recognizer_name)
40
- dataset_train_path, dataset_dev_path = os.path.join(dataset_directory, 'train.pkl'), os.path.join(dataset_directory, 'dev.pkl')
41
- if os.path.exists(dataset_train_path) and os.path.exists(dataset_dev_path):
42
- print(f"Loading pre-existing datasets in {dataset_directory}")
43
- with open(dataset_train_path, 'rb') as train_file:
44
- train_samples = dill.load(train_file)
45
- with open(dataset_dev_path, 'rb') as dev_file:
46
- dev_samples = dill.load(dev_file)
47
- else:
48
- print(f"{dataset_directory} doesn't exist, generating datasets")
49
- if not os.path.exists(dataset_directory):
50
- os.makedirs(dataset_directory)
51
- all_samples = []
52
- for i in range(num_samples):
53
- if gc_goal_set != None: # TODO change to having one flow for both cases and injecting according to gc_goal_set or not
54
- assert env_prop.gc_adaptable() == True, "shouldn't specify a goal directed representation if not generating datasets with a general agent."
55
- is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(gc_goal_set), 6), 1 - 1/max(len(gc_goal_set), 6)]))[0]
56
- first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
57
- first_random_index = np.random.randint(0, len(gc_goal_set)) # works for lists of every object type, while np.choice only works for 1d arrays
58
- first_agent_goal = gc_goal_set[first_random_index] # could be either a real goal or a goal-directed problem name
59
- #first_agent_goal = np.random.choice(gc_goal_set)
60
- first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
61
- first_observation = []
62
- first_agent_kwargs = {
63
- "action_selection_method": observation_creation_method,
64
- "percentage": first_trace_percentage,
65
- "is_consecutive": first_is_consecutive,
66
- "save_fig": False
67
- }
68
- while first_observation == []:
69
- # needs to be different than agents[0] problem_name, it should be from the gc_goal_set.
70
- # but the problem is with the panda because it
71
- if env_prop.use_goal_directed_problem(): first_agent_kwargs["goal_directed_problem"] = first_agent_goal
72
- else: first_agent_kwargs["goal_directed_goal"] = first_agent_goal
73
- first_observation = agents[0].agent.generate_partial_observation(**first_agent_kwargs)
74
- first_observation = agents[0].agent.simplify_observation(first_observation)
75
-
76
- second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
77
- second_agent_goal = first_agent_goal
78
- second_random_index = first_random_index
79
- if not is_same_goal:
80
- second_random_index = np.random.choice([i for i in range(len(gc_goal_set)) if i != first_random_index])
81
- assert first_random_index != second_random_index
82
- second_agent_goal = gc_goal_set[second_random_index]
83
- if not is_same_goal: check_diff_goals(first_agent_goal, second_agent_goal)
84
- second_trace_percentage = first_trace_percentage
85
- second_observation = []
86
- second_agent_kwargs = {
87
- "action_selection_method": observation_creation_method,
88
- "percentage": second_trace_percentage,
89
- "is_consecutive": second_is_consecutive,
90
- "save_fig": False
91
- }
92
- while second_observation == []:
93
- if env_prop.use_goal_directed_problem() == True: second_agent_kwargs["goal_directed_problem"] = second_agent_goal
94
- else: second_agent_kwargs["goal_directed_goal"] = second_agent_goal
95
- second_observation = agents[0].agent.generate_partial_observation(**second_agent_kwargs)
96
- second_observation = agents[0].agent.simplify_observation(second_observation)
97
- else:
98
- is_same_goal = (np.random.choice([1, 0], 1, p=[1/max(len(agents), 6), 1 - 1/max(len(agents), 6)]))[0]
99
- first_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
100
- first_agent = np.random.choice(agents)
101
- first_trace_percentage = random.choice([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
102
- first_observation = first_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=first_trace_percentage, is_consecutive=first_is_consecutive, save_fig=False, random_optimalism=True)
103
- first_observation = first_agent.agent.simplify_observation(first_observation)
104
-
105
- second_agent = first_agent
106
- if not is_same_goal:
107
- second_agent = np.random.choice([agent for agent in agents if agent != first_agent])
108
- assert second_agent != first_agent
109
- second_is_consecutive = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
110
- second_trace_percentage = first_trace_percentage
111
- second_observation = second_agent.agent.generate_partial_observation(action_selection_method=observation_creation_method, percentage=second_trace_percentage, is_consecutive=second_is_consecutive, save_fig=False, random_optimalism=True)
112
- second_observation = second_agent.agent.simplify_observation(second_observation)
113
- if is_same_goal:
114
- observations_distance = measure_average_sequence_distance(first_observation, second_observation) # for debugging mate
115
- all_samples.append((
116
- [torch.tensor(observation, dtype=torch.float32) for observation in first_observation],
117
- [torch.tensor(observation, dtype=torch.float32) for observation in second_observation],
118
- torch.tensor(is_same_goal, dtype=torch.float32)))
119
- # all_samples.append((first_observation, second_observation, torch.tensor(is_same_goal, dtype=torch.float32)))
120
- if i % 1000 == 0:
121
- print(f'generated {i} samples')
122
-
123
- total_samples = len(all_samples)
124
- train_size = int(0.8 * total_samples)
125
- train_samples = all_samples[:train_size]
126
- dev_samples = all_samples[train_size:]
127
- with open(dataset_train_path, 'wb') as train_file:
128
- dill.dump(train_samples, train_file)
129
- with open(dataset_dev_path, 'wb') as dev_file:
130
- dill.dump(dev_samples, dev_file)
131
-
132
- return train_samples, dev_samples
133
-
134
-
@@ -1 +0,0 @@
1
- from .format import recognizer_str_to_obj
@@ -1,13 +0,0 @@
1
- from gr_libs.recognizer.graml.graml_recognizer import GCGraml, ExpertBasedGraml, MCTSBasedGraml
2
- from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
3
-
4
- def recognizer_str_to_obj(recognizer_str: str):
5
- recognizer_map = {
6
- "GCGraml": GCGraml,
7
- "ExpertBasedGraml": ExpertBasedGraml,
8
- "MCTSBasedGraml": MCTSBasedGraml,
9
- "Graql": Graql,
10
- "Draco": Draco,
11
- "GCDraco": GCDraco
12
- }
13
- return recognizer_map.get(recognizer_str)
@@ -1,67 +0,0 @@
1
- evaluation/analyze_results_cross_alg_cross_domain.py,sha256=s_DDh4rNfRnvQ0PDa2d5411jYOa7CaI1YeB8Dpup7QU,9803
2
- evaluation/create_minigrid_map_image.py,sha256=jaSW3n3tY222iFUeAMqedBP9cvD88GCzPrQ6_XHv5oQ,1242
3
- evaluation/file_system.py,sha256=SSYnj8QGFkq-8V_0s7x2MWbD88aFaoFY4Ogc_Pt8m6U,1601
4
- evaluation/generate_experiments_results.py,sha256=oMFt2-TX7g3O6aBflFtQ5q0PT6sngEb8104kpPVMi0s,4051
5
- evaluation/generate_experiments_results_new_ver1.py,sha256=P9gz3xa0DoRRMQ16GQL3_wVSDYUfh8oZ3BCIUjphKaM,8909
6
- evaluation/generate_experiments_results_new_ver2.py,sha256=jeKj_wgdM50o2vi8WZI-s3GbsQdsjultHX-8H4Xvus4,12276
7
- evaluation/generate_task_specific_statistics_plots.py,sha256=rBsqaMe2irP_Cfo-icwIg4_dsleFjEH6eiQCcUBj6WU,15286
8
- evaluation/get_plans_images.py,sha256=BT-bGWuOPUAYpZVDwk7YMRBLdgKaDbNOBjMrtcl1Vjk,2346
9
- evaluation/increasing_and_decreasing_.py,sha256=fu1hkEjhOQC3jEsjiS7emW_UPRpVFCaae0d0E2MGZqI,2991
10
- gr_libs/__init__.py,sha256=WlSRpZIpz5GxLNk96nhympbk3Z5nsMiSOyiAWj17S88,280
11
- gr_libs/_version.py,sha256=Zy3HQFB_Viry2Rl81-7LPU4kL2FTQegnwLvl0VxTs3E,526
12
- gr_libs/environment/__init__.py,sha256=KlRp3qdgxEmej31zDoDsYPwbcAqyDglx6x0mH0KRmHU,1032
13
- gr_libs/environment/environment.py,sha256=d6ZbiAQ4H1aLrUFI8sm0BN9DVW3JtzpkodSi_70Z_PY,6780
14
- gr_libs/environment/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- gr_libs/environment/utils/utils.py,sha256=4yM3s30KjyuEmWR8UuICE5rR03zsLi3tzqNDvBkdPcU,537
16
- gr_libs/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- gr_libs/metrics/metrics.py,sha256=4bnvs5suv-QrK9i1NuOzkE_E8uIzS1nlEazNDRXvZGs,8700
18
- gr_libs/ml/__init__.py,sha256=jrjxYqvSRgWwFWw7XQP9DzOwvmprMZ2umwT7t-DYtDU,233
19
- gr_libs/ml/agent.py,sha256=DSnK8nRx9SS76fAOZZEEvA68_meLjzm9lfQpMUXmGQU,1957
20
- gr_libs/ml/consts.py,sha256=mrbZk8n6QoGzLGaKmaxq4QlAsBbk4fhkCgXLuO9jXKw,365
21
- gr_libs/ml/base/__init__.py,sha256=MfIYhl_UqH8H7YoTCih8wBFA_gpTOUFq8Ph0_Nq0XQk,68
22
- gr_libs/ml/base/rl_agent.py,sha256=u9rnb-ma9iDM5b_BlwjcTJGSFezIGrxXINw6b-Dbl8s,1598
23
- gr_libs/ml/neural/__init__.py,sha256=g-0D5oFX8W52To4OR8vO8kDoBLSxAupVqwcQw8XjT5E,180
24
- gr_libs/ml/neural/deep_rl_learner.py,sha256=b41_b4GVlYqxhjrr1_YMcGdU9iwcMXsf3zH8D2kEucs,20659
25
- gr_libs/ml/neural/utils/__init__.py,sha256=bJgPfRnmfDQxdnb0OyRGwzgebEc1PnlO7-GpqszPBcc,106
26
- gr_libs/ml/neural/utils/dictlist.py,sha256=WpHfdWpVZ_T3PcSnOQUC--ro_tsS0dvam2WG3LcsHDw,1039
27
- gr_libs/ml/neural/utils/penv.py,sha256=R1uW8sePQqvTlJjpAuMx16eDU6TuGAjQF3hTR1QasMo,1862
28
- gr_libs/ml/planner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- gr_libs/ml/planner/mcts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- gr_libs/ml/planner/mcts/mcts_model.py,sha256=N4B2SRWAySW7sJ1JIIkKHbzpxMYo2GcuaSB-eauJmBg,21068
31
- gr_libs/ml/planner/mcts/utils/__init__.py,sha256=8OE_XolCHiWIZZwS23lqLaLd72tsHwO8cQRRToTC0Lk,45
32
- gr_libs/ml/planner/mcts/utils/node.py,sha256=WXXaEjfU857yIBF8gKVjr0ZGmU2Du9s1d-dBcA4QS10,1220
33
- gr_libs/ml/planner/mcts/utils/tree.py,sha256=mLtLtPoqoU0eauNEExY94px5mdbmH-HCsYAYQDZqioI,3382
34
- gr_libs/ml/sequential/__init__.py,sha256=rusN4ahTvAeAq1Saz6qS_9HEU7WuXDJu2zwhc9WUEYQ,61
35
- gr_libs/ml/sequential/lstm_model.py,sha256=Vzm-C1URR84PGNEecj69GUtn3ZmOVyh1BAY6CUnfL1Q,8978
36
- gr_libs/ml/tabular/__init__.py,sha256=jAfjfTFZLLlVm1KUiJdxdnaNGFp1J2KBU89q_vvradM,177
37
- gr_libs/ml/tabular/state.py,sha256=8xroKF3y3nRX0LK1QX5fRT2PS2WmvcDPp0UvPFdSx2A,733
38
- gr_libs/ml/tabular/tabular_q_learner.py,sha256=5QU9ZWC-Cu5jxv5K1TohoRjQrRDhCgTs1Mt18cqM_Sc,18970
39
- gr_libs/ml/tabular/tabular_rl_agent.py,sha256=7w8PYbKi8QgxHJyECWU_rURtT89spg0tHIMI1cDwYc8,3764
40
- gr_libs/ml/utils/__init__.py,sha256=qH3pcnem5Z6rkQ4RTZi47AXJRe1RkFEST_-DrBmfWcM,258
41
- gr_libs/ml/utils/env.py,sha256=AWVN0OXYmFU-J3FUiwvEAIY93Suf1oL6VNcxtyWJraM,171
42
- gr_libs/ml/utils/format.py,sha256=nu7RzVwn_raG_fqqmnqlJgUjtA0yzKztkB3a5QZnRYo,3071
43
- gr_libs/ml/utils/math.py,sha256=n62zssVOLHnUb4dPofAoFhoLOKl5n_xBzaKQOUQBoNc,440
44
- gr_libs/ml/utils/other.py,sha256=HKUfeLBbd4DgJxSTs3ya9KQ85Acx4TjycRrtGD9WQ3s,505
45
- gr_libs/ml/utils/storage.py,sha256=52wR2pgFmcCOhqbu5Km3tegjAmtI1Fb4HYAVUnUubZk,4626
46
- gr_libs/problems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- gr_libs/problems/consts.py,sha256=ON7yfKTAKETg7i3okDYuOzEU7KWvynyubl0m7TlU6Hs,38808
48
- gr_libs/recognizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- gr_libs/recognizer/recognizer.py,sha256=ZrApJVdBQxKRYhhDiWLCNGmlxgi674nwgb30BgVggC8,1705
50
- gr_libs/recognizer/gr_as_rl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py,sha256=84GdfohC2dZoNH_QEo7GpSt8nZWdfqSRKCTY99X_iME,5215
52
- gr_libs/recognizer/graml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- gr_libs/recognizer/graml/gr_dataset.py,sha256=lG6m3ulxFELpH1oURnlcmNDWOrxyuzvlAR28ZTqB7L8,7224
54
- gr_libs/recognizer/graml/graml_recognizer.py,sha256=1xLl1gHj3JxWhHtV9h3SvsW7oJdxsQQV0F-VLtlTmKQ,15911
55
- gr_libs/recognizer/utils/__init__.py,sha256=ewSroxL7aATvvm-Xzc1_-61mP2LU2U28YaOEqvVVDB0,41
56
- gr_libs/recognizer/utils/format.py,sha256=e0AnqtPeYoJsV9Z7cEBpgbzTM0hLNxFIjn07fQ3YbQw,492
57
- tests/test_graml.py,sha256=ZJB2jqtf4Q2-KZredkJq90teqmHBIvigCAQpvR5G110,559
58
- tests/test_graql.py,sha256=-onMi13e2wStOmB5bYv2f3Ita3QFFiw416XMBkby0OI,141
59
- tutorials/graml_minigrid_tutorial.py,sha256=ONvxFi79R7d8dcd6gy083Z_yy9A2flhGTDIDRxurdx8,1782
60
- tutorials/graml_panda_tutorial.py,sha256=wtv_lsw0vsU7j45GKeWecTfE7jzfh4iVGEVnQyaWthM,2063
61
- tutorials/graml_parking_tutorial.py,sha256=M6bt1WQOOgn8_CRyG2kjxF14PMeyXVAWRDq1ZRwGTXo,1808
62
- tutorials/graml_point_maze_tutorial.py,sha256=mYq3IxYbf9jidq-4VdT3MdStV80Q5lytFv6Xzzn22Ys,1835
63
- tutorials/graql_minigrid_tutorial.py,sha256=Jb0TCUhiZQkFeafJWUTPnCISd4FKfPrqP-xfHiqCGKE,1635
64
- gr_libs-0.1.7.post0.dist-info/METADATA,sha256=aS7y9Nl1JErXYdpAHstuQP_W1QMcKMbGet6IfxfJ_Do,9620
65
- gr_libs-0.1.7.post0.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
66
- gr_libs-0.1.7.post0.dist-info/top_level.txt,sha256=fJQF8Q8Dfh_D3pA2mhNodazNjzW6b3oWfnx6Jdo-pBU,35
67
- gr_libs-0.1.7.post0.dist-info/RECORD,,
@@ -1,4 +0,0 @@
1
- evaluation
2
- gr_libs
3
- tests
4
- tutorials
@@ -1,34 +0,0 @@
1
- from gr_libs.environment.environment import MINIGRID, QLEARNING
2
- from gr_libs.metrics.metrics import stochastic_amplified_selection
3
- from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
- from gr_libs.ml.utils.format import random_subset_with_order
5
- from gr_libs import ExpertBasedGraml
6
-
7
- def run_graml_minigrid_tutorial():
8
- recognizer = ExpertBasedGraml(
9
- domain_name=MINIGRID,
10
- env_name="MiniGrid-SimpleCrossingS13N4"
11
- )
12
-
13
- recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
14
- train_configs=[(QLEARNING, 100000) for _ in range(9)])
15
-
16
- recognizer.goals_adaptation_phase(
17
- dynamic_goals = [(11,1), (11,11), (1,11)],
18
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
19
- )
20
- # TD3 is different from recognizer and expert algorithms, which are SAC #
21
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
22
- actor.learn()
23
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
24
- full_sequence = actor.generate_observation(
25
- action_selection_method=stochastic_amplified_selection,
26
- random_optimalism=True, # the noise that's added to the actions
27
- )
28
-
29
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
30
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
31
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
32
-
33
- if __name__ == "__main__":
34
- run_graml_minigrid_tutorial()
@@ -1,41 +0,0 @@
1
-
2
- import numpy as np
3
- from stable_baselines3 import PPO, SAC
4
- import gr_libs.environment.environment
5
- from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
6
- from gr_libs.environment.utils.utils import domain_to_env_property
7
- from gr_libs.metrics.metrics import stochastic_amplified_selection
8
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
9
- from gr_libs.ml.utils.format import random_subset_with_order
10
- from gr_libs import GCGraml
11
-
12
- def run_graml_panda_tutorial():
13
- recognizer = GCGraml( # TODO make these tutorials into pytests
14
- domain_name=PANDA,
15
- env_name="PandaMyReachDense"
16
- )
17
- recognizer.domain_learning_phase(
18
- base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
19
- train_configs=[(SAC, 800000)]
20
- )
21
- recognizer.goals_adaptation_phase(
22
- dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
23
- )
24
- # TD3 is different from recognizer and expert algorithms, which are SAC #
25
- property_type = domain_to_env_property(PANDA)
26
- env_property = property_type("PandaMyReachDense")
27
- problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
28
- actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
29
- actor.learn()
30
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
- full_sequence = actor.generate_observation(
32
- action_selection_method=stochastic_amplified_selection,
33
- random_optimalism=True, # the noise that's added to the actions
34
- )
35
-
36
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
- closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
38
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
39
-
40
- if __name__ == "__main__":
41
- run_graml_panda_tutorial()
@@ -1,39 +0,0 @@
1
-
2
- from stable_baselines3 import PPO, SAC, TD3
3
- from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
4
- from gr_libs.metrics.metrics import stochastic_amplified_selection
5
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
- from gr_libs.ml.utils.format import random_subset_with_order
7
- from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
- import gr_libs.environment.environment
9
-
10
- def run_graml_parking_tutorial():
11
- recognizer = GCGraml(
12
- domain_name=PARKING,
13
- env_name="Parking-S-14-PC-"
14
- )
15
-
16
- recognizer.domain_learning_phase(
17
- [i for i in range(1,21)],
18
- [(PPO, 200000)]
19
- )
20
- recognizer.goals_adaptation_phase(
21
- dynamic_goals = ["1", "11", "21"]
22
- # no need for expert sequence generation since GCRL is used
23
- )
24
-
25
- # TD3 is different from recognizer and expert algorithms, which are SAC #
26
- actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
27
- actor.learn()
28
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
- full_sequence = actor.generate_observation(
30
- action_selection_method=stochastic_amplified_selection,
31
- random_optimalism=True, # the noise that's added to the actions
32
- )
33
-
34
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
35
- closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
36
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
37
-
38
- if __name__ == "__main__":
39
- run_graml_parking_tutorial()
@@ -1,39 +0,0 @@
1
-
2
- from stable_baselines3 import SAC, TD3
3
- from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
4
- from gr_libs.metrics.metrics import stochastic_amplified_selection
5
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
- from gr_libs.ml.utils.format import random_subset_with_order
7
- from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
-
9
- def run_graml_point_maze_tutorial():
10
- recognizer = ExpertBasedGraml(
11
- domain_name=POINT_MAZE,
12
- env_name="PointMaze-FourRoomsEnvDense-11x11"
13
- )
14
-
15
- recognizer.domain_learning_phase(
16
- [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
17
- [(SAC, 200000) for _ in range(8)]
18
- )
19
-
20
- recognizer.goals_adaptation_phase(
21
- dynamic_goals = [(4,4), (7,3), (3,7)],
22
- dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
23
- )
24
-
25
- # TD3 is different from recognizer and expert algorithms, which are SAC #
26
- actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
27
- actor.learn()
28
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
- full_sequence = actor.generate_observation(
30
- action_selection_method=stochastic_amplified_selection,
31
- random_optimalism=True, # the noise that's added to the actions
32
- )
33
-
34
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
35
- closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
36
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
37
-
38
- if __name__ == "__main__":
39
- run_graml_point_maze_tutorial()
@@ -1,34 +0,0 @@
1
- from gr_libs.environment.environment import QLEARNING
2
- from gr_libs.metrics.metrics import stochastic_amplified_selection
3
- from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
- from gr_libs.ml.utils.format import random_subset_with_order
5
- from gr_libs import Graql
6
-
7
- def run_graql_minigrid_tutorial():
8
- recognizer = Graql(
9
- domain_name="minigrid",
10
- env_name="MiniGrid-SimpleCrossingS13N4"
11
- )
12
-
13
- #Graql doesn't have a domain learning phase, so we skip it
14
-
15
- recognizer.goals_adaptation_phase(
16
- dynamic_goals = [(11,1), (11,11), (1,11)],
17
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
- )
19
- # TD3 is different from recognizer and expert algorithms, which are SAC #
20
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
- actor.learn()
22
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
- full_sequence = actor.generate_observation(
24
- action_selection_method=stochastic_amplified_selection,
25
- random_optimalism=True, # the noise that's added to the actions
26
- )
27
-
28
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
- print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
31
- return closest_goal, (11,1)
32
-
33
- if __name__ == "__main__":
34
- run_graql_minigrid_tutorial()
File without changes