gr-libs 0.1.8__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gr_libs-0.1.8 → gr_libs-0.2.5}/PKG-INFO +95 -29
- {gr_libs-0.1.8 → gr_libs-0.2.5}/README.md +94 -28
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/__init__.py +3 -1
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/_version.py +2 -2
- gr_libs-0.2.5/gr_libs/all_experiments.py +260 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/environment/__init__.py +14 -1
- {gr_libs-0.1.8/gr_libs/environment/utils → gr_libs-0.2.5/gr_libs/environment/_utils}/utils.py +1 -1
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/environment/environment.py +278 -23
- gr_libs-0.2.5/gr_libs/evaluation/__init__.py +1 -0
- gr_libs-0.2.5/gr_libs/evaluation/generate_experiments_results.py +100 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/metrics/__init__.py +2 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/metrics/metrics.py +166 -31
- gr_libs-0.2.5/gr_libs/ml/__init__.py +3 -0
- gr_libs-0.2.5/gr_libs/ml/base/__init__.py +3 -0
- gr_libs-0.2.5/gr_libs/ml/base/rl_agent.py +122 -0
- gr_libs-0.2.5/gr_libs/ml/neural/__init__.py +1 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/neural/deep_rl_learner.py +241 -84
- gr_libs-0.2.5/gr_libs/ml/neural/utils/__init__.py +1 -0
- {gr_libs-0.1.8/gr_libs/ml/planner/mcts/utils → gr_libs-0.2.5/gr_libs/ml/planner/mcts/_utils}/tree.py +1 -1
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/planner/mcts/mcts_model.py +71 -34
- gr_libs-0.1.8/gr_libs/ml/sequential/lstm_model.py → gr_libs-0.2.5/gr_libs/ml/sequential/_lstm_model.py +11 -14
- gr_libs-0.2.5/gr_libs/ml/tabular/__init__.py +1 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/tabular/tabular_q_learner.py +27 -9
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/tabular/tabular_rl_agent.py +22 -9
- gr_libs-0.2.5/gr_libs/ml/utils/__init__.py +5 -0
- gr_libs-0.2.5/gr_libs/ml/utils/format.py +31 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/utils/math.py +3 -2
- gr_libs-0.2.5/gr_libs/ml/utils/storage.py +141 -0
- gr_libs-0.2.5/gr_libs/odgr_executor.py +263 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/problems/consts.py +570 -292
- {gr_libs-0.1.8/gr_libs/recognizer/utils → gr_libs-0.2.5/gr_libs/recognizer/_utils}/format.py +2 -2
- gr_libs-0.2.5/gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +127 -36
- gr_libs-0.2.5/gr_libs/recognizer/graml/__init__.py +0 -0
- gr_libs-0.1.8/gr_libs/recognizer/graml/gr_dataset.py → gr_libs-0.2.5/gr_libs/recognizer/graml/_gr_dataset.py +11 -11
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/recognizer/graml/graml_recognizer.py +186 -35
- gr_libs-0.2.5/gr_libs/recognizer/recognizer.py +105 -0
- gr_libs-0.2.5/gr_libs/tutorials/draco_panda_tutorial.py +58 -0
- gr_libs-0.2.5/gr_libs/tutorials/draco_parking_tutorial.py +56 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5/gr_libs}/tutorials/gcdraco_panda_tutorial.py +11 -11
- {gr_libs-0.1.8 → gr_libs-0.2.5/gr_libs}/tutorials/gcdraco_parking_tutorial.py +6 -8
- {gr_libs-0.1.8 → gr_libs-0.2.5/gr_libs}/tutorials/graml_minigrid_tutorial.py +18 -14
- {gr_libs-0.1.8 → gr_libs-0.2.5/gr_libs}/tutorials/graml_panda_tutorial.py +11 -12
- {gr_libs-0.1.8 → gr_libs-0.2.5/gr_libs}/tutorials/graml_parking_tutorial.py +8 -10
- {gr_libs-0.1.8 → gr_libs-0.2.5/gr_libs}/tutorials/graml_point_maze_tutorial.py +17 -3
- {gr_libs-0.1.8 → gr_libs-0.2.5/gr_libs}/tutorials/graql_minigrid_tutorial.py +2 -2
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs.egg-info/PKG-INFO +95 -29
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs.egg-info/SOURCES.txt +28 -26
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs.egg-info/top_level.txt +1 -2
- gr_libs-0.2.5/tests/test_draco.py +14 -0
- gr_libs-0.2.5/tests/test_gcdraco.py +10 -0
- gr_libs-0.2.5/tests/test_graml.py +20 -0
- gr_libs-0.2.5/tests/test_graql.py +5 -0
- gr_libs-0.2.5/tests/test_odgr_executor_expertbasedgraml.py +14 -0
- gr_libs-0.2.5/tests/test_odgr_executor_gcdraco.py +14 -0
- gr_libs-0.2.5/tests/test_odgr_executor_gcgraml.py +14 -0
- gr_libs-0.2.5/tests/test_odgr_executor_graql.py +14 -0
- gr_libs-0.1.8/evaluation/analyze_results_cross_alg_cross_domain.py +0 -267
- gr_libs-0.1.8/evaluation/create_minigrid_map_image.py +0 -38
- gr_libs-0.1.8/evaluation/file_system.py +0 -53
- gr_libs-0.1.8/evaluation/generate_experiments_results.py +0 -141
- gr_libs-0.1.8/evaluation/generate_experiments_results_new_ver1.py +0 -238
- gr_libs-0.1.8/evaluation/generate_experiments_results_new_ver2.py +0 -331
- gr_libs-0.1.8/evaluation/generate_task_specific_statistics_plots.py +0 -500
- gr_libs-0.1.8/evaluation/get_plans_images.py +0 -62
- gr_libs-0.1.8/evaluation/increasing_and_decreasing_.py +0 -104
- gr_libs-0.1.8/gr_libs/ml/__init__.py +0 -8
- gr_libs-0.1.8/gr_libs/ml/base/__init__.py +0 -1
- gr_libs-0.1.8/gr_libs/ml/base/rl_agent.py +0 -57
- gr_libs-0.1.8/gr_libs/ml/neural/__init__.py +0 -3
- gr_libs-0.1.8/gr_libs/ml/neural/utils/__init__.py +0 -2
- gr_libs-0.1.8/gr_libs/ml/neural/utils/penv.py +0 -60
- gr_libs-0.1.8/gr_libs/ml/sequential/__init__.py +0 -1
- gr_libs-0.1.8/gr_libs/ml/tabular/__init__.py +0 -3
- gr_libs-0.1.8/gr_libs/ml/utils/__init__.py +0 -12
- gr_libs-0.1.8/gr_libs/ml/utils/format.py +0 -108
- gr_libs-0.1.8/gr_libs/ml/utils/storage.py +0 -194
- gr_libs-0.1.8/gr_libs/recognizer/recognizer.py +0 -56
- gr_libs-0.1.8/tests/test_gcdraco.py +0 -10
- gr_libs-0.1.8/tests/test_graml.py +0 -20
- gr_libs-0.1.8/tests/test_graql.py +0 -5
- gr_libs-0.1.8/gr_libs/environment/utils/__init__.py → gr_libs-0.2.5/gr_libs/_evaluation/_generate_experiments_results.py +0 -0
- {gr_libs-0.1.8/gr_libs/ml/planner → gr_libs-0.2.5/gr_libs/environment/_utils}/__init__.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/agent.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/consts.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/neural/utils/dictlist.py +0 -0
- {gr_libs-0.1.8/gr_libs/ml/planner/mcts → gr_libs-0.2.5/gr_libs/ml/planner}/__init__.py +0 -0
- {gr_libs-0.1.8/gr_libs/problems → gr_libs-0.2.5/gr_libs/ml/planner/mcts}/__init__.py +0 -0
- {gr_libs-0.1.8/gr_libs/ml/planner/mcts/utils → gr_libs-0.2.5/gr_libs/ml/planner/mcts/_utils}/__init__.py +0 -0
- {gr_libs-0.1.8/gr_libs/ml/planner/mcts/utils → gr_libs-0.2.5/gr_libs/ml/planner/mcts/_utils}/node.py +0 -0
- {gr_libs-0.1.8/gr_libs/recognizer → gr_libs-0.2.5/gr_libs/ml/sequential}/__init__.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/tabular/state.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/utils/env.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs/ml/utils/other.py +2 -2
- {gr_libs-0.1.8/gr_libs/recognizer/gr_as_rl → gr_libs-0.2.5/gr_libs/problems}/__init__.py +0 -0
- {gr_libs-0.1.8/gr_libs/recognizer/graml → gr_libs-0.2.5/gr_libs/recognizer}/__init__.py +0 -0
- {gr_libs-0.1.8/gr_libs/recognizer/utils → gr_libs-0.2.5/gr_libs/recognizer/_utils}/__init__.py +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs.egg-info/dependency_links.txt +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/gr_libs.egg-info/requires.txt +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/pyproject.toml +0 -0
- {gr_libs-0.1.8 → gr_libs-0.2.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: gr_libs
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.5
|
4
4
|
Summary: Package with goal recognition frameworks baselines
|
5
5
|
Author: Ben Nageris
|
6
6
|
Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
|
@@ -106,6 +106,28 @@ If you prefer using Conda, follow these steps:
|
|
106
106
|
|
107
107
|
For any issues or troubleshooting, please refer to the repository's issue tracker.
|
108
108
|
|
109
|
+
## Supported Algorithms
|
110
|
+
|
111
|
+
Successors of algorithms that don't differ in their specifics are added in parentheses after the algorithm name. For example, since GC-DRACO and DRACO share the same column values, they're written on one line as DRACO (GC).
|
112
|
+
|
113
|
+
| **Algorithm** | **Supervised** | **Reinforcement Learning** | **Discrete States** | **Continuous States** | **Discrete Actions** | **Continuous Actions** | **Model-Based** | **Model-Free** | **Action-Only** | **Supported Environments** |
|
114
|
+
|---------------------|----------------|---------------------------|---------------------|----------------------|----------------------|-----------------------|------------------|----------------|----------------|--------------------------------------------|
|
115
|
+
| Graql | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | Minigrid |
|
116
|
+
| Draco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | PointMaze, Panda Reach, Parking |
|
117
|
+
| GCDraco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | Panda Reach, Parking |
|
118
|
+
| ExpertBasedGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
|
119
|
+
| BGGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Minigrid, PointMaze |
|
120
|
+
| GCGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
|
121
|
+
|
122
|
+
## Supported Domains
|
123
|
+
|
124
|
+
| **Domain** | **Action Space** | **State Space** |
|
125
|
+
|------------|----------------|----------------|
|
126
|
+
| Minigrid | Discrete | Discrete |
|
127
|
+
| PointMaze | Continuous | Continuous |
|
128
|
+
| Parking | Continuous | Continuous |
|
129
|
+
| Panda | Continuous | Continuous |
|
130
|
+
|
109
131
|
## Usage Guide
|
110
132
|
|
111
133
|
After installing GRLib, you will have access to custom Gym environments, allowing you to set up and execute an Online Dynamic Goal Recognition (ODGR) scenario with the algorithm of your choice.
|
@@ -116,9 +138,10 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
|
|
116
138
|
gr_libs also includes a library of trained agents for the various supported environments within the package.
|
117
139
|
To get the dataset of trained agents, you can run:
|
118
140
|
```sh
|
141
|
+
pip install gdown
|
119
142
|
python download_dataset.py
|
120
143
|
```
|
121
|
-
|
144
|
+
Alternatively, you can visit the google-drive links where download_dataset.py points to and manually download the zipped folders, and unzip them into the project directory.
|
122
145
|
An alternative is to use our docker image, which includes the dataset in it.
|
123
146
|
You can:
|
124
147
|
1. pull the image:
|
@@ -191,42 +214,85 @@ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
|
|
191
214
|
|
192
215
|
The `consts.py` file contains predefined ODGR problem configurations. You can use existing configurations or define new ones.
|
193
216
|
|
194
|
-
To execute
|
217
|
+
To execute an ODGR problem using the configuration file, you specify a recognizer, a domain, a gym environment within that domain and the task:
|
195
218
|
```sh
|
196
|
-
python odgr_executor.py --recognizer
|
219
|
+
python odgr_executor.py --recognizer ExpertBasedGraml --domain minigrid --task L1 --env_name MiniGrid-SimpleCrossingS13N4
|
197
220
|
```
|
198
221
|
|
199
|
-
|
222
|
+
If you also add the flag:
|
223
|
+
```sh
|
224
|
+
--collect_stats
|
225
|
+
```
|
226
|
+
to the cmd, 3 kinds of outputs will be generated from the ODGR problem's execution:
|
227
|
+
a. Into:
|
228
|
+
```sh
|
229
|
+
outputs\\minigrid\MiniGrid-SimpleCrossingS13N4\MiniGrid-SimpleCrossingS13N4\L1\experiment_results
|
230
|
+
```
|
231
|
+
a .pkl and a .txt summary in a dictionary format will be generated, including the summary of all ODGR executions, including runtime and overall accuracies for all lengths and types of input sequences.
|
200
232
|
|
201
|
-
|
233
|
+
b. Into:
|
234
|
+
```sh
|
235
|
+
outputs\ExpertBasedGraml\minigrid\MiniGrid-SimpleCrossingS13N4\policy_sequences\MiniGrid-SimpleCrossingS13N4-DynamicGoal-1x11-v0_inference_seq/plan_image.png
|
236
|
+
```
|
237
|
+
a visulzation of the sequence the agent generated will be dumped, either in a png or an mp4 format, depending on the domain, for debugability.
|
202
238
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
239
|
+
c. Into:
|
240
|
+
either:
|
241
|
+
```sh
|
242
|
+
outputs\ExpertBasedGraml\minigrid\MiniGrid-SimpleCrossingS13N4\goal_embeddings
|
243
|
+
```
|
244
|
+
In Graml algorithms, or:
|
245
|
+
```sh
|
246
|
+
outputs\Graql\minigrid\MiniGrid-SimpleCrossingS13N4\confidence
|
247
|
+
```
|
248
|
+
In GRAsRL algorithms,
|
249
|
+
pickled results from which confidence of the results can be obtained, for offline analysis.
|
208
250
|
|
209
|
-
|
251
|
+
For GRAsRL outputs, for every possible goal, the likelihood of it being the true goal from the input sequence, based on the policy distance metric.
|
210
252
|
|
211
|
-
|
212
|
-
|------------|----------------|----------------|
|
213
|
-
| Minigrid | Discrete | Discrete |
|
214
|
-
| PointMaze | Continuous | Continuous |
|
215
|
-
| Parking | Continuous | Continuous |
|
216
|
-
| Panda | Continuous | Continuous |
|
253
|
+
For GRAML outputs, the embeddings of the sequences are pickled for every goal-directed sequence. Offline, since, since in the embdding space of GRAML's metric model- sequences towards the same sequences are close and vice versa, one could reproduce the most likely goal by measuring the elementwise vector distance of the embeddings, and retrieve a confidence of it.
|
217
254
|
|
218
255
|
## Running Experiments
|
219
256
|
|
220
|
-
|
257
|
+
In light of the previous section, the user should already know how to scale the experiments using odgr_executor, and they should also understand how to use the 3 types of outputs for offline analysis of the algorithms.
|
258
|
+
gr_libs also provides another scaling method to run odgr_executor on multiple domains and environments, for many ODGR problems, as well as python scripts for analysis of these results, to create plots and statistics over the executions.
|
221
259
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
260
|
+
### Scaling odgr_executor runs
|
261
|
+
A part of the contribution of this package is standardizing the evaluations of MDP-based GR frameworks.
|
262
|
+
consts.py provides a set of ODGR problems on which the framework can be evaluated.
|
263
|
+
The 'evaluations' sub-package provides scripts to analyze the results of the all_experiments.py execution, done over the ODGR the problems defined at consts.py.
|
226
264
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
265
|
+
#### Running all_experiments.py
|
266
|
+
|
267
|
+
You can now run `all_experiments.py` with your desired combination of domains, environments, tasks, and recognizers directly from the command line, without editing the script:
|
268
|
+
|
269
|
+
```sh
|
270
|
+
python gr_libs/all_experiments.py \
|
271
|
+
--domains minigrid parking \
|
272
|
+
--envs MiniGrid-SimpleCrossingS13N4 Parking-S-14-PC- \
|
273
|
+
--tasks L1 L2 L3 L4 L5 \
|
274
|
+
--recognizers ExpertBasedGraml Graql \
|
275
|
+
--n 5
|
276
|
+
```
|
277
|
+
|
278
|
+
- `--domains`: List of domains to run experiments on.
|
279
|
+
- `--envs`: List of environments (must be in the same order as domains).
|
280
|
+
- `--tasks`: List of tasks (applied to all domain/env pairs).
|
281
|
+
- `--recognizers`: List of recognizers/algorithms to evaluate.
|
282
|
+
- `--n`: Number of times to execute each task (default: 5).
|
283
|
+
|
284
|
+
This script uses multiprocessing to simultaneously execute many `odgr_executor.py` runs as child processes. It logs failures and successful executions for debugability.
|
285
|
+
|
286
|
+
After execution, summary files are generated in `outputs/summaries/` for further analysis and plotting.
|
287
|
+
|
288
|
+
### Using analysis scripts
|
289
|
+
The repository provides benchmark domains and scripts for analyzing experimental results. The `evaluation` directory contains tools for processing and visualizing the results from odgr_executor.py and all_experiments.py.
|
290
|
+
Please follow the README.md file in the 'evaluation' directory for more details.
|
291
|
+
|
292
|
+
## For Developers
|
293
|
+
Developers will need to work slightly different: instead of installing the packages, they need to clone the repos and either install them as editables or add their paths to PYTHONPATH so they will function as packages effectively.
|
294
|
+
Additional packages to install as a developer:
|
295
|
+
```sh
|
296
|
+
pip install pre-commit
|
297
|
+
pre-commit install
|
298
|
+
```
|
@@ -77,6 +77,28 @@ If you prefer using Conda, follow these steps:
|
|
77
77
|
|
78
78
|
For any issues or troubleshooting, please refer to the repository's issue tracker.
|
79
79
|
|
80
|
+
## Supported Algorithms
|
81
|
+
|
82
|
+
Successors of algorithms that don't differ in their specifics are added in parentheses after the algorithm name. For example, since GC-DRACO and DRACO share the same column values, they're written on one line as DRACO (GC).
|
83
|
+
|
84
|
+
| **Algorithm** | **Supervised** | **Reinforcement Learning** | **Discrete States** | **Continuous States** | **Discrete Actions** | **Continuous Actions** | **Model-Based** | **Model-Free** | **Action-Only** | **Supported Environments** |
|
85
|
+
|---------------------|----------------|---------------------------|---------------------|----------------------|----------------------|-----------------------|------------------|----------------|----------------|--------------------------------------------|
|
86
|
+
| Graql | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | Minigrid |
|
87
|
+
| Draco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | PointMaze, Panda Reach, Parking |
|
88
|
+
| GCDraco | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | Panda Reach, Parking |
|
89
|
+
| ExpertBasedGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
|
90
|
+
| BGGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Minigrid, PointMaze |
|
91
|
+
| GCGraml | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | Panda Reach, Parking |
|
92
|
+
|
93
|
+
## Supported Domains
|
94
|
+
|
95
|
+
| **Domain** | **Action Space** | **State Space** |
|
96
|
+
|------------|----------------|----------------|
|
97
|
+
| Minigrid | Discrete | Discrete |
|
98
|
+
| PointMaze | Continuous | Continuous |
|
99
|
+
| Parking | Continuous | Continuous |
|
100
|
+
| Panda | Continuous | Continuous |
|
101
|
+
|
80
102
|
## Usage Guide
|
81
103
|
|
82
104
|
After installing GRLib, you will have access to custom Gym environments, allowing you to set up and execute an Online Dynamic Goal Recognition (ODGR) scenario with the algorithm of your choice.
|
@@ -87,9 +109,10 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
|
|
87
109
|
gr_libs also includes a library of trained agents for the various supported environments within the package.
|
88
110
|
To get the dataset of trained agents, you can run:
|
89
111
|
```sh
|
112
|
+
pip install gdown
|
90
113
|
python download_dataset.py
|
91
114
|
```
|
92
|
-
|
115
|
+
Alternatively, you can visit the google-drive links where download_dataset.py points to and manually download the zipped folders, and unzip them into the project directory.
|
93
116
|
An alternative is to use our docker image, which includes the dataset in it.
|
94
117
|
You can:
|
95
118
|
1. pull the image:
|
@@ -162,42 +185,85 @@ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
|
|
162
185
|
|
163
186
|
The `consts.py` file contains predefined ODGR problem configurations. You can use existing configurations or define new ones.
|
164
187
|
|
165
|
-
To execute
|
188
|
+
To execute an ODGR problem using the configuration file, you specify a recognizer, a domain, a gym environment within that domain and the task:
|
166
189
|
```sh
|
167
|
-
python odgr_executor.py --recognizer
|
190
|
+
python odgr_executor.py --recognizer ExpertBasedGraml --domain minigrid --task L1 --env_name MiniGrid-SimpleCrossingS13N4
|
168
191
|
```
|
169
192
|
|
170
|
-
|
193
|
+
If you also add the flag:
|
194
|
+
```sh
|
195
|
+
--collect_stats
|
196
|
+
```
|
197
|
+
to the cmd, 3 kinds of outputs will be generated from the ODGR problem's execution:
|
198
|
+
a. Into:
|
199
|
+
```sh
|
200
|
+
outputs\\minigrid\MiniGrid-SimpleCrossingS13N4\MiniGrid-SimpleCrossingS13N4\L1\experiment_results
|
201
|
+
```
|
202
|
+
a .pkl and a .txt summary in a dictionary format will be generated, including the summary of all ODGR executions, including runtime and overall accuracies for all lengths and types of input sequences.
|
171
203
|
|
172
|
-
|
204
|
+
b. Into:
|
205
|
+
```sh
|
206
|
+
outputs\ExpertBasedGraml\minigrid\MiniGrid-SimpleCrossingS13N4\policy_sequences\MiniGrid-SimpleCrossingS13N4-DynamicGoal-1x11-v0_inference_seq/plan_image.png
|
207
|
+
```
|
208
|
+
a visulzation of the sequence the agent generated will be dumped, either in a png or an mp4 format, depending on the domain, for debugability.
|
173
209
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
210
|
+
c. Into:
|
211
|
+
either:
|
212
|
+
```sh
|
213
|
+
outputs\ExpertBasedGraml\minigrid\MiniGrid-SimpleCrossingS13N4\goal_embeddings
|
214
|
+
```
|
215
|
+
In Graml algorithms, or:
|
216
|
+
```sh
|
217
|
+
outputs\Graql\minigrid\MiniGrid-SimpleCrossingS13N4\confidence
|
218
|
+
```
|
219
|
+
In GRAsRL algorithms,
|
220
|
+
pickled results from which confidence of the results can be obtained, for offline analysis.
|
179
221
|
|
180
|
-
|
222
|
+
For GRAsRL outputs, for every possible goal, the likelihood of it being the true goal from the input sequence, based on the policy distance metric.
|
181
223
|
|
182
|
-
|
183
|
-
|------------|----------------|----------------|
|
184
|
-
| Minigrid | Discrete | Discrete |
|
185
|
-
| PointMaze | Continuous | Continuous |
|
186
|
-
| Parking | Continuous | Continuous |
|
187
|
-
| Panda | Continuous | Continuous |
|
224
|
+
For GRAML outputs, the embeddings of the sequences are pickled for every goal-directed sequence. Offline, since, since in the embdding space of GRAML's metric model- sequences towards the same sequences are close and vice versa, one could reproduce the most likely goal by measuring the elementwise vector distance of the embeddings, and retrieve a confidence of it.
|
188
225
|
|
189
226
|
## Running Experiments
|
190
227
|
|
191
|
-
|
228
|
+
In light of the previous section, the user should already know how to scale the experiments using odgr_executor, and they should also understand how to use the 3 types of outputs for offline analysis of the algorithms.
|
229
|
+
gr_libs also provides another scaling method to run odgr_executor on multiple domains and environments, for many ODGR problems, as well as python scripts for analysis of these results, to create plots and statistics over the executions.
|
192
230
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
231
|
+
### Scaling odgr_executor runs
|
232
|
+
A part of the contribution of this package is standardizing the evaluations of MDP-based GR frameworks.
|
233
|
+
consts.py provides a set of ODGR problems on which the framework can be evaluated.
|
234
|
+
The 'evaluations' sub-package provides scripts to analyze the results of the all_experiments.py execution, done over the ODGR the problems defined at consts.py.
|
197
235
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
236
|
+
#### Running all_experiments.py
|
237
|
+
|
238
|
+
You can now run `all_experiments.py` with your desired combination of domains, environments, tasks, and recognizers directly from the command line, without editing the script:
|
239
|
+
|
240
|
+
```sh
|
241
|
+
python gr_libs/all_experiments.py \
|
242
|
+
--domains minigrid parking \
|
243
|
+
--envs MiniGrid-SimpleCrossingS13N4 Parking-S-14-PC- \
|
244
|
+
--tasks L1 L2 L3 L4 L5 \
|
245
|
+
--recognizers ExpertBasedGraml Graql \
|
246
|
+
--n 5
|
247
|
+
```
|
248
|
+
|
249
|
+
- `--domains`: List of domains to run experiments on.
|
250
|
+
- `--envs`: List of environments (must be in the same order as domains).
|
251
|
+
- `--tasks`: List of tasks (applied to all domain/env pairs).
|
252
|
+
- `--recognizers`: List of recognizers/algorithms to evaluate.
|
253
|
+
- `--n`: Number of times to execute each task (default: 5).
|
254
|
+
|
255
|
+
This script uses multiprocessing to simultaneously execute many `odgr_executor.py` runs as child processes. It logs failures and successful executions for debugability.
|
256
|
+
|
257
|
+
After execution, summary files are generated in `outputs/summaries/` for further analysis and plotting.
|
258
|
+
|
259
|
+
### Using analysis scripts
|
260
|
+
The repository provides benchmark domains and scripts for analyzing experimental results. The `evaluation` directory contains tools for processing and visualizing the results from odgr_executor.py and all_experiments.py.
|
261
|
+
Please follow the README.md file in the 'evaluation' directory for more details.
|
262
|
+
|
263
|
+
## For Developers
|
264
|
+
Developers will need to work slightly different: instead of installing the packages, they need to clone the repos and either install them as editables or add their paths to PYTHONPATH so they will function as packages effectively.
|
265
|
+
Additional packages to install as a developer:
|
266
|
+
```sh
|
267
|
+
pip install pre-commit
|
268
|
+
pre-commit install
|
269
|
+
```
|
@@ -1,5 +1,7 @@
|
|
1
|
+
"""gr_libs: Baselines for goal recognition executions on gym environments."""
|
2
|
+
|
3
|
+
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Draco, GCDraco, Graql
|
1
4
|
from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
|
2
|
-
from gr_libs.recognizer.gr_as_rl.gr_as_rl_recognizer import Graql, Draco, GCDraco
|
3
5
|
|
4
6
|
try:
|
5
7
|
from ._version import version as __version__
|
@@ -0,0 +1,260 @@
|
|
1
|
+
""" executes odgr_executor parallely on a set of problems defined in consts.py """
|
2
|
+
|
3
|
+
import argparse
|
4
|
+
import concurrent.futures
|
5
|
+
import os
|
6
|
+
import subprocess
|
7
|
+
import sys
|
8
|
+
|
9
|
+
import dill
|
10
|
+
import numpy as np
|
11
|
+
|
12
|
+
from gr_libs.ml.utils.storage import get_experiment_results_path
|
13
|
+
|
14
|
+
parser = argparse.ArgumentParser()
|
15
|
+
parser.add_argument("--domains", nargs="+", required=True, help="List of domains")
|
16
|
+
parser.add_argument(
|
17
|
+
"--envs",
|
18
|
+
nargs="+",
|
19
|
+
required=True,
|
20
|
+
help="List of environments (same order as domains)",
|
21
|
+
)
|
22
|
+
parser.add_argument(
|
23
|
+
"--tasks", nargs="+", required=True, help="List of tasks (e.g. L1 L2 L3 L4 L5)"
|
24
|
+
)
|
25
|
+
parser.add_argument(
|
26
|
+
"--recognizers", nargs="+", required=True, help="List of recognizers"
|
27
|
+
)
|
28
|
+
parser.add_argument(
|
29
|
+
"--n", type=int, default=5, help="Number of times to execute each task"
|
30
|
+
)
|
31
|
+
args = parser.parse_args()
|
32
|
+
|
33
|
+
# Build configs dynamically
|
34
|
+
configs = {}
|
35
|
+
for domain, env in zip(args.domains, args.envs):
|
36
|
+
configs.setdefault(domain, {})
|
37
|
+
configs[domain][env] = args.tasks
|
38
|
+
|
39
|
+
recognizers = args.recognizers
|
40
|
+
n = args.n
|
41
|
+
|
42
|
+
|
43
|
+
# Function to read results from the result file
|
44
|
+
def read_results(res_file_path):
|
45
|
+
"""
|
46
|
+
Read the results from a result file.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
res_file_path (str): The path to the result file.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
The results read from the file.
|
53
|
+
"""
|
54
|
+
with open(res_file_path, "rb") as f:
|
55
|
+
results = dill.load(f)
|
56
|
+
return results
|
57
|
+
|
58
|
+
|
59
|
+
# Every thread worker executes this function.
|
60
|
+
def run_experiment(domain, env, task, recognizer, i, generate_new=False):
|
61
|
+
"""
|
62
|
+
Run an experiment.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
domain (str): The domain of the experiment.
|
66
|
+
env (str): The environment of the experiment.
|
67
|
+
task (str): The task of the experiment.
|
68
|
+
recognizer (str): The recognizer used in the experiment.
|
69
|
+
i (int): The index of the experiment.
|
70
|
+
generate_new (bool, optional): Whether to generate new results.
|
71
|
+
Defaults to False.
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
tuple: A tuple containing the experiment details and the results.
|
75
|
+
"""
|
76
|
+
cmd = f"python gr_libs/odgr_executor.py --domain {domain} --recognizer {recognizer} --env_name {env} --task {task} --collect_stats --experiment_num {i}"
|
77
|
+
try:
|
78
|
+
res_file_path = get_experiment_results_path(domain, env, task, recognizer)
|
79
|
+
i_res_file_path_pkl = os.path.join(res_file_path, f"res_{i}.pkl")
|
80
|
+
i_res_file_path_txt = os.path.join(res_file_path, f"res_{i}.txt")
|
81
|
+
if generate_new or (
|
82
|
+
not os.path.exists(i_res_file_path_txt)
|
83
|
+
or not os.path.exists(i_res_file_path_pkl)
|
84
|
+
):
|
85
|
+
process = subprocess.Popen(
|
86
|
+
cmd,
|
87
|
+
shell=True,
|
88
|
+
stdout=subprocess.PIPE,
|
89
|
+
stderr=subprocess.PIPE,
|
90
|
+
text=True,
|
91
|
+
)
|
92
|
+
stdout, stderr = process.communicate()
|
93
|
+
if process.returncode != 0:
|
94
|
+
print(f"Execution failed: {cmd}\nSTDOUT:\n{stdout}\nSTDERR:\n{stderr}")
|
95
|
+
return None
|
96
|
+
else:
|
97
|
+
print(f"Finished execution successfully: {cmd}")
|
98
|
+
else:
|
99
|
+
print(
|
100
|
+
f"File {i_res_file_path_txt} already exists. Skipping execution of {cmd}"
|
101
|
+
)
|
102
|
+
return ((domain, env, task, recognizer), read_results(i_res_file_path_pkl))
|
103
|
+
except Exception as e:
|
104
|
+
print(f"Exception occurred while running experiment: {e}")
|
105
|
+
return None
|
106
|
+
|
107
|
+
|
108
|
+
# Collect results
|
109
|
+
results = {}
|
110
|
+
|
111
|
+
# create an executor that manages a pool of threads.
|
112
|
+
# Note that any failure in the threads will not stop the main thread
|
113
|
+
# from continuing and vice versa, nor will the debugger view the
|
114
|
+
# failure if in debug mode.
|
115
|
+
# Use prints and if any thread's printing stops suspect failure.
|
116
|
+
# If failure happened, use breakpoints before failure and use the
|
117
|
+
# watch to see the failure by pasting the problematic piece of code.
|
118
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
119
|
+
futures = []
|
120
|
+
for domain, envs in configs.items():
|
121
|
+
for env, tasks in envs.items():
|
122
|
+
for task in tasks:
|
123
|
+
for recognizer in recognizers:
|
124
|
+
for i in range(n):
|
125
|
+
futures.append(
|
126
|
+
executor.submit(
|
127
|
+
run_experiment,
|
128
|
+
domain,
|
129
|
+
env,
|
130
|
+
task,
|
131
|
+
recognizer,
|
132
|
+
i,
|
133
|
+
generate_new=(
|
134
|
+
True
|
135
|
+
if len(sys.argv) > 1
|
136
|
+
and sys.argv[1] == "--generate_new"
|
137
|
+
else False
|
138
|
+
),
|
139
|
+
)
|
140
|
+
)
|
141
|
+
|
142
|
+
for future in concurrent.futures.as_completed(futures):
|
143
|
+
if future.result() is None:
|
144
|
+
print(
|
145
|
+
f"for future {future}, future.result() is None. \
|
146
|
+
Continuing to next future."
|
147
|
+
)
|
148
|
+
continue
|
149
|
+
key, result = future.result()
|
150
|
+
print(f"main thread reading results from future {key}")
|
151
|
+
if key not in results:
|
152
|
+
results[key] = []
|
153
|
+
results[key].append(result)
|
154
|
+
|
155
|
+
# Calculate average accuracy and standard deviation for each percentage
|
156
|
+
detailed_summary = {}
|
157
|
+
compiled_accuracies = {}
|
158
|
+
for key, result_list in results.items():
|
159
|
+
domain, env, task, recognizer = key
|
160
|
+
percentages = result_list[0].keys()
|
161
|
+
detailed_summary[key] = {}
|
162
|
+
if (domain, recognizer) not in compiled_accuracies:
|
163
|
+
compiled_accuracies[(domain, recognizer)] = {}
|
164
|
+
for percentage in percentages:
|
165
|
+
if percentage == "total":
|
166
|
+
continue
|
167
|
+
if percentage not in compiled_accuracies[(domain, recognizer)].keys():
|
168
|
+
compiled_accuracies[(domain, recognizer)][percentage] = {}
|
169
|
+
if percentage not in detailed_summary[key].keys():
|
170
|
+
detailed_summary[key][percentage] = {}
|
171
|
+
consecutive_accuracies = [
|
172
|
+
result[percentage]["consecutive"]["accuracy"] for result in result_list
|
173
|
+
]
|
174
|
+
non_consecutive_accuracies = [
|
175
|
+
result[percentage]["non_consecutive"]["accuracy"] for result in result_list
|
176
|
+
]
|
177
|
+
if (
|
178
|
+
"consecutive"
|
179
|
+
in compiled_accuracies[(domain, recognizer)][percentage].keys()
|
180
|
+
):
|
181
|
+
compiled_accuracies[(domain, recognizer)][percentage]["consecutive"].extend(
|
182
|
+
consecutive_accuracies
|
183
|
+
)
|
184
|
+
else:
|
185
|
+
compiled_accuracies[(domain, recognizer)][percentage][
|
186
|
+
"consecutive"
|
187
|
+
] = consecutive_accuracies
|
188
|
+
if (
|
189
|
+
"non_consecutive"
|
190
|
+
in compiled_accuracies[(domain, recognizer)][percentage].keys()
|
191
|
+
):
|
192
|
+
compiled_accuracies[(domain, recognizer)][percentage][
|
193
|
+
"non_consecutive"
|
194
|
+
].extend(non_consecutive_accuracies)
|
195
|
+
else:
|
196
|
+
compiled_accuracies[(domain, recognizer)][percentage][
|
197
|
+
"non_consecutive"
|
198
|
+
] = non_consecutive_accuracies
|
199
|
+
avg_consecutive_accuracy = np.mean(consecutive_accuracies)
|
200
|
+
consecutive_std_dev = np.std(consecutive_accuracies)
|
201
|
+
detailed_summary[key][percentage]["consecutive"] = (
|
202
|
+
avg_consecutive_accuracy,
|
203
|
+
consecutive_std_dev,
|
204
|
+
)
|
205
|
+
avg_non_consecutive_accuracy = np.mean(non_consecutive_accuracies)
|
206
|
+
non_consecutive_std_dev = np.std(non_consecutive_accuracies)
|
207
|
+
detailed_summary[key][percentage]["non_consecutive"] = (
|
208
|
+
avg_non_consecutive_accuracy,
|
209
|
+
non_consecutive_std_dev,
|
210
|
+
)
|
211
|
+
|
212
|
+
compiled_summary = {}
|
213
|
+
for key, percentage_dict in compiled_accuracies.items():
|
214
|
+
compiled_summary[key] = {}
|
215
|
+
for percentage, cons_accuracies in percentage_dict.items():
|
216
|
+
compiled_summary[key][percentage] = {}
|
217
|
+
for is_cons, accuracies in cons_accuracies.items():
|
218
|
+
avg_accuracy = np.mean(accuracies)
|
219
|
+
std_dev = np.std(accuracies)
|
220
|
+
compiled_summary[key][percentage][is_cons] = (avg_accuracy, std_dev)
|
221
|
+
|
222
|
+
# Write different summary results to different files, one per recognizer
|
223
|
+
if not os.path.exists(os.path.join("outputs", "summaries")):
|
224
|
+
os.makedirs(os.path.join("outputs", "summaries"))
|
225
|
+
|
226
|
+
for recognizer in recognizers:
|
227
|
+
compiled_summary_file_path = os.path.join(
|
228
|
+
"outputs",
|
229
|
+
"summaries",
|
230
|
+
f"compiled_summary_{''.join(configs.keys())}_{recognizer}.txt",
|
231
|
+
)
|
232
|
+
with open(compiled_summary_file_path, "w") as f:
|
233
|
+
for key, percentage_dict in compiled_summary.items():
|
234
|
+
domain, recog = key
|
235
|
+
if recog != recognizer:
|
236
|
+
continue # Only write results for this recognizer
|
237
|
+
for percentage, cons_info in percentage_dict.items():
|
238
|
+
for is_cons, (avg_accuracy, std_dev) in cons_info.items():
|
239
|
+
f.write(
|
240
|
+
f"{domain}\t{recog}\t{percentage}\t{is_cons}\t{avg_accuracy:.4f}\t{std_dev:.4f}\n"
|
241
|
+
)
|
242
|
+
print(f"Compiled summary results written to {compiled_summary_file_path}")
|
243
|
+
|
244
|
+
detailed_summary_file_path = os.path.join(
|
245
|
+
"outputs",
|
246
|
+
"summaries",
|
247
|
+
f"detailed_summary_{''.join(configs.keys())}_{recognizer}.txt",
|
248
|
+
)
|
249
|
+
with open(detailed_summary_file_path, "w") as f:
|
250
|
+
for key, percentage_dict in detailed_summary.items():
|
251
|
+
domain, env, task, recog = key
|
252
|
+
if recog != recognizer:
|
253
|
+
continue # Only write results for this recognizer
|
254
|
+
f.write(f"{domain}\t{env}\t{task}\t{recog}\n")
|
255
|
+
for percentage, cons_info in percentage_dict.items():
|
256
|
+
for is_cons, (avg_accuracy, std_dev) in cons_info.items():
|
257
|
+
f.write(
|
258
|
+
f"\t\t{percentage}\t{is_cons}\t{avg_accuracy:.4f}\t{std_dev:.4f}\n"
|
259
|
+
)
|
260
|
+
print(f"Detailed summary results written to {detailed_summary_file_path}")
|
@@ -1,9 +1,22 @@
|
|
1
|
+
"""
|
2
|
+
A module GR algorithms can store hard-coded parameters anf functionalities
|
3
|
+
that are environment-related.
|
4
|
+
"""
|
5
|
+
|
1
6
|
import importlib.metadata
|
2
7
|
import warnings
|
3
8
|
|
4
9
|
|
5
10
|
def is_extra_installed(package: str, extra: str) -> bool:
|
6
|
-
"""Check if an extra was installed for a given package.
|
11
|
+
"""Check if an extra was installed for a given package.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
package (str): The name of the package.
|
15
|
+
extra (str): The name of the extra to check.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
bool: True if the extra is installed, False otherwise.
|
19
|
+
"""
|
7
20
|
try:
|
8
21
|
# Get metadata for the installed package
|
9
22
|
dist = importlib.metadata.metadata(package)
|