gr-libs 0.1.4__tar.gz → 0.1.6.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. gr_libs-0.1.6.post1/.github/workflows/common_test_steps.yml +26 -0
  2. gr_libs-0.1.6.post1/.github/workflows/pr_flow.yml +10 -0
  3. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/.github/workflows/release.yml +10 -9
  4. gr_libs-0.1.6.post1/CI/README.md +12 -0
  5. gr_libs-0.1.6.post1/CI/docker_build_context/Dockerfile +15 -0
  6. {gr_libs-0.1.4/gr_libs.egg-info → gr_libs-0.1.6.post1}/PKG-INFO +22 -1
  7. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/README.md +20 -0
  8. gr_libs-0.1.6.post1/download_dataset.py +19 -0
  9. gr_libs-0.1.6.post1/gr_libs/_version.py +21 -0
  10. gr_libs-0.1.6.post1/gr_libs/environment/__init__.py +22 -0
  11. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/environment/environment.py +1 -3
  12. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/metrics/metrics.py +1 -2
  13. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/deep_rl_learner.py +10 -12
  14. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/graml/graml_recognizer.py +1 -2
  15. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/recognizer.py +3 -4
  16. {gr_libs-0.1.4 → gr_libs-0.1.6.post1/gr_libs.egg-info}/PKG-INFO +22 -1
  17. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs.egg-info/SOURCES.txt +10 -1
  18. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs.egg-info/requires.txt +1 -0
  19. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs.egg-info/top_level.txt +2 -0
  20. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/odgr_executor.py +1 -1
  21. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/pyproject.toml +3 -1
  22. gr_libs-0.1.6.post1/tests/test_graml.py +16 -0
  23. gr_libs-0.1.6.post1/tests/test_graql.py +4 -0
  24. gr_libs-0.1.6.post1/tutorials/graml_minigrid_tutorial.py +34 -0
  25. gr_libs-0.1.6.post1/tutorials/graml_panda_tutorial.py +41 -0
  26. gr_libs-0.1.6.post1/tutorials/graml_parking_tutorial.py +38 -0
  27. gr_libs-0.1.6.post1/tutorials/graml_point_maze_tutorial.py +39 -0
  28. gr_libs-0.1.6.post1/tutorials/graql_minigrid_tutorial.py +34 -0
  29. gr_libs-0.1.4/tutorials/graml_minigrid_tutorial.py +0 -30
  30. gr_libs-0.1.4/tutorials/graml_panda_tutorial.py +0 -32
  31. gr_libs-0.1.4/tutorials/graml_parking_tutorial.py +0 -38
  32. gr_libs-0.1.4/tutorials/graml_point_maze_tutorial.py +0 -43
  33. gr_libs-0.1.4/tutorials/graql_minigrid_tutorial.py +0 -29
  34. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/.gitignore +0 -0
  35. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/all_experiments.py +0 -0
  36. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/analyze_results_cross_alg_cross_domain.py +0 -0
  37. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/create_minigrid_map_image.py +0 -0
  38. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/file_system.py +0 -0
  39. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/generate_experiments_results.py +0 -0
  40. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/generate_experiments_results_new_ver1.py +0 -0
  41. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/generate_experiments_results_new_ver2.py +0 -0
  42. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/generate_task_specific_statistics_plots.py +0 -0
  43. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/get_plans_images.py +0 -0
  44. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/evaluation/increasing_and_decreasing_.py +0 -0
  45. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/__init__.py +0 -0
  46. {gr_libs-0.1.4/gr_libs/environment → gr_libs-0.1.6.post1/gr_libs/environment/utils}/__init__.py +0 -0
  47. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/environment/utils/utils.py +0 -0
  48. {gr_libs-0.1.4/gr_libs/environment/utils → gr_libs-0.1.6.post1/gr_libs/metrics}/__init__.py +0 -0
  49. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/__init__.py +0 -0
  50. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/agent.py +0 -0
  51. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/base/__init__.py +0 -0
  52. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/base/rl_agent.py +0 -0
  53. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/consts.py +0 -0
  54. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/__init__.py +0 -0
  55. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/utils/__init__.py +0 -0
  56. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/utils/dictlist.py +0 -0
  57. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/utils/penv.py +0 -0
  58. {gr_libs-0.1.4/gr_libs/metrics → gr_libs-0.1.6.post1/gr_libs/ml/planner}/__init__.py +0 -0
  59. {gr_libs-0.1.4/gr_libs/ml/planner → gr_libs-0.1.6.post1/gr_libs/ml/planner/mcts}/__init__.py +0 -0
  60. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/mcts_model.py +0 -0
  61. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/utils/__init__.py +0 -0
  62. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/utils/node.py +0 -0
  63. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/utils/tree.py +0 -0
  64. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/sequential/__init__.py +0 -0
  65. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/sequential/lstm_model.py +0 -0
  66. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/__init__.py +0 -0
  67. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/state.py +0 -0
  68. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/tabular_q_learner.py +0 -0
  69. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/tabular_rl_agent.py +0 -0
  70. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/__init__.py +0 -0
  71. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/env.py +0 -0
  72. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/format.py +0 -0
  73. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/math.py +0 -0
  74. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/other.py +0 -0
  75. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/storage.py +0 -0
  76. {gr_libs-0.1.4/gr_libs/ml/planner/mcts → gr_libs-0.1.6.post1/gr_libs/problems}/__init__.py +0 -0
  77. {gr_libs-0.1.4 → gr_libs-0.1.6.post1/gr_libs/problems}/consts.py +0 -0
  78. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/__init__.py +0 -0
  79. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/gr_as_rl/__init__.py +0 -0
  80. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +0 -0
  81. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/graml/__init__.py +0 -0
  82. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/graml/gr_dataset.py +0 -0
  83. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/recognizer_doc.md +0 -0
  84. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/utils/__init__.py +0 -0
  85. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs/recognizer/utils/format.py +0 -0
  86. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/gr_libs.egg-info/dependency_links.txt +0 -0
  87. {gr_libs-0.1.4 → gr_libs-0.1.6.post1}/setup.cfg +0 -0
@@ -0,0 +1,26 @@
1
+ name: Common Test Steps
2
+
3
+ on:
4
+ workflow_call:
5
+
6
+ jobs:
7
+ test_steps:
8
+ runs-on: ubuntu-latest
9
+ container:
10
+ image: ghcr.io/matanshamir1/gr_test_base_slim:latest
11
+ steps:
12
+ - name: Check out the repository
13
+ uses: actions/checkout@v4
14
+
15
+ - name: Install gr_libs with all extras and test tools
16
+ env:
17
+ SETUPTOOLS_SCM_PRETEND_VERSION_FOR_GR_LIBS: "0.0.0"
18
+ run: |
19
+ python -m pip install --upgrade pip
20
+ pip install setuptools_scm
21
+ pip install gr_envs[minigrid,panda,parking,maze]
22
+ pip install .[minigrid,panda,parking,maze]
23
+ pip install pytest
24
+
25
+ - name: Run tests
26
+ run: pytest tests/
@@ -0,0 +1,10 @@
1
+ name: PR Test Flow
2
+
3
+ on:
4
+ pull_request:
5
+ branches:
6
+ - main # or whichever branch you're targeting for PRs
7
+
8
+ jobs:
9
+ run_tests:
10
+ uses: ./.github/workflows/common_test_steps.yml
@@ -6,27 +6,28 @@ on:
6
6
  - "v*"
7
7
 
8
8
  jobs:
9
- build-and-publish:
9
+ release:
10
10
  runs-on: ubuntu-latest
11
-
12
11
  steps:
13
- - name: Check out the repository
12
+ # from here to remov when returning uses: ./.github/workflows/common_test_steps.yml
13
+ - name: Checkout code
14
14
  uses: actions/checkout@v4
15
15
 
16
16
  - name: Set up Python
17
- uses: actions/setup-python@v4
17
+ uses: actions/setup-python@v5
18
18
  with:
19
19
  python-version: "3.11"
20
20
 
21
- - name: Install build dependencies
21
+ - name: Install build tools
22
22
  run: |
23
23
  python -m pip install --upgrade pip
24
24
  pip install build twine
25
-
25
+ # until here!
26
26
  - name: Build the package
27
- run: python -m build # Uses pyproject.toml instead of setup.py
27
+ run: python -m build
28
28
 
29
29
  - name: Publish to PyPI
30
30
  env:
31
- PYPY_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
32
- run: python -m twine upload dist/* -u __token__ -p $PYPY_API_TOKEN
31
+ TWINE_USERNAME: __token__
32
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
33
+ run: python -m twine upload dist/*
@@ -0,0 +1,12 @@
1
+ ## How to build a new docker image including new trained agents:
2
+ 1. Install docker
3
+ 2. Make sure you have a dataset.zip at your repo root
4
+ 3. Make sure you have a classic token in github: https://github.com/settings/tokens . If you don't, create one with package write, read and delete permissions and copy it somewhere safe.
5
+ 4. Authenticate to ghcr with docker by running:
6
+ ```sh
7
+ echo ghp_REST_OF_TOKEN | docker login ghcr.io -u MatanShamir1 --password-stdin
8
+ ```
9
+ 3. docker build -t ghcr.io/<your-username>/gr_test_base:latest -f CI/Dockerfile .
10
+ (the -f Dockerfile tells docker which Dockerfile to use and the '.' tells docker what's the build context, or where the dataset.zip should live)
11
+ 4. docker push ghcr.io/<your-username>/gr_test_base:latest
12
+ docker push ghcr.io/MatanShamir1/gr_test_base:latest
@@ -0,0 +1,15 @@
1
+ FROM python:3.11-slim
2
+
3
+ # Set workdir
4
+ WORKDIR /app
5
+
6
+ # Install unzip
7
+ RUN apt-get update && apt-get install -y unzip && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Copy and unzip the dataset
10
+ COPY dataset.zip .
11
+ RUN unzip dataset.zip && rm dataset.zip
12
+ RUN mv dataset_new dataset
13
+
14
+ # Just start with bash by default
15
+ CMD [ "bash" ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.4
3
+ Version: 0.1.6.post1
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
17
17
  Requires-Dist: rl_zoo3
18
18
  Requires-Dist: stable_baselines3[extra]
19
19
  Requires-Dist: sb3_contrib
20
+ Requires-Dist: pytest
20
21
  Provides-Extra: minigrid
21
22
  Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
22
23
  Provides-Extra: highway
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
111
112
 
112
113
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
113
114
 
115
+ ## Working with an initial dataset of trained agents
116
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
117
+ To get the dataset of trained agents, you can run:
118
+ ```sh
119
+ python download_dataset.py
120
+ ```
121
+
122
+ An alternative is to use our docker image, which includes the dataset in it.
123
+ You can:
124
+ 1. pull the image:
125
+ ```sh
126
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
127
+ ```
128
+ 2. run a container:
129
+ ```sh
130
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
131
+ ```
132
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
133
+
114
134
  ### Method 1: Writing a Custom Script
115
135
 
116
136
  1. **Create a recognizer**
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
118
138
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
119
139
 
120
140
  ```python
141
+ import gr_libs.environment # Triggers gym env registration - you must run it!
121
142
  recognizer = Graql(
122
143
  domain_name="minigrid",
123
144
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -83,6 +83,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
83
83
 
84
84
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
85
85
 
86
+ ## Working with an initial dataset of trained agents
87
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
88
+ To get the dataset of trained agents, you can run:
89
+ ```sh
90
+ python download_dataset.py
91
+ ```
92
+
93
+ An alternative is to use our docker image, which includes the dataset in it.
94
+ You can:
95
+ 1. pull the image:
96
+ ```sh
97
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
98
+ ```
99
+ 2. run a container:
100
+ ```sh
101
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
102
+ ```
103
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
104
+
86
105
  ### Method 1: Writing a Custom Script
87
106
 
88
107
  1. **Create a recognizer**
@@ -90,6 +109,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
90
109
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
91
110
 
92
111
  ```python
112
+ import gr_libs.environment # Triggers gym env registration - you must run it!
93
113
  recognizer = Graql(
94
114
  domain_name="minigrid",
95
115
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -0,0 +1,19 @@
1
+ import requests
2
+ import zipfile
3
+ import os
4
+
5
+ def download_and_extract_dataset(google_drive_url, extract_to):
6
+ os.makedirs(extract_to, exist_ok=True)
7
+ download_url = google_drive_url + "&export=download"
8
+ response = requests.get(download_url)
9
+ response.raise_for_status()
10
+ with open('dataset.zip', 'wb') as f:
11
+ f.write(response.content)
12
+ with zipfile.ZipFile('dataset.zip', 'r') as zip_ref:
13
+ zip_ref.extractall(extract_to)
14
+ os.remove('dataset.zip')
15
+
16
+ if __name__ == "__main__":
17
+ google_drive_url = "https://drive.google.com/file/d/1PK1iZONTyiQZBgLErUO88p1YWdL4B9Xn/view?usp=sharing"
18
+ extract_to = "dataset"
19
+ download_and_extract_dataset(google_drive_url, extract_to)
@@ -0,0 +1,21 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
10
+
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
14
+
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
19
+
20
+ __version__ = version = '0.1.6.post1'
21
+ __version_tuple__ = version_tuple = (0, 1, 6)
@@ -0,0 +1,22 @@
1
+ import importlib.metadata
2
+ import warnings
3
+
4
+ def is_extra_installed(package: str, extra: str) -> bool:
5
+ """Check if an extra was installed for a given package."""
6
+ try:
7
+ # Get metadata for the installed package
8
+ dist = importlib.metadata.metadata(package)
9
+ requires = dist.get_all("Requires-Dist", []) # Dependencies listed in the package metadata
10
+ return any(extra in req for req in requires)
11
+ except importlib.metadata.PackageNotFoundError:
12
+ return False # The package is not installed
13
+
14
+ # Check if `gr_libs[minigrid]` was installed
15
+ for env in ["minigrid", "panda", "highway", "point_maze"]:
16
+ if is_extra_installed("gr_libs", f"gr_envs[{env}]"):
17
+ try:
18
+ importlib.import_module(f"gr_envs.{env}_scripts.envs")
19
+ except ImportError:
20
+ raise ImportError(f"gr_envs[{env}] was not installed, but gr_libs[{env}] requires it! if you messed with gr_envs installation, you can reinstall gr_libs.")
21
+ else:
22
+ warnings.warn(f"gr_libs[{env}] was not installed, skipping {env} imports.", RuntimeWarning)
@@ -105,7 +105,7 @@ class MinigridProperty(EnvProperty):
105
105
  env_id = problem_name.split("-DynamicGoal-")[0] + "-DynamicGoal-" + problem_name.split("-DynamicGoal-")[1]
106
106
  result = register(
107
107
  id=env_id,
108
- entry_point="gr_libss.minigrid_scripts.envs:CustomColorEnv",
108
+ entry_point="gr_envs.minigrid_scripts.envs:CustomColorEnv",
109
109
  kwargs={"size": 13 if 'Simple' in problem_name else 9,
110
110
  "num_crossings": 4 if 'Simple' in problem_name else 3,
111
111
  "goal_pos": self.str_to_goal(problem_name),
@@ -168,8 +168,6 @@ class PandaProperty(GCEnvProperty):
168
168
 
169
169
 
170
170
  class ParkingProperty(GCEnvProperty):
171
- # def str_to_goal(self): # TODO not use it, goal is not a part of the env property anymore.
172
- # return self.name.split("-")[-2]
173
171
 
174
172
  def __init__(self, name):
175
173
  super().__init__(name)
@@ -5,7 +5,6 @@ import numpy as np
5
5
 
6
6
  from typing import Callable, Generator, List, Dict, Tuple, Any
7
7
  from math import log2
8
- from numpy.core.fromnumeric import mean
9
8
  from scipy.stats import wasserstein_distance
10
9
  from gymnasium.spaces.discrete import Discrete
11
10
  # import torch
@@ -43,7 +42,7 @@ def kl_divergence_norm_softmax(observations: List[Tuple[State, Any]], agent, act
43
42
  qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
44
43
  observation=(observation, agent_pos))
45
44
  distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
46
- return mean(distances)
45
+ return np.mean(distances)
47
46
 
48
47
 
49
48
  def amplify(values, alpha=1.0):
@@ -13,11 +13,6 @@ if __name__ != "__main__":
13
13
  from gr_libs.ml.utils.format import random_subset_with_order
14
14
  from stable_baselines3 import SAC, PPO
15
15
  from stable_baselines3.common.vec_env import DummyVecEnv
16
- from gr_envs.custom_env_wrappers.flat_obs_wrapper import CombineAchievedGoalAndObservationWrapper
17
-
18
- # important for registration of envs! do not remove lad
19
- import gr_envs.maze_scripts.envs.maze
20
- import gr_envs.highway_env_scripts.envs.parking_env
21
16
  from gr_libs.ml.utils import device
22
17
 
23
18
  # built-in python modules
@@ -32,13 +27,15 @@ def create_vec_env(kwargs):
32
27
  return DummyVecEnv([lambda: env])
33
28
 
34
29
  def change_goal_to_specific_desired(obs, desired):
35
- try:
36
- if desired!=None: obs['desired_goal'] = desired
37
- except Exception as e:
38
- try:
39
- if all(desired!=None): obs['desired_goal'] = desired
40
- except Exception as e:
41
- if all([desiredy!=None for desiredish in desired for desiredy in desiredish]): obs['desired_goal'] = desired
30
+ if desired is not None:
31
+ obs['desired_goal'] = desired
32
+ # try:
33
+ # if desired!=None: obs['desired_goal'] = desired
34
+ # except Exception as e:
35
+ # try:
36
+ # if all(desired!=None): obs['desired_goal'] = desired
37
+ # except Exception as e:
38
+ # if all([desiredy!=None for desiredish in desired for desiredy in desiredish]): obs['desired_goal'] = desired
42
39
 
43
40
 
44
41
  NETWORK_SETUP = {
@@ -265,6 +262,7 @@ class DeepRLAgent():
265
262
  assert fig_path == None, "You can't specify a vid path when you don't even save the figure."
266
263
  else:
267
264
  assert fig_path != None, "You need to specify a vid path when you save the figure."
265
+ # The try-except is a bug fix for the env not being reset properly in panda. If someone wants to check why and provide a robust solution they're welcome.
268
266
  try:
269
267
  obs = self.env.reset()
270
268
  change_goal_to_specific_desired(obs, desired)
@@ -103,7 +103,6 @@ class Graml(LearningRecognizer):
103
103
  self.plans_dict[f"{true_goal}_true"] = true_sequence
104
104
 
105
105
  with open(embeddings_path + f'/{true_goal}_{percentage}_plans_dict.pkl', 'wb') as plans_file:
106
- # TODO erase AGENT_BASED macros
107
106
  to_dump = {}
108
107
  for goal, obss in self.plans_dict.items():
109
108
  if goal == f"{true_goal}_true":
@@ -243,7 +242,7 @@ class GCGraml(Graml, GaAdaptingRecognizer):
243
242
  if num_timesteps != None: kwargs["num_timesteps"] = num_timesteps
244
243
  gc_agent = self.rl_agent_type(**kwargs)
245
244
  gc_agent.learn()
246
- self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent)) # TODO change
245
+ self.agents.append(ContextualAgent(problem_name=self.env_prop.name, problem_goal="general", agent=gc_agent))
247
246
 
248
247
  def generate_sequences_library(self, goal: str) -> List[List[Tuple[np.ndarray, np.ndarray]]]:
249
248
  problem_name = self.env_prop.goal_to_problem_str(goal)
@@ -1,6 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import List, Type
3
-
4
3
  from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
5
4
  from gr_libs.environment.utils.utils import domain_to_env_property
6
5
  from gr_libs.ml.base.rl_agent import RLAgent
@@ -18,7 +17,7 @@ class Recognizer(ABC):
18
17
  def inference_phase(self, inf_sequence, true_goal, percentage) -> str:
19
18
  pass
20
19
 
21
- class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
20
+ class LearningRecognizer(Recognizer):
22
21
  def __init__(self, *args, **kwargs):
23
22
  super().__init__(*args, **kwargs)
24
23
 
@@ -26,7 +25,7 @@ class LearningRecognizer(Recognizer): # TODO add a class diagram with the inheri
26
25
  self.original_train_configs = train_configs
27
26
 
28
27
  # a recognizer that needs to train agents for every new goal as part of the goal adaptation phase (that's why it needs dynamic train configs)
29
- class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
28
+ class GaAgentTrainerRecognizer(Recognizer):
30
29
  def __init__(self, *args, **kwargs):
31
30
  super().__init__(*args, **kwargs)
32
31
 
@@ -37,7 +36,7 @@ class GaAgentTrainerRecognizer(Recognizer): # TODO add a class diagram with the
37
36
  def domain_learning_phase(self, base_goals: List[str], train_configs: List):
38
37
  super().domain_learning_phase(base_goals, train_configs)
39
38
 
40
- class GaAdaptingRecognizer(Recognizer): # TODO add a class diagram with the inheritance of all calsses
39
+ class GaAdaptingRecognizer(Recognizer):
41
40
  def __init__(self, *args, **kwargs):
42
41
  super().__init__(*args, **kwargs)
43
42
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.4
3
+ Version: 0.1.6.post1
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
17
17
  Requires-Dist: rl_zoo3
18
18
  Requires-Dist: stable_baselines3[extra]
19
19
  Requires-Dist: sb3_contrib
20
+ Requires-Dist: pytest
20
21
  Provides-Extra: minigrid
21
22
  Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
22
23
  Provides-Extra: highway
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
111
112
 
112
113
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
113
114
 
115
+ ## Working with an initial dataset of trained agents
116
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
117
+ To get the dataset of trained agents, you can run:
118
+ ```sh
119
+ python download_dataset.py
120
+ ```
121
+
122
+ An alternative is to use our docker image, which includes the dataset in it.
123
+ You can:
124
+ 1. pull the image:
125
+ ```sh
126
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
127
+ ```
128
+ 2. run a container:
129
+ ```sh
130
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
131
+ ```
132
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
133
+
114
134
  ### Method 1: Writing a Custom Script
115
135
 
116
136
  1. **Create a recognizer**
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
118
138
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
119
139
 
120
140
  ```python
141
+ import gr_libs.environment # Triggers gym env registration - you must run it!
121
142
  recognizer = Graql(
122
143
  domain_name="minigrid",
123
144
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -1,10 +1,14 @@
1
1
  .gitignore
2
2
  README.md
3
3
  all_experiments.py
4
- consts.py
4
+ download_dataset.py
5
5
  odgr_executor.py
6
6
  pyproject.toml
7
+ .github/workflows/common_test_steps.yml
8
+ .github/workflows/pr_flow.yml
7
9
  .github/workflows/release.yml
10
+ CI/README.md
11
+ CI/docker_build_context/Dockerfile
8
12
  evaluation/analyze_results_cross_alg_cross_domain.py
9
13
  evaluation/create_minigrid_map_image.py
10
14
  evaluation/file_system.py
@@ -15,6 +19,7 @@ evaluation/generate_task_specific_statistics_plots.py
15
19
  evaluation/get_plans_images.py
16
20
  evaluation/increasing_and_decreasing_.py
17
21
  gr_libs/__init__.py
22
+ gr_libs/_version.py
18
23
  gr_libs.egg-info/PKG-INFO
19
24
  gr_libs.egg-info/SOURCES.txt
20
25
  gr_libs.egg-info/dependency_links.txt
@@ -54,6 +59,8 @@ gr_libs/ml/utils/format.py
54
59
  gr_libs/ml/utils/math.py
55
60
  gr_libs/ml/utils/other.py
56
61
  gr_libs/ml/utils/storage.py
62
+ gr_libs/problems/__init__.py
63
+ gr_libs/problems/consts.py
57
64
  gr_libs/recognizer/__init__.py
58
65
  gr_libs/recognizer/recognizer.py
59
66
  gr_libs/recognizer/recognizer_doc.md
@@ -64,6 +71,8 @@ gr_libs/recognizer/graml/gr_dataset.py
64
71
  gr_libs/recognizer/graml/graml_recognizer.py
65
72
  gr_libs/recognizer/utils/__init__.py
66
73
  gr_libs/recognizer/utils/format.py
74
+ tests/test_graml.py
75
+ tests/test_graql.py
67
76
  tutorials/graml_minigrid_tutorial.py
68
77
  tutorials/graml_panda_tutorial.py
69
78
  tutorials/graml_parking_tutorial.py
@@ -6,6 +6,7 @@ torchvision
6
6
  rl_zoo3
7
7
  stable_baselines3[extra]
8
8
  sb3_contrib
9
+ pytest
9
10
 
10
11
  [highway]
11
12
  gr_envs[highway]
@@ -1,4 +1,6 @@
1
+ CI
1
2
  dist
2
3
  evaluation
3
4
  gr_libs
5
+ tests
4
6
  tutorials
@@ -13,7 +13,7 @@ from gr_libs.recognizer.recognizer import GaAgentTrainerRecognizer, LearningReco
13
13
  from gr_libs.recognizer.utils import recognizer_str_to_obj
14
14
  from gr_libs.ml.utils.storage import create_folders_if_necessary, get_and_create, get_experiment_results_path, get_policy_sequences_result_path
15
15
 
16
- from consts import PROBLEMS
16
+ from gr_libs.problems.consts import PROBLEMS
17
17
 
18
18
  def validate(args, recognizer_type, task_inputs):
19
19
  if "base" in task_inputs.keys():
@@ -22,7 +22,8 @@ dependencies = [
22
22
  "torchvision",
23
23
  "rl_zoo3",
24
24
  "stable_baselines3[extra]",
25
- "sb3_contrib"
25
+ "sb3_contrib",
26
+ "pytest"
26
27
  ]
27
28
  classifiers = [
28
29
  "Programming Language :: Python :: 3",
@@ -42,3 +43,4 @@ packages = {find = {}}
42
43
  [tool.setuptools_scm]
43
44
  version_scheme = "post-release"
44
45
  local_scheme = "node-and-date"
46
+ write_to = "gr_libs/_version.py" # This line writes the version to a file within the package
@@ -0,0 +1,16 @@
1
+ from tutorials.graml_minigrid_tutorial import run_graml_minigrid_tutorial
2
+ from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
3
+ from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
4
+ from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
5
+
6
+ def test_graml_minigrid_tutorial():
7
+ run_graml_minigrid_tutorial()
8
+
9
+ def test_graml_panda_tutorial():
10
+ run_graml_panda_tutorial()
11
+
12
+ def test_graml_parking_tutorial():
13
+ run_graml_parking_tutorial()
14
+
15
+ def test_graml_point_maze_tutorial():
16
+ run_graml_point_maze_tutorial()
@@ -0,0 +1,4 @@
1
+ from tutorials.graql_minigrid_tutorial import run_graql_minigrid_tutorial
2
+
3
+ def test_graql_minigrid_tutorial():
4
+ run_graql_minigrid_tutorial()
@@ -0,0 +1,34 @@
1
+ from gr_libs.environment.environment import MINIGRID, QLEARNING
2
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
3
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
+ from gr_libs.ml.utils.format import random_subset_with_order
5
+ from gr_libs import ExpertBasedGraml
6
+
7
+ def run_graml_minigrid_tutorial():
8
+ recognizer = ExpertBasedGraml(
9
+ domain_name=MINIGRID,
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
12
+
13
+ recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
14
+ train_configs=[(QLEARNING, 100000) for _ in range(9)])
15
+
16
+ recognizer.goals_adaptation_phase(
17
+ dynamic_goals = [(11,1), (11,11), (1,11)],
18
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
19
+ )
20
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
21
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
22
+ actor.learn()
23
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
24
+ full_sequence = actor.generate_observation(
25
+ action_selection_method=stochastic_amplified_selection,
26
+ random_optimalism=True, # the noise that's added to the actions
27
+ )
28
+
29
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
30
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
31
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
32
+
33
+ if __name__ == "__main__":
34
+ run_graml_minigrid_tutorial()
@@ -0,0 +1,41 @@
1
+
2
+ import numpy as np
3
+ from stable_baselines3 import PPO, SAC
4
+ import gr_libs.environment.environment
5
+ from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
6
+ from gr_libs.environment.utils.utils import domain_to_env_property
7
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
8
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
9
+ from gr_libs.ml.utils.format import random_subset_with_order
10
+ from gr_libs import GCGraml
11
+
12
+ def run_graml_panda_tutorial():
13
+ recognizer = GCGraml( # TODO make these tutorials into pytests
14
+ domain_name=PANDA,
15
+ env_name="PandaMyReachDense"
16
+ )
17
+ recognizer.domain_learning_phase(
18
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
19
+ train_configs=[(SAC, 800000)]
20
+ )
21
+ recognizer.goals_adaptation_phase(
22
+ dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
23
+ )
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ property_type = domain_to_env_property(PANDA)
26
+ env_property = property_type("PandaMyReachDense")
27
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
28
+ actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
29
+ actor.learn()
30
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
+ full_sequence = actor.generate_observation(
32
+ action_selection_method=stochastic_amplified_selection,
33
+ random_optimalism=True, # the noise that's added to the actions
34
+ )
35
+
36
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
+ closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
38
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
39
+
40
+ if __name__ == "__main__":
41
+ run_graml_panda_tutorial()
@@ -0,0 +1,38 @@
1
+
2
+ from stable_baselines3 import PPO, SAC, TD3
3
+ from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
4
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
5
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
+ from gr_libs.ml.utils.format import random_subset_with_order
7
+ from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
+
9
+ def run_graml_parking_tutorial():
10
+ recognizer = GCGraml(
11
+ domain_name=PARKING,
12
+ env_name="Parking-S-14-PC-"
13
+ )
14
+
15
+ recognizer.domain_learning_phase(
16
+ [i for i in range(1,21)],
17
+ [(PPO, 200000)]
18
+ )
19
+ recognizer.goals_adaptation_phase(
20
+ dynamic_goals = ["1", "11", "21"]
21
+ # no need for expert sequence generation since GCRL is used
22
+ )
23
+
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
26
+ actor.learn()
27
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
28
+ full_sequence = actor.generate_observation(
29
+ action_selection_method=stochastic_amplified_selection,
30
+ random_optimalism=True, # the noise that's added to the actions
31
+ )
32
+
33
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
34
+ closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
35
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
36
+
37
+ if __name__ == "__main__":
38
+ run_graml_parking_tutorial()
@@ -0,0 +1,39 @@
1
+
2
+ from stable_baselines3 import SAC, TD3
3
+ from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
4
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
5
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
+ from gr_libs.ml.utils.format import random_subset_with_order
7
+ from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
+
9
+ def run_graml_point_maze_tutorial():
10
+ recognizer = ExpertBasedGraml(
11
+ domain_name=POINT_MAZE,
12
+ env_name="PointMaze-FourRoomsEnvDense-11x11"
13
+ )
14
+
15
+ recognizer.domain_learning_phase(
16
+ [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
17
+ [(SAC, 200000) for _ in range(8)]
18
+ )
19
+
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals = [(4,4), (7,3), (3,7)],
22
+ dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
23
+ )
24
+
25
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
26
+ actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
27
+ actor.learn()
28
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
+ full_sequence = actor.generate_observation(
30
+ action_selection_method=stochastic_amplified_selection,
31
+ random_optimalism=True, # the noise that's added to the actions
32
+ )
33
+
34
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
35
+ closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
36
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
37
+
38
+ if __name__ == "__main__":
39
+ run_graml_point_maze_tutorial()
@@ -0,0 +1,34 @@
1
+ from gr_libs.environment.environment import QLEARNING
2
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
3
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
+ from gr_libs.ml.utils.format import random_subset_with_order
5
+ from gr_libs import Graql
6
+
7
+ def run_graql_minigrid_tutorial():
8
+ recognizer = Graql(
9
+ domain_name="minigrid",
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
12
+
13
+ #Graql doesn't have a domain learning phase, so we skip it
14
+
15
+ recognizer.goals_adaptation_phase(
16
+ dynamic_goals = [(11,1), (11,11), (1,11)],
17
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
+ )
19
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
20
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
+ actor.learn()
22
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
+ full_sequence = actor.generate_observation(
24
+ action_selection_method=stochastic_amplified_selection,
25
+ random_optimalism=True, # the noise that's added to the actions
26
+ )
27
+
28
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
+ print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
31
+ return closest_goal, (11,1)
32
+
33
+ if __name__ == "__main__":
34
+ run_graql_minigrid_tutorial()
@@ -1,30 +0,0 @@
1
- from gr_libs.environment.environment import QLEARNING
2
- from gr_libs.metrics.metrics import stochastic_amplified_selection
3
- from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
- from gr_libs.ml.utils.format import random_subset_with_order
5
- from gr_libs import ExpertBasedGraml
6
-
7
- recognizer = ExpertBasedGraml(
8
- domain_name="minigrid",
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
11
-
12
- recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
13
- train_configs=[(QLEARNING, 100000) for _ in range(9)])
14
-
15
- recognizer.goals_adaptation_phase(
16
- dynamic_goals = [(11,1), (11,11), (1,11)],
17
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
- )
19
- # TD3 is different from recognizer and expert algorithms, which are SAC #
20
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
- actor.learn()
22
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
- full_sequence = actor.generate_observation(
24
- action_selection_method=stochastic_amplified_selection,
25
- random_optimalism=True, # the noise that's added to the actions
26
- )
27
-
28
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
@@ -1,32 +0,0 @@
1
-
2
- import numpy as np
3
- from stable_baselines3 import PPO, SAC
4
- from gr_libs.environment.environment import PANDA, GCEnvProperty, PandaProperty
5
- from gr_libs.environment.utils.utils import domain_to_env_property
6
- from gr_libs.metrics.metrics import stochastic_amplified_selection
7
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
8
- from gr_libs.ml.utils.format import random_subset_with_order
9
- from gr_libs import GCGraml
10
-
11
- recognizer = GCGraml( # TODO make these tutorials into pytests
12
- domain_name=PANDA,
13
- env_name="PandaMyReachDense"
14
- )
15
- recognizer.domain_learning_phase(base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
16
- train_configs=[(SAC, 800000)])
17
- recognizer.goals_adaptation_phase(dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])])
18
- # TD3 is different from recognizer and expert algorithms, which are SAC #
19
- property_type = domain_to_env_property(PANDA)
20
- env_property = property_type("PandaMyReachDense")
21
- problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
22
- actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
23
- actor.learn()
24
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
25
- full_sequence = actor.generate_observation(
26
- action_selection_method=stochastic_amplified_selection,
27
- random_optimalism=True, # the noise that's added to the actions
28
- )
29
-
30
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
31
- closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
32
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
@@ -1,38 +0,0 @@
1
-
2
- from stable_baselines3 import PPO, SAC, TD3
3
- from gr_libs.environment.environment import EnvProperty, GCEnvProperty, ParkingProperty
4
- from gr_libs.metrics.metrics import stochastic_amplified_selection
5
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
- from gr_libs.ml.utils.format import random_subset_with_order
7
- from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
-
9
- # Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
10
- recognizer = GCGraml(
11
- env_name="parking", # TODO change to macros which are importable from some info or env module of enums.
12
- problems=[ParkingProperty("parking-v0")],
13
- train_configs=[(PPO, 400000)],
14
- gc_goal_set=[f"Parking-S-14-PC--GI-{i}-v0" for i in range(1,21)]
15
- )
16
- recognizer.domain_learning_phase()
17
- recognizer.goals_adaptation_phase(
18
- dynamic_goals_problems = [ParkingProperty(p) for p in ["Parking-S-14-PC--GI-1-v0",
19
- "Parking-S-14-PC--GI-4-v0",
20
- "Parking-S-14-PC--GI-8-v0",
21
- "Parking-S-14-PC--GI-11-v0",
22
- "Parking-S-14-PC--GI-14-v0",
23
- "Parking-S-14-PC--GI-18-v0",
24
- "Parking-S-14-PC--GI-21-v0"]] # TODO detach the goal from the environment instance in every gym env, add the ability to alter it from outside.
25
- #dynamic_train_configs=[(SAC, 400000) for _ in range(7)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
26
- )
27
- # TD3 is different from recognizer and expert algorithms, which are SAC #
28
- actor = DeepRLAgent(env_name="parking", problem_name="Parking-S-14-PC--GI-8-v0", algorithm=TD3, num_timesteps=400000)
29
- actor.learn()
30
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
- full_sequence = actor.generate_observation(
32
- action_selection_method=stochastic_amplified_selection,
33
- random_optimalism=True, # the noise that's added to the actions
34
- )
35
-
36
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
- closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-8-v0").str_to_goal(), 0.5)
38
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 8")
@@ -1,43 +0,0 @@
1
-
2
- from stable_baselines3 import SAC, TD3
3
- from gr_libs.environment.utils.format import maze_str_to_goal
4
- from gr_libs.metrics.metrics import stochastic_amplified_selection
5
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
- from gr_libs.ml.utils.format import random_subset_with_order
7
- from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
-
9
- # Consider extracting all these to "default point_maze (or every other domain) variables" module which would simplify things like the problem_list_to_str_tuple function, sizes of inputs, etc.
10
- recognizer = ExpertBasedGraml(
11
- env_name="point_maze", # TODO change to macros which are importable from some info or env module of enums.
12
- problems=[("PointMaze-FourRoomsEnvDense-11x11-Goal-9x1"),
13
- ("PointMaze-FourRoomsEnv-11x11-Goal-9x9"), # this one doesn't work with dense rewards because of encountering local minima
14
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-1x9"),
15
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x3"),
16
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x4"),
17
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-8x2"),
18
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"),
19
- ("PointMaze-FourRoomsEnvDense-11x11-Goal-2x8")],
20
- task_str_to_goal=maze_str_to_goal,
21
- method=DeepRLAgent,
22
- collect_statistics=False,
23
- train_configs=[(SAC, 200000) for _ in range(8)],
24
- )
25
- recognizer.domain_learning_phase()
26
- recognizer.goals_adaptation_phase(
27
- dynamic_goals_problems = ["PointMaze-FourRoomsEnvDense-11x11-Goal-4x4",
28
- "PointMaze-FourRoomsEnvDense-11x11-Goal-7x3",
29
- "PointMaze-FourRoomsEnvDense-11x11-Goal-3x7"],
30
- dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation. TODO change to require this only if sequence generation method is EXPERT.
31
- )
32
- # TD3 is different from recognizer and expert algorithms, which are SAC #
33
- actor = DeepRLAgent(env_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
34
- actor.learn()
35
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
36
- full_sequence = actor.generate_observation(
37
- action_selection_method=stochastic_amplified_selection,
38
- random_optimalism=True, # the noise that's added to the actions
39
- )
40
-
41
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
42
- closest_goal = recognizer.inference_phase(partial_sequence, maze_str_to_goal("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4"), 0.5)
43
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
@@ -1,29 +0,0 @@
1
- from gr_libs.environment.environment import QLEARNING
2
- from gr_libs.metrics.metrics import stochastic_amplified_selection
3
- from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
- from gr_libs.ml.utils.format import random_subset_with_order
5
- from gr_libs import Graql
6
-
7
- recognizer = Graql(
8
- domain_name="minigrid",
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
11
-
12
- #Graql doesn't have a domain learning phase, so we skip it
13
-
14
- recognizer.goals_adaptation_phase(
15
- dynamic_goals = [(11,1), (11,11), (1,11)],
16
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
17
- )
18
- # TD3 is different from recognizer and expert algorithms, which are SAC #
19
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
20
- actor.learn()
21
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
22
- full_sequence = actor.generate_observation(
23
- action_selection_method=stochastic_amplified_selection,
24
- random_optimalism=True, # the noise that's added to the actions
25
- )
26
-
27
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
28
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
29
- print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
File without changes
File without changes