gr-libs 0.1.5__tar.gz → 0.1.6.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. gr_libs-0.1.6.post1/.github/workflows/common_test_steps.yml +26 -0
  2. gr_libs-0.1.6.post1/.github/workflows/pr_flow.yml +10 -0
  3. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/.github/workflows/release.yml +10 -9
  4. gr_libs-0.1.6.post1/CI/README.md +12 -0
  5. gr_libs-0.1.6.post1/CI/docker_build_context/Dockerfile +15 -0
  6. {gr_libs-0.1.5/gr_libs.egg-info → gr_libs-0.1.6.post1}/PKG-INFO +22 -1
  7. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/README.md +20 -0
  8. gr_libs-0.1.6.post1/download_dataset.py +19 -0
  9. gr_libs-0.1.6.post1/gr_libs/_version.py +21 -0
  10. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/environment/__init__.py +2 -2
  11. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/environment/environment.py +1 -1
  12. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/metrics/metrics.py +1 -2
  13. gr_libs-0.1.6.post1/gr_libs/recognizer/graml/__init__.py +0 -0
  14. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/recognizer/recognizer.py +0 -1
  15. {gr_libs-0.1.5 → gr_libs-0.1.6.post1/gr_libs.egg-info}/PKG-INFO +22 -1
  16. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs.egg-info/SOURCES.txt +10 -1
  17. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs.egg-info/requires.txt +1 -0
  18. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs.egg-info/top_level.txt +2 -0
  19. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/odgr_executor.py +1 -1
  20. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/pyproject.toml +3 -1
  21. gr_libs-0.1.6.post1/tests/test_graml.py +16 -0
  22. gr_libs-0.1.6.post1/tests/test_graql.py +4 -0
  23. gr_libs-0.1.6.post1/tutorials/graml_minigrid_tutorial.py +34 -0
  24. gr_libs-0.1.6.post1/tutorials/graml_panda_tutorial.py +41 -0
  25. gr_libs-0.1.6.post1/tutorials/graml_parking_tutorial.py +38 -0
  26. gr_libs-0.1.6.post1/tutorials/graml_point_maze_tutorial.py +39 -0
  27. gr_libs-0.1.6.post1/tutorials/graql_minigrid_tutorial.py +34 -0
  28. gr_libs-0.1.5/tutorials/graml_minigrid_tutorial.py +0 -30
  29. gr_libs-0.1.5/tutorials/graml_panda_tutorial.py +0 -37
  30. gr_libs-0.1.5/tutorials/graml_parking_tutorial.py +0 -34
  31. gr_libs-0.1.5/tutorials/graml_point_maze_tutorial.py +0 -35
  32. gr_libs-0.1.5/tutorials/graql_minigrid_tutorial.py +0 -29
  33. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/.gitignore +0 -0
  34. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/all_experiments.py +0 -0
  35. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/analyze_results_cross_alg_cross_domain.py +0 -0
  36. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/create_minigrid_map_image.py +0 -0
  37. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/file_system.py +0 -0
  38. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/generate_experiments_results.py +0 -0
  39. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/generate_experiments_results_new_ver1.py +0 -0
  40. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/generate_experiments_results_new_ver2.py +0 -0
  41. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/generate_task_specific_statistics_plots.py +0 -0
  42. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/get_plans_images.py +0 -0
  43. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/evaluation/increasing_and_decreasing_.py +0 -0
  44. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/__init__.py +0 -0
  45. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/environment/utils/__init__.py +0 -0
  46. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/environment/utils/utils.py +0 -0
  47. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/metrics/__init__.py +0 -0
  48. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/__init__.py +0 -0
  49. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/agent.py +0 -0
  50. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/base/__init__.py +0 -0
  51. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/base/rl_agent.py +0 -0
  52. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/consts.py +0 -0
  53. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/__init__.py +0 -0
  54. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/deep_rl_learner.py +0 -0
  55. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/utils/__init__.py +0 -0
  56. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/utils/dictlist.py +0 -0
  57. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/neural/utils/penv.py +0 -0
  58. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/__init__.py +0 -0
  59. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/__init__.py +0 -0
  60. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/mcts_model.py +0 -0
  61. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/utils/__init__.py +0 -0
  62. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/utils/node.py +0 -0
  63. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/planner/mcts/utils/tree.py +0 -0
  64. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/sequential/__init__.py +0 -0
  65. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/sequential/lstm_model.py +0 -0
  66. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/__init__.py +0 -0
  67. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/state.py +0 -0
  68. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/tabular_q_learner.py +0 -0
  69. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/tabular/tabular_rl_agent.py +0 -0
  70. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/__init__.py +0 -0
  71. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/env.py +0 -0
  72. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/format.py +0 -0
  73. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/math.py +0 -0
  74. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/other.py +0 -0
  75. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/ml/utils/storage.py +0 -0
  76. {gr_libs-0.1.5/gr_libs/recognizer → gr_libs-0.1.6.post1/gr_libs/problems}/__init__.py +0 -0
  77. {gr_libs-0.1.5 → gr_libs-0.1.6.post1/gr_libs/problems}/consts.py +0 -0
  78. {gr_libs-0.1.5/gr_libs/recognizer/gr_as_rl → gr_libs-0.1.6.post1/gr_libs/recognizer}/__init__.py +0 -0
  79. {gr_libs-0.1.5/gr_libs/recognizer/graml → gr_libs-0.1.6.post1/gr_libs/recognizer/gr_as_rl}/__init__.py +0 -0
  80. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/recognizer/gr_as_rl/gr_as_rl_recognizer.py +0 -0
  81. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/recognizer/graml/gr_dataset.py +0 -0
  82. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/recognizer/graml/graml_recognizer.py +0 -0
  83. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/recognizer/recognizer_doc.md +0 -0
  84. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/recognizer/utils/__init__.py +0 -0
  85. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs/recognizer/utils/format.py +0 -0
  86. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/gr_libs.egg-info/dependency_links.txt +0 -0
  87. {gr_libs-0.1.5 → gr_libs-0.1.6.post1}/setup.cfg +0 -0
@@ -0,0 +1,26 @@
1
+ name: Common Test Steps
2
+
3
+ on:
4
+ workflow_call:
5
+
6
+ jobs:
7
+ test_steps:
8
+ runs-on: ubuntu-latest
9
+ container:
10
+ image: ghcr.io/matanshamir1/gr_test_base_slim:latest
11
+ steps:
12
+ - name: Check out the repository
13
+ uses: actions/checkout@v4
14
+
15
+ - name: Install gr_libs with all extras and test tools
16
+ env:
17
+ SETUPTOOLS_SCM_PRETEND_VERSION_FOR_GR_LIBS: "0.0.0"
18
+ run: |
19
+ python -m pip install --upgrade pip
20
+ pip install setuptools_scm
21
+ pip install gr_envs[minigrid,panda,parking,maze]
22
+ pip install .[minigrid,panda,parking,maze]
23
+ pip install pytest
24
+
25
+ - name: Run tests
26
+ run: pytest tests/
@@ -0,0 +1,10 @@
1
+ name: PR Test Flow
2
+
3
+ on:
4
+ pull_request:
5
+ branches:
6
+ - main # or whichever branch you're targeting for PRs
7
+
8
+ jobs:
9
+ run_tests:
10
+ uses: ./.github/workflows/common_test_steps.yml
@@ -6,27 +6,28 @@ on:
6
6
  - "v*"
7
7
 
8
8
  jobs:
9
- build-and-publish:
9
+ release:
10
10
  runs-on: ubuntu-latest
11
-
12
11
  steps:
13
- - name: Check out the repository
12
+ # from here to remov when returning uses: ./.github/workflows/common_test_steps.yml
13
+ - name: Checkout code
14
14
  uses: actions/checkout@v4
15
15
 
16
16
  - name: Set up Python
17
- uses: actions/setup-python@v4
17
+ uses: actions/setup-python@v5
18
18
  with:
19
19
  python-version: "3.11"
20
20
 
21
- - name: Install build dependencies
21
+ - name: Install build tools
22
22
  run: |
23
23
  python -m pip install --upgrade pip
24
24
  pip install build twine
25
-
25
+ # until here!
26
26
  - name: Build the package
27
- run: python -m build # Uses pyproject.toml instead of setup.py
27
+ run: python -m build
28
28
 
29
29
  - name: Publish to PyPI
30
30
  env:
31
- PYPY_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
32
- run: python -m twine upload dist/* -u __token__ -p $PYPY_API_TOKEN
31
+ TWINE_USERNAME: __token__
32
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
33
+ run: python -m twine upload dist/*
@@ -0,0 +1,12 @@
1
+ ## How to build a new docker image including new trained agents:
2
+ 1. Install docker
3
+ 2. Make sure you have a dataset.zip at your repo root
4
+ 3. Make sure you have a classic token in github: https://github.com/settings/tokens . If you don't, create one with package write, read and delete permissions and copy it somewhere safe.
5
+ 4. Authenticate to ghcr with docker by running:
6
+ ```sh
7
+ echo ghp_REST_OF_TOKEN | docker login ghcr.io -u MatanShamir1 --password-stdin
8
+ ```
9
+ 3. docker build -t ghcr.io/<your-username>/gr_test_base:latest -f CI/Dockerfile .
10
+ (the -f Dockerfile tells docker which Dockerfile to use and the '.' tells docker what's the build context, or where the dataset.zip should live)
11
+ 4. docker push ghcr.io/<your-username>/gr_test_base:latest
12
+ docker push ghcr.io/MatanShamir1/gr_test_base:latest
@@ -0,0 +1,15 @@
1
+ FROM python:3.11-slim
2
+
3
+ # Set workdir
4
+ WORKDIR /app
5
+
6
+ # Install unzip
7
+ RUN apt-get update && apt-get install -y unzip && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Copy and unzip the dataset
10
+ COPY dataset.zip .
11
+ RUN unzip dataset.zip && rm dataset.zip
12
+ RUN mv dataset_new dataset
13
+
14
+ # Just start with bash by default
15
+ CMD [ "bash" ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.5
3
+ Version: 0.1.6.post1
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
17
17
  Requires-Dist: rl_zoo3
18
18
  Requires-Dist: stable_baselines3[extra]
19
19
  Requires-Dist: sb3_contrib
20
+ Requires-Dist: pytest
20
21
  Provides-Extra: minigrid
21
22
  Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
22
23
  Provides-Extra: highway
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
111
112
 
112
113
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
113
114
 
115
+ ## Working with an initial dataset of trained agents
116
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
117
+ To get the dataset of trained agents, you can run:
118
+ ```sh
119
+ python download_dataset.py
120
+ ```
121
+
122
+ An alternative is to use our docker image, which includes the dataset in it.
123
+ You can:
124
+ 1. pull the image:
125
+ ```sh
126
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
127
+ ```
128
+ 2. run a container:
129
+ ```sh
130
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
131
+ ```
132
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
133
+
114
134
  ### Method 1: Writing a Custom Script
115
135
 
116
136
  1. **Create a recognizer**
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
118
138
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
119
139
 
120
140
  ```python
141
+ import gr_libs.environment # Triggers gym env registration - you must run it!
121
142
  recognizer = Graql(
122
143
  domain_name="minigrid",
123
144
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -83,6 +83,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
83
83
 
84
84
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
85
85
 
86
+ ## Working with an initial dataset of trained agents
87
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
88
+ To get the dataset of trained agents, you can run:
89
+ ```sh
90
+ python download_dataset.py
91
+ ```
92
+
93
+ An alternative is to use our docker image, which includes the dataset in it.
94
+ You can:
95
+ 1. pull the image:
96
+ ```sh
97
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
98
+ ```
99
+ 2. run a container:
100
+ ```sh
101
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
102
+ ```
103
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
104
+
86
105
  ### Method 1: Writing a Custom Script
87
106
 
88
107
  1. **Create a recognizer**
@@ -90,6 +109,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
90
109
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
91
110
 
92
111
  ```python
112
+ import gr_libs.environment # Triggers gym env registration - you must run it!
93
113
  recognizer = Graql(
94
114
  domain_name="minigrid",
95
115
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -0,0 +1,19 @@
1
+ import requests
2
+ import zipfile
3
+ import os
4
+
5
+ def download_and_extract_dataset(google_drive_url, extract_to):
6
+ os.makedirs(extract_to, exist_ok=True)
7
+ download_url = google_drive_url + "&export=download"
8
+ response = requests.get(download_url)
9
+ response.raise_for_status()
10
+ with open('dataset.zip', 'wb') as f:
11
+ f.write(response.content)
12
+ with zipfile.ZipFile('dataset.zip', 'r') as zip_ref:
13
+ zip_ref.extractall(extract_to)
14
+ os.remove('dataset.zip')
15
+
16
+ if __name__ == "__main__":
17
+ google_drive_url = "https://drive.google.com/file/d/1PK1iZONTyiQZBgLErUO88p1YWdL4B9Xn/view?usp=sharing"
18
+ extract_to = "dataset"
19
+ download_and_extract_dataset(google_drive_url, extract_to)
@@ -0,0 +1,21 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
10
+
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
14
+
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
19
+
20
+ __version__ = version = '0.1.6.post1'
21
+ __version_tuple__ = version_tuple = (0, 1, 6)
@@ -12,11 +12,11 @@ def is_extra_installed(package: str, extra: str) -> bool:
12
12
  return False # The package is not installed
13
13
 
14
14
  # Check if `gr_libs[minigrid]` was installed
15
- for env in ["minigrid", "panda", "parking", "point_maze"]:
15
+ for env in ["minigrid", "panda", "highway", "point_maze"]:
16
16
  if is_extra_installed("gr_libs", f"gr_envs[{env}]"):
17
17
  try:
18
18
  importlib.import_module(f"gr_envs.{env}_scripts.envs")
19
19
  except ImportError:
20
- raise ImportError(f"gr_libs[{env}] was not installed, but gr_libs[{env}] requires it! if you messed with gr_libs installation, you can reinstall gr_libs.")
20
+ raise ImportError(f"gr_envs[{env}] was not installed, but gr_libs[{env}] requires it! if you messed with gr_envs installation, you can reinstall gr_libs.")
21
21
  else:
22
22
  warnings.warn(f"gr_libs[{env}] was not installed, skipping {env} imports.", RuntimeWarning)
@@ -105,7 +105,7 @@ class MinigridProperty(EnvProperty):
105
105
  env_id = problem_name.split("-DynamicGoal-")[0] + "-DynamicGoal-" + problem_name.split("-DynamicGoal-")[1]
106
106
  result = register(
107
107
  id=env_id,
108
- entry_point="gr_libss.minigrid_scripts.envs:CustomColorEnv",
108
+ entry_point="gr_envs.minigrid_scripts.envs:CustomColorEnv",
109
109
  kwargs={"size": 13 if 'Simple' in problem_name else 9,
110
110
  "num_crossings": 4 if 'Simple' in problem_name else 3,
111
111
  "goal_pos": self.str_to_goal(problem_name),
@@ -5,7 +5,6 @@ import numpy as np
5
5
 
6
6
  from typing import Callable, Generator, List, Dict, Tuple, Any
7
7
  from math import log2
8
- from numpy.core.fromnumeric import mean
9
8
  from scipy.stats import wasserstein_distance
10
9
  from gymnasium.spaces.discrete import Discrete
11
10
  # import torch
@@ -43,7 +42,7 @@ def kl_divergence_norm_softmax(observations: List[Tuple[State, Any]], agent, act
43
42
  qp2_flatten_distribution_list: List[float] = agent.get_actions_probabilities(
44
43
  observation=(observation, agent_pos))
45
44
  distances.append(kl_divergence(qp1, qp2_flatten_distribution_list))
46
- return mean(distances)
45
+ return np.mean(distances)
47
46
 
48
47
 
49
48
  def amplify(values, alpha=1.0):
@@ -1,6 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import List, Type
3
-
4
3
  from gr_libs.environment.environment import EnvProperty, SUPPORTED_DOMAINS
5
4
  from gr_libs.environment.utils.utils import domain_to_env_property
6
5
  from gr_libs.ml.base.rl_agent import RLAgent
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gr_libs
3
- Version: 0.1.5
3
+ Version: 0.1.6.post1
4
4
  Summary: Package with goal recognition frameworks baselines
5
5
  Author: Ben Nageris
6
6
  Author-email: Matan Shamir <matan.shamir@live.biu.ac.il>, Osher Elhadad <osher.elhadad@live.biu.ac.il>
@@ -17,6 +17,7 @@ Requires-Dist: torchvision
17
17
  Requires-Dist: rl_zoo3
18
18
  Requires-Dist: stable_baselines3[extra]
19
19
  Requires-Dist: sb3_contrib
20
+ Requires-Dist: pytest
20
21
  Provides-Extra: minigrid
21
22
  Requires-Dist: gr_envs[minigrid]; extra == "minigrid"
22
23
  Provides-Extra: highway
@@ -111,6 +112,25 @@ After installing GRLib, you will have access to custom Gym environments, allowin
111
112
 
112
113
  Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tutorials`. These tutorials walk through the initialization and deployment process, showcasing how different GR algorithms adapt to emerging goals in various Gym environments.
113
114
 
115
+ ## Working with an initial dataset of trained agents
116
+ gr_libs also includes a library of trained agents for the various supported environments within the package.
117
+ To get the dataset of trained agents, you can run:
118
+ ```sh
119
+ python download_dataset.py
120
+ ```
121
+
122
+ An alternative is to use our docker image, which includes the dataset in it.
123
+ You can:
124
+ 1. pull the image:
125
+ ```sh
126
+ docker pull ghcr.io/MatanShamir1/gr_test_base:latest
127
+ ```
128
+ 2. run a container:
129
+ ```sh
130
+ docker run -it ghcr.io/MatanShamir1/gr_test_base:latest bash
131
+ ```
132
+ 3. don't forget to install the package from within the container, go back to 'Setup' for that.
133
+
114
134
  ### Method 1: Writing a Custom Script
115
135
 
116
136
  1. **Create a recognizer**
@@ -118,6 +138,7 @@ Tutorials demonstrating basic ODGR scenarios is available in the sub-package `tu
118
138
  Specify the domain name and specific environment for the recognizer, effectively telling it the domain theory - the collection of states and actions in the environment.
119
139
 
120
140
  ```python
141
+ import gr_libs.environment # Triggers gym env registration - you must run it!
121
142
  recognizer = Graql(
122
143
  domain_name="minigrid",
123
144
  env_name="MiniGrid-SimpleCrossingS13N4"
@@ -1,10 +1,14 @@
1
1
  .gitignore
2
2
  README.md
3
3
  all_experiments.py
4
- consts.py
4
+ download_dataset.py
5
5
  odgr_executor.py
6
6
  pyproject.toml
7
+ .github/workflows/common_test_steps.yml
8
+ .github/workflows/pr_flow.yml
7
9
  .github/workflows/release.yml
10
+ CI/README.md
11
+ CI/docker_build_context/Dockerfile
8
12
  evaluation/analyze_results_cross_alg_cross_domain.py
9
13
  evaluation/create_minigrid_map_image.py
10
14
  evaluation/file_system.py
@@ -15,6 +19,7 @@ evaluation/generate_task_specific_statistics_plots.py
15
19
  evaluation/get_plans_images.py
16
20
  evaluation/increasing_and_decreasing_.py
17
21
  gr_libs/__init__.py
22
+ gr_libs/_version.py
18
23
  gr_libs.egg-info/PKG-INFO
19
24
  gr_libs.egg-info/SOURCES.txt
20
25
  gr_libs.egg-info/dependency_links.txt
@@ -54,6 +59,8 @@ gr_libs/ml/utils/format.py
54
59
  gr_libs/ml/utils/math.py
55
60
  gr_libs/ml/utils/other.py
56
61
  gr_libs/ml/utils/storage.py
62
+ gr_libs/problems/__init__.py
63
+ gr_libs/problems/consts.py
57
64
  gr_libs/recognizer/__init__.py
58
65
  gr_libs/recognizer/recognizer.py
59
66
  gr_libs/recognizer/recognizer_doc.md
@@ -64,6 +71,8 @@ gr_libs/recognizer/graml/gr_dataset.py
64
71
  gr_libs/recognizer/graml/graml_recognizer.py
65
72
  gr_libs/recognizer/utils/__init__.py
66
73
  gr_libs/recognizer/utils/format.py
74
+ tests/test_graml.py
75
+ tests/test_graql.py
67
76
  tutorials/graml_minigrid_tutorial.py
68
77
  tutorials/graml_panda_tutorial.py
69
78
  tutorials/graml_parking_tutorial.py
@@ -6,6 +6,7 @@ torchvision
6
6
  rl_zoo3
7
7
  stable_baselines3[extra]
8
8
  sb3_contrib
9
+ pytest
9
10
 
10
11
  [highway]
11
12
  gr_envs[highway]
@@ -1,4 +1,6 @@
1
+ CI
1
2
  dist
2
3
  evaluation
3
4
  gr_libs
5
+ tests
4
6
  tutorials
@@ -13,7 +13,7 @@ from gr_libs.recognizer.recognizer import GaAgentTrainerRecognizer, LearningReco
13
13
  from gr_libs.recognizer.utils import recognizer_str_to_obj
14
14
  from gr_libs.ml.utils.storage import create_folders_if_necessary, get_and_create, get_experiment_results_path, get_policy_sequences_result_path
15
15
 
16
- from consts import PROBLEMS
16
+ from gr_libs.problems.consts import PROBLEMS
17
17
 
18
18
  def validate(args, recognizer_type, task_inputs):
19
19
  if "base" in task_inputs.keys():
@@ -22,7 +22,8 @@ dependencies = [
22
22
  "torchvision",
23
23
  "rl_zoo3",
24
24
  "stable_baselines3[extra]",
25
- "sb3_contrib"
25
+ "sb3_contrib",
26
+ "pytest"
26
27
  ]
27
28
  classifiers = [
28
29
  "Programming Language :: Python :: 3",
@@ -42,3 +43,4 @@ packages = {find = {}}
42
43
  [tool.setuptools_scm]
43
44
  version_scheme = "post-release"
44
45
  local_scheme = "node-and-date"
46
+ write_to = "gr_libs/_version.py" # This line writes the version to a file within the package
@@ -0,0 +1,16 @@
1
+ from tutorials.graml_minigrid_tutorial import run_graml_minigrid_tutorial
2
+ from tutorials.graml_panda_tutorial import run_graml_panda_tutorial
3
+ from tutorials.graml_parking_tutorial import run_graml_parking_tutorial
4
+ from tutorials.graml_point_maze_tutorial import run_graml_point_maze_tutorial
5
+
6
+ def test_graml_minigrid_tutorial():
7
+ run_graml_minigrid_tutorial()
8
+
9
+ def test_graml_panda_tutorial():
10
+ run_graml_panda_tutorial()
11
+
12
+ def test_graml_parking_tutorial():
13
+ run_graml_parking_tutorial()
14
+
15
+ def test_graml_point_maze_tutorial():
16
+ run_graml_point_maze_tutorial()
@@ -0,0 +1,4 @@
1
+ from tutorials.graql_minigrid_tutorial import run_graql_minigrid_tutorial
2
+
3
+ def test_graql_minigrid_tutorial():
4
+ run_graql_minigrid_tutorial()
@@ -0,0 +1,34 @@
1
+ from gr_libs.environment.environment import MINIGRID, QLEARNING
2
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
3
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
+ from gr_libs.ml.utils.format import random_subset_with_order
5
+ from gr_libs import ExpertBasedGraml
6
+
7
+ def run_graml_minigrid_tutorial():
8
+ recognizer = ExpertBasedGraml(
9
+ domain_name=MINIGRID,
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
12
+
13
+ recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
14
+ train_configs=[(QLEARNING, 100000) for _ in range(9)])
15
+
16
+ recognizer.goals_adaptation_phase(
17
+ dynamic_goals = [(11,1), (11,11), (1,11)],
18
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
19
+ )
20
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
21
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
22
+ actor.learn()
23
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
24
+ full_sequence = actor.generate_observation(
25
+ action_selection_method=stochastic_amplified_selection,
26
+ random_optimalism=True, # the noise that's added to the actions
27
+ )
28
+
29
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
30
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
31
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
32
+
33
+ if __name__ == "__main__":
34
+ run_graml_minigrid_tutorial()
@@ -0,0 +1,41 @@
1
+
2
+ import numpy as np
3
+ from stable_baselines3 import PPO, SAC
4
+ import gr_libs.environment.environment
5
+ from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
6
+ from gr_libs.environment.utils.utils import domain_to_env_property
7
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
8
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
9
+ from gr_libs.ml.utils.format import random_subset_with_order
10
+ from gr_libs import GCGraml
11
+
12
+ def run_graml_panda_tutorial():
13
+ recognizer = GCGraml( # TODO make these tutorials into pytests
14
+ domain_name=PANDA,
15
+ env_name="PandaMyReachDense"
16
+ )
17
+ recognizer.domain_learning_phase(
18
+ base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
19
+ train_configs=[(SAC, 800000)]
20
+ )
21
+ recognizer.goals_adaptation_phase(
22
+ dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
23
+ )
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ property_type = domain_to_env_property(PANDA)
26
+ env_property = property_type("PandaMyReachDense")
27
+ problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
28
+ actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
29
+ actor.learn()
30
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
31
+ full_sequence = actor.generate_observation(
32
+ action_selection_method=stochastic_amplified_selection,
33
+ random_optimalism=True, # the noise that's added to the actions
34
+ )
35
+
36
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
37
+ closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
38
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
39
+
40
+ if __name__ == "__main__":
41
+ run_graml_panda_tutorial()
@@ -0,0 +1,38 @@
1
+
2
+ from stable_baselines3 import PPO, SAC, TD3
3
+ from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
4
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
5
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
+ from gr_libs.ml.utils.format import random_subset_with_order
7
+ from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
+
9
+ def run_graml_parking_tutorial():
10
+ recognizer = GCGraml(
11
+ domain_name=PARKING,
12
+ env_name="Parking-S-14-PC-"
13
+ )
14
+
15
+ recognizer.domain_learning_phase(
16
+ [i for i in range(1,21)],
17
+ [(PPO, 200000)]
18
+ )
19
+ recognizer.goals_adaptation_phase(
20
+ dynamic_goals = ["1", "11", "21"]
21
+ # no need for expert sequence generation since GCRL is used
22
+ )
23
+
24
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
25
+ actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
26
+ actor.learn()
27
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
28
+ full_sequence = actor.generate_observation(
29
+ action_selection_method=stochastic_amplified_selection,
30
+ random_optimalism=True, # the noise that's added to the actions
31
+ )
32
+
33
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
34
+ closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
35
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
36
+
37
+ if __name__ == "__main__":
38
+ run_graml_parking_tutorial()
@@ -0,0 +1,39 @@
1
+
2
+ from stable_baselines3 import SAC, TD3
3
+ from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
4
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
5
+ from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
+ from gr_libs.ml.utils.format import random_subset_with_order
7
+ from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
+
9
+ def run_graml_point_maze_tutorial():
10
+ recognizer = ExpertBasedGraml(
11
+ domain_name=POINT_MAZE,
12
+ env_name="PointMaze-FourRoomsEnvDense-11x11"
13
+ )
14
+
15
+ recognizer.domain_learning_phase(
16
+ [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
17
+ [(SAC, 200000) for _ in range(8)]
18
+ )
19
+
20
+ recognizer.goals_adaptation_phase(
21
+ dynamic_goals = [(4,4), (7,3), (3,7)],
22
+ dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
23
+ )
24
+
25
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
26
+ actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
27
+ actor.learn()
28
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
29
+ full_sequence = actor.generate_observation(
30
+ action_selection_method=stochastic_amplified_selection,
31
+ random_optimalism=True, # the noise that's added to the actions
32
+ )
33
+
34
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
35
+ closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
36
+ print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
37
+
38
+ if __name__ == "__main__":
39
+ run_graml_point_maze_tutorial()
@@ -0,0 +1,34 @@
1
+ from gr_libs.environment.environment import QLEARNING
2
+ from gr_libs.metrics.metrics import stochastic_amplified_selection
3
+ from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
+ from gr_libs.ml.utils.format import random_subset_with_order
5
+ from gr_libs import Graql
6
+
7
+ def run_graql_minigrid_tutorial():
8
+ recognizer = Graql(
9
+ domain_name="minigrid",
10
+ env_name="MiniGrid-SimpleCrossingS13N4"
11
+ )
12
+
13
+ #Graql doesn't have a domain learning phase, so we skip it
14
+
15
+ recognizer.goals_adaptation_phase(
16
+ dynamic_goals = [(11,1), (11,11), (1,11)],
17
+ dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
+ )
19
+ # TD3 is different from recognizer and expert algorithms, which are SAC #
20
+ actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
+ actor.learn()
22
+ # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
+ full_sequence = actor.generate_observation(
24
+ action_selection_method=stochastic_amplified_selection,
25
+ random_optimalism=True, # the noise that's added to the actions
26
+ )
27
+
28
+ partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
+ closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
+ print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
31
+ return closest_goal, (11,1)
32
+
33
+ if __name__ == "__main__":
34
+ run_graql_minigrid_tutorial()
@@ -1,30 +0,0 @@
1
- from gr_libs.environment.environment import MINIGRID, QLEARNING
2
- from gr_libs.metrics.metrics import stochastic_amplified_selection
3
- from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
- from gr_libs.ml.utils.format import random_subset_with_order
5
- from gr_libs import ExpertBasedGraml
6
-
7
- recognizer = ExpertBasedGraml(
8
- domain_name=MINIGRID,
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
11
-
12
- recognizer.domain_learning_phase(base_goals=[(11,1), (11,11), (1,11), (7,11), (8,1), (10,6), (6,9), (11,3), (11,5)],
13
- train_configs=[(QLEARNING, 100000) for _ in range(9)])
14
-
15
- recognizer.goals_adaptation_phase(
16
- dynamic_goals = [(11,1), (11,11), (1,11)],
17
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
18
- )
19
- # TD3 is different from recognizer and expert algorithms, which are SAC #
20
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
21
- actor.learn()
22
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
23
- full_sequence = actor.generate_observation(
24
- action_selection_method=stochastic_amplified_selection,
25
- random_optimalism=True, # the noise that's added to the actions
26
- )
27
-
28
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
29
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
30
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
@@ -1,37 +0,0 @@
1
-
2
- import numpy as np
3
- from stable_baselines3 import PPO, SAC
4
- import gr_libs.environment.environment
5
- from gr_libs.environment.environment import PANDA, EnvProperty, GCEnvProperty, PandaProperty
6
- from gr_libs.environment.utils.utils import domain_to_env_property
7
- from gr_libs.metrics.metrics import stochastic_amplified_selection
8
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
9
- from gr_libs.ml.utils.format import random_subset_with_order
10
- from gr_libs import GCGraml
11
-
12
- recognizer = GCGraml( # TODO make these tutorials into pytests
13
- domain_name=PANDA,
14
- env_name="PandaMyReachDense"
15
- )
16
- recognizer.domain_learning_phase(
17
- base_goals=[np.array([PandaProperty.sample_goal()]) for _ in range(1,30)],
18
- train_configs=[(SAC, 800000)]
19
- )
20
- recognizer.goals_adaptation_phase(
21
- dynamic_goals=[np.array([[-0.1, -0.1, 0.1]]), np.array([[-0.1, 0.1, 0.1]]), np.array([[0.2, 0.2, 0.1]])]
22
- )
23
- # TD3 is different from recognizer and expert algorithms, which are SAC #
24
- property_type = domain_to_env_property(PANDA)
25
- env_property = property_type("PandaMyReachDense")
26
- problem_name = env_property.goal_to_problem_str(np.array([[-0.1, -0.1, 0.1]]))
27
- actor = DeepRLAgent(domain_name=PANDA, problem_name=problem_name, algorithm=PPO, num_timesteps=400000)
28
- actor.learn()
29
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
30
- full_sequence = actor.generate_observation(
31
- action_selection_method=stochastic_amplified_selection,
32
- random_optimalism=True, # the noise that's added to the actions
33
- )
34
-
35
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
36
- closest_goal = recognizer.inference_phase(partial_sequence, np.array([[-0.1, -0.1, 0.1]]), 0.5)
37
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: [-0.1, -0.1, 0.1]")
@@ -1,34 +0,0 @@
1
-
2
- from stable_baselines3 import PPO, SAC, TD3
3
- from gr_libs.environment.environment import PARKING, EnvProperty, GCEnvProperty, ParkingProperty
4
- from gr_libs.metrics.metrics import stochastic_amplified_selection
5
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent, GCDeepRLAgent
6
- from gr_libs.ml.utils.format import random_subset_with_order
7
- from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml, GCGraml
8
-
9
- recognizer = GCGraml(
10
- domain_name=PARKING,
11
- env_name="Parking-S-14-PC-"
12
- )
13
-
14
- recognizer.domain_learning_phase(
15
- [i for i in range(1,21)],
16
- [(PPO, 200000)]
17
- )
18
- recognizer.goals_adaptation_phase(
19
- dynamic_goals = ["1", "11", "21"]
20
- # no need for expert sequence generation since GCRL is used
21
- )
22
-
23
- # TD3 is different from recognizer and expert algorithms, which are SAC #
24
- actor = DeepRLAgent(domain_name="parking", problem_name="Parking-S-14-PC--GI-11-v0", algorithm=TD3, num_timesteps=400000)
25
- actor.learn()
26
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
27
- full_sequence = actor.generate_observation(
28
- action_selection_method=stochastic_amplified_selection,
29
- random_optimalism=True, # the noise that's added to the actions
30
- )
31
-
32
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
33
- closest_goal = recognizer.inference_phase(partial_sequence, ParkingProperty("Parking-S-14-PC--GI-11-v0").str_to_goal(), 0.5)
34
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: 11")
@@ -1,35 +0,0 @@
1
-
2
- from stable_baselines3 import SAC, TD3
3
- from gr_libs.environment.environment import POINT_MAZE, PointMazeProperty
4
- from gr_libs.metrics.metrics import stochastic_amplified_selection
5
- from gr_libs.ml.neural.deep_rl_learner import DeepRLAgent
6
- from gr_libs.ml.utils.format import random_subset_with_order
7
- from gr_libs.recognizer.graml.graml_recognizer import ExpertBasedGraml
8
-
9
- recognizer = ExpertBasedGraml(
10
- domain_name=POINT_MAZE,
11
- env_name="PointMaze-FourRoomsEnvDense-11x11"
12
- )
13
-
14
- recognizer.domain_learning_phase(
15
- [(9,1), (9,9), (1,9), (3,3), (3,4), (8,2), (3,7), (2,8)],
16
- [(SAC, 200000) for _ in range(8)]
17
- )
18
-
19
- recognizer.goals_adaptation_phase(
20
- dynamic_goals = [(4,4), (7,3), (3,7)],
21
- dynamic_train_configs=[(SAC, 200000) for _ in range(3)] # for expert sequence generation.
22
- )
23
-
24
- # TD3 is different from recognizer and expert algorithms, which are SAC #
25
- actor = DeepRLAgent(domain_name="point_maze", problem_name="PointMaze-FourRoomsEnvDense-11x11-Goal-4x4", algorithm=TD3, num_timesteps=200000)
26
- actor.learn()
27
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
28
- full_sequence = actor.generate_observation(
29
- action_selection_method=stochastic_amplified_selection,
30
- random_optimalism=True, # the noise that's added to the actions
31
- )
32
-
33
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)))
34
- closest_goal = recognizer.inference_phase(partial_sequence, PointMazeProperty("PointMaze-FourRoomsEnvDense-11x11-Goal-4x4").str_to_goal(), 0.5)
35
- print(f"closest_goal returned by GRAML: {closest_goal}\nactual goal actor aimed towards: (4, 4)")
@@ -1,29 +0,0 @@
1
- from gr_libs.environment.environment import QLEARNING
2
- from gr_libs.metrics.metrics import stochastic_amplified_selection
3
- from gr_libs.ml.tabular.tabular_q_learner import TabularQLearner
4
- from gr_libs.ml.utils.format import random_subset_with_order
5
- from gr_libs import Graql
6
-
7
- recognizer = Graql(
8
- domain_name="minigrid",
9
- env_name="MiniGrid-SimpleCrossingS13N4"
10
- )
11
-
12
- #Graql doesn't have a domain learning phase, so we skip it
13
-
14
- recognizer.goals_adaptation_phase(
15
- dynamic_goals = [(11,1), (11,11), (1,11)],
16
- dynamic_train_configs=[(QLEARNING, 100000) for _ in range(3)] # for expert sequence generation.
17
- )
18
- # TD3 is different from recognizer and expert algorithms, which are SAC #
19
- actor = TabularQLearner(domain_name="minigrid", problem_name="MiniGrid-SimpleCrossingS13N4-DynamicGoal-11x1-v0", algorithm=QLEARNING, num_timesteps=100000)
20
- actor.learn()
21
- # sample is generated stochastically to simulate suboptimal behavior, noise is added to the actions values #
22
- full_sequence = actor.generate_observation(
23
- action_selection_method=stochastic_amplified_selection,
24
- random_optimalism=True, # the noise that's added to the actions
25
- )
26
-
27
- partial_sequence = random_subset_with_order(full_sequence, (int)(0.5 * len(full_sequence)), is_consecutive=False)
28
- closest_goal = recognizer.inference_phase(partial_sequence, (11,1), 0.5)
29
- print(f"closest_goal returned by Graql: {closest_goal}\nactual goal actor aimed towards: (11, 1)")
File without changes
File without changes