gymcts 1.0.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gymcts-1.0.0/src/gymcts.egg-info → gymcts-1.2.1}/PKG-INFO +54 -56
- {gymcts-1.0.0 → gymcts-1.2.1}/README.md +43 -51
- {gymcts-1.0.0 → gymcts-1.2.1}/pyproject.toml +10 -4
- {gymcts-1.0.0 → gymcts-1.2.1}/setup.cfg +2 -5
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts/colorful_console_utils.py +26 -3
- gymcts-1.0.0/src/gymcts/gymcts_deterministic_wrapper.py → gymcts-1.2.1/src/gymcts/gymcts_action_history_wrapper.py +74 -4
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts/gymcts_agent.py +29 -69
- gymcts-1.0.0/src/gymcts/gymcts_naive_wrapper.py → gymcts-1.2.1/src/gymcts/gymcts_deepcopy_wrapper.py +60 -3
- gymcts-1.2.1/src/gymcts/gymcts_distributed_agent.py +299 -0
- gymcts-1.2.1/src/gymcts/gymcts_env_abc.py +61 -0
- gymcts-1.2.1/src/gymcts/gymcts_node.py +276 -0
- gymcts-1.2.1/src/gymcts/gymcts_tree_plotter.py +96 -0
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts/logger.py +1 -4
- {gymcts-1.0.0 → gymcts-1.2.1/src/gymcts.egg-info}/PKG-INFO +54 -56
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts.egg-info/SOURCES.txt +5 -3
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts.egg-info/requires.txt +6 -2
- {gymcts-1.0.0 → gymcts-1.2.1}/tests/test_graph_matrix_jsp_env.py +6 -15
- {gymcts-1.0.0 → gymcts-1.2.1}/tests/test_gymnasium_envs.py +8 -8
- {gymcts-1.0.0 → gymcts-1.2.1}/tests/test_number_of_visits.py +6 -8
- gymcts-1.0.0/src/gymcts/gymcts_gym_env.py +0 -28
- gymcts-1.0.0/src/gymcts/gymcts_node.py +0 -213
- {gymcts-1.0.0 → gymcts-1.2.1}/LICENSE +0 -0
- {gymcts-1.0.0 → gymcts-1.2.1}/MANIFEST.in +0 -0
- {gymcts-1.0.0 → gymcts-1.2.1}/setup.py +0 -0
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts/__init__.py +0 -0
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts.egg-info/dependency_links.txt +0 -0
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts.egg-info/not-zip-safe +0 -0
- {gymcts-1.0.0 → gymcts-1.2.1}/src/gymcts.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: gymcts
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
|
|
5
5
|
Author: Alexander Nasuta
|
|
6
6
|
Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
|
|
@@ -25,7 +25,7 @@ License: MIT License
|
|
|
25
25
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
26
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
27
|
SOFTWARE.
|
|
28
|
-
Project-URL: Homepage, https://github.com/Alexander-Nasuta/
|
|
28
|
+
Project-URL: Homepage, https://github.com/Alexander-Nasuta/gymcts
|
|
29
29
|
Platform: unix
|
|
30
30
|
Platform: linux
|
|
31
31
|
Platform: osx
|
|
@@ -34,7 +34,7 @@ Platform: win32
|
|
|
34
34
|
Classifier: License :: OSI Approved :: MIT License
|
|
35
35
|
Classifier: Programming Language :: Python
|
|
36
36
|
Classifier: Programming Language :: Python :: 3
|
|
37
|
-
Requires-Python: >=3.
|
|
37
|
+
Requires-Python: >=3.11
|
|
38
38
|
Description-Content-Type: text/markdown
|
|
39
39
|
License-File: LICENSE
|
|
40
40
|
Requires-Dist: rich
|
|
@@ -47,7 +47,7 @@ Requires-Dist: graph-matrix-jsp-env; extra == "examples"
|
|
|
47
47
|
Requires-Dist: graph-jsp-env; extra == "examples"
|
|
48
48
|
Provides-Extra: dev
|
|
49
49
|
Requires-Dist: jsp-instance-utils; extra == "dev"
|
|
50
|
-
Requires-Dist: graph-matrix-jsp-env; extra == "dev"
|
|
50
|
+
Requires-Dist: graph-matrix-jsp-env>=0.3.0; extra == "dev"
|
|
51
51
|
Requires-Dist: graph-jsp-env; extra == "dev"
|
|
52
52
|
Requires-Dist: JSSEnv; extra == "dev"
|
|
53
53
|
Requires-Dist: pip-tools; extra == "dev"
|
|
@@ -59,18 +59,24 @@ Requires-Dist: stable_baselines3; extra == "dev"
|
|
|
59
59
|
Requires-Dist: sphinx; extra == "dev"
|
|
60
60
|
Requires-Dist: myst-parser; extra == "dev"
|
|
61
61
|
Requires-Dist: sphinx-autobuild; extra == "dev"
|
|
62
|
+
Requires-Dist: sphinx-copybutton; extra == "dev"
|
|
62
63
|
Requires-Dist: furo; extra == "dev"
|
|
63
64
|
Requires-Dist: twine; extra == "dev"
|
|
64
65
|
Requires-Dist: sphinx-copybutton; extra == "dev"
|
|
65
66
|
Requires-Dist: nbsphinx; extra == "dev"
|
|
67
|
+
Requires-Dist: pandoc; extra == "dev"
|
|
68
|
+
Requires-Dist: jupytext; extra == "dev"
|
|
69
|
+
Requires-Dist: jupyter; extra == "dev"
|
|
70
|
+
Requires-Dist: typing_extensions>=4.12.0; extra == "dev"
|
|
71
|
+
Dynamic: license-file
|
|
66
72
|
|
|
67
73
|
# Graph Matrix Job Shop Env
|
|
68
74
|
|
|
69
75
|
A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
|
|
70
76
|
|
|
71
|
-
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/
|
|
72
|
-
- Pypi: [GYMCTS on PyPi](https://pypi.org/project/
|
|
73
|
-
- Documentation: [GYMCTS Docs](https://
|
|
77
|
+
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
|
|
78
|
+
- Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
|
|
79
|
+
- Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
|
|
74
80
|
|
|
75
81
|
## Description
|
|
76
82
|
|
|
@@ -98,28 +104,32 @@ The usage of a MCTS agent can roughly organised into the following steps:
|
|
|
98
104
|
- Render the solution
|
|
99
105
|
|
|
100
106
|
The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
|
|
101
|
-
- `
|
|
102
|
-
- `
|
|
107
|
+
- `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
|
|
108
|
+
- `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
|
|
103
109
|
|
|
104
|
-
These wrappers can be used with the `
|
|
105
|
-
The wrapper implement methods that are required by the `
|
|
110
|
+
These wrappers can be used with the `GymctsAgent` to solve the environment.
|
|
111
|
+
The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
|
|
106
112
|
GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
|
|
107
113
|
|
|
108
114
|
NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
|
|
109
115
|
Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
|
|
116
|
+
```python
|
|
117
|
+
env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
|
|
118
|
+
```
|
|
110
119
|
|
|
111
|
-
|
|
112
|
-
env = TransformReward(env, lambda r: r /
|
|
113
|
-
|
|
120
|
+
```python
|
|
121
|
+
env = TransformReward(env, lambda r: r / n_steps_per_episode)
|
|
122
|
+
```
|
|
123
|
+
### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
|
|
114
124
|
|
|
115
125
|
A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
|
|
116
|
-
The
|
|
126
|
+
The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
|
|
117
127
|
|
|
118
128
|
```python
|
|
119
129
|
import gymnasium as gym
|
|
120
130
|
|
|
121
|
-
from gymcts.gymcts_agent import
|
|
122
|
-
from gymcts.
|
|
131
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
132
|
+
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
|
|
123
133
|
|
|
124
134
|
from gymcts.logger import log
|
|
125
135
|
|
|
@@ -132,11 +142,11 @@ if __name__ == '__main__':
|
|
|
132
142
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
|
|
133
143
|
env.reset()
|
|
134
144
|
|
|
135
|
-
# 1. wrap the environment with the
|
|
136
|
-
env =
|
|
145
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
146
|
+
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
137
147
|
|
|
138
148
|
# 2. create the agent
|
|
139
|
-
agent =
|
|
149
|
+
agent = GymctsAgent(
|
|
140
150
|
env=env,
|
|
141
151
|
clear_mcts_tree_after_step=False,
|
|
142
152
|
render_tree_after_step=True,
|
|
@@ -155,7 +165,7 @@ if __name__ == '__main__':
|
|
|
155
165
|
|
|
156
166
|
# 5. print the solution
|
|
157
167
|
# read the solution from the info provided by the RecordEpisodeStatistics wrapper
|
|
158
|
-
# (that
|
|
168
|
+
# (that DeepCopyMCTSGymEnvWrapper uses internally)
|
|
159
169
|
episode_length = info["episode"]["l"]
|
|
160
170
|
episode_return = info["episode"]["r"]
|
|
161
171
|
|
|
@@ -170,13 +180,13 @@ if __name__ == '__main__':
|
|
|
170
180
|
A minimal example of how to use the package with the FrozenLake environment and the DeterministicSoloMCTSGymEnvWrapper is provided in the following code snippet below.
|
|
171
181
|
The DeterministicSoloMCTSGymEnvWrapper can be used with deterministic environments, such as the FrozenLake environment without slippery ice.
|
|
172
182
|
|
|
173
|
-
The DeterministicSoloMCTSGymEnvWrapper saves the action sequence that lead to the current state in the MCTS node.
|
|
183
|
+
The DeterministicSoloMCTSGymEnvWrapper saves the action sequence that lead to the current state in the MCTS node.
|
|
174
184
|
|
|
175
185
|
```python
|
|
176
186
|
import gymnasium as gym
|
|
177
187
|
|
|
178
|
-
from gymcts.gymcts_agent import
|
|
179
|
-
from gymcts.
|
|
188
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
189
|
+
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
|
|
180
190
|
|
|
181
191
|
from gymcts.logger import log
|
|
182
192
|
|
|
@@ -190,10 +200,10 @@ if __name__ == '__main__':
|
|
|
190
200
|
env.reset()
|
|
191
201
|
|
|
192
202
|
# 1. wrap the environment with the wrapper
|
|
193
|
-
env =
|
|
203
|
+
env = ActionHistoryMCTSGymEnvWrapper(env)
|
|
194
204
|
|
|
195
205
|
# 2. create the agent
|
|
196
|
-
agent =
|
|
206
|
+
agent = GymctsAgent(
|
|
197
207
|
env=env,
|
|
198
208
|
clear_mcts_tree_after_step=False,
|
|
199
209
|
render_tree_after_step=True,
|
|
@@ -232,8 +242,8 @@ To create a video of the solution of the FrozenLake environment, you can use the
|
|
|
232
242
|
```python
|
|
233
243
|
import gymnasium as gym
|
|
234
244
|
|
|
235
|
-
from gymcts.gymcts_agent import
|
|
236
|
-
from gymcts.
|
|
245
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
246
|
+
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
|
|
237
247
|
|
|
238
248
|
from gymcts.logger import log
|
|
239
249
|
|
|
@@ -248,11 +258,11 @@ if __name__ == '__main__':
|
|
|
248
258
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
|
|
249
259
|
env.reset()
|
|
250
260
|
|
|
251
|
-
# 1. wrap the environment with the
|
|
252
|
-
env =
|
|
261
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
262
|
+
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
253
263
|
|
|
254
264
|
# 2. create the agent
|
|
255
|
-
agent =
|
|
265
|
+
agent = GymctsAgent(
|
|
256
266
|
env=env,
|
|
257
267
|
clear_mcts_tree_after_step=False,
|
|
258
268
|
render_tree_after_step=True,
|
|
@@ -277,7 +287,7 @@ if __name__ == '__main__':
|
|
|
277
287
|
env.close()
|
|
278
288
|
|
|
279
289
|
# 5. print the solution
|
|
280
|
-
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that
|
|
290
|
+
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
|
|
281
291
|
episode_length = info["episode"]["l"]
|
|
282
292
|
episode_return = info["episode"]["r"]
|
|
283
293
|
|
|
@@ -318,13 +328,13 @@ import gymnasium as gym
|
|
|
318
328
|
from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
|
|
319
329
|
from jsp_instance_utils.instances import ft06, ft06_makespan
|
|
320
330
|
|
|
321
|
-
from gymcts.gymcts_agent import
|
|
322
|
-
from gymcts.
|
|
331
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
332
|
+
from gymcts.gymcts_env_abc import GymctsABC
|
|
323
333
|
|
|
324
334
|
from gymcts.logger import log
|
|
325
335
|
|
|
326
336
|
|
|
327
|
-
class GraphJspGYMCTSWrapper(
|
|
337
|
+
class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
|
|
328
338
|
|
|
329
339
|
def __init__(self, env: DisjunctiveGraphJspEnv):
|
|
330
340
|
gym.Wrapper.__init__(self, env)
|
|
@@ -375,7 +385,7 @@ if __name__ == '__main__':
|
|
|
375
385
|
|
|
376
386
|
env = GraphJspGYMCTSWrapper(env)
|
|
377
387
|
|
|
378
|
-
agent =
|
|
388
|
+
agent = GymctsAgent(
|
|
379
389
|
env=env,
|
|
380
390
|
clear_mcts_tree_after_step=True,
|
|
381
391
|
render_tree_after_step=True,
|
|
@@ -413,13 +423,11 @@ The color gradient is based on the minimum and maximum values of the respective
|
|
|
413
423
|
The visualisation is rendered in the terminal and can be limited to a certain depth of the tree.
|
|
414
424
|
The default depth is 2.
|
|
415
425
|
|
|
416
|
-
|
|
417
426
|
```python
|
|
418
427
|
import gymnasium as gym
|
|
419
428
|
|
|
420
|
-
from gymcts.gymcts_agent import
|
|
421
|
-
from gymcts.
|
|
422
|
-
from gymcts.gymcts_naive_wrapper import NaiveSoloMCTSGymEnvWrapper
|
|
429
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
430
|
+
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
|
|
423
431
|
|
|
424
432
|
from gymcts.logger import log
|
|
425
433
|
|
|
@@ -432,11 +440,11 @@ if __name__ == '__main__':
|
|
|
432
440
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
|
|
433
441
|
env.reset()
|
|
434
442
|
|
|
435
|
-
# wrap the environment with the
|
|
436
|
-
env =
|
|
443
|
+
# wrap the environment with the wrapper or a custom gymcts wrapper
|
|
444
|
+
env = ActionHistoryMCTSGymEnvWrapper(env)
|
|
437
445
|
|
|
438
446
|
# create the agent
|
|
439
|
-
agent =
|
|
447
|
+
agent = GymctsAgent(
|
|
440
448
|
env=env,
|
|
441
449
|
clear_mcts_tree_after_step=False,
|
|
442
450
|
render_tree_after_step=False,
|
|
@@ -503,11 +511,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
|
|
|
503
511
|
|
|
504
512
|
using https:
|
|
505
513
|
```shell
|
|
506
|
-
git clone https://github.com/Alexander-Nasuta/
|
|
514
|
+
git clone https://github.com/Alexander-Nasuta/gymcts.git
|
|
507
515
|
```
|
|
508
516
|
or by using the GitHub CLI:
|
|
509
517
|
```shell
|
|
510
|
-
gh repo clone Alexander-Nasuta/
|
|
518
|
+
gh repo clone Alexander-Nasuta/gymcts
|
|
511
519
|
```
|
|
512
520
|
|
|
513
521
|
if you are using PyCharm, I recommend doing the following additional steps:
|
|
@@ -516,9 +524,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
|
|
|
516
524
|
- mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
|
|
517
525
|
- mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
|
|
518
526
|
|
|
519
|
-
at the end your project structure should look like this:
|
|
520
|
-
|
|
521
|
-
todo
|
|
522
527
|
|
|
523
528
|
### Create a Virtual Environment (optional)
|
|
524
529
|
|
|
@@ -584,12 +589,6 @@ For testing with `tox` run the following command:
|
|
|
584
589
|
tox
|
|
585
590
|
```
|
|
586
591
|
|
|
587
|
-
Here is a screenshot of what the output might look like:
|
|
588
|
-
|
|
589
|
-

|
|
590
|
-
|
|
591
|
-
Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
|
|
592
|
-
|
|
593
592
|
### Builing and Publishing the Project to PyPi
|
|
594
593
|
|
|
595
594
|
In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
|
|
@@ -628,7 +627,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
|
|
|
628
627
|
This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
|
|
629
628
|
|
|
630
629
|
|
|
631
|
-
|
|
632
630
|
## Contact
|
|
633
631
|
|
|
634
632
|
If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
|
|
4
4
|
|
|
5
|
-
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/
|
|
6
|
-
- Pypi: [GYMCTS on PyPi](https://pypi.org/project/
|
|
7
|
-
- Documentation: [GYMCTS Docs](https://
|
|
5
|
+
- Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
|
|
6
|
+
- Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
|
|
7
|
+
- Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
|
|
8
8
|
|
|
9
9
|
## Description
|
|
10
10
|
|
|
@@ -32,28 +32,32 @@ The usage of a MCTS agent can roughly organised into the following steps:
|
|
|
32
32
|
- Render the solution
|
|
33
33
|
|
|
34
34
|
The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
|
|
35
|
-
- `
|
|
36
|
-
- `
|
|
35
|
+
- `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
|
|
36
|
+
- `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
|
|
37
37
|
|
|
38
|
-
These wrappers can be used with the `
|
|
39
|
-
The wrapper implement methods that are required by the `
|
|
38
|
+
These wrappers can be used with the `GymctsAgent` to solve the environment.
|
|
39
|
+
The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
|
|
40
40
|
GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
|
|
41
41
|
|
|
42
42
|
NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
|
|
43
43
|
Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
|
|
44
|
+
```python
|
|
45
|
+
env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
|
|
46
|
+
```
|
|
44
47
|
|
|
45
|
-
|
|
46
|
-
env = TransformReward(env, lambda r: r /
|
|
47
|
-
|
|
48
|
+
```python
|
|
49
|
+
env = TransformReward(env, lambda r: r / n_steps_per_episode)
|
|
50
|
+
```
|
|
51
|
+
### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
|
|
48
52
|
|
|
49
53
|
A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
|
|
50
|
-
The
|
|
54
|
+
The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
|
|
51
55
|
|
|
52
56
|
```python
|
|
53
57
|
import gymnasium as gym
|
|
54
58
|
|
|
55
|
-
from gymcts.gymcts_agent import
|
|
56
|
-
from gymcts.
|
|
59
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
60
|
+
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
|
|
57
61
|
|
|
58
62
|
from gymcts.logger import log
|
|
59
63
|
|
|
@@ -66,11 +70,11 @@ if __name__ == '__main__':
|
|
|
66
70
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
|
|
67
71
|
env.reset()
|
|
68
72
|
|
|
69
|
-
# 1. wrap the environment with the
|
|
70
|
-
env =
|
|
73
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
74
|
+
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
71
75
|
|
|
72
76
|
# 2. create the agent
|
|
73
|
-
agent =
|
|
77
|
+
agent = GymctsAgent(
|
|
74
78
|
env=env,
|
|
75
79
|
clear_mcts_tree_after_step=False,
|
|
76
80
|
render_tree_after_step=True,
|
|
@@ -89,7 +93,7 @@ if __name__ == '__main__':
|
|
|
89
93
|
|
|
90
94
|
# 5. print the solution
|
|
91
95
|
# read the solution from the info provided by the RecordEpisodeStatistics wrapper
|
|
92
|
-
# (that
|
|
96
|
+
# (that DeepCopyMCTSGymEnvWrapper uses internally)
|
|
93
97
|
episode_length = info["episode"]["l"]
|
|
94
98
|
episode_return = info["episode"]["r"]
|
|
95
99
|
|
|
@@ -104,13 +108,13 @@ if __name__ == '__main__':
|
|
|
104
108
|
A minimal example of how to use the package with the FrozenLake environment and the DeterministicSoloMCTSGymEnvWrapper is provided in the following code snippet below.
|
|
105
109
|
The DeterministicSoloMCTSGymEnvWrapper can be used with deterministic environments, such as the FrozenLake environment without slippery ice.
|
|
106
110
|
|
|
107
|
-
The DeterministicSoloMCTSGymEnvWrapper saves the action sequence that lead to the current state in the MCTS node.
|
|
111
|
+
The DeterministicSoloMCTSGymEnvWrapper saves the action sequence that lead to the current state in the MCTS node.
|
|
108
112
|
|
|
109
113
|
```python
|
|
110
114
|
import gymnasium as gym
|
|
111
115
|
|
|
112
|
-
from gymcts.gymcts_agent import
|
|
113
|
-
from gymcts.
|
|
116
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
117
|
+
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
|
|
114
118
|
|
|
115
119
|
from gymcts.logger import log
|
|
116
120
|
|
|
@@ -124,10 +128,10 @@ if __name__ == '__main__':
|
|
|
124
128
|
env.reset()
|
|
125
129
|
|
|
126
130
|
# 1. wrap the environment with the wrapper
|
|
127
|
-
env =
|
|
131
|
+
env = ActionHistoryMCTSGymEnvWrapper(env)
|
|
128
132
|
|
|
129
133
|
# 2. create the agent
|
|
130
|
-
agent =
|
|
134
|
+
agent = GymctsAgent(
|
|
131
135
|
env=env,
|
|
132
136
|
clear_mcts_tree_after_step=False,
|
|
133
137
|
render_tree_after_step=True,
|
|
@@ -166,8 +170,8 @@ To create a video of the solution of the FrozenLake environment, you can use the
|
|
|
166
170
|
```python
|
|
167
171
|
import gymnasium as gym
|
|
168
172
|
|
|
169
|
-
from gymcts.gymcts_agent import
|
|
170
|
-
from gymcts.
|
|
173
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
174
|
+
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
|
|
171
175
|
|
|
172
176
|
from gymcts.logger import log
|
|
173
177
|
|
|
@@ -182,11 +186,11 @@ if __name__ == '__main__':
|
|
|
182
186
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
|
|
183
187
|
env.reset()
|
|
184
188
|
|
|
185
|
-
# 1. wrap the environment with the
|
|
186
|
-
env =
|
|
189
|
+
# 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
|
|
190
|
+
env = DeepCopyMCTSGymEnvWrapper(env)
|
|
187
191
|
|
|
188
192
|
# 2. create the agent
|
|
189
|
-
agent =
|
|
193
|
+
agent = GymctsAgent(
|
|
190
194
|
env=env,
|
|
191
195
|
clear_mcts_tree_after_step=False,
|
|
192
196
|
render_tree_after_step=True,
|
|
@@ -211,7 +215,7 @@ if __name__ == '__main__':
|
|
|
211
215
|
env.close()
|
|
212
216
|
|
|
213
217
|
# 5. print the solution
|
|
214
|
-
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that
|
|
218
|
+
# read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
|
|
215
219
|
episode_length = info["episode"]["l"]
|
|
216
220
|
episode_return = info["episode"]["r"]
|
|
217
221
|
|
|
@@ -252,13 +256,13 @@ import gymnasium as gym
|
|
|
252
256
|
from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
|
|
253
257
|
from jsp_instance_utils.instances import ft06, ft06_makespan
|
|
254
258
|
|
|
255
|
-
from gymcts.gymcts_agent import
|
|
256
|
-
from gymcts.
|
|
259
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
260
|
+
from gymcts.gymcts_env_abc import GymctsABC
|
|
257
261
|
|
|
258
262
|
from gymcts.logger import log
|
|
259
263
|
|
|
260
264
|
|
|
261
|
-
class GraphJspGYMCTSWrapper(
|
|
265
|
+
class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
|
|
262
266
|
|
|
263
267
|
def __init__(self, env: DisjunctiveGraphJspEnv):
|
|
264
268
|
gym.Wrapper.__init__(self, env)
|
|
@@ -309,7 +313,7 @@ if __name__ == '__main__':
|
|
|
309
313
|
|
|
310
314
|
env = GraphJspGYMCTSWrapper(env)
|
|
311
315
|
|
|
312
|
-
agent =
|
|
316
|
+
agent = GymctsAgent(
|
|
313
317
|
env=env,
|
|
314
318
|
clear_mcts_tree_after_step=True,
|
|
315
319
|
render_tree_after_step=True,
|
|
@@ -347,13 +351,11 @@ The color gradient is based on the minimum and maximum values of the respective
|
|
|
347
351
|
The visualisation is rendered in the terminal and can be limited to a certain depth of the tree.
|
|
348
352
|
The default depth is 2.
|
|
349
353
|
|
|
350
|
-
|
|
351
354
|
```python
|
|
352
355
|
import gymnasium as gym
|
|
353
356
|
|
|
354
|
-
from gymcts.gymcts_agent import
|
|
355
|
-
from gymcts.
|
|
356
|
-
from gymcts.gymcts_naive_wrapper import NaiveSoloMCTSGymEnvWrapper
|
|
357
|
+
from gymcts.gymcts_agent import GymctsAgent
|
|
358
|
+
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
|
|
357
359
|
|
|
358
360
|
from gymcts.logger import log
|
|
359
361
|
|
|
@@ -366,11 +368,11 @@ if __name__ == '__main__':
|
|
|
366
368
|
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
|
|
367
369
|
env.reset()
|
|
368
370
|
|
|
369
|
-
# wrap the environment with the
|
|
370
|
-
env =
|
|
371
|
+
# wrap the environment with the wrapper or a custom gymcts wrapper
|
|
372
|
+
env = ActionHistoryMCTSGymEnvWrapper(env)
|
|
371
373
|
|
|
372
374
|
# create the agent
|
|
373
|
-
agent =
|
|
375
|
+
agent = GymctsAgent(
|
|
374
376
|
env=env,
|
|
375
377
|
clear_mcts_tree_after_step=False,
|
|
376
378
|
render_tree_after_step=False,
|
|
@@ -437,11 +439,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
|
|
|
437
439
|
|
|
438
440
|
using https:
|
|
439
441
|
```shell
|
|
440
|
-
git clone https://github.com/Alexander-Nasuta/
|
|
442
|
+
git clone https://github.com/Alexander-Nasuta/gymcts.git
|
|
441
443
|
```
|
|
442
444
|
or by using the GitHub CLI:
|
|
443
445
|
```shell
|
|
444
|
-
gh repo clone Alexander-Nasuta/
|
|
446
|
+
gh repo clone Alexander-Nasuta/gymcts
|
|
445
447
|
```
|
|
446
448
|
|
|
447
449
|
if you are using PyCharm, I recommend doing the following additional steps:
|
|
@@ -450,9 +452,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
|
|
|
450
452
|
- mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
|
|
451
453
|
- mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
|
|
452
454
|
|
|
453
|
-
at the end your project structure should look like this:
|
|
454
|
-
|
|
455
|
-
todo
|
|
456
455
|
|
|
457
456
|
### Create a Virtual Environment (optional)
|
|
458
457
|
|
|
@@ -518,12 +517,6 @@ For testing with `tox` run the following command:
|
|
|
518
517
|
tox
|
|
519
518
|
```
|
|
520
519
|
|
|
521
|
-
Here is a screenshot of what the output might look like:
|
|
522
|
-
|
|
523
|
-

|
|
524
|
-
|
|
525
|
-
Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
|
|
526
|
-
|
|
527
520
|
### Builing and Publishing the Project to PyPi
|
|
528
521
|
|
|
529
522
|
In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
|
|
@@ -562,7 +555,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
|
|
|
562
555
|
This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
|
|
563
556
|
|
|
564
557
|
|
|
565
|
-
|
|
566
558
|
## Contact
|
|
567
559
|
|
|
568
560
|
If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "gymcts"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.1"
|
|
8
8
|
description = "A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "Alexander Nasuta", email = "alexander.nasuta@wzl-iqs.rwth-aachen.de" }]
|
|
@@ -21,7 +21,7 @@ dependencies = [
|
|
|
21
21
|
"gymnasium",
|
|
22
22
|
"matplotlib<3.9",
|
|
23
23
|
]
|
|
24
|
-
requires-python = ">=3.
|
|
24
|
+
requires-python = ">=3.11"
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
[project.optional-dependencies]
|
|
@@ -32,7 +32,7 @@ examples = [
|
|
|
32
32
|
]
|
|
33
33
|
dev = [
|
|
34
34
|
"jsp-instance-utils",
|
|
35
|
-
"graph-matrix-jsp-env",
|
|
35
|
+
"graph-matrix-jsp-env>=0.3.0",
|
|
36
36
|
"graph-jsp-env",
|
|
37
37
|
"JSSEnv",
|
|
38
38
|
|
|
@@ -49,14 +49,20 @@ dev = [
|
|
|
49
49
|
"myst-parser", # .md support for sphinx
|
|
50
50
|
"sphinx-autobuild",
|
|
51
51
|
#
|
|
52
|
+
"sphinx-copybutton", # for code copy buttons
|
|
52
53
|
"furo", # cool theme
|
|
53
54
|
"twine",
|
|
54
55
|
"sphinx-copybutton", # for code copy buttons
|
|
55
56
|
"nbsphinx", # for jupyter notebook support in sphinx
|
|
57
|
+
"pandoc",
|
|
58
|
+
|
|
59
|
+
"jupytext", # converting .py examples to jupyter notebook jupytext --to notebook *.py
|
|
60
|
+
"jupyter", # for jupyter notebook kernel
|
|
61
|
+
"typing_extensions>=4.12.0",
|
|
56
62
|
]
|
|
57
63
|
|
|
58
64
|
[project.urls]
|
|
59
|
-
Homepage = "https://github.com/Alexander-Nasuta/
|
|
65
|
+
Homepage = "https://github.com/Alexander-Nasuta/gymcts"
|
|
60
66
|
|
|
61
67
|
[tool.pytest.ini_options]
|
|
62
68
|
addopts = "--cov=gymcts -p no:warnings"
|
|
@@ -7,12 +7,12 @@ platforms = unix, linux, osx, cygwin, win32
|
|
|
7
7
|
classifiers =
|
|
8
8
|
Programming Language :: Python :: 3
|
|
9
9
|
Programming Language :: Python :: 3 :: Only
|
|
10
|
-
Programming Language :: Python :: 3.
|
|
10
|
+
Programming Language :: Python :: 3.11
|
|
11
11
|
|
|
12
12
|
[options]
|
|
13
13
|
packages =
|
|
14
14
|
gymcts
|
|
15
|
-
python_requires = >=3.
|
|
15
|
+
python_requires = >=3.11
|
|
16
16
|
package_dir =
|
|
17
17
|
=src
|
|
18
18
|
zip_safe = no
|
|
@@ -25,9 +25,6 @@ testing =
|
|
|
25
25
|
flake8>=3.9
|
|
26
26
|
tox>=3.24
|
|
27
27
|
|
|
28
|
-
[options.package_data]
|
|
29
|
-
phantomderopfa = py.typed
|
|
30
|
-
|
|
31
28
|
[flake8]
|
|
32
29
|
max-line-length = 160
|
|
33
30
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
1
3
|
import matplotlib.pyplot as plt
|
|
2
4
|
import numpy as np
|
|
3
5
|
|
|
@@ -103,8 +105,19 @@ def wrap_with_color_codes(s: object, /, r: int | float, g: int | float, b: int |
|
|
|
103
105
|
f"{CEND}"
|
|
104
106
|
|
|
105
107
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
+
def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rainbow") -> str:
|
|
109
|
+
"""
|
|
110
|
+
Wraps a string with a color scale (a matplotlib c_map) based on the n_of_item and n_classes.
|
|
111
|
+
This function is used to color code the available actions in the MCTS tree visualisation.
|
|
112
|
+
The children of the MCTS tree are colored based on their action for a clearer visualisation.
|
|
113
|
+
|
|
114
|
+
:param s: the string (or object) to be wrapped. objects are converted to string (using the __str__ function).
|
|
115
|
+
:param n_of_item: the index of the item to be colored. In a mcts tree, this is the (parent-)action of the node.
|
|
116
|
+
:param n_classes: the number of classes (or items) to be colored. In a mcts tree, this is the number of available actions.
|
|
117
|
+
:param c_map: the colormap to be used (default is 'rainbow').
|
|
118
|
+
The colormap can be any matplotlib colormap, e.g. 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
|
|
119
|
+
:return: a string that contains the color-codes (prefix and suffix) and the string s in between.
|
|
120
|
+
"""
|
|
108
121
|
if s is None or n_of_item is None or n_classes is None:
|
|
109
122
|
return s
|
|
110
123
|
|
|
@@ -117,7 +130,17 @@ def wrap_evenly_spaced_color(s: str, n_of_item:int, n_classes:int, c_map="rainbo
|
|
|
117
130
|
return f"{color_asni}{s}{CEND}"
|
|
118
131
|
|
|
119
132
|
|
|
120
|
-
def wrap_with_color_scale(s: str, value: float, min_val:float, max_val:float, c_map=None) -> str:
|
|
133
|
+
def wrap_with_color_scale(s: str, value: float, min_val: float, max_val: float, c_map=None) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Wraps a string with a color scale (a matplotlib c_map) based on the value, min_val, and max_val.
|
|
136
|
+
|
|
137
|
+
:param s: the string to be wrapped
|
|
138
|
+
:param value: the value to be mapped to a color
|
|
139
|
+
:param min_val: the minimum value of the scale
|
|
140
|
+
:param max_val: the maximum value of the scale
|
|
141
|
+
:param c_map: the colormap to be used (default is 'rainbow')
|
|
142
|
+
:return:
|
|
143
|
+
"""
|
|
121
144
|
if s is None or min_val is None or max_val is None or min_val >= max_val:
|
|
122
145
|
return s
|
|
123
146
|
|