gymcts 1.2.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {gymcts-1.2.0/src/gymcts.egg-info → gymcts-1.2.1}/PKG-INFO +31 -35
  2. {gymcts-1.2.0 → gymcts-1.2.1}/README.md +26 -33
  3. {gymcts-1.2.0 → gymcts-1.2.1}/pyproject.toml +5 -2
  4. {gymcts-1.2.0 → gymcts-1.2.1}/setup.cfg +0 -3
  5. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/colorful_console_utils.py +22 -0
  6. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/gymcts_action_history_wrapper.py +72 -2
  7. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/gymcts_agent.py +5 -1
  8. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/gymcts_deepcopy_wrapper.py +59 -2
  9. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/gymcts_distributed_agent.py +30 -12
  10. gymcts-1.2.1/src/gymcts/gymcts_env_abc.py +61 -0
  11. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/gymcts_node.py +85 -8
  12. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/gymcts_tree_plotter.py +22 -1
  13. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/logger.py +1 -4
  14. {gymcts-1.2.0 → gymcts-1.2.1/src/gymcts.egg-info}/PKG-INFO +31 -35
  15. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts.egg-info/requires.txt +4 -2
  16. gymcts-1.2.0/src/gymcts/gymcts_env_abc.py +0 -28
  17. {gymcts-1.2.0 → gymcts-1.2.1}/LICENSE +0 -0
  18. {gymcts-1.2.0 → gymcts-1.2.1}/MANIFEST.in +0 -0
  19. {gymcts-1.2.0 → gymcts-1.2.1}/setup.py +0 -0
  20. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts/__init__.py +0 -0
  21. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts.egg-info/SOURCES.txt +0 -0
  22. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts.egg-info/dependency_links.txt +0 -0
  23. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts.egg-info/not-zip-safe +0 -0
  24. {gymcts-1.2.0 → gymcts-1.2.1}/src/gymcts.egg-info/top_level.txt +0 -0
  25. {gymcts-1.2.0 → gymcts-1.2.1}/tests/test_graph_matrix_jsp_env.py +0 -0
  26. {gymcts-1.2.0 → gymcts-1.2.1}/tests/test_gymnasium_envs.py +0 -0
  27. {gymcts-1.2.0 → gymcts-1.2.1}/tests/test_number_of_visits.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gymcts
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Summary: A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments.
5
5
  Author: Alexander Nasuta
6
6
  Author-email: Alexander Nasuta <alexander.nasuta@wzl-iqs.rwth-aachen.de>
@@ -47,7 +47,7 @@ Requires-Dist: graph-matrix-jsp-env; extra == "examples"
47
47
  Requires-Dist: graph-jsp-env; extra == "examples"
48
48
  Provides-Extra: dev
49
49
  Requires-Dist: jsp-instance-utils; extra == "dev"
50
- Requires-Dist: graph-matrix-jsp-env; extra == "dev"
50
+ Requires-Dist: graph-matrix-jsp-env>=0.3.0; extra == "dev"
51
51
  Requires-Dist: graph-jsp-env; extra == "dev"
52
52
  Requires-Dist: JSSEnv; extra == "dev"
53
53
  Requires-Dist: pip-tools; extra == "dev"
@@ -59,21 +59,24 @@ Requires-Dist: stable_baselines3; extra == "dev"
59
59
  Requires-Dist: sphinx; extra == "dev"
60
60
  Requires-Dist: myst-parser; extra == "dev"
61
61
  Requires-Dist: sphinx-autobuild; extra == "dev"
62
+ Requires-Dist: sphinx-copybutton; extra == "dev"
62
63
  Requires-Dist: furo; extra == "dev"
63
64
  Requires-Dist: twine; extra == "dev"
64
65
  Requires-Dist: sphinx-copybutton; extra == "dev"
65
66
  Requires-Dist: nbsphinx; extra == "dev"
67
+ Requires-Dist: pandoc; extra == "dev"
66
68
  Requires-Dist: jupytext; extra == "dev"
67
69
  Requires-Dist: jupyter; extra == "dev"
70
+ Requires-Dist: typing_extensions>=4.12.0; extra == "dev"
68
71
  Dynamic: license-file
69
72
 
70
73
  # Graph Matrix Job Shop Env
71
74
 
72
75
  A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
73
76
 
74
- - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv)
75
- - Pypi: [GYMCTS on PyPi](https://pypi.org/project/graph-matrix-jsp-env/)
76
- - Documentation: [GYMCTS Docs](https://graphmatrixjobshopenv.readthedocs.io/en/latest/)
77
+ - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
78
+ - Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
79
+ - Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
77
80
 
78
81
  ## Description
79
82
 
@@ -101,22 +104,26 @@ The usage of a MCTS agent can roughly organised into the following steps:
101
104
  - Render the solution
102
105
 
103
106
  The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
104
- - `NaiveSoloMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
105
- - `DeterministicSoloMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
107
+ - `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
108
+ - `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
106
109
 
107
- These wrappers can be used with the `SoloMCTSAgent` to solve the environment.
108
- The wrapper implement methods that are required by the `SoloMCTSAgent` to interact with the environment.
110
+ These wrappers can be used with the `GymctsAgent` to solve the environment.
111
+ The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
109
112
  GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
110
113
 
111
114
  NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
112
115
  Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
116
+ ```python
117
+ env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
118
+ ```
113
119
 
114
- NormalizeReward(env, gamma=0.99, epsilon=1e-8)
115
- env = TransformReward(env, lambda r: r / 36)
116
- ### FrozenLake Example (NaiveSoloMCTSGymEnvWrapper)
120
+ ```python
121
+ env = TransformReward(env, lambda r: r / n_steps_per_episode)
122
+ ```
123
+ ### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
117
124
 
118
125
  A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
119
- The NaiveSoloMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
126
+ The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
120
127
 
121
128
  ```python
122
129
  import gymnasium as gym
@@ -135,7 +142,7 @@ if __name__ == '__main__':
135
142
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
136
143
  env.reset()
137
144
 
138
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
145
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
139
146
  env = DeepCopyMCTSGymEnvWrapper(env)
140
147
 
141
148
  # 2. create the agent
@@ -158,7 +165,7 @@ if __name__ == '__main__':
158
165
 
159
166
  # 5. print the solution
160
167
  # read the solution from the info provided by the RecordEpisodeStatistics wrapper
161
- # (that NaiveSoloMCTSGymEnvWrapper uses internally)
168
+ # (that DeepCopyMCTSGymEnvWrapper uses internally)
162
169
  episode_length = info["episode"]["l"]
163
170
  episode_return = info["episode"]["r"]
164
171
 
@@ -251,7 +258,7 @@ if __name__ == '__main__':
251
258
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
252
259
  env.reset()
253
260
 
254
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
261
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
255
262
  env = DeepCopyMCTSGymEnvWrapper(env)
256
263
 
257
264
  # 2. create the agent
@@ -280,7 +287,7 @@ if __name__ == '__main__':
280
287
  env.close()
281
288
 
282
289
  # 5. print the solution
283
- # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that NaiveSoloMCTSGymEnvWrapper wraps internally)
290
+ # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
284
291
  episode_length = info["episode"]["l"]
285
292
  episode_return = info["episode"]["r"]
286
293
 
@@ -321,13 +328,13 @@ import gymnasium as gym
321
328
  from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
322
329
  from jsp_instance_utils.instances import ft06, ft06_makespan
323
330
 
324
- from gymcts.gymcts_agent import SoloMCTSAgent
325
- from gymcts.gymcts_gym_env import SoloMCTSGymEnv
331
+ from gymcts.gymcts_agent import GymctsAgent
332
+ from gymcts.gymcts_env_abc import GymctsABC
326
333
 
327
334
  from gymcts.logger import log
328
335
 
329
336
 
330
- class GraphJspGYMCTSWrapper(SoloMCTSGymEnv, gym.Wrapper):
337
+ class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
331
338
 
332
339
  def __init__(self, env: DisjunctiveGraphJspEnv):
333
340
  gym.Wrapper.__init__(self, env)
@@ -378,7 +385,7 @@ if __name__ == '__main__':
378
385
 
379
386
  env = GraphJspGYMCTSWrapper(env)
380
387
 
381
- agent = SoloMCTSAgent(
388
+ agent = GymctsAgent(
382
389
  env=env,
383
390
  clear_mcts_tree_after_step=True,
384
391
  render_tree_after_step=True,
@@ -421,7 +428,6 @@ import gymnasium as gym
421
428
 
422
429
  from gymcts.gymcts_agent import GymctsAgent
423
430
  from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
424
- from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
425
431
 
426
432
  from gymcts.logger import log
427
433
 
@@ -434,7 +440,7 @@ if __name__ == '__main__':
434
440
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
435
441
  env.reset()
436
442
 
437
- # wrap the environment with the naive wrapper or a custom gymcts wrapper
443
+ # wrap the environment with the wrapper or a custom gymcts wrapper
438
444
  env = ActionHistoryMCTSGymEnvWrapper(env)
439
445
 
440
446
  # create the agent
@@ -505,11 +511,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
505
511
 
506
512
  using https:
507
513
  ```shell
508
- git clone https://github.com/Alexander-Nasuta/todo
514
+ git clone https://github.com/Alexander-Nasuta/gymcts.git
509
515
  ```
510
516
  or by using the GitHub CLI:
511
517
  ```shell
512
- gh repo clone Alexander-Nasuta/todo
518
+ gh repo clone Alexander-Nasuta/gymcts
513
519
  ```
514
520
 
515
521
  if you are using PyCharm, I recommend doing the following additional steps:
@@ -518,9 +524,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
518
524
  - mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
519
525
  - mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
520
526
 
521
- at the end your project structure should look like this:
522
-
523
- todo
524
527
 
525
528
  ### Create a Virtual Environment (optional)
526
529
 
@@ -586,12 +589,6 @@ For testing with `tox` run the following command:
586
589
  tox
587
590
  ```
588
591
 
589
- Here is a screenshot of what the output might look like:
590
-
591
- ![](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv/raw/master/resources/tox-screenshot.png)
592
-
593
- Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
594
-
595
592
  ### Builing and Publishing the Project to PyPi
596
593
 
597
594
  In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
@@ -630,7 +627,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
630
627
  This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
631
628
 
632
629
 
633
-
634
630
  ## Contact
635
631
 
636
632
  If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
@@ -2,9 +2,9 @@
2
2
 
3
3
  A Monte Carlo Tree Search Implementation for Gymnasium-style Environments.
4
4
 
5
- - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv)
6
- - Pypi: [GYMCTS on PyPi](https://pypi.org/project/graph-matrix-jsp-env/)
7
- - Documentation: [GYMCTS Docs](https://graphmatrixjobshopenv.readthedocs.io/en/latest/)
5
+ - Github: [GYMCTS on Github](https://github.com/Alexander-Nasuta/gymcts)
6
+ - Pypi: [GYMCTS on PyPi](https://pypi.org/project/gymcts/)
7
+ - Documentation: [GYMCTS Docs](https://gymcts.readthedocs.io/en/latest/)
8
8
 
9
9
  ## Description
10
10
 
@@ -32,22 +32,26 @@ The usage of a MCTS agent can roughly organised into the following steps:
32
32
  - Render the solution
33
33
 
34
34
  The GYMCTS package provides a two types of wrappers for Gymnasium-style environments:
35
- - `NaiveSoloMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
36
- - `DeterministicSoloMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
35
+ - `DeepCopyMCTSGymEnvWrapper`: A wrapper that uses deepcopies of the environment to save a snapshot of the environment state for each node in the MCTS tree.
36
+ - `ActionHistoryMCTSGymEnvWrapper`: A wrapper that saves the action sequence that lead to the current state in the MCTS node.
37
37
 
38
- These wrappers can be used with the `SoloMCTSAgent` to solve the environment.
39
- The wrapper implement methods that are required by the `SoloMCTSAgent` to interact with the environment.
38
+ These wrappers can be used with the `GymctsAgent` to solve the environment.
39
+ The wrapper implement methods that are required by the `GymctsAgent` to interact with the environment.
40
40
  GYMCTS is designed to use a single environment instance and reconstructing the environment state form a state snapshot, when needed.
41
41
 
42
42
  NOTE: MCTS works best when the return of an episode is in the range of [-1, 1]. Please adjust the reward function of the environment accordingly (or change the ubc-scaling parameter of the MCTS agent).
43
43
  Adjusting the reward function of the environment is easily done with a [NormalizeReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.NormalizeReward) or [TransformReward](https://gymnasium.farama.org/api/wrappers/reward_wrappers/#gymnasium.wrappers.TransformReward) Wrapper.
44
+ ```python
45
+ env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
46
+ ```
44
47
 
45
- NormalizeReward(env, gamma=0.99, epsilon=1e-8)
46
- env = TransformReward(env, lambda r: r / 36)
47
- ### FrozenLake Example (NaiveSoloMCTSGymEnvWrapper)
48
+ ```python
49
+ env = TransformReward(env, lambda r: r / n_steps_per_episode)
50
+ ```
51
+ ### FrozenLake Example (DeepCopyMCTSGymEnvWrapper)
48
52
 
49
53
  A minimal example of how to use the package with the FrozenLake environment and the NaiveSoloMCTSGymEnvWrapper is provided in the following code snippet below.
50
- The NaiveSoloMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
54
+ The DeepCopyMCTSGymEnvWrapper can be used with non-deterministic environments, such as the FrozenLake environment with slippery ice.
51
55
 
52
56
  ```python
53
57
  import gymnasium as gym
@@ -66,7 +70,7 @@ if __name__ == '__main__':
66
70
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
67
71
  env.reset()
68
72
 
69
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
73
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
70
74
  env = DeepCopyMCTSGymEnvWrapper(env)
71
75
 
72
76
  # 2. create the agent
@@ -89,7 +93,7 @@ if __name__ == '__main__':
89
93
 
90
94
  # 5. print the solution
91
95
  # read the solution from the info provided by the RecordEpisodeStatistics wrapper
92
- # (that NaiveSoloMCTSGymEnvWrapper uses internally)
96
+ # (that DeepCopyMCTSGymEnvWrapper uses internally)
93
97
  episode_length = info["episode"]["l"]
94
98
  episode_return = info["episode"]["r"]
95
99
 
@@ -182,7 +186,7 @@ if __name__ == '__main__':
182
186
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
183
187
  env.reset()
184
188
 
185
- # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
189
+ # 1. wrap the environment with the deep copy wrapper or a custom gymcts wrapper
186
190
  env = DeepCopyMCTSGymEnvWrapper(env)
187
191
 
188
192
  # 2. create the agent
@@ -211,7 +215,7 @@ if __name__ == '__main__':
211
215
  env.close()
212
216
 
213
217
  # 5. print the solution
214
- # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that NaiveSoloMCTSGymEnvWrapper wraps internally)
218
+ # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that DeepCopyMCTSGymEnvWrapper wraps internally)
215
219
  episode_length = info["episode"]["l"]
216
220
  episode_return = info["episode"]["r"]
217
221
 
@@ -252,13 +256,13 @@ import gymnasium as gym
252
256
  from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
253
257
  from jsp_instance_utils.instances import ft06, ft06_makespan
254
258
 
255
- from gymcts.gymcts_agent import SoloMCTSAgent
256
- from gymcts.gymcts_gym_env import SoloMCTSGymEnv
259
+ from gymcts.gymcts_agent import GymctsAgent
260
+ from gymcts.gymcts_env_abc import GymctsABC
257
261
 
258
262
  from gymcts.logger import log
259
263
 
260
264
 
261
- class GraphJspGYMCTSWrapper(SoloMCTSGymEnv, gym.Wrapper):
265
+ class GraphJspGYMCTSWrapper(GymctsABC, gym.Wrapper):
262
266
 
263
267
  def __init__(self, env: DisjunctiveGraphJspEnv):
264
268
  gym.Wrapper.__init__(self, env)
@@ -309,7 +313,7 @@ if __name__ == '__main__':
309
313
 
310
314
  env = GraphJspGYMCTSWrapper(env)
311
315
 
312
- agent = SoloMCTSAgent(
316
+ agent = GymctsAgent(
313
317
  env=env,
314
318
  clear_mcts_tree_after_step=True,
315
319
  render_tree_after_step=True,
@@ -352,7 +356,6 @@ import gymnasium as gym
352
356
 
353
357
  from gymcts.gymcts_agent import GymctsAgent
354
358
  from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
355
- from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
356
359
 
357
360
  from gymcts.logger import log
358
361
 
@@ -365,7 +368,7 @@ if __name__ == '__main__':
365
368
  env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
366
369
  env.reset()
367
370
 
368
- # wrap the environment with the naive wrapper or a custom gymcts wrapper
371
+ # wrap the environment with the wrapper or a custom gymcts wrapper
369
372
  env = ActionHistoryMCTSGymEnvWrapper(env)
370
373
 
371
374
  # create the agent
@@ -436,11 +439,11 @@ clone the repository in your favorite code editor (for example PyCharm, VSCode,
436
439
 
437
440
  using https:
438
441
  ```shell
439
- git clone https://github.com/Alexander-Nasuta/todo
442
+ git clone https://github.com/Alexander-Nasuta/gymcts.git
440
443
  ```
441
444
  or by using the GitHub CLI:
442
445
  ```shell
443
- gh repo clone Alexander-Nasuta/todo
446
+ gh repo clone Alexander-Nasuta/gymcts
444
447
  ```
445
448
 
446
449
  if you are using PyCharm, I recommend doing the following additional steps:
@@ -449,9 +452,6 @@ if you are using PyCharm, I recommend doing the following additional steps:
449
452
  - mark the `tests` folder as test root (by right-clicking on the folder and selecting `Mark Directory as` -> `Test Sources Root`)
450
453
  - mark the `resources` folder as resources root (by right-clicking on the folder and selecting `Mark Directory as` -> `Resources Root`)
451
454
 
452
- at the end your project structure should look like this:
453
-
454
- todo
455
455
 
456
456
  ### Create a Virtual Environment (optional)
457
457
 
@@ -517,12 +517,6 @@ For testing with `tox` run the following command:
517
517
  tox
518
518
  ```
519
519
 
520
- Here is a screenshot of what the output might look like:
521
-
522
- ![](https://github.com/Alexander-Nasuta/GraphMatrixJobShopEnv/raw/master/resources/tox-screenshot.png)
523
-
524
- Tox will run the tests in a separate environment and will also check if the requirements are installed correctly.
525
-
526
520
  ### Builing and Publishing the Project to PyPi
527
521
 
528
522
  In order to publish the project to PyPi, the project needs to be built and then uploaded to PyPi.
@@ -561,7 +555,6 @@ sphinx-autobuild ./docs/source/ ./docs/build/html/
561
555
  This project features most of the extensions featured in this Tutorial: [Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021](https://www.youtube.com/watch?v=qRSb299awB0).
562
556
 
563
557
 
564
-
565
558
  ## Contact
566
559
 
567
560
  If you have any questions or feedback, feel free to contact me via [email](mailto:alexander.nasuta@wzl-iqs.rwth-aachen.de) or open an issue on repository.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "gymcts"
7
- version = "1.2.0"
7
+ version = "1.2.1"
8
8
  description = "A minimalistic implementation of the Monte Carlo Tree Search algorithm for planning problems fomulated as gymnaisum reinforcement learning environments."
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Alexander Nasuta", email = "alexander.nasuta@wzl-iqs.rwth-aachen.de" }]
@@ -32,7 +32,7 @@ examples = [
32
32
  ]
33
33
  dev = [
34
34
  "jsp-instance-utils",
35
- "graph-matrix-jsp-env",
35
+ "graph-matrix-jsp-env>=0.3.0",
36
36
  "graph-jsp-env",
37
37
  "JSSEnv",
38
38
 
@@ -49,13 +49,16 @@ dev = [
49
49
  "myst-parser", # .md support for sphinx
50
50
  "sphinx-autobuild",
51
51
  #
52
+ "sphinx-copybutton", # for code copy buttons
52
53
  "furo", # cool theme
53
54
  "twine",
54
55
  "sphinx-copybutton", # for code copy buttons
55
56
  "nbsphinx", # for jupyter notebook support in sphinx
57
+ "pandoc",
56
58
 
57
59
  "jupytext", # converting .py examples to jupyter notebook jupytext --to notebook *.py
58
60
  "jupyter", # for jupyter notebook kernel
61
+ "typing_extensions>=4.12.0",
59
62
  ]
60
63
 
61
64
  [project.urls]
@@ -25,9 +25,6 @@ testing =
25
25
  flake8>=3.9
26
26
  tox>=3.24
27
27
 
28
- [options.package_data]
29
- phantomderopfa = py.typed
30
-
31
28
  [flake8]
32
29
  max-line-length = 160
33
30
 
@@ -106,6 +106,18 @@ def wrap_with_color_codes(s: object, /, r: int | float, g: int | float, b: int |
106
106
 
107
107
 
108
108
  def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rainbow") -> str:
109
+ """
110
+ Wraps a string with a color scale (a matplotlib c_map) based on the n_of_item and n_classes.
111
+ This function is used to color code the available actions in the MCTS tree visualisation.
112
+ The children of the MCTS tree are colored based on their action for a clearer visualisation.
113
+
114
+ :param s: the string (or object) to be wrapped. objects are converted to string (using the __str__ function).
115
+ :param n_of_item: the index of the item to be colored. In a mcts tree, this is the (parent-)action of the node.
116
+ :param n_classes: the number of classes (or items) to be colored. In a mcts tree, this is the number of available actions.
117
+ :param c_map: the colormap to be used (default is 'rainbow').
118
+ The colormap can be any matplotlib colormap, e.g. 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
119
+ :return: a string that contains the color-codes (prefix and suffix) and the string s in between.
120
+ """
109
121
  if s is None or n_of_item is None or n_classes is None:
110
122
  return s
111
123
 
@@ -119,6 +131,16 @@ def wrap_evenly_spaced_color(s: Any, n_of_item: int, n_classes: int, c_map="rain
119
131
 
120
132
 
121
133
  def wrap_with_color_scale(s: str, value: float, min_val: float, max_val: float, c_map=None) -> str:
134
+ """
135
+ Wraps a string with a color scale (a matplotlib c_map) based on the value, min_val, and max_val.
136
+
137
+ :param s: the string to be wrapped
138
+ :param value: the value to be mapped to a color
139
+ :param min_val: the minimum value of the scale
140
+ :param max_val: the maximum value of the scale
141
+ :param c_map: the colormap to be used (default is 'rainbow')
142
+ :return:
143
+ """
122
144
  if s is None or min_val is None or max_val is None or min_val >= max_val:
123
145
  return s
124
146
 
@@ -1,8 +1,7 @@
1
1
  import random
2
- import copy
3
2
 
4
3
  import numpy as np
5
- from typing import TypeVar, Any, SupportsFloat, Callable
4
+ from typing import Any, SupportsFloat, Callable
6
5
  import gymnasium as gym
7
6
  from gymnasium.core import WrapperActType, WrapperObsType
8
7
  from gymnasium.wrappers import RecordEpisodeStatistics
@@ -13,6 +12,21 @@ from gymcts.logger import log
13
12
 
14
13
 
15
14
  class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
15
+ """
16
+ A wrapper for gym environments that implements the GymctsABC interface.
17
+ It uses the action history as state representation.
18
+ Please note that this is not the most efficient way to implement the state representation.
19
+ It is supposed to be used to see if your use-case works well with the MCTS algorithm.
20
+ If it does, you can consider implementing all GymctsABC methods in a more efficient way.
21
+ The action history is a list of actions taken in the environment.
22
+ The state is represented as a list of actions taken in the environment.
23
+ The state is used to restore the environment using the load_state method.
24
+
25
+ It is supposed to be used to see if your use-case works well with the MCTS algorithm.
26
+ If it does, you can consider implementing all GymctsABC methods in a more efficient way.
27
+ """
28
+
29
+ # helper attributes for the wrapper
16
30
  _terminal_flag: bool = False
17
31
  _last_reward: SupportsFloat = 0
18
32
  _step_tuple: tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]] = None
@@ -25,6 +39,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
25
39
  action_mask_fn: str | Callable[[gym.Env], np.ndarray] | None = None,
26
40
  buffer_length: int = 100,
27
41
  ):
42
+ """
43
+ A wrapper for gym environments that implements the GymctsABC interface.
44
+ It uses the action history as state representation.
45
+ Please note that this is not the most efficient way to implement the state representation.
46
+ It is supposed to be used to see if your use-case works well with the MCTS algorithm.
47
+ If it does, you can consider implementing all GymctsABC methods in a more efficient way.
48
+
49
+ :param env: the environment to wrap
50
+ :param action_mask_fn: a function that takes the environment as input and returns a mask of valid actions
51
+ :param buffer_length: the length of the buffer for recording episodes for determining their rollout returns
52
+ """
28
53
  # wrap with RecordEpisodeStatistics if it is not already wrapped
29
54
  env = RecordEpisodeStatistics(env, buffer_length=buffer_length)
30
55
 
@@ -48,6 +73,17 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
48
73
  self._action_mask_fn = action_mask_fn
49
74
 
50
75
  def load_state(self, state: list[int]) -> None:
76
+ """
77
+ Loads the state of the environment. The state is a list of actions taken in the environment.
78
+
79
+ The environment is reset and all actions in the state are performed in order to restore the environment to the
80
+ same state.
81
+
82
+ This works only for deterministic environments!
83
+
84
+ :param state: the state to load
85
+ :return: None
86
+ """
51
87
  self.env.reset()
52
88
  self._wrapper_action_history = []
53
89
 
@@ -56,15 +92,30 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
56
92
  self._wrapper_action_history.append(action)
57
93
 
58
94
  def is_terminal(self) -> bool:
95
+ """
96
+ Returns True if the environment is in a terminal state, False otherwise.
97
+
98
+ :return:
99
+ """
59
100
  if not len(self.get_valid_actions()):
60
101
  return True
61
102
  else:
62
103
  return self._terminal_flag
63
104
 
64
105
  def action_masks(self) -> np.ndarray | None:
106
+ """
107
+ Returns the action masks for the environment. If the action_mask_fn is not set, it returns None.
108
+
109
+ :return:
110
+ """
65
111
  return self._action_mask_fn(self.env) if self._action_mask_fn is not None else None
66
112
 
67
113
  def get_valid_actions(self) -> list[int]:
114
+ """
115
+ Returns a list of valid actions for the current state of the environment.
116
+
117
+ :return: a list of valid actions
118
+ """
68
119
  if self._action_mask_fn is None:
69
120
  action_space: gym.spaces.Discrete = self.env.action_space # Type hinting
70
121
  return list(range(action_space.n))
@@ -72,6 +123,12 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
72
123
  return [i for i, mask in enumerate(self.action_masks()) if mask]
73
124
 
74
125
  def rollout(self) -> float:
126
+ """
127
+ Performs a random rollout from the current state of the environment and returns the return (sum of rewards)
128
+ of the rollout.
129
+
130
+ :return: the return of the rollout
131
+ """
75
132
  log.debug("performing rollout")
76
133
  # random rollout
77
134
  # perform random valid action util terminal
@@ -92,11 +149,24 @@ class ActionHistoryMCTSGymEnvWrapper(GymctsABC, gym.Wrapper):
92
149
  return episode_return
93
150
 
94
151
  def get_state(self) -> list[int]:
152
+ """
153
+ Returns the current state of the environment. The state is a list of actions taken in the environment,
154
+ namely all action that have been taken in the environment so far (since the last reset).
155
+
156
+ :return: a list of actions taken in the environment
157
+ """
158
+
95
159
  return self._wrapper_action_history.copy()
96
160
 
97
161
  def step(
98
162
  self, action: WrapperActType
99
163
  ) -> tuple[WrapperObsType, SupportsFloat, bool, bool, dict[str, Any]]:
164
+ """
165
+ Performs a step in the environment. It adds the action to the action history and updates the terminal flag.
166
+
167
+ :param action: action to perform in the environment
168
+ :return: the step tuple of the environment (obs, reward, terminated, truncated, info)
169
+ """
100
170
  step_tuple = self.env.step(action)
101
171
  self._wrapper_action_history.append(action)
102
172
  obs, reward, terminated, truncated, info = step_tuple
@@ -1,4 +1,5 @@
1
1
  import copy
2
+ import random
2
3
  import gymnasium as gym
3
4
 
4
5
  from typing import TypeVar, Any, SupportsFloat, Callable
@@ -63,7 +64,10 @@ class GymctsAgent:
63
64
  # NAVIGATION STRATEGY
64
65
  # select child with highest UCB score
65
66
  while not temp_node.is_leaf():
66
- temp_node = max(temp_node.children.values(), key=lambda child: child.ucb_score())
67
+ children = list(temp_node.children.values())
68
+ max_ucb_score = max(child.ucb_score() for child in children)
69
+ best_children = [child for child in children if child.ucb_score() == max_ucb_score]
70
+ temp_node = random.choice(best_children)
67
71
  log.debug(f"Selected leaf node: {temp_node}")
68
72
  return temp_node
69
73