kuzongaenv 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kuzongaenv-0.2.9/LICENSE +21 -0
- kuzongaenv-0.2.9/PKG-INFO +117 -0
- kuzongaenv-0.2.9/README.md +100 -0
- kuzongaenv-0.2.9/kuzongaenv/__init__.py +6 -0
- kuzongaenv-0.2.9/kuzongaenv/envs/__init__.py +1 -0
- kuzongaenv-0.2.9/kuzongaenv/envs/kuzonga_env.py +499 -0
- kuzongaenv-0.2.9/kuzongaenv/inspection/__init__.py +0 -0
- kuzongaenv-0.2.9/kuzongaenv/inspection/inspector.py +269 -0
- kuzongaenv-0.2.9/kuzongaenv/utils/__init__.py +0 -0
- kuzongaenv-0.2.9/kuzongaenv/utils/logger.py +40 -0
- kuzongaenv-0.2.9/kuzongaenv.egg-info/PKG-INFO +117 -0
- kuzongaenv-0.2.9/kuzongaenv.egg-info/SOURCES.txt +16 -0
- kuzongaenv-0.2.9/kuzongaenv.egg-info/dependency_links.txt +1 -0
- kuzongaenv-0.2.9/kuzongaenv.egg-info/requires.txt +2 -0
- kuzongaenv-0.2.9/kuzongaenv.egg-info/top_level.txt +1 -0
- kuzongaenv-0.2.9/pyproject.toml +21 -0
- kuzongaenv-0.2.9/setup.cfg +4 -0
- kuzongaenv-0.2.9/setup.py +22 -0
kuzongaenv-0.2.9/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Jacinto Jeje Matamba Quimua
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kuzongaenv
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: Gymnasium environment for the game Kuzonga
|
|
5
|
+
Home-page: https://github.com/jaci-hub/kuzongaenv
|
|
6
|
+
Author: Jacinto Jeje Matamba Quimua
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Source, https://github.com/jaci-hub/kuzongaenv
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: gymnasium>=0.30.0
|
|
13
|
+
Requires-Dist: numpy>=1.23
|
|
14
|
+
Dynamic: home-page
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
Dynamic: requires-python
|
|
17
|
+
|
|
18
|
+
# KuzongaEnv
|
|
19
|
+
|
|
20
|
+
A custom Gymnasium-compatible environment for the [Kuzonga game](https://www.kuzonga.com).
|
|
21
|
+
|
|
22
|
+
## Environment Details
|
|
23
|
+
### Action Space
|
|
24
|
+
|
|
25
|
+
The environment uses a dictionary action space with three components:
|
|
26
|
+
|
|
27
|
+
| Key | Value | Description |
|
|
28
|
+
|-----------|------------|-------------|
|
|
29
|
+
| v | 1 or 0 (or True and False, respectively) | Whether to attempt division (`1`) or change a digit (`0`). |
|
|
30
|
+
| g | 0–9 | If `v=1`, the divisor; if `v=0`, the new digit to set at `r`. |
|
|
31
|
+
| r | 0…digits-1 or `None` | Rindex (Right-to-left or reverse index) of the digit to overwrite (if `v=1`, it should be `None`). |
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
action = {"v": 1, "g": 3, "r": None} # attempt division by 3
|
|
38
|
+
action = {"v": 0, "g": 7, "r": 1} # set the second digit (from the right) to 7
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Observation Space
|
|
42
|
+
|
|
43
|
+
The environment uses a dictionary observation space with the following keys:
|
|
44
|
+
|
|
45
|
+
| Key | Type | Description |
|
|
46
|
+
|----------------------------|-------------------------|-------------|
|
|
47
|
+
| s | np.int8 array (digits,) | The original number as an array of digits. |
|
|
48
|
+
| d | np.int8 array (digits,) | The current number as an array of digits. |
|
|
49
|
+
| a | np.int64 array (digits*10,) | Binary mask of which digits can be set at each position. Flattened from shape (digits, 10). |
|
|
50
|
+
| p | np.int64 array (num_players*3,) | Each player’s `[i, c, m]`, where `i` is the ID, `c` is the score and `m` tells if it is the player's turn (`m=1`) or not (`m=0`). Flattened array of all players. It has one player by default. |
|
|
51
|
+
| t | int | ID of the player whose turn it is. |
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
Example:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
obs, info = env.reset()
|
|
58
|
+
print(obs["s"]) # [1, 7]
|
|
59
|
+
print(obs["d"]) # [4, 7]
|
|
60
|
+
print(obs["a"]) # array([1,1,0,...])
|
|
61
|
+
print(obs["p"]) # array([0,0,1,1,0,0]) # two players
|
|
62
|
+
print(obs["t"]) # 0
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Quick Notes
|
|
66
|
+
|
|
67
|
+
The *a* mask ensures illegal moves (e.g., setting a leading zero or creating number 0/1) are prevented.
|
|
68
|
+
|
|
69
|
+
Rewards and penalties are automatically updated in the environment during *step()*.
|
|
70
|
+
|
|
71
|
+
The environment fully supports multiple players, and tracks turns via *t* and *m*.
|
|
72
|
+
|
|
73
|
+
The *options* parameter in *reset()* allows resetting the environment to a specific given state/obs, by setting it with the format: `options = {'obs': <state/obs dict>}`
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
## Usage Example
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import gymnasium as gym
|
|
80
|
+
import kuzongaenv
|
|
81
|
+
|
|
82
|
+
env = gym.make("Kuzonga-v0")
|
|
83
|
+
obs, info = env.reset()
|
|
84
|
+
action = env.action_space.sample()
|
|
85
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
86
|
+
|
|
87
|
+
print(f"Observation: {obs}")
|
|
88
|
+
print(f"Reward: {reward}, Terminated: {terminated}")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Installation
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
pip install -e .
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Cite This Project
|
|
98
|
+
|
|
99
|
+
If you use **Kuzonga** in your research, projects, or publications, please cite it as:
|
|
100
|
+
|
|
101
|
+
Jacinto Jeje Matamba Quimua (2025). KuzongaEnv: Gym Environment for Reinforcement Learning Experiments. GitHub repository: https://github.com/jaci-hub/kuzongaenv
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
### BibTeX
|
|
105
|
+
|
|
106
|
+
```bibtex
|
|
107
|
+
@misc{kuzongaenv2025,
|
|
108
|
+
author = {Jacinto Jeje Matamba Quimua},
|
|
109
|
+
title = {KuzongaEnv: Gym Environment for Reinforcement Learning Experiments},
|
|
110
|
+
year = 2025,
|
|
111
|
+
howpublished = {\url{https://github.com/jaci-hub/kuzongaenv}},
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Play Kuzonga Online
|
|
116
|
+
|
|
117
|
+
[Kuzonga game](https://www.kuzonga.com)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# KuzongaEnv
|
|
2
|
+
|
|
3
|
+
A custom Gymnasium-compatible environment for the [Kuzonga game](https://www.kuzonga.com).
|
|
4
|
+
|
|
5
|
+
## Environment Details
|
|
6
|
+
### Action Space
|
|
7
|
+
|
|
8
|
+
The environment uses a dictionary action space with three components:
|
|
9
|
+
|
|
10
|
+
| Key | Value | Description |
|
|
11
|
+
|-----------|------------|-------------|
|
|
12
|
+
| v | 1 or 0 (or True and False, respectively) | Whether to attempt division (`1`) or change a digit (`0`). |
|
|
13
|
+
| g | 0–9 | If `v=1`, the divisor; if `v=0`, the new digit to set at `r`. |
|
|
14
|
+
| r | 0…digits-1 or `None` | Rindex (Right-to-left or reverse index) of the digit to overwrite (if `v=1`, it should be `None`). |
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
action = {"v": 1, "g": 3, "r": None} # attempt division by 3
|
|
21
|
+
action = {"v": 0, "g": 7, "r": 1} # set the second digit (from the right) to 7
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Observation Space
|
|
25
|
+
|
|
26
|
+
The environment uses a dictionary observation space with the following keys:
|
|
27
|
+
|
|
28
|
+
| Key | Type | Description |
|
|
29
|
+
|----------------------------|-------------------------|-------------|
|
|
30
|
+
| s | np.int8 array (digits,) | The original number as an array of digits. |
|
|
31
|
+
| d | np.int8 array (digits,) | The current number as an array of digits. |
|
|
32
|
+
| a | np.int64 array (digits*10,) | Binary mask of which digits can be set at each position. Flattened from shape (digits, 10). |
|
|
33
|
+
| p | np.int64 array (num_players*3,) | Each player’s `[i, c, m]`, where `i` is the ID, `c` is the score and `m` tells if it is the player's turn (`m=1`) or not (`m=0`). Flattened array of all players. It has one player by default. |
|
|
34
|
+
| t | int | ID of the player whose turn it is. |
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
obs, info = env.reset()
|
|
41
|
+
print(obs["s"]) # [1, 7]
|
|
42
|
+
print(obs["d"]) # [4, 7]
|
|
43
|
+
print(obs["a"]) # array([1,1,0,...])
|
|
44
|
+
print(obs["p"]) # array([0,0,1,1,0,0]) # two players
|
|
45
|
+
print(obs["t"]) # 0
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Quick Notes
|
|
49
|
+
|
|
50
|
+
The *a* mask ensures illegal moves (e.g., setting a leading zero or creating number 0/1) are prevented.
|
|
51
|
+
|
|
52
|
+
Rewards and penalties are automatically updated in the environment during *step()*.
|
|
53
|
+
|
|
54
|
+
The environment fully supports multiple players, and tracks turns via *t* and *m*.
|
|
55
|
+
|
|
56
|
+
The *options* parameter in *reset()* allows resetting the environment to a specific given state/obs, by setting it with the format: `options = {'obs': <state/obs dict>}`
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
## Usage Example
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import gymnasium as gym
|
|
63
|
+
import kuzongaenv
|
|
64
|
+
|
|
65
|
+
env = gym.make("Kuzonga-v0")
|
|
66
|
+
obs, info = env.reset()
|
|
67
|
+
action = env.action_space.sample()
|
|
68
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
69
|
+
|
|
70
|
+
print(f"Observation: {obs}")
|
|
71
|
+
print(f"Reward: {reward}, Terminated: {terminated}")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Installation
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install -e .
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Cite This Project
|
|
81
|
+
|
|
82
|
+
If you use **Kuzonga** in your research, projects, or publications, please cite it as:
|
|
83
|
+
|
|
84
|
+
Jacinto Jeje Matamba Quimua (2025). KuzongaEnv: Gym Environment for Reinforcement Learning Experiments. GitHub repository: https://github.com/jaci-hub/kuzongaenv
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
### BibTeX
|
|
88
|
+
|
|
89
|
+
```bibtex
|
|
90
|
+
@misc{kuzongaenv2025,
|
|
91
|
+
author = {Jacinto Jeje Matamba Quimua},
|
|
92
|
+
title = {KuzongaEnv: Gym Environment for Reinforcement Learning Experiments},
|
|
93
|
+
year = 2025,
|
|
94
|
+
howpublished = {\url{https://github.com/jaci-hub/kuzongaenv}},
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Play Kuzonga Online
|
|
99
|
+
|
|
100
|
+
[Kuzonga game](https://www.kuzonga.com)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from kuzongaenv.envs.kuzonga_env import KuzongaEnv
|
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Name: Jacinto Jeje Matamba Quimua
|
|
3
|
+
Date: 10/28/2025
|
|
4
|
+
|
|
5
|
+
This is the python gym-style API for my game Kuzonga
|
|
6
|
+
'''
|
|
7
|
+
|
|
8
|
+
import math
|
|
9
|
+
import random
|
|
10
|
+
import gymnasium as gym
|
|
11
|
+
from gymnasium import spaces
|
|
12
|
+
import numpy as np
|
|
13
|
+
import warnings
|
|
14
|
+
from kuzongaenv.inspection.inspector import Inspector
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class KuzongaEnv(gym.Env):
|
|
19
|
+
metadata = {"render_modes": ["human"]}
|
|
20
|
+
|
|
21
|
+
def __init__(self, digits=2, players=1, render_mode=None, auto_render=False):
|
|
22
|
+
super().__init__()
|
|
23
|
+
self.players = [{"i": i, "c": 0, "m": 1 if i==0 else 0} for i in range(players)]
|
|
24
|
+
self.static_number = None
|
|
25
|
+
self.dynamic_number = None
|
|
26
|
+
self.player_turn = 0
|
|
27
|
+
self.digits = digits
|
|
28
|
+
self.available_digits_per_rindex = {i: list(range(10)) for i in range(digits)}
|
|
29
|
+
self.maxScore = 9*digits
|
|
30
|
+
self.render_mode = render_mode
|
|
31
|
+
self.auto_render = auto_render
|
|
32
|
+
|
|
33
|
+
warnings.filterwarnings(
|
|
34
|
+
"ignore",
|
|
35
|
+
message=".*Box observation space maximum and minimum values are equal.*"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# (1) Action space:
|
|
39
|
+
# action is dictionary with keys: division, digit, rindex
|
|
40
|
+
# division (bool): true/false
|
|
41
|
+
# digit (int): if division=true, then it is the divisor, else it is the new digit in the rindex chosen
|
|
42
|
+
# rindex (int): if division=true, then it is None, else the rindex where the digit will be overwriten
|
|
43
|
+
self.action_space = spaces.Dict({
|
|
44
|
+
"v": spaces.Discrete(2),
|
|
45
|
+
"g": spaces.Discrete(10),
|
|
46
|
+
"r": spaces.Discrete(digits)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
# (2) Observation space:
|
|
50
|
+
# observation is a dictionary with keys: static_number, dynamic_number, available_digits_per_rindex, players, player_turn]
|
|
51
|
+
# static_number (int): the value of the number originally generated
|
|
52
|
+
# dynamic_number (int): the current value of the number whose digits are manipulated
|
|
53
|
+
# available_digits_per_rindex (dict): a dictionary where the keys are the rindexes of the dynamic_number and their values are the list of digits available at that rindex
|
|
54
|
+
# players (list): the list of dictionaries with each player's id, score and a variable (is_current_turn) that tells if it is the player's turn to play. By default there is one player in the list
|
|
55
|
+
# player_turn (int): the id of the player with the turn
|
|
56
|
+
number_of_players = len(self.players)
|
|
57
|
+
self.observation_space = spaces.Dict({
|
|
58
|
+
"s": spaces.Box(
|
|
59
|
+
low=0,
|
|
60
|
+
high=9,
|
|
61
|
+
shape=(digits,),
|
|
62
|
+
dtype=np.int8
|
|
63
|
+
),
|
|
64
|
+
"d": spaces.Box(
|
|
65
|
+
low=0,
|
|
66
|
+
high=9,
|
|
67
|
+
shape=(digits,),
|
|
68
|
+
dtype=np.int8
|
|
69
|
+
),
|
|
70
|
+
"a": spaces.MultiBinary(10 * digits),
|
|
71
|
+
"p": spaces.Box(
|
|
72
|
+
low=np.array([0, -self.maxScore-8, 0] * number_of_players, dtype=np.int64),
|
|
73
|
+
high=np.array([number_of_players - 1, self.maxScore+8, 1] * number_of_players, dtype=np.int64),
|
|
74
|
+
shape=(number_of_players * 3,),
|
|
75
|
+
dtype=np.int64
|
|
76
|
+
),
|
|
77
|
+
"t": spaces.Discrete(number_of_players)
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _encode_players(self, given_players=None):
|
|
82
|
+
'''
|
|
83
|
+
Encodes player info numerically:
|
|
84
|
+
Each player has attributes: i, c, m
|
|
85
|
+
Note: if m=1, it is the player's turn to play, else, it is not
|
|
86
|
+
'''
|
|
87
|
+
if given_players != None:
|
|
88
|
+
self.players = given_players
|
|
89
|
+
|
|
90
|
+
if not self.players:
|
|
91
|
+
# create a default single-player representation
|
|
92
|
+
encoded = np.zeros((1, 3), dtype=np.int64)
|
|
93
|
+
encoded[0] = [0, 0, 1] # i=0, c=0, m=1
|
|
94
|
+
return encoded.flatten()
|
|
95
|
+
|
|
96
|
+
num_players = len(self.players)
|
|
97
|
+
encoded = np.zeros((num_players, 3), dtype=np.int64)
|
|
98
|
+
for i, p in enumerate(self.players):
|
|
99
|
+
encoded[i, 0] = p.get("i", i)
|
|
100
|
+
encoded[i, 1] = p.get("c", 0)
|
|
101
|
+
encoded[i, 2] = 1 if i == self.player_turn else 0
|
|
102
|
+
return encoded.flatten()
|
|
103
|
+
|
|
104
|
+
def _has_one_digit_divisor(self, num):
|
|
105
|
+
res = False
|
|
106
|
+
for i in range(2, 10):
|
|
107
|
+
if num % i == 0:
|
|
108
|
+
res = True
|
|
109
|
+
break
|
|
110
|
+
return res
|
|
111
|
+
|
|
112
|
+
def _create_dynamic_number(self):
|
|
113
|
+
min_val = 10 ** (self.digits - 1)
|
|
114
|
+
max_val = (10 ** self.digits) - 1
|
|
115
|
+
|
|
116
|
+
num = random.randint(min_val, max_val)
|
|
117
|
+
|
|
118
|
+
while self._has_one_digit_divisor(num):
|
|
119
|
+
num = random.randint(min_val, max_val)
|
|
120
|
+
|
|
121
|
+
return int(num)
|
|
122
|
+
|
|
123
|
+
def _get_prohibited_digit_list_at_rindex(self, rindex):
|
|
124
|
+
prohibited = set()
|
|
125
|
+
# no leading zero
|
|
126
|
+
if rindex == self.digits-1:
|
|
127
|
+
prohibited.add(0)
|
|
128
|
+
|
|
129
|
+
# can’t make number 0 or 1
|
|
130
|
+
for d in [0, 1]:
|
|
131
|
+
modified = str(self.dynamic_number)
|
|
132
|
+
modified = list(modified)
|
|
133
|
+
if rindex > 0:
|
|
134
|
+
modified[-rindex-1] = str(d)
|
|
135
|
+
else:
|
|
136
|
+
modified[self.digits-1] = str(d)
|
|
137
|
+
modified = ''.join(modified)
|
|
138
|
+
|
|
139
|
+
if int(modified) in (0, 1):
|
|
140
|
+
prohibited.add(d)
|
|
141
|
+
|
|
142
|
+
return list(prohibited)
|
|
143
|
+
|
|
144
|
+
def _remove_all_prohibited_digits_at_given_rindex_from_given_list(self, rindex, digit_list):
|
|
145
|
+
prohibited_digits = set(self._get_prohibited_digit_list_at_rindex(rindex))
|
|
146
|
+
return [d for d in digit_list if d not in prohibited_digits]
|
|
147
|
+
|
|
148
|
+
def _setup_available_digits_per_rindex(self):
|
|
149
|
+
available_digits_per_rindex = {}
|
|
150
|
+
|
|
151
|
+
for i in range(self.digits):
|
|
152
|
+
current_digit = int(str(self.dynamic_number)[self.digits-i-1])
|
|
153
|
+
all_digits = [d for d in range(10) if d != current_digit]
|
|
154
|
+
filtered_digits = self._remove_all_prohibited_digits_at_given_rindex_from_given_list(i, all_digits)
|
|
155
|
+
available_digits_per_rindex[i] = filtered_digits
|
|
156
|
+
|
|
157
|
+
return available_digits_per_rindex
|
|
158
|
+
|
|
159
|
+
def _encode_available_digits(self, given_available_digits_per_rindex=None):
|
|
160
|
+
if given_available_digits_per_rindex is not None:
|
|
161
|
+
# convert string keys to int if necessary
|
|
162
|
+
self.available_digits_per_rindex = {
|
|
163
|
+
int(k): v for k, v in given_available_digits_per_rindex.items()
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
mask = np.zeros((self.digits, 10), dtype=np.int64)
|
|
167
|
+
for idx, available in self.available_digits_per_rindex.items():
|
|
168
|
+
mask[int(idx), available] = 1
|
|
169
|
+
return mask.flatten()
|
|
170
|
+
|
|
171
|
+
def reset(self, *, seed = None, options = None):
|
|
172
|
+
manual_obs = None
|
|
173
|
+
if options:
|
|
174
|
+
manual_obs = options.get('obs', None)
|
|
175
|
+
inspector = Inspector(state=manual_obs)
|
|
176
|
+
inspector.inspect_state()
|
|
177
|
+
|
|
178
|
+
if inspector.state_passed():
|
|
179
|
+
return self._manual_reset(seed=seed, options=options)
|
|
180
|
+
|
|
181
|
+
super().reset(seed=seed)
|
|
182
|
+
original_number = self._create_dynamic_number()
|
|
183
|
+
self.static_number = original_number
|
|
184
|
+
self.dynamic_number = original_number
|
|
185
|
+
self.available_digits_per_rindex = self._setup_available_digits_per_rindex()
|
|
186
|
+
self.player_turn = 0
|
|
187
|
+
obs = {
|
|
188
|
+
"s": np.array([int(d) for d in str(self.static_number)], dtype=np.int8),
|
|
189
|
+
"d": np.array([int(d) for d in str(self.dynamic_number)], dtype=np.int8),
|
|
190
|
+
"a": self._encode_available_digits(),
|
|
191
|
+
"p": self._encode_players(),
|
|
192
|
+
"t": np.int64(self.player_turn)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
info = {"seed": seed}
|
|
196
|
+
return obs, info
|
|
197
|
+
|
|
198
|
+
def _manual_reset(self, *, seed = None, options = None):
|
|
199
|
+
'''
|
|
200
|
+
resets the ennvironment manually with the obs key-value dictionary (obs) as an argument.
|
|
201
|
+
if obs does not pass inspection, it falls back to the default gym-env reset
|
|
202
|
+
'''
|
|
203
|
+
super().reset(seed=seed)
|
|
204
|
+
|
|
205
|
+
obs = None
|
|
206
|
+
if options:
|
|
207
|
+
obs = options.get('obs', None)
|
|
208
|
+
else:
|
|
209
|
+
return # It should not get here!
|
|
210
|
+
|
|
211
|
+
# update observation space var
|
|
212
|
+
number_of_players = len(obs["p"])
|
|
213
|
+
self.maxScore = 9*len(str(obs["s"]))
|
|
214
|
+
self.observation_space = spaces.Dict({
|
|
215
|
+
"s": spaces.Box(
|
|
216
|
+
low=0,
|
|
217
|
+
high=9,
|
|
218
|
+
shape=(len(str(obs["s"])),),
|
|
219
|
+
dtype=np.int8
|
|
220
|
+
),
|
|
221
|
+
"d": spaces.Box(
|
|
222
|
+
low=0,
|
|
223
|
+
high=9,
|
|
224
|
+
shape=(len(str(obs["d"])),),
|
|
225
|
+
dtype=np.int8
|
|
226
|
+
),
|
|
227
|
+
"a": spaces.MultiBinary(10 * len(str(obs["d"]))),
|
|
228
|
+
"p": spaces.Box(
|
|
229
|
+
low=np.array([0, -self.maxScore-8, 0] * number_of_players, dtype=np.int64),
|
|
230
|
+
high=np.array([number_of_players - 1, self.maxScore+8, 1] * number_of_players, dtype=np.int64),
|
|
231
|
+
shape=(number_of_players * 3,),
|
|
232
|
+
dtype=np.int64
|
|
233
|
+
),
|
|
234
|
+
"t": spaces.Discrete(number_of_players)
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
# update the number of digits
|
|
238
|
+
self.digits = len(str(obs["d"]))
|
|
239
|
+
|
|
240
|
+
self.static_number = obs["s"]
|
|
241
|
+
self.dynamic_number = obs["d"]
|
|
242
|
+
# convert string keys to int if necessary
|
|
243
|
+
self.available_digits_per_rindex = {
|
|
244
|
+
int(k): v for k, v in obs["a"].items()
|
|
245
|
+
}
|
|
246
|
+
self.players = obs["p"]
|
|
247
|
+
self.player_turn = obs["t"]
|
|
248
|
+
|
|
249
|
+
new_obs = {
|
|
250
|
+
"s": np.array([int(d) for d in str(obs["s"])], dtype=np.int8),
|
|
251
|
+
"d": np.array([int(d) for d in str(obs["d"])], dtype=np.int8),
|
|
252
|
+
"a": self._encode_available_digits(obs["a"]),
|
|
253
|
+
"p": self._encode_players(obs["p"]),
|
|
254
|
+
"t": np.int64(obs["t"])
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
info = {"seed": seed}
|
|
258
|
+
info = {"manual_reset": True}
|
|
259
|
+
return new_obs, info
|
|
260
|
+
|
|
261
|
+
def _rindex_available_digit_list_is_empty(self, rindex):
|
|
262
|
+
if not self.available_digits_per_rindex:
|
|
263
|
+
return True
|
|
264
|
+
|
|
265
|
+
rindex_available_digit_list = self.available_digits_per_rindex[rindex]
|
|
266
|
+
return len(rindex_available_digit_list) == 0
|
|
267
|
+
|
|
268
|
+
def _update_available_digits_per_rindex(self, rindex=None):
|
|
269
|
+
if rindex is None: # division was performed
|
|
270
|
+
for i in range(self.digits):
|
|
271
|
+
current_digit = int(str(self.dynamic_number)[self.digits-i-1])
|
|
272
|
+
rindex_available_digit_list = [d for d in self.available_digits_per_rindex[i] if d != current_digit]
|
|
273
|
+
rindex_available_digit_list = self._remove_all_prohibited_digits_at_given_rindex_from_given_list(i, rindex_available_digit_list)
|
|
274
|
+
|
|
275
|
+
if rindex_available_digit_list:
|
|
276
|
+
self.available_digits_per_rindex[i] = rindex_available_digit_list
|
|
277
|
+
else:
|
|
278
|
+
all_digits = [d for d in range(10) if d != current_digit]
|
|
279
|
+
all_digits = self._remove_all_prohibited_digits_at_given_rindex_from_given_list(i, all_digits)
|
|
280
|
+
self.available_digits_per_rindex[i] = all_digits
|
|
281
|
+
else: # division was not performed
|
|
282
|
+
if not self._rindex_available_digit_list_is_empty(rindex):
|
|
283
|
+
return
|
|
284
|
+
|
|
285
|
+
current_digit = None
|
|
286
|
+
if rindex>0:
|
|
287
|
+
current_digit = int(str(self.dynamic_number)[-rindex-1])
|
|
288
|
+
else:
|
|
289
|
+
current_digit = int(str(self.dynamic_number)[self.digits-1])
|
|
290
|
+
all_digits = [d for d in range(10) if d != current_digit]
|
|
291
|
+
all_digits = self._remove_all_prohibited_digits_at_given_rindex_from_given_list(rindex, all_digits)
|
|
292
|
+
self.available_digits_per_rindex[rindex] = all_digits
|
|
293
|
+
|
|
294
|
+
def _remove_each_quotient_digit_from_available_digits_per_rindex(self, quotient_string):
|
|
295
|
+
if not self.available_digits_per_rindex:
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
for i in range(len(quotient_string)):
|
|
299
|
+
rindex_available_digit_list = self.available_digits_per_rindex[i]
|
|
300
|
+
if not rindex_available_digit_list:
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
digit_to_remove = int(quotient_string[self.digits-i-1])
|
|
304
|
+
rindex_available_digit_list = [d for d in rindex_available_digit_list if d != digit_to_remove]
|
|
305
|
+
self.available_digits_per_rindex[i] = rindex_available_digit_list
|
|
306
|
+
|
|
307
|
+
def _remove_digit_from_rindex_available_digits(self, rindex, digit_to_remove):
|
|
308
|
+
if not self.available_digits_per_rindex:
|
|
309
|
+
return
|
|
310
|
+
|
|
311
|
+
rindex_available_digit_list = self.available_digits_per_rindex[rindex]
|
|
312
|
+
if not rindex_available_digit_list:
|
|
313
|
+
return
|
|
314
|
+
|
|
315
|
+
rindex_available_digit_list = [d for d in rindex_available_digit_list if d != digit_to_remove]
|
|
316
|
+
self.available_digits_per_rindex[rindex] = rindex_available_digit_list
|
|
317
|
+
|
|
318
|
+
def _game_over(self):
|
|
319
|
+
# (1) quotient 1
|
|
320
|
+
if self.dynamic_number == 1:
|
|
321
|
+
return True
|
|
322
|
+
# (2) max points
|
|
323
|
+
for player in self.players:
|
|
324
|
+
if player["c"] >= self.maxScore:
|
|
325
|
+
return True
|
|
326
|
+
# (3) only one player left without -max points or less
|
|
327
|
+
count = 0
|
|
328
|
+
for player in self.players:
|
|
329
|
+
if player["c"] <= -self.maxScore:
|
|
330
|
+
if len(self.players) > 1:
|
|
331
|
+
count += 1
|
|
332
|
+
else:
|
|
333
|
+
return True
|
|
334
|
+
if len(self.players) > 1 and count == len(self.players) - 1:
|
|
335
|
+
return True
|
|
336
|
+
|
|
337
|
+
return False
|
|
338
|
+
|
|
339
|
+
def _update_player_turn(self):
|
|
340
|
+
if self.players:
|
|
341
|
+
self.player_turn = (self.player_turn + 1) % len(self.players)
|
|
342
|
+
if len(self.players) > 1:
|
|
343
|
+
while self.players[self.player_turn]["c"] <= -self.maxScore:
|
|
344
|
+
self.player_turn = (self.player_turn + 1) % len(self.players)
|
|
345
|
+
self.players[self.player_turn]["m"] = 1
|
|
346
|
+
for player in self.players:
|
|
347
|
+
if player["i"] != self.players[self.player_turn]["i"]:
|
|
348
|
+
player["m"] = 0
|
|
349
|
+
|
|
350
|
+
def step(self, action):
|
|
351
|
+
"""
|
|
352
|
+
Executes one step of the Kuzonga environment.
|
|
353
|
+
Args:
|
|
354
|
+
action (dict): {
|
|
355
|
+
"v": 0 or 1,
|
|
356
|
+
"g": int,
|
|
357
|
+
"r": int if division == 0, else None
|
|
358
|
+
}
|
|
359
|
+
Returns:
|
|
360
|
+
obs, reward, terminated, truncated, info
|
|
361
|
+
"""
|
|
362
|
+
reward = 0
|
|
363
|
+
terminated = False
|
|
364
|
+
truncated = False
|
|
365
|
+
info = {}
|
|
366
|
+
|
|
367
|
+
# check action
|
|
368
|
+
expected_keys = {"v", "g", "r"}
|
|
369
|
+
if not isinstance(action, dict):
|
|
370
|
+
reward += -5
|
|
371
|
+
info["critical"] = "Action must be a Python dictionary."
|
|
372
|
+
elif set(action.keys()) != expected_keys:
|
|
373
|
+
reward += -5
|
|
374
|
+
info["critical"] = f"Action dictionary must have exactly these keys: {', '.join(expected_keys)}."
|
|
375
|
+
else:
|
|
376
|
+
# get attributes
|
|
377
|
+
division = bool(action["v"]) if action["v"] in [0, 1, True, False] else None
|
|
378
|
+
digit = int(action["g"]) if action["g"] in range(0, 10) else None
|
|
379
|
+
rindex = int(action["r"]) if (isinstance(action["r"], (int, np.integer)) and action["r"]>=0) else None
|
|
380
|
+
|
|
381
|
+
# check division
|
|
382
|
+
if division is None:
|
|
383
|
+
reward += -5
|
|
384
|
+
info["critical"] = "The value for the division key, v, must be either True or False, or 1 or 0."
|
|
385
|
+
# check digit
|
|
386
|
+
elif digit is None:
|
|
387
|
+
reward += -5
|
|
388
|
+
info["critical"] = "Digit must be between 0-9."
|
|
389
|
+
# check rindex
|
|
390
|
+
elif rindex is None and division is None:
|
|
391
|
+
reward += -5
|
|
392
|
+
info["critical"] = "Rindex, r, must be an integer greater than or equal to 0."
|
|
393
|
+
|
|
394
|
+
# (1) Division attempt
|
|
395
|
+
elif division:
|
|
396
|
+
# deduct points if rindex is not None
|
|
397
|
+
if rindex != None:
|
|
398
|
+
reward += -2
|
|
399
|
+
info["warning"] = "Rindex, r, should have not been provided!"
|
|
400
|
+
|
|
401
|
+
if digit in [0, 1]: # not allowed to divide by 0 or 1
|
|
402
|
+
reward += -5
|
|
403
|
+
info["critical"] = "Division by 0 or 1 is not allowed!"
|
|
404
|
+
elif self.dynamic_number % digit == 0:
|
|
405
|
+
self.dynamic_number = self.dynamic_number // digit
|
|
406
|
+
# whenever the number of digits in the quotient is less than that of the original number,
|
|
407
|
+
# remove the rindex key greater than the number of digits in the quotient
|
|
408
|
+
for j in range(len(str(self.dynamic_number)), self.digits):
|
|
409
|
+
if j in self.available_digits_per_rindex:
|
|
410
|
+
del self.available_digits_per_rindex[j]
|
|
411
|
+
# update the number of digits
|
|
412
|
+
self.digits = len(str(self.dynamic_number))
|
|
413
|
+
reward += 1
|
|
414
|
+
# update the list of available digits per rindex
|
|
415
|
+
# (1) remove each quotient digit from available digits per rindex
|
|
416
|
+
self._remove_each_quotient_digit_from_available_digits_per_rindex(str(self.dynamic_number))
|
|
417
|
+
# (2) update available digits per rindex
|
|
418
|
+
self._update_available_digits_per_rindex() # no need to pass the rindex, because a division was performed
|
|
419
|
+
# update player score
|
|
420
|
+
if self.players:
|
|
421
|
+
self.players[self.player_turn]["c"] += digit
|
|
422
|
+
info["note"] = f"Divided by {digit}."
|
|
423
|
+
else:
|
|
424
|
+
reward += -1
|
|
425
|
+
# update player score
|
|
426
|
+
if self.players:
|
|
427
|
+
self.players[self.player_turn]["c"] -= digit
|
|
428
|
+
if self.players[self.player_turn]["c"] <= -self.maxScore:
|
|
429
|
+
# update player turn
|
|
430
|
+
self._update_player_turn()
|
|
431
|
+
info["note"] = f"Careful, {digit} is not a factor of {self.dynamic_number}."
|
|
432
|
+
# (2) Digit change
|
|
433
|
+
else:
|
|
434
|
+
if rindex in self.available_digits_per_rindex and digit in self.available_digits_per_rindex[rindex]:
|
|
435
|
+
num_str = list(str(self.dynamic_number))
|
|
436
|
+
if rindex>0:
|
|
437
|
+
num_str[-rindex-1] = str(digit)
|
|
438
|
+
else:
|
|
439
|
+
num_str[len(num_str)-1] = str(digit)
|
|
440
|
+
self.dynamic_number = "".join(num_str)
|
|
441
|
+
self.dynamic_number = int(self.dynamic_number)
|
|
442
|
+
reward += 1
|
|
443
|
+
# update the list of available digits per rindex
|
|
444
|
+
# (1) remove digit from rindex available digits
|
|
445
|
+
self._remove_digit_from_rindex_available_digits(rindex, digit)
|
|
446
|
+
# (2) update available digits per rindex
|
|
447
|
+
self._update_available_digits_per_rindex(rindex)
|
|
448
|
+
# update player turn
|
|
449
|
+
self._update_player_turn()
|
|
450
|
+
info["note"] = f"Updated digit at rindex r={rindex} to {digit}."
|
|
451
|
+
else:
|
|
452
|
+
reward += -2
|
|
453
|
+
info["warning"] = f"Cannot update the digit at rindex r={rindex} to {digit}."
|
|
454
|
+
|
|
455
|
+
# Check if game is over
|
|
456
|
+
if self._game_over():
|
|
457
|
+
terminated = True
|
|
458
|
+
|
|
459
|
+
if reward > 0:
|
|
460
|
+
reward += 10
|
|
461
|
+
else:
|
|
462
|
+
reward -= 10
|
|
463
|
+
|
|
464
|
+
info["concluded"] = True
|
|
465
|
+
|
|
466
|
+
# Create Observation
|
|
467
|
+
obs = {
|
|
468
|
+
"s": np.array([int(d) for d in str(self.static_number)], dtype=np.int8),
|
|
469
|
+
"d": np.array([int(d) for d in str(self.dynamic_number)], dtype=np.int8),
|
|
470
|
+
"a": self._encode_available_digits(),
|
|
471
|
+
"p": self._encode_players(),
|
|
472
|
+
"t": np.int64(self.player_turn)
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
# Render to see output
|
|
476
|
+
if self.render_mode == "human" and getattr(self, "auto_render", True):
|
|
477
|
+
self.render()
|
|
478
|
+
|
|
479
|
+
return obs, float(reward), terminated, truncated, info
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def render(self):
|
|
483
|
+
if self.render_mode == "human":
|
|
484
|
+
print()
|
|
485
|
+
print(f"Static Number: {self.static_number}")
|
|
486
|
+
print(f"Dynamic Number: {self.dynamic_number}")
|
|
487
|
+
print(f"Available digits per rindex: {self.available_digits_per_rindex}")
|
|
488
|
+
print(f"Turn: Player{self.player_turn}")
|
|
489
|
+
print('*** Scoreboard ***')
|
|
490
|
+
for p in self.players:
|
|
491
|
+
id = p["i"]
|
|
492
|
+
score = p["c"]
|
|
493
|
+
print(f"Player{id}: {score} pts")
|
|
494
|
+
print('******************')
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def close(self):
|
|
498
|
+
return super().close()
|
|
499
|
+
|
|
File without changes
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import kuzongaenv
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import numpy as np
|
|
5
|
+
from kuzongaenv.utils.logger import EpisodeLogger
|
|
6
|
+
|
|
7
|
+
# base dir
|
|
8
|
+
BASE_DIR = './kuzongax/inspection/logs'
|
|
9
|
+
# categories
|
|
10
|
+
ACTION = 'action'
|
|
11
|
+
STATE = 'state'
|
|
12
|
+
# types
|
|
13
|
+
CRITICAL = 'critical'
|
|
14
|
+
WARNING = 'warning'
|
|
15
|
+
NOTE = 'note'
|
|
16
|
+
SCORE = 'score'
|
|
17
|
+
|
|
18
|
+
class Inspector():
|
|
19
|
+
def __init__(self, action=None, state=None):
|
|
20
|
+
self.action = action
|
|
21
|
+
# action keys
|
|
22
|
+
self.division = None
|
|
23
|
+
self.digit = None
|
|
24
|
+
self.rindex = None
|
|
25
|
+
|
|
26
|
+
self.state = state
|
|
27
|
+
# state keys
|
|
28
|
+
self.static_number = None
|
|
29
|
+
self.dynamic_number = None
|
|
30
|
+
self.available_digits_per_rindex = None
|
|
31
|
+
self.players = None
|
|
32
|
+
self.player_turn = None
|
|
33
|
+
|
|
34
|
+
# scores
|
|
35
|
+
self.action_score = 10
|
|
36
|
+
self.state_score = 40
|
|
37
|
+
self.overall_score = self.action_score + self.state_score
|
|
38
|
+
self.action_passing_score = 10
|
|
39
|
+
self.state_passing_score = 40
|
|
40
|
+
self.overall_passing_score = self.action_passing_score + self.state_passing_score
|
|
41
|
+
|
|
42
|
+
# Logging
|
|
43
|
+
self.logger = EpisodeLogger(BASE_DIR)
|
|
44
|
+
|
|
45
|
+
def get_action(self):
|
|
46
|
+
return self.action
|
|
47
|
+
|
|
48
|
+
def get_state(self):
|
|
49
|
+
return self.state
|
|
50
|
+
|
|
51
|
+
def inspect_action(self):
|
|
52
|
+
'''
|
|
53
|
+
inspect the action, to ensure it follows the format of the game Kuzonga
|
|
54
|
+
'''
|
|
55
|
+
# check action
|
|
56
|
+
expected_keys = {"v", "g", "r"}
|
|
57
|
+
if not isinstance(self.action, dict):
|
|
58
|
+
self.action_score -= 10
|
|
59
|
+
message = "Action must be a Python dictionary."
|
|
60
|
+
self.logger.add_info(ACTION, CRITICAL, message)
|
|
61
|
+
elif set(self.action.keys()) != expected_keys:
|
|
62
|
+
self.action_score -= 9
|
|
63
|
+
message = f"Action dictionary must have exactly these keys: {', '.join(expected_keys)}."
|
|
64
|
+
self.logger.add_info(ACTION, CRITICAL, message)
|
|
65
|
+
else:
|
|
66
|
+
# get key values
|
|
67
|
+
if self.action["v"] in [0, 1, True, False]:
|
|
68
|
+
self.division = bool(self.action["v"])
|
|
69
|
+
if self.action["g"] in range(0, 10):
|
|
70
|
+
self.digit = int(self.action["g"])
|
|
71
|
+
if (isinstance(self.action["r"], (int, np.integer)) and self.action["r"] >= 0):
|
|
72
|
+
self.rindex = int(self.action["r"])
|
|
73
|
+
|
|
74
|
+
# check division
|
|
75
|
+
if self.division is None:
|
|
76
|
+
self.action_score -= 7
|
|
77
|
+
message = "The value for the division attribute must be either True or False, or 1 or 0."
|
|
78
|
+
self.logger.add_info(ACTION, CRITICAL, message)
|
|
79
|
+
# check digit
|
|
80
|
+
elif self.digit is None:
|
|
81
|
+
self.action_score -= 7
|
|
82
|
+
message = "Digit must be between 0-9."
|
|
83
|
+
self.logger.add_info(ACTION, CRITICAL, message)
|
|
84
|
+
# check rindex
|
|
85
|
+
elif self.rindex is None and self.division is None:
|
|
86
|
+
self.action_score -= 7
|
|
87
|
+
message = "*Rindex, r, must be an integer greater than or equal to 0."
|
|
88
|
+
self.logger.add_info(ACTION, CRITICAL, message)
|
|
89
|
+
|
|
90
|
+
# Division attempt
|
|
91
|
+
elif self.division:
|
|
92
|
+
# deduct points if rindex is not None
|
|
93
|
+
if self.rindex != None:
|
|
94
|
+
self.action_score -= 2
|
|
95
|
+
message = "Rindex, r, should have not been provided!"
|
|
96
|
+
self.logger.add_info(ACTION, WARNING, message)
|
|
97
|
+
|
|
98
|
+
message = self.action_score
|
|
99
|
+
self.logger.add_info(ACTION, SCORE, message)
|
|
100
|
+
# log
|
|
101
|
+
if self.logger.info not in self.logger.episode_log:
|
|
102
|
+
self.logger.episode_log.append(self.logger.info)
|
|
103
|
+
|
|
104
|
+
def inspect_state(self):
|
|
105
|
+
'''
|
|
106
|
+
inspect the state, to ensure it follows the format of the game Kuzonga
|
|
107
|
+
'''
|
|
108
|
+
# check state
|
|
109
|
+
expected_keys = {"s", "d", "a", "p", "t"}
|
|
110
|
+
if not isinstance(self.state, dict):
|
|
111
|
+
self.state_score -= 40
|
|
112
|
+
message = "State must be a Python dictionary."
|
|
113
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
114
|
+
elif set(self.state.keys()) != expected_keys:
|
|
115
|
+
self.state_score -= 38
|
|
116
|
+
message = f"State dictionary must have exactly these keys: {', '.join(expected_keys)}."
|
|
117
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
118
|
+
else:
|
|
119
|
+
# get attributes
|
|
120
|
+
# (0) check static_number
|
|
121
|
+
if (isinstance(self.state["s"], (int, np.integer)) and self.state["s"] > 0):
|
|
122
|
+
self.static_number = self.state["s"]
|
|
123
|
+
else:
|
|
124
|
+
self.state_score -= 7
|
|
125
|
+
message = "The static number, s, must be a non-negative integer."
|
|
126
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
127
|
+
|
|
128
|
+
# (1) check dynamic_number
|
|
129
|
+
if (isinstance(self.state["d"], (int, np.integer)) and self.state["d"] > 0):
|
|
130
|
+
self.dynamic_number = self.state["d"]
|
|
131
|
+
else:
|
|
132
|
+
self.state_score -= 7
|
|
133
|
+
message = "The dynamic number, d, must be a non-negative integer."
|
|
134
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
135
|
+
|
|
136
|
+
# (2) check available digits per rindex
|
|
137
|
+
field = "a"
|
|
138
|
+
value = self.state.get(field, None)
|
|
139
|
+
# (2.1) Must be a dictionary
|
|
140
|
+
if not isinstance(value, dict):
|
|
141
|
+
self.state_score -= 7
|
|
142
|
+
message = f"'{field}' must be a Python dictionary."
|
|
143
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
144
|
+
return
|
|
145
|
+
# (2.2) Must not be empty
|
|
146
|
+
if len(value) == 0:
|
|
147
|
+
self.state_score -= 6
|
|
148
|
+
message = f"'{field}' dictionary must not be empty."
|
|
149
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
150
|
+
return
|
|
151
|
+
# (2.3) Validate keys and values
|
|
152
|
+
for k, v in value.items():
|
|
153
|
+
# Key must be an integer ≥ 0
|
|
154
|
+
if isinstance(k, (int, np.integer)) and 0 <= k < len(str(self.static_number)) == False:
|
|
155
|
+
self.state_score -= 5
|
|
156
|
+
message = f"Key '{k}' in '{field}' must be a non-negative integer."
|
|
157
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
158
|
+
break
|
|
159
|
+
# Value must be a list
|
|
160
|
+
if not isinstance(v, list):
|
|
161
|
+
self.state_score -= 5
|
|
162
|
+
message = f"Value for key '{k}' in '{field}' must be a Python list."
|
|
163
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
164
|
+
break
|
|
165
|
+
# Each element must be a unique integer digit between 0–9
|
|
166
|
+
if not all(isinstance(d, (int, np.integer)) and 0 <= d <= 9 for d in v):
|
|
167
|
+
self.state_score -= 4
|
|
168
|
+
message = f"All elements in '{field}[{k}]' must be digits between 0 and 9."
|
|
169
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
170
|
+
break
|
|
171
|
+
if len(v) != len(set(v)):
|
|
172
|
+
self.state_score -= 3
|
|
173
|
+
message = f"Duplicate digits found in '{field}[{k}]'."
|
|
174
|
+
self.logger.add_info(STATE, WARNING, message)
|
|
175
|
+
# (2.4) Assign valid value
|
|
176
|
+
self.available_digits_per_rindex = value
|
|
177
|
+
|
|
178
|
+
# (3) check players
|
|
179
|
+
if isinstance(self.state["p"], list):
|
|
180
|
+
# check the length
|
|
181
|
+
if len(self.state["p"]) == 0:
|
|
182
|
+
self.state_score -= 6
|
|
183
|
+
message = "players list must have at least one player."
|
|
184
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
185
|
+
else:
|
|
186
|
+
# check the player keys
|
|
187
|
+
expected_players_key = {"i", "c", "m"}
|
|
188
|
+
for player in self.state["p"]:
|
|
189
|
+
if set(player.keys()) != expected_players_key:
|
|
190
|
+
self.state_score -= 5
|
|
191
|
+
message = f"player must have exactly these keys: {', '.join(expected_players_key)}."
|
|
192
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
193
|
+
break
|
|
194
|
+
else:
|
|
195
|
+
# check the value of the keys
|
|
196
|
+
# id
|
|
197
|
+
if not (isinstance(player["i"], (int, np.integer)) and 0 <= player["i"] < len(self.state["p"])):
|
|
198
|
+
self.state_score -= 4
|
|
199
|
+
message = "The player id, i, must be a non-negative integer less than the number of players."
|
|
200
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
201
|
+
break
|
|
202
|
+
# score
|
|
203
|
+
elif not (isinstance(player["c"], (int, np.integer)) and -9*len(str(self.static_number)) - 8 <= player["c"] <= 9*len(str(self.static_number)) + 8):
|
|
204
|
+
self.state_score -= 4
|
|
205
|
+
message = "The player score, c, must satisfy: -9*(the original number of digits) - 8 <= c <= 9*(the original number of digits) + 8."
|
|
206
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
207
|
+
break
|
|
208
|
+
# is_current_turn
|
|
209
|
+
elif not (isinstance(player["m"], (int, np.integer)) and 0 <= player["m"] <= 1):
|
|
210
|
+
self.state_score -= 4
|
|
211
|
+
message = "m must be 1 or 0, which means that it is the player's turn or not, respectivelly."
|
|
212
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
213
|
+
break
|
|
214
|
+
# assign valid value for players
|
|
215
|
+
self.players = self.state["p"]
|
|
216
|
+
else:
|
|
217
|
+
self.state_score -= 7
|
|
218
|
+
message = "players must be a Python list."
|
|
219
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
220
|
+
|
|
221
|
+
# (4) check player_turn
|
|
222
|
+
if self.players is not None:
|
|
223
|
+
if (isinstance(self.state["t"], (int, np.integer)) and 0 <= self.state["t"] < len(self.players)):
|
|
224
|
+
self.player_turn = int(self.state["t"])
|
|
225
|
+
if self.player_turn is None:
|
|
226
|
+
self.state_score -= 7
|
|
227
|
+
message = "The player turn must be a non-negative integer less than the number of players."
|
|
228
|
+
self.logger.add_info(STATE, CRITICAL, message)
|
|
229
|
+
|
|
230
|
+
message = self.state_score
|
|
231
|
+
self.logger.add_info(STATE, SCORE, message)
|
|
232
|
+
# log
|
|
233
|
+
if self.logger.info not in self.logger.episode_log:
|
|
234
|
+
self.logger.episode_log.append(self.logger.info)
|
|
235
|
+
|
|
236
|
+
def inspect_all(self):
|
|
237
|
+
'''
|
|
238
|
+
inspect both the action and the state, to ensure they follow the format of the game Kuzonga
|
|
239
|
+
'''
|
|
240
|
+
self.inspect_action()
|
|
241
|
+
self.inspect_state()
|
|
242
|
+
self.logger.save_episode()
|
|
243
|
+
|
|
244
|
+
def get_action_score(self):
|
|
245
|
+
return self.action_score
|
|
246
|
+
|
|
247
|
+
def action_passed(self):
|
|
248
|
+
return self.action_passing_score == self.action_score
|
|
249
|
+
|
|
250
|
+
def get_state_score(self):
|
|
251
|
+
return self.state_score
|
|
252
|
+
|
|
253
|
+
def state_passed(self):
|
|
254
|
+
return self.state_passing_score == self.state_score
|
|
255
|
+
|
|
256
|
+
def get_overall_score(self):
|
|
257
|
+
return self.overall_score
|
|
258
|
+
|
|
259
|
+
def all_passed(self):
|
|
260
|
+
return self.overall_score == self.overall_passing_score
|
|
261
|
+
|
|
262
|
+
def get_action_passing_score(self):
|
|
263
|
+
return self.action_passing_score
|
|
264
|
+
|
|
265
|
+
def get_state_passing_score(self):
|
|
266
|
+
return self.state_passing_score
|
|
267
|
+
|
|
268
|
+
def get_overall_passing_score(self):
|
|
269
|
+
return self.overall_passing_score
|
|
File without changes
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
SCORE = 'score'
|
|
6
|
+
|
|
7
|
+
class EpisodeLogger:
|
|
8
|
+
def __init__(self, base_dir="./logs"):
|
|
9
|
+
os.makedirs(base_dir, exist_ok=True)
|
|
10
|
+
self.base_dir = base_dir
|
|
11
|
+
self.info = {}
|
|
12
|
+
self.episode = 0
|
|
13
|
+
self.episode_log = []
|
|
14
|
+
|
|
15
|
+
def add_info(self, category=None, type=None, message=None):
|
|
16
|
+
if category not in self.info:
|
|
17
|
+
self.info[category] = {}
|
|
18
|
+
|
|
19
|
+
if type not in self.info[category]:
|
|
20
|
+
if type != SCORE:
|
|
21
|
+
self.info[category][type] = []
|
|
22
|
+
self.info[category][type].append(message)
|
|
23
|
+
else:
|
|
24
|
+
self.info[category][type] = message
|
|
25
|
+
else:
|
|
26
|
+
if type != SCORE:
|
|
27
|
+
self.info[category][type].append(message)
|
|
28
|
+
else:
|
|
29
|
+
self.info[category][type] = message
|
|
30
|
+
|
|
31
|
+
def save_episode(self):
|
|
32
|
+
if not self.episode_log:
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
36
|
+
path = os.path.join(self.base_dir, f"episode_{self.episode}_{ts}.json")
|
|
37
|
+
with open(path, "w") as f:
|
|
38
|
+
json.dump(self.episode_log, f, indent=2)
|
|
39
|
+
self.episode += 1
|
|
40
|
+
return path
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kuzongaenv
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: Gymnasium environment for the game Kuzonga
|
|
5
|
+
Home-page: https://github.com/jaci-hub/kuzongaenv
|
|
6
|
+
Author: Jacinto Jeje Matamba Quimua
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Source, https://github.com/jaci-hub/kuzongaenv
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: gymnasium>=0.30.0
|
|
13
|
+
Requires-Dist: numpy>=1.23
|
|
14
|
+
Dynamic: home-page
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
Dynamic: requires-python
|
|
17
|
+
|
|
18
|
+
# KuzongaEnv
|
|
19
|
+
|
|
20
|
+
A custom Gymnasium-compatible environment for the [Kuzonga game](https://www.kuzonga.com).
|
|
21
|
+
|
|
22
|
+
## Environment Details
|
|
23
|
+
### Action Space
|
|
24
|
+
|
|
25
|
+
The environment uses a dictionary action space with three components:
|
|
26
|
+
|
|
27
|
+
| Key | Value | Description |
|
|
28
|
+
|-----------|------------|-------------|
|
|
29
|
+
| v | 1 or 0 (or True and False, respectively) | Whether to attempt division (`1`) or change a digit (`0`). |
|
|
30
|
+
| g | 0–9 | If `v=1`, the divisor; if `v=0`, the new digit to set at `r`. |
|
|
31
|
+
| r | 0…digits-1 or `None` | Rindex (Right-to-left or reverse index) of the digit to overwrite (if `v=1`, it should be `None`). |
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
action = {"v": 1, "g": 3, "r": None} # attempt division by 3
|
|
38
|
+
action = {"v": 0, "g": 7, "r": 1} # set the second digit (from the right) to 7
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Observation Space
|
|
42
|
+
|
|
43
|
+
The environment uses a dictionary observation space with the following keys:
|
|
44
|
+
|
|
45
|
+
| Key | Type | Description |
|
|
46
|
+
|----------------------------|-------------------------|-------------|
|
|
47
|
+
| s | np.int8 array (digits,) | The original number as an array of digits. |
|
|
48
|
+
| d | np.int8 array (digits,) | The current number as an array of digits. |
|
|
49
|
+
| a | np.int64 array (digits*10,) | Binary mask of which digits can be set at each position. Flattened from shape (digits, 10). |
|
|
50
|
+
| p | np.int64 array (num_players*3,) | Each player’s `[i, c, m]`, where `i` is the ID, `c` is the score and `m` tells if it is the player's turn (`m=1`) or not (`m=0`). Flattened array of all players. It has one player by default. |
|
|
51
|
+
| t | int | ID of the player whose turn it is. |
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
Example:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
obs, info = env.reset()
|
|
58
|
+
print(obs["s"]) # [1, 7]
|
|
59
|
+
print(obs["d"]) # [4, 7]
|
|
60
|
+
print(obs["a"]) # array([1,1,0,...])
|
|
61
|
+
print(obs["p"]) # array([0,0,1,1,0,0]) # two players
|
|
62
|
+
print(obs["t"]) # 0
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Quick Notes
|
|
66
|
+
|
|
67
|
+
The *a* mask ensures illegal moves (e.g., setting a leading zero or creating number 0/1) are prevented.
|
|
68
|
+
|
|
69
|
+
Rewards and penalties are automatically updated in the environment during *step()*.
|
|
70
|
+
|
|
71
|
+
The environment fully supports multiple players, and tracks turns via *t* and *m*.
|
|
72
|
+
|
|
73
|
+
The *options* parameter in *reset()* allows resetting the environment to a specific given state/obs, by setting it with the format: `options = {'obs': <state/obs dict>}`
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
## Usage Example
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import gymnasium as gym
|
|
80
|
+
import kuzongaenv
|
|
81
|
+
|
|
82
|
+
env = gym.make("Kuzonga-v0")
|
|
83
|
+
obs, info = env.reset()
|
|
84
|
+
action = env.action_space.sample()
|
|
85
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
86
|
+
|
|
87
|
+
print(f"Observation: {obs}")
|
|
88
|
+
print(f"Reward: {reward}, Terminated: {terminated}")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Installation
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
pip install -e .
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Cite This Project
|
|
98
|
+
|
|
99
|
+
If you use **Kuzonga** in your research, projects, or publications, please cite it as:
|
|
100
|
+
|
|
101
|
+
Jacinto Jeje Matamba Quimua (2025). KuzongaEnv: Gym Environment for Reinforcement Learning Experiments. GitHub repository: https://github.com/jaci-hub/kuzongaenv
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
### BibTeX
|
|
105
|
+
|
|
106
|
+
```bibtex
|
|
107
|
+
@misc{kuzongaenv2025,
|
|
108
|
+
author = {Jacinto Jeje Matamba Quimua},
|
|
109
|
+
title = {KuzongaEnv: Gym Environment for Reinforcement Learning Experiments},
|
|
110
|
+
year = 2025,
|
|
111
|
+
howpublished = {\url{https://github.com/jaci-hub/kuzongaenv}},
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Play Kuzonga Online
|
|
116
|
+
|
|
117
|
+
[Kuzonga game](https://www.kuzonga.com)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
kuzongaenv/__init__.py
|
|
6
|
+
kuzongaenv.egg-info/PKG-INFO
|
|
7
|
+
kuzongaenv.egg-info/SOURCES.txt
|
|
8
|
+
kuzongaenv.egg-info/dependency_links.txt
|
|
9
|
+
kuzongaenv.egg-info/requires.txt
|
|
10
|
+
kuzongaenv.egg-info/top_level.txt
|
|
11
|
+
kuzongaenv/envs/__init__.py
|
|
12
|
+
kuzongaenv/envs/kuzonga_env.py
|
|
13
|
+
kuzongaenv/inspection/__init__.py
|
|
14
|
+
kuzongaenv/inspection/inspector.py
|
|
15
|
+
kuzongaenv/utils/__init__.py
|
|
16
|
+
kuzongaenv/utils/logger.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
kuzongaenv
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=65", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "kuzongaenv"
|
|
7
|
+
version = "0.2.9"
|
|
8
|
+
description = "Gymnasium environment for the game Kuzonga"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name="Jacinto Jeje Matamba Quimua" }
|
|
14
|
+
]
|
|
15
|
+
dependencies = [
|
|
16
|
+
"gymnasium>=0.30.0",
|
|
17
|
+
"numpy>=1.23"
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
"Source" = "https://github.com/jaci-hub/kuzongaenv"
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="kuzongaenv",
|
|
5
|
+
version="0.2.9",
|
|
6
|
+
author="Jacinto Jeje Matamba Quimua",
|
|
7
|
+
description="A custom Gymnasium-compatible environment for the Kuzonga",
|
|
8
|
+
long_description=open("README.md", encoding="utf-8").read(),
|
|
9
|
+
long_description_content_type="text/markdown",
|
|
10
|
+
packages=find_packages(),
|
|
11
|
+
install_requires=[
|
|
12
|
+
"gymnasium>=0.30.0",
|
|
13
|
+
"numpy>=1.23"
|
|
14
|
+
],
|
|
15
|
+
classifiers=[
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
],
|
|
19
|
+
url="https://github.com/jaci-hub/kuzongaenv",
|
|
20
|
+
license="MIT",
|
|
21
|
+
python_requires=">=3.11",
|
|
22
|
+
)
|