gymnasium-sudoku 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gymnasium_sudoku-0.3.4/LICENSE +21 -0
- gymnasium_sudoku-0.3.4/PKG-INFO +91 -0
- gymnasium_sudoku-0.3.4/README.md +43 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/__init__.py +33 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/datasets/v0_biased/test_boards.csv +50 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/datasets/v0_biased/train_boards.csv +50 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/datasets/v1_easy/test_boards.csv +50 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/datasets/v1_easy/train_boards.csv +50 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/environment.py +210 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/puzzle.py +56 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku/rendering.py +168 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku.egg-info/PKG-INFO +91 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku.egg-info/SOURCES.txt +18 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku.egg-info/dependency_links.txt +1 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku.egg-info/requires.txt +7 -0
- gymnasium_sudoku-0.3.4/gymnasium_sudoku.egg-info/top_level.txt +1 -0
- gymnasium_sudoku-0.3.4/pyproject.toml +48 -0
- gymnasium_sudoku-0.3.4/setup.cfg +4 -0
- gymnasium_sudoku-0.3.4/tests/test_1.py +59 -0
- gymnasium_sudoku-0.3.4/tests/test_2.py +111 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Author(s)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gymnasium_sudoku
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: A Sudoku environment for Reinforcement Learning research
|
|
5
|
+
Author-email: adeottidev@gmail.com
|
|
6
|
+
License: The MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Author(s)
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in
|
|
18
|
+
all copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
26
|
+
THE SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/adeotti/Gymnasium-Sudoku
|
|
28
|
+
Project-URL: Repository, https://github.com/adeotti/Gymnasium-Sudoku
|
|
29
|
+
Keywords: Reinforcement Learning,game,RL,AI,gymnasium,Sudoku
|
|
30
|
+
Classifier: Development Status :: 3 - Alpha
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Programming Language :: Python :: 3
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
35
|
+
Classifier: Intended Audience :: Science/Research
|
|
36
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
37
|
+
Requires-Python: >=3.10
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
License-File: LICENSE
|
|
40
|
+
Requires-Dist: gymnasium>=1.1.1
|
|
41
|
+
Requires-Dist: numpy>=1.25.2
|
|
42
|
+
Requires-Dist: PySide6>=6.7.2
|
|
43
|
+
Requires-Dist: typing-extensions>=4.14.0
|
|
44
|
+
Requires-Dist: cloudpickle>=3.1.1
|
|
45
|
+
Requires-Dist: tqdm>=4.67.1
|
|
46
|
+
Requires-Dist: pathlib>=1.0.1
|
|
47
|
+
Dynamic: license-file
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
pip install gymnasium_sudoku
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Observation space :** The state returned after each `.reset()` or `.step()` is a raw sudoku board shape `[9,9]`.This observation can be converted into an image.
|
|
54
|
+
|
|
55
|
+
**Action space:** The action space is shaped `[x,y,z]`,representing : x = row position of the cell, y = column position of the cell and value that should go into that cell.When vectorizing, the current version of the environment do not handle action reshaping, so for n environments, the action's shape should be : `[[x0...xn],[y0...yn],[z0...zn]]`
|
|
56
|
+
|
|
57
|
+
**Horizon:** This parameter controls the number of steps after which `Truncated` is set to `True` and the environment is reset. Otherwise, early in training (when the policy is still mostly random and the exploration incentive is high), the policy may corrupt the board and either make it unsolvable or push it into a local minimum. The default value for this parameter is set to 400 for no specific reason and should probably be adjusted during initialization.
|
|
58
|
+
|
|
59
|
+
**Eval mode/Training mode :** By default, eval_mode in the __init__ method is set to `False`. This is used for training, where the environment is reset with one of 50 different boards after eacg .reset() call. During testing, eval_mode should be set to `True` in order to evaluate the agent on boards that were never seen during the training phase.
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
### Sudoku-v0 (biased version)
|
|
63
|
+
```python
|
|
64
|
+
import gymnasium as gym
|
|
65
|
+
|
|
66
|
+
env = gym.make("sudoku-v0",mode="biased"render_mode="human",horizon=600,eval_mode=True)
|
|
67
|
+
env.reset()
|
|
68
|
+
|
|
69
|
+
for n in range(int(6e3)):
|
|
70
|
+
env.step(env.action_space.sample())
|
|
71
|
+
env.render()
|
|
72
|
+
```
|
|
73
|
+
**Bias :**
|
|
74
|
+
Among the induced biases that immensely help guide that learning is the fact that the policy cannot modify a cell that was already correctly filled, on top of the existing untouchable cells present in the beginning.
|
|
75
|
+
|
|
76
|
+
**Measuring learning for this version of the environment:*** The current structure of the environment allows a completely random policy to solve it (this is true for easy boards in the current version of the environment), so a good way to measure learning might be to use the number of steps over N episodes under a random policy as a `baseline`. This implies that a policy able to consistently solve the test boards in fewer steps over the same N episodes used to run a random policy is, in theory, displaying some sort of learning.
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
### Sudoku-v1
|
|
80
|
+
```python
|
|
81
|
+
import gymnasium as gym
|
|
82
|
+
|
|
83
|
+
env = gym.make("sudoku-v1",mode="easy",render_mode="human",horizon=600,eval_mode=True)
|
|
84
|
+
env.reset()
|
|
85
|
+
|
|
86
|
+
for n in range(int(6e3)):
|
|
87
|
+
env.step(env.action_space.sample())
|
|
88
|
+
env.render()
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
```
|
|
2
|
+
pip install gymnasium_sudoku
|
|
3
|
+
```
|
|
4
|
+
|
|
5
|
+
**Observation space :** The state returned after each `.reset()` or `.step()` is a raw sudoku board shape `[9,9]`.This observation can be converted into an image.
|
|
6
|
+
|
|
7
|
+
**Action space:** The action space is shaped `[x,y,z]`,representing : x = row position of the cell, y = column position of the cell and value that should go into that cell.When vectorizing, the current version of the environment do not handle action reshaping, so for n environments, the action's shape should be : `[[x0...xn],[y0...yn],[z0...zn]]`
|
|
8
|
+
|
|
9
|
+
**Horizon:** This parameter controls the number of steps after which `Truncated` is set to `True` and the environment is reset. Otherwise, early in training (when the policy is still mostly random and the exploration incentive is high), the policy may corrupt the board and either make it unsolvable or push it into a local minimum. The default value for this parameter is set to 400 for no specific reason and should probably be adjusted during initialization.
|
|
10
|
+
|
|
11
|
+
**Eval mode/Training mode :** By default, eval_mode in the __init__ method is set to `False`. This is used for training, where the environment is reset with one of 50 different boards after eacg .reset() call. During testing, eval_mode should be set to `True` in order to evaluate the agent on boards that were never seen during the training phase.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Sudoku-v0 (biased version)
|
|
15
|
+
```python
|
|
16
|
+
import gymnasium as gym
|
|
17
|
+
|
|
18
|
+
env = gym.make("sudoku-v0",mode="biased"render_mode="human",horizon=600,eval_mode=True)
|
|
19
|
+
env.reset()
|
|
20
|
+
|
|
21
|
+
for n in range(int(6e3)):
|
|
22
|
+
env.step(env.action_space.sample())
|
|
23
|
+
env.render()
|
|
24
|
+
```
|
|
25
|
+
**Bias :**
|
|
26
|
+
Among the induced biases that immensely help guide that learning is the fact that the policy cannot modify a cell that was already correctly filled, on top of the existing untouchable cells present in the beginning.
|
|
27
|
+
|
|
28
|
+
**Measuring learning for this version of the environment:*** The current structure of the environment allows a completely random policy to solve it (this is true for easy boards in the current version of the environment), so a good way to measure learning might be to use the number of steps over N episodes under a random policy as a `baseline`. This implies that a policy able to consistently solve the test boards in fewer steps over the same N episodes used to run a random policy is, in theory, displaying some sort of learning.
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
### Sudoku-v1
|
|
32
|
+
```python
|
|
33
|
+
import gymnasium as gym
|
|
34
|
+
|
|
35
|
+
env = gym.make("sudoku-v1",mode="easy",render_mode="human",horizon=600,eval_mode=True)
|
|
36
|
+
env.reset()
|
|
37
|
+
|
|
38
|
+
for n in range(int(6e3)):
|
|
39
|
+
env.step(env.action_space.sample())
|
|
40
|
+
env.render()
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from gymnasium.envs.registration import register
|
|
2
|
+
from gymnasium_sudoku.environment import Gym_env,V0_MODES,V1_MODES
|
|
3
|
+
|
|
4
|
+
__all__ = ["Gym_env"]
|
|
5
|
+
__version__ = "0.3.4"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _make_v0(**kwargs):
|
|
9
|
+
if not kwargs.get("mode") in V0_MODES:
|
|
10
|
+
raise ValueError(f"sudoku-v0 requires mode {[*V0_MODES]}")
|
|
11
|
+
return Gym_env(**kwargs)
|
|
12
|
+
|
|
13
|
+
def _make_v1(**kwargs):
|
|
14
|
+
if not kwargs.get("mode") in V1_MODES:
|
|
15
|
+
raise ValueError(f"sudoku-v1 availables modes are : {[*V1_MODES]}")
|
|
16
|
+
return Gym_env(**kwargs)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
register(
|
|
20
|
+
id="sudoku-v0",
|
|
21
|
+
entry_point="gymnasium_sudoku:_make_v0",
|
|
22
|
+
kwargs={"mode":"biased"}
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
register(
|
|
26
|
+
id="sudoku-v1",
|
|
27
|
+
entry_point="gymnasium_sudoku:_make_v1",
|
|
28
|
+
kwargs={"mode":"easy"}
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
083695020572104800960782500097401200254300980000928400009870000000240018000000794,483695127572134869961782543897451236254367981316928475149876352735249618628513794
|
|
2
|
+
300000070002306010600890003000209560000570100500610934004000090006080701805001206,398124675742356819651897423183249567469573182527618934214765398936482751875931246
|
|
3
|
+
080350972751200003032008510060904001194827005300000000209583000610470389000609207,486351972751296843932748516568934721194827635327165498279583164615472389843619257
|
|
4
|
+
078021963029500000136790045650139087397284500800070392085010624003942051241056730,578421963429563178136798245652139487397284516814675392985317624763942851241856739
|
|
5
|
+
478369102005781000630240008543096800020803006867004900010658070754902080006010005,478369152295781364631245798543196827129873546867524913312658479754932681986417235
|
|
6
|
+
600730800025481006701000000490057001260000000510364098100540000030029407050876020,649735812325481976781692534498257361263918745517364298172543689836129457954876123
|
|
7
|
+
078143000645970801910000240096000015500019060801405900784501000060830000059700184,278143596645972831913658247496287315527319468831465972784591623162834759359726184
|
|
8
|
+
070029341609000520301570968804760293130890005000000016267050184413200009080107030,578629341649318527321574968854761293136892475792435816267953184413286759985147632
|
|
9
|
+
560000003008690000092045007040002080010409020270000496400000500089510002657008000,564827913738691254192345867946752381813469725275183496421976538389514672657238149
|
|
10
|
+
009001070143907208760824093030000001097410300620008000804065009306000000000080040,289631574143957268765824193438576921597412386621398457814265739356749812972183645
|
|
11
|
+
000063470600040891004080003720614059415030000096270004940000000060000745807001000,589163472632547891174982563728614359415839627396275184943756218261398745857421936
|
|
12
|
+
100400320003500090049317685837601250950000740410705036090000400374060502628150900,165489327783526194249317685837641259956832741412795836591273468374968512628154973
|
|
13
|
+
004001200790400100031905784003610005062307000000002000300050040020094061006030000,654871293798423156231965784473619825562387419189542637317256948825794361946138572
|
|
14
|
+
007000200310705406600040100000430527200607900000800010700004000190500860003080740,947168235318725496652349178869431527231657984475892613786214359194573862523986741
|
|
15
|
+
070586000000700530000140000809257100001000020560831074007000400000408359080029006,973586241614792538258143697849257163731964825562831974397615482126478359485329716
|
|
16
|
+
042639005300071600069208403034000000001000347000500100403015869596080000010062030,142639785385471692769258413634127958251896347978543126423715869596384271817962534
|
|
17
|
+
300905074090008001608004050764000019030009087000100430247590068000780000016000005,321965874495378621678214953764853219132649587589127436247591368953786142816432795
|
|
18
|
+
000000000300750960000800541030070100070490385045103600103245790000960013029001050,452619837318754962697832541936578124271496385845123679163245798584967213729381456
|
|
19
|
+
610025430248001960030000200080604009001300652005010008056193004900060003170080596,619825437248731965537946281382654179491378652765219348856193724924567813173482596
|
|
20
|
+
963710000000040903000000670502000030000050009007308154370200000209801300040030098,963715482785642913421983675512479836834156729697328154378294561259861347146537298
|
|
21
|
+
800000600326070000041090038408960572060701940970000186000400300002030860504010020,859342617326178495741695238418963572265781943973254186687429351192537864534816729
|
|
22
|
+
000300000032900781000410029093800200850060000004179050970500402281700530345620900,719382645432956781568417329193845267857263194624179853976538412281794536345621978
|
|
23
|
+
400051000573062900000000507800009153130600804000003000926800430054090208000024609,498751362573462981612938547867249153139675824245183796926817435754396218381524679
|
|
24
|
+
000039208812006004409812750243050060100000000500328490706180000051794680084205100,675439218812576934439812756243951867198647325567328491726183549351794682984265173
|
|
25
|
+
001065079003000060629030800816047503930010007270000601108306050560491738307028000,481265379753189264629734815816947523935612487274853691148376952562491738397528146
|
|
26
|
+
240109380639400051851300290090003502308000400560940130410785903925630040083200010,247159386639428751851376294194863572378512469562947138416785923925631847783294615
|
|
27
|
+
109805020640109750285040691800000000756010004000200000090081000007306800300402019,179865423643129758285743691821634975756918234934257186492581367517396842368472519
|
|
28
|
+
025046000460037900037810042800100463006480520054703890541000270003624009692570000,125946738468237915937815642879152463316489527254763891541398276783624159692571384
|
|
29
|
+
000895100390410080160000950250980010410263895039050400902037041603109078071648239,724895163395416782168372954256984317417263895839751426982537641643129578571648239
|
|
30
|
+
674301059020079438039002100900200864216045000040967510465798301790104080183000007,674381259521679438839452176957213864216845793348967512465798321792134685183526947
|
|
31
|
+
208035009050400603000179802090000000506814007704093060007001006000700080460000021,278635419951482673643179852192567348536814297784293165827941536315726984469358721
|
|
32
|
+
470100000619800003302907816521793004806420001030610290247300560060009030003064007,478136952619852743352947816521793684896425371734618295247381569165279438983564127
|
|
33
|
+
080000051004089030020406900108002000007810523000070080703021845050008002812300600,986237451574189236321456978138592764697814523245673189763921845459768312812345697
|
|
34
|
+
587600090012049800004007210905400102100900070240370000821004006703208001059160708,587612493312549867694837215975486132138925674246371589821794356763258941459163728
|
|
35
|
+
008009000502700090300040000000600054250007130091800000020004010800902700600000049,148259376562783491379146582783621954256497138491835627927564813834912765615378249
|
|
36
|
+
010509002085070000032410008206037005891054060070000000360705421400021673027040050,614589732985372146732416598246137985891254367573968214368795421459821673127643859
|
|
37
|
+
630502100400000736900070405290800004001000200500200003109005870000000000050029000,637542198425198736918673425296837514381954267574216983149365872762481359853729641
|
|
38
|
+
985000000400008610000025049100300462096500000040010007028450000000690070619802534,985164723472938615361725849157389462296547381843216957728453196534691278619872534
|
|
39
|
+
040017008892603107001098300100009850358024060009180000910000030087500019030040000,543217698892653147761498325174369852358724961629185473916872534487536219235941786
|
|
40
|
+
746000009308400070129600000000000002932000617004010090090080004607094203400200060,746351829358429176129678435871963542932845617564712398293186754617594283485237961
|
|
41
|
+
010307805305009740004080030047010003000003400100058206009031000038000900201806050,912347865385169742764285139847612593526973481193458276459731628638524917271896354
|
|
42
|
+
043927681010030920000408000724090006035000007900000030471000060050600019060070300,543927681817536924692418573724893156135264897986751432471389265358642719269175348
|
|
43
|
+
000701053040005010000800200530108000962504000000209340406000009103000520200000634,628791453349625718751843296534178962962534871817269345476352189193486527285917634
|
|
44
|
+
600930000000005306200008491100003904803600700924087560407500030302890605000070208,641932857798415326235768491176253984853649712924187563487526139312894675569371248
|
|
45
|
+
200006700653709810098100036005008961076093000982401007000002579069080100427915680,214836795653729814798154236345278961176593428982461357831642579569387142427915683
|
|
46
|
+
003005264609802000010000900096000730180600095000050000960030000005080019030209546,873195264659842173214367958596428731182673495347951682961534827425786319738219546
|
|
47
|
+
305060071810407050200150840968510320000086500150000000690040030480305062030021408,345862971819437256276159843968514327723986514154273689692748135481395762537621498
|
|
48
|
+
080021734026479050400083200000147390090208017010390582030910048100734920500062003,985621734326479851471583269852147396693258417714396582237915648168734925549862173
|
|
49
|
+
500000201170000640900010008050070084040009072017348009826090010095100036431850920,584637291172985643963412758659271384348569172217348569826793415795124836431856927
|
|
50
|
+
020000100050000080800435006600927400002000007907040021000298000290061040178000902,326879154754612389819435276681927435542183697937546821465298713293761548178354962
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
085230070140809000070010008709005003000160000502300010001748059650903000890600702,985234176146879325273516948719485263438162597562397814321748659657923481894651732
|
|
2
|
+
600000040000362905305478062013507620700104098082009000860043000001006000040290836,628915743174362985395478162913587624756124398482639517869743251231856479547291836
|
|
3
|
+
000103609010000000250000310001200083004500201002641000006000030320400970905307020,487123659613895742259764318561279483794538261832641597176952834328416975945387126
|
|
4
|
+
560020308000081000108000290486170930315009080020036400750090020090007843040000000,569724318234981765178653294486175932315249687927836451753498126692517843841362579
|
|
5
|
+
249561070073290100816730205000170006100050300090083701030900082050026419004805630,249561873573298164816734295385172946167459328492683751631947582758326419924815637
|
|
6
|
+
002105030070206058003000007040000670601027509720650300090513004007908000304000800,482175936179236458563894217945381672631427589728659341896513724257948163314762895
|
|
7
|
+
204010003000304629963500740092450170840931005500728390430060000009145030681203400,274619583158374629963582741392456178847931265516728394435867912729145836681293457
|
|
8
|
+
023001950000000020890050004900170230040036000002005040258607490704903800019008070,423761958576489321891352764985174236147236589632895147258617493764923815319548672
|
|
9
|
+
879004160000001809601209570900130700150002438003405600000826314000013000008940250,879354162532761849641289573964138725157692438283475691795826314426513987318947256
|
|
10
|
+
012697000000130000603500000000000035006080900097053681860300510070016248001040306,412697853759138462683524197148269735536781924297453681864372519375916248921845376
|
|
11
|
+
076004920195600000004950008009061002000040570000230080950006010001000006620008057,876314925195682734234957168589761342362849571417235689958476213741523896623198457
|
|
12
|
+
500607290420089501309001684034596800982400756651020400897062305243900008000030907,518647293426389571379251684734596812982413756651728439897162345243975168165834927
|
|
13
|
+
908070060700358902030900007080000530600100000009000000860430050100080674495607003,948271365716358942532946817281794536654123798379865421867432159123589674495617283
|
|
14
|
+
642010039300070100719042600090030000004000910036289040005090070061004098000108000,642815739358976124719342685597431862824567913136289547485693271261754398973128456
|
|
15
|
+
005010400601030897090084000078326509053097640900045070809251004034900200502073000,285719463641532897397684125478326519153897642926145378869251734734968251512473986
|
|
16
|
+
010574280028019746400800090090750408701008009004001072000900004349007800085000020,916574283528319746473862591692753418731248659854691372267985134349127865185436927
|
|
17
|
+
012803740394120000750490200541078320206310497073042015030060954020084603405739100,612853749394127568758496231541978326286315497973642815837261954129584673465739182
|
|
18
|
+
000562081000000050602318074001009820390080500008007409920703008000820095080000003,749562381813974652652318974571439826394286517268157439925743168436821795187695243
|
|
19
|
+
082000071710040080049718000000950003063004095007860014000637020078105346230480050,382596471715342689649718532824951763163274895597863214451637928978125346236489157
|
|
20
|
+
097800513032000006608075020700430005340526078256007100000618050005790000870000001,497862513532149786618375429789431265341526978256987134924618357165793842873254691
|
|
21
|
+
082006013000400090090000640825079000000200951014305007709081004003002800008730000,482956713631427598597813642825179436376248951914365287769581324143692875258734169
|
|
22
|
+
000050000900100000852000001001730650760805342300042107070026083600501200008300000,416257839937168425852493761241739658769815342385642197174926583693581274528374916
|
|
23
|
+
025000000703065000000900000200009106000748205009610730051420008800090400004580300,425831679793265841618974523247359186136748295589612734351427968862193457974586312
|
|
24
|
+
701580000352100064008000051106492500079308420000657030600905080427036910895721000,761584392352179864948263751136492578579318426284657139613945287427836915895721643
|
|
25
|
+
604031009071009200859260004000054600406903005010682000207006850068010702045008006,624831579371549268859267314792154683486973125513682497237496851968315742145728936
|
|
26
|
+
104052967690804052200600814976143080000260030325900041530080006069531028801726000,184352967697814352253697814976143285418265739325978641532489176769531428841726593
|
|
27
|
+
000100070090060003102050006509014300004530100000209000058703061023001940000900730,635198472897462513142357896569814327274536189381279654958743261723681945416925738
|
|
28
|
+
040902150000145060500376092000000601170000045604521730027003519013054006005200374,746982153239145867581376492358497621172638945694521738427863519913754286865219374
|
|
29
|
+
140003000000000384008460070700030006200690100000080903067049500030000409004350000,142873695675912384398465271759231846283694157416587923867149532531728469924356718
|
|
30
|
+
700030105030000002050100340523800074091000000867025031080340009370019000910060053,748932165136754892259186347523891674491673528867425931685347219372519486914268753
|
|
31
|
+
945700032000230000000000108000100050057800903602354000060520010070000564000007800,945781632781236495326495178834179256157862943692354781469528317278913564513647829
|
|
32
|
+
000050042000346090095000010019003000000482900000000007800000306657200109000801504,763159842128346795495728613219673458576482931384915267841597326657234189932861574
|
|
33
|
+
000000500630042001250600004000251807710006003000000605000920700300000109008137206,984713562637542981251689374463251897715896423829374615146925738372468159598137246
|
|
34
|
+
084002000069081000250600000700096008900510600000007450472960030590008062816003000,184752396369481527257639184745396218928514673631827459472965831593148762816273945
|
|
35
|
+
003276040650004371784053690030027000000681030061309200920030000007460500048910000,193276845652894371784153692439527186275681934861349257926735418317468529548912763
|
|
36
|
+
000090007008205001604000932000021093000004006700053008480009005100080000006140200,213496857978235641654718932845621793329874516761953428482369175197582364536147289
|
|
37
|
+
020190003031482000609070200380500742007060300050004080208941537000800169010007020,825196473731482695649375218386519742497268351152734986268941537574823169913657824
|
|
38
|
+
260094081078012039039708400023485000000007942600020850580170004006243010010050206,265394781478512639139768425923485167851637942647921853582176394796243518314859276
|
|
39
|
+
009532180300074000052801300807690403096200000240007590070000002100009630938000051,769532184381974265452861379817695423596243817243187596675318942124759638938426751
|
|
40
|
+
000073006090165800365000790002009403639720005540300009750208014900040028200091300,821973546497165832365482791172859463639724185548316279756238914913547628284691357
|
|
41
|
+
109782003087354600040091000490200060832069000071548320304006807900073016720015934,169782543287354691543691782495237168832169475671548329314926857958473216726815934
|
|
42
|
+
060200000310060578740000602807002105236800900154003020005170400000000709473056000,569287314312469578748531692897642135236815947154793826925178463681324759473956281
|
|
43
|
+
900008016000930200021400390690103402053800609000000100060509700200047050070300900,934278516586931247721465398697153482153824679842796135368519724219647853475382961
|
|
44
|
+
081506000500294000000810050328740010007001048000089732000000609036907020102460375,281576493573294861649813257328745916967321548415689732754132689836957124192468375
|
|
45
|
+
054068000800920050000450000005010900700300000480000006060002014008030007003107600,354768291876921453291453768635814972712396845489275136967582314148639527523147689
|
|
46
|
+
710600000000012090008007631000700204900103000604080000007091002500028900400500000,719634825356812497248957631183769254925143768674285319867391542531428976492576183
|
|
47
|
+
020507000045269001790841600000300816000194003012786005107000000200075068408610007,621537489845269371793841652974352816586194723312786945167928534239475168458613297
|
|
48
|
+
000000003000200001000430020204008136510300008003016042905000200001900305030045070,427861953369257481158439627274598136516324798893716542985673214741982365632145879
|
|
49
|
+
570310892148926357920058160809002036000160000060089501000800003201690008004037005,576314892148926357923758164819572436457163289362489571795841623231695748684237915
|
|
50
|
+
005001000807000914040080630204007083058123460360050700000016290000000146090074300,635941872827365914941782635214697583758123469369458721483516297572839146196274358
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
010082690709100000040060000000010000490730050003840026500000102070304900026570380,315482697769153248248967513687215439492736851153849726534698172871324965926571384
|
|
2
|
+
900000500000071002764800000503207010000050804471009003020030091008090320000400760,912364587835971642764825139583247916296153874471689253627538491148796325359412768
|
|
3
|
+
091507000040060003000028097004000029720000805003190060000053400070600030306710082,291537648847961253635428197164875329729346815583192764918253476472689531356714982
|
|
4
|
+
000859100006070090500000040010006072403001000069230080007020503002004018301067900,274859136136472895598613247815946372423781659769235481947128563652394718381567924
|
|
5
|
+
090040702400980001058060300030005020206000000570100804000408507980070010041309060,693541782427983651158267349834795126216834975579126834362418597985672413741359268
|
|
6
|
+
051003029060800400003100700080026000010940038902000060000680007700050090040031052,851473629267895413493162785384526971516947238972318564125689347738254196649731852
|
|
7
|
+
040902006001470308000580079006000050023058040780103000050320067000009200009760100,847932516591476328362581479416297853923658741785143692158324967674819235239765184
|
|
8
|
+
701004003040009060000508027003800109025690004008100070800023900004050030060700045,781264593542379861936518427673842159125697384498135672857423916214956738369781245
|
|
9
|
+
356019020204000000000806005010970500000680090708030410503240009007003600900005280,356419827284357961179826345612974538435681792798532416563248179827193654941765283
|
|
10
|
+
000010006030820009504300070000050400600009130002000087100580060005043020097102800,289715346736824519514396278371658492648279135952431687123587964865943721497162853
|
|
11
|
+
000006120800409000560000080000205000302014005090830060140052003679001800000900704,934586127821479536567123489716295348382614975495837261148752693679341852253968714
|
|
12
|
+
070080000150004020090005637067003009910500403804910000000607800620030501005001040,276389154153764928498125637567843219912576483834912765341657892629438571785291346
|
|
13
|
+
904002700006100000580069000050030280008670001091004005405200016030400090000350028,914582763726143859583769142657931284248675931391824675475298316832416597169357428
|
|
14
|
+
100030082090008056000050340410700000283094700050006008072900004960071030800020000,145637982397248156628159347416782593283594761759316428572963814964871235831425679
|
|
15
|
+
700069050849150300003740120005007600078000903100600002052014700000000098400030000,721369854849152367563748129235497681678521943194683572952814736316275498487936215
|
|
16
|
+
809400006210006030000710000102050007070060309060380042590003024004120500600800900,859432716217596438346718295132954687478261359965387142591673824784129563623845971
|
|
17
|
+
900200040001006035350900008080005009070060010400390006130870024004002750006041800,968253147241786935357914268683125479579468312412397586135879624894632751726541893
|
|
18
|
+
694002010003640820007500000100005060930020700020800100000000439050071002800306000,694782315513649827287513946178935264936124758425867193761258439359471682842396571
|
|
19
|
+
007800003039600542040509700706490000084025030000000091063010080000004350002000006,657842913839671542241539768716493825984125637325786491563917284178264359492358176
|
|
20
|
+
800104360000908705040063091000009678497200050500000000056030004002017000010000080,879154362631928745245763891123549678497286153568371429756832914982417536314695287
|
|
21
|
+
090005204300061700840903065009000048050006300180007090430200010072000800500070006,691785234325461789847923165769132548254896371183547692436258917972614853518379426
|
|
22
|
+
037400960200005000800060003400020050001703026080109304340902008096080000000600405,537418962264395781819267543473826159951743826682159374345972618196584237728631495
|
|
23
|
+
050010009000809300062074008700002153090560840200083060004008000070640035001020700,358216479417859326962374518786492153193567842245183967524738691879641235631925784
|
|
24
|
+
060000900003100254004020107000400006107308000029050038085270600700039005096800002,261547983973186254854923167538492716147368529629751438385274691712639845496815372
|
|
25
|
+
100300059020670080894000030500700390400910000000082701006000410001005006003020508,167348259325679184894251637518764392472913865639582741256897413781435926943126578
|
|
26
|
+
000000030068470000004139007206000050100804000000657000010000529082560074070300180,791286435368475912524139867246913758157824693839657241613748529982561374475392186
|
|
27
|
+
408060700009030006150002008000000800007300049502409010200901307043080021070500060,438165792729834156156792438394216875617358249582479613265941387943687521871523964
|
|
28
|
+
003500007091000003025098600000470980007080026350060070062009500800200139000041000,683514297491627853725398614216473985947185326358962471162739548874256139539841762
|
|
29
|
+
005048270002030609010070043000019007004000800960800050701000026680290010050007430,395648271472531689816972543528419367134756892967823154741385926683294715259167438
|
|
30
|
+
900007005140000080002900070004780000230500010805001046790806120000403069080100300,968217435147635982352948671614782593239564718875391246793856124521473869486129357
|
|
31
|
+
090070300800960010600014507009005706184020050000300100002036000570280400040009020,491572368857963214623814597239145786184627953765398142912436875576281439348759621
|
|
32
|
+
408090003000000600090251080010004000649030100027060308060720000001300590700008460,458697213172483659396251784813574926649832175527169348964725831281346597735918462
|
|
33
|
+
080001007107054208094000030048020070605730810003500600360000000000900500000470961,286391457137654298594287136948126375625739814713548629369815742471962583852473961
|
|
34
|
+
502090080030702050600001004705000620006008149040030005201900007004610300000007008,512394786438762951697851234785149623326578149149236875251983467874615392963427518
|
|
35
|
+
010000900306500008004080120400300060501870004602005000190403580000092030000000700,718234956326519478954687123479321865531876294682945317197463582845792631263158749
|
|
36
|
+
400300005000007301008250907210780506509003000000069103090018700000400000182000600,471396285925847361638251947213784596569123478847569123394618752756432819182975634
|
|
37
|
+
820060900945000000001000275510904000060001002304007000000830701008200090003015640,827563914945172836631498275512984367769351482384627159456839721178246593293715648
|
|
38
|
+
035107000600000020040026801070230010803701540500908600004000089007503000902004700,235187496618459327749326851476235918893761542521948673354672189187593264962814735
|
|
39
|
+
003054078400209010002070009057630000090000100000700023080015600004003080006800001,963154278475289316812376459257631894398542167641798523789415632124963785536827941
|
|
40
|
+
301042906680000500090001270000109020150600034200350007004000708006038010007095000,371542986682973541495861273743189625159627834268354197934216758526738419817495362
|
|
41
|
+
000085070064070800000091236001900007000000080738200094006704051040800302509063000,213685479964372815875491236451938627692547183738216594386724951147859362529163748
|
|
42
|
+
003590461600007900800100005481020090200080006050300020049700008300400102060053000,723598461615247983894136275481625397237984516956371824549712638378469152162853749
|
|
43
|
+
004500000670010000120700094000080700750200609098000520086051002000094350003602910,834529167679418235125736894362985741751243689498167523986351472217894356543672918
|
|
44
|
+
003067105100840600090002040205000907700000380048001000906200050000100798001450006,483967125152843679697512843265384917719625384348791562936278451524136798871459236
|
|
45
|
+
104702008300560009062380500040270000201600735000009080503000092018000403000000670,154792368387564129962381547845273916291648735736159284573416892618927453429835671
|
|
46
|
+
079000400602907005100008060751030006008004590000062000900243000300005720006000804,879356412642917385135428967751839246268174593493562178987243651314685729526791834
|
|
47
|
+
320600900400030805007900010000040106830201000005079400006800090009000358500107020,328615947491732865657984213972548136834261579165379482246853791719426358583197624
|
|
48
|
+
030000400790000260000015008002750040400620980673008000820040095000060000507100603,138276459795834261264915738982751346451623987673498512826347195319562874547189623
|
|
49
|
+
000400000798000004003902106000028603021060008050040920480050009009076050100300700,612437895798615234543982176974528613321769548856143927487251369239876451165394782
|
|
50
|
+
078090056100600040040013027007000690001408200250000001090000500480706003500032008,378294156125687349649513827837125694961478235254369781793841562482756913516932478
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
500007109130096020004030050000008942001003600020750080903040060007001008800200503,562487139138596724794132856375618942481923675629754381913845267257361498846279513
|
|
2
|
+
060010080830005207000400053027050906900200041000340000340600002005807090079000068,562713489834965217791482653427158936953276841186349725348691572615827394279534168
|
|
3
|
+
050720380000603900100050000040008061081006400900500270310890750006040002290000003,659721384427683915138459627742938561581276439963514278314892756876345192295167843
|
|
4
|
+
600700900002064000080009053040600570037208400005000201400070300098510006070043010,613785924952364187784129653249631578137258469865497231421876395398512746576943812
|
|
5
|
+
208000300064308007000071006090063240003040570007105090005024109040009030000600800,278496315164358927359271486591763248683942571427185693735824169846519732912637854
|
|
6
|
+
934510000020000590006008070790006120000070080605100400058002003300040700002950016,934517268827634591516298374793486125241375689685129437158762943369841752472953816
|
|
7
|
+
000000000709420100645800007108009030060140902000030580310006008007010290000205700,231567849789423165645891327128759634563148972974632581312976458857314296496285713
|
|
8
|
+
007000000000805200321000600600070300930120078000509004700301020250600019068700050,587263941496815237321947685615478392934126578872539164749351826253684719168792453
|
|
9
|
+
000003200000500980008046001760010000030205007159600403010002570400800090200730064,697183245341527986528946731762314859834295617159678423913462578476851392285739164
|
|
10
|
+
016700200020000057400309100068003000501240090000008735004000600090150080300970010,916785243823461957475329168768593421531247896249618735154832679697154382382976514
|
|
11
|
+
006009805408170902070200040900020008010700060063050007004538600002400059031000000,126349875458176932379285146947623518815794263263851497794538621682417359531962784
|
|
12
|
+
005400012000009300019800004000040028100027590096001070400760000080130650037005800,865473912724619385319852764573946128148327596296581473451768239982134657637295841
|
|
13
|
+
009300050045010062000020008001500070400060890807203600603048000020907100700000340,289376451345819762176425938961584273432761895857293614693148527524937186718652349
|
|
14
|
+
000307090009050081600000205900008043850610700020049100710000456002064030000080900,185327694279456381643891275961278543854613729327549168718932456592164837436785912
|
|
15
|
+
005004002100000400040768000020980051300405706807030200070510008080200937496000000,735194862168352479942768513624987351319425786857631294273519648581246937496873125
|
|
16
|
+
080040320269700504000001000540020800800175030000800607005080700070913000402600009,781549326269738514354261978547326891896175432123894657915482763678913245432657189
|
|
17
|
+
000620578000000400100870092003008750607410000902060130009100023030005600840700000,394621578728359461165874392413298756657413289982567134579146823231985647846732915
|
|
18
|
+
001806035540107900000000001006700204090005000173900000200090060780402300300501080,921846735548137926637259841856713294492685173173924658214398567785462319369571482
|
|
19
|
+
094000020000001008760850430043008010009006500002179006000207801010300694050600000,894763125325941768761852439643528917179436582582179346436297851217385694958614273
|
|
20
|
+
300800067001020400040003059003000906029010540080704000098602730600900020005078001,352849167961527483847163259413285976729316548586794312198652734674931825235478691
|
|
21
|
+
410007960000010000208005070500900000070060038006351400192000004300002650040780001,413827965759613842268495173534978216971264538826351497192536784387142659645789321
|
|
22
|
+
078000003020000106500930200000082009396510004000400000104060058200305460700800930,678124593923758146541936287457682319396517824812493675134269758289375461765841932
|
|
23
|
+
080495017100003009050208040035064008200039406007000000000080090074002300006107050,683495217142673589759218643935764128218539476467821935521386794874952361396147852
|
|
24
|
+
000000570708002000240503010000209000130048090800030604000300006402907305500014700,391486572758192463246573918674259831135648297829731654987325146412967385563814729
|
|
25
|
+
075004000108320000600050901004019072000000506300705180001008230907002000800030690,275194863198326754643857921584619372719283546326745189461978235937562418852431697
|
|
26
|
+
000700512040060903000928000200005000100080276386010400030000090408000305051046700,869734512742561983513928647274695831195483276386217459637852194428179365951346728
|
|
27
|
+
063000102400209700005700000902075800840000060001008203506091080070802035000400001,763584192418239756295716348932675814847123569651948273526391487174862935389457621
|
|
28
|
+
003604108800003206400020070720000004390001000005400903000106090682075400001830500,273694158859713246416528379728359614394261785165487923537146892682975431941832567
|
|
29
|
+
010680903400052000008709520050000730043070051706000000000960000502801000007003846,215684973479352168638719524851496732943278651726135489384967215562841397197523846
|
|
30
|
+
070000000004038000200901075008305609006000200520017304090050703105400806400200091,671542938954738162283961475748325619316894257529617384892156743135479826467283591
|
|
31
|
+
006010009009204070075000340014097800060300057020000006050060920800540060000809000,346715289189234675275986341514697832968321457723458196457163928891542763632879514
|
|
32
|
+
900167000234009000000400850002000040100086003069205007720000010005023600800510300,958167234234859761671432859582371946147986523369245187723698415415723698896514372
|
|
33
|
+
030401020084053106015620000000085090021090403906002007007009080002060035040030001,639471528284953176715628349473185692821796453956342817367519284192864735548237961
|
|
34
|
+
020600003907800051000090480073049000196730008005100000060070200500001034014053009,428615973937824651651397482873549126196732548245186397369478215582961734714253869
|
|
35
|
+
034006500000040097120080000009600000310500740700100082000002001040709053875400600,934276518568341297127985364489627135312598746756134982693852471241769853875413629
|
|
36
|
+
005430070800205000020000609000090160409006300730810200601000500980003040500780020,165439872897265413324178659258397164419526387736814295671942538982653741543781926
|
|
37
|
+
000410090300000204160950000702000501590800400000600007030090050620108700040007832,278413695359786214164952378782349561596871423413625987837294156625138749941567832
|
|
38
|
+
690000010000004050080107402007810000035092000806400007140005030702300068000080504,694528713271934856583167492927816345435792681816453927148675239752349168369281574
|
|
39
|
+
703080600060003500200009043040710059001900826029500000008050031000001200050046007,713485692964123578285679143846712359571934826329568714698257431437891265152346987
|
|
40
|
+
257000608000000702360090000035401070009008501070020000090010000640085300001072906,257143698914856732368297154835461279429738561176529483793614825642985317581372946
|
|
41
|
+
000407210002900605000010089800305900470620000300094107780560001010002800056008000,698457213142983675537216489861375942479621538325894167784569321913742856256138794
|
|
42
|
+
010000075000350604000070390400201000300049002809700060200010800086003950794060003,613894275978352614542176398467281539351649782829735461235917846186423957794568123
|
|
43
|
+
006000004010030002007004930000009473204600081750108060109005008040300095300201000,936752814418936752527814936861529473294673581753148269179465328642387195385291647
|
|
44
|
+
080005009902070000007601203070026000304700008600103040053000860490000100001040057,186235479932874516547691283875426391314759628629183745753912864498567132261348957
|
|
45
|
+
060050070020740008005201030081063020002007800940000503300000094074002010050610007,468359172123746958795281436581463729632597841947128563316875294874932615259614387
|
|
46
|
+
023057908064800000900030167002000300450000009008471506000063080000500001510090402,123657948764819253985234167672985314451326879398471526249163785837542691516798432
|
|
47
|
+
653007200000000086000100340700600000286003000401902507070008001140309600000020058,653847219914235786827196345795614823286573194431982567572468931148359672369721458
|
|
48
|
+
006090501020035870008400000040703000800010030079650080100009324093021005600007900,736298541924135876518476293241783659865912437379654182187569324493821765652347918
|
|
49
|
+
506097030020001006004002080900000350073000104000620000805070420010840079600009005,586497231729381546134562987968714352273958164451623798895176423312845679647239815
|
|
50
|
+
000907006004250080075063014067500090912300400000002063000010835041008200009700000,183947526694251387275863914367584192912376458458192763726419835541638279839725641
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import csv,random,sys
|
|
2
|
+
import numpy as np
|
|
3
|
+
import gymnasium as gym
|
|
4
|
+
import gymnasium.spaces as spaces
|
|
5
|
+
|
|
6
|
+
from PySide6.QtWidgets import QApplication
|
|
7
|
+
from gymnasium_sudoku.rendering import Gui
|
|
8
|
+
from copy import deepcopy
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
def _get_region(x,y,board,n = 3):
|
|
12
|
+
# T = target cell
|
|
13
|
+
# returns the region (row - T ∪ column - T ∪ 3X3 block T)
|
|
14
|
+
board = board.copy()
|
|
15
|
+
xlist = board[x]
|
|
16
|
+
xlist = np.concatenate((xlist[:y],xlist[y+1:]))
|
|
17
|
+
|
|
18
|
+
ylist = board[:,y]
|
|
19
|
+
ylist = np.concatenate((ylist[:x],ylist[x+1:]))
|
|
20
|
+
|
|
21
|
+
ix,iy = (x//n)* n , (y//n)* n
|
|
22
|
+
block = board[ix:ix+n , iy:iy+n].flatten()
|
|
23
|
+
local_row = x - ix
|
|
24
|
+
local_col = y - iy
|
|
25
|
+
action_index= local_row * n + local_col
|
|
26
|
+
block = np.delete(block,action_index)
|
|
27
|
+
return xlist,ylist,block
|
|
28
|
+
|
|
29
|
+
def _is_row_complete(board,x):
|
|
30
|
+
xlist = board[x]
|
|
31
|
+
return np.all(xlist!=0)
|
|
32
|
+
|
|
33
|
+
def _is_col_complete(board,y):
|
|
34
|
+
ylist = board[:,y]
|
|
35
|
+
return np.all(ylist!=0)
|
|
36
|
+
|
|
37
|
+
def _is_region_complete(board,x,y,n=3):
|
|
38
|
+
ix,iy = (x//n)* n , (y//n)* n
|
|
39
|
+
block = board[ix:ix+n , iy:iy+n].flatten()
|
|
40
|
+
return np.all(block!=0)
|
|
41
|
+
|
|
42
|
+
def _sudoku_board(csv_path,line_pick):
|
|
43
|
+
with open(csv_path) as file:
|
|
44
|
+
reader = csv.reader(file)
|
|
45
|
+
for n,row in enumerate(reader):
|
|
46
|
+
if n == line_pick:
|
|
47
|
+
chosen_line = row
|
|
48
|
+
board,solution = chosen_line
|
|
49
|
+
board,solution = list(
|
|
50
|
+
map(lambda x:np.fromiter(x,dtype=np.int32).reshape(9,9),(board,solution))
|
|
51
|
+
)
|
|
52
|
+
return board,solution
|
|
53
|
+
|
|
54
|
+
def _gen_board(env_mode,eval_mode):
|
|
55
|
+
csv_path = Path(__file__).parent
|
|
56
|
+
if env_mode=="biased":
|
|
57
|
+
csv_path_train = csv_path/"datasets/v0_biased/train_boards.csv"
|
|
58
|
+
csv_path_test = csv_path/"datasets/v0_biased/test_boards.csv"
|
|
59
|
+
line_pick = random.randint(0,49)
|
|
60
|
+
|
|
61
|
+
elif env_mode=="easy":
|
|
62
|
+
csv_path_train = csv_path/"datasets/v1_easy/train_boards.csv"
|
|
63
|
+
csv_path_test = csv_path/"datasets/v1_easy/test_boards.csv"
|
|
64
|
+
line_pick = random.randint(0,49)
|
|
65
|
+
|
|
66
|
+
if eval_mode:
|
|
67
|
+
state,solution = deepcopy(_sudoku_board(csv_path_test,line_pick))
|
|
68
|
+
else:
|
|
69
|
+
state,solution = deepcopy(_sudoku_board(csv_path_train,line_pick))
|
|
70
|
+
return state,solution
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
V0_MODES = ["biased"]
|
|
74
|
+
V1_MODES = ["easy"]
|
|
75
|
+
|
|
76
|
+
class Gym_env(gym.Env):
|
|
77
|
+
metadata = {"render_modes":["human"],"render_fps":60,"rendering_attention":False}
|
|
78
|
+
def __init__(self,
|
|
79
|
+
mode,
|
|
80
|
+
render_mode=None,
|
|
81
|
+
horizon=400,
|
|
82
|
+
eval_mode:bool=False,
|
|
83
|
+
rendering_attention=False
|
|
84
|
+
):
|
|
85
|
+
super().__init__()
|
|
86
|
+
|
|
87
|
+
self.env_mode = mode
|
|
88
|
+
self.render_mode = render_mode
|
|
89
|
+
self.horizon = horizon
|
|
90
|
+
self.eval_mode = eval_mode
|
|
91
|
+
self.rendering_attention = rendering_attention
|
|
92
|
+
self.env_steps = 0
|
|
93
|
+
self.action = None
|
|
94
|
+
self.true_action = False
|
|
95
|
+
|
|
96
|
+
self.action_space = spaces.Tuple(
|
|
97
|
+
(
|
|
98
|
+
spaces.Discrete(9,None,0),
|
|
99
|
+
spaces.Discrete(9,None,0),
|
|
100
|
+
spaces.Discrete(9,None,1)
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
self.observation_space = spaces.Box(0,9,(9,9),dtype=np.int32)
|
|
104
|
+
|
|
105
|
+
self.state,self.solution = _gen_board(self.env_mode,self.eval_mode)
|
|
106
|
+
self.mask = (self.state==0)
|
|
107
|
+
self.conflicts = (self.state==0).sum()
|
|
108
|
+
|
|
109
|
+
# init gui
|
|
110
|
+
self.app = None
|
|
111
|
+
if self.render_mode=="human":
|
|
112
|
+
self.app = QApplication.instance()
|
|
113
|
+
if self.app is None:
|
|
114
|
+
self.app = QApplication([])
|
|
115
|
+
|
|
116
|
+
self.gui = Gui(deepcopy(self.state),self.env_mode,self.rendering_attention)
|
|
117
|
+
|
|
118
|
+
def reset(self,seed=None,options=None):
|
|
119
|
+
super().reset(seed=seed)
|
|
120
|
+
if seed is not None:
|
|
121
|
+
random.seed(seed)
|
|
122
|
+
np.random.seed(seed)
|
|
123
|
+
|
|
124
|
+
self.state,self.solution = _gen_board(self.env_mode,self.eval_mode)
|
|
125
|
+
self.env_steps = 0
|
|
126
|
+
self.mask = (self.state==0)
|
|
127
|
+
|
|
128
|
+
if self.render_mode =="human":
|
|
129
|
+
self.gui.reset(deepcopy(self.state))
|
|
130
|
+
return np.array(self.state,dtype=np.int32),{}
|
|
131
|
+
|
|
132
|
+
def _get_reward(self,env_mode,action,state):
|
|
133
|
+
x,y,value = action
|
|
134
|
+
|
|
135
|
+
if self.env_mode=="biased":
|
|
136
|
+
if not self.mask[x,y]:
|
|
137
|
+
reward = -0.1
|
|
138
|
+
true_action = False
|
|
139
|
+
else:
|
|
140
|
+
if value == self.solution[x,y]:
|
|
141
|
+
state[x,y] = value
|
|
142
|
+
self.mask[x,y] = False
|
|
143
|
+
assert action[-1] in range(1,10)
|
|
144
|
+
true_action = True
|
|
145
|
+
reward = 0.2
|
|
146
|
+
|
|
147
|
+
if _is_row_complete(state,x):
|
|
148
|
+
reward+= 0.2*9
|
|
149
|
+
if _is_col_complete(state,y):
|
|
150
|
+
reward+= 0.2*9
|
|
151
|
+
if _is_region_complete(state,x,y):
|
|
152
|
+
reward+= 0.2*9
|
|
153
|
+
else:
|
|
154
|
+
reward = -0.1
|
|
155
|
+
true_action = False
|
|
156
|
+
return reward,true_action,state
|
|
157
|
+
|
|
158
|
+
elif env_mode=="easy":
|
|
159
|
+
reward = 0
|
|
160
|
+
if not self.mask[x][y]:
|
|
161
|
+
reward = -0.1
|
|
162
|
+
true_action = False
|
|
163
|
+
return reward,true_action,state
|
|
164
|
+
|
|
165
|
+
state[x][y] = value
|
|
166
|
+
true_action = True
|
|
167
|
+
filter_zeros = lambda x : x[x!=0]
|
|
168
|
+
xlist,ylist,block = _get_region(x,y,state)
|
|
169
|
+
|
|
170
|
+
row = filter_zeros(xlist)
|
|
171
|
+
col = filter_zeros(ylist)
|
|
172
|
+
block = filter_zeros(block)
|
|
173
|
+
|
|
174
|
+
if not value in np.concatenate((xlist,ylist,block)):
|
|
175
|
+
reward = 0.2*3
|
|
176
|
+
return reward,true_action,state
|
|
177
|
+
|
|
178
|
+
reward = 0
|
|
179
|
+
if len(row) == len(np.unique(row)):
|
|
180
|
+
reward += 0.2
|
|
181
|
+
|
|
182
|
+
if len(col) == len(np.unique(col)):
|
|
183
|
+
reward += 0.2
|
|
184
|
+
|
|
185
|
+
if len(block) == len(np.unique(block)):
|
|
186
|
+
reward += 0.2
|
|
187
|
+
|
|
188
|
+
return reward,True,state
|
|
189
|
+
|
|
190
|
+
def step(self,action):
|
|
191
|
+
assert (action[0] and action[1]) in range(9)
|
|
192
|
+
self.env_steps+=1
|
|
193
|
+
self.action = action
|
|
194
|
+
|
|
195
|
+
reward,true_action,obs = self._get_reward(self.env_mode,self.action,self.state)
|
|
196
|
+
self.true_action = true_action
|
|
197
|
+
self.state = obs
|
|
198
|
+
|
|
199
|
+
truncated = (self.env_steps>=self.horizon)
|
|
200
|
+
done = np.array_equal(self.state,self.solution)
|
|
201
|
+
if done:
|
|
202
|
+
reward+=0.2*81
|
|
203
|
+
info = {}
|
|
204
|
+
return np.array(self.state,dtype=np.int32),round(reward,1),done,truncated,info
|
|
205
|
+
|
|
206
|
+
def render(self):
|
|
207
|
+
self.gui.show()
|
|
208
|
+
self.gui.updated(self.action,self.true_action)
|
|
209
|
+
self.app.processEvents()
|
|
210
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
# tb = test board
|
|
4
|
+
# ts = test solution
|
|
5
|
+
|
|
6
|
+
tb_1 = np.array([
|
|
7
|
+
[8, 0, 0, 5, 3, 1, 0, 0, 0],
|
|
8
|
+
[0, 0, 0, 0, 4, 0, 3, 0, 1],
|
|
9
|
+
[1, 0, 0, 8, 0, 0, 0, 0, 0],
|
|
10
|
+
[0, 0, 4, 0, 0, 5, 6, 0, 0],
|
|
11
|
+
[0, 0, 3, 9, 0, 2, 1, 4, 0],
|
|
12
|
+
[6, 1, 5, 0, 7, 0, 0, 9, 8],
|
|
13
|
+
[0, 2, 0, 0, 9, 6, 0, 1, 0],
|
|
14
|
+
[0, 5, 7, 2, 0, 8, 0, 0, 6],
|
|
15
|
+
[9, 6, 1, 7, 5, 3, 0, 2, 4]],dtype=np.int32)
|
|
16
|
+
|
|
17
|
+
ts_1 = np.array([
|
|
18
|
+
[8, 4, 9, 5, 3, 1, 7, 6, 2],
|
|
19
|
+
[5, 7, 2, 6, 4, 9, 3, 8, 1],
|
|
20
|
+
[1, 3, 6, 8, 2, 7, 4, 5, 9],
|
|
21
|
+
[2, 9, 4, 1, 8, 5, 6, 7, 3],
|
|
22
|
+
[7, 8, 3, 9, 6, 2, 1, 4, 5],
|
|
23
|
+
[6, 1, 5, 3, 7, 4, 2, 9, 8],
|
|
24
|
+
[3, 2, 8, 4, 9, 6, 5, 1, 7],
|
|
25
|
+
[4, 5, 7, 2, 1, 8, 9, 3, 6],
|
|
26
|
+
[9, 6, 1, 7, 5, 3, 8, 2, 4]],dtype=np.int32)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
tb_2 = np.array([
|
|
31
|
+
[5, 9, 1, 8, 7, 0, 0, 2, 4],
|
|
32
|
+
[8, 4, 0, 2, 3, 1, 0, 5, 6],
|
|
33
|
+
[0, 0, 0, 0, 0, 9, 1, 0, 7],
|
|
34
|
+
[0, 6, 0, 0, 0, 0, 1, 0, 0],
|
|
35
|
+
[9, 0, 8, 3, 0, 4, 0, 0, 0],
|
|
36
|
+
[0, 0, 0, 6, 8, 0, 0, 4, 0],
|
|
37
|
+
[0, 0, 0, 0, 0, 3, 0, 8, 0],
|
|
38
|
+
[1, 0, 6, 0, 2, 0, 4, 0, 0],
|
|
39
|
+
[7, 8, 0, 0, 0, 0, 2, 9, 0]],dtype=np.int32)
|
|
40
|
+
|
|
41
|
+
ts_2 = np.array([
|
|
42
|
+
[5, 9, 1, 8, 7, 6, 3, 2, 4],
|
|
43
|
+
[8, 4, 7, 2, 3, 1, 9, 5, 6],
|
|
44
|
+
[6, 2, 3, 4, 5, 9, 8, 1, 7],
|
|
45
|
+
[4, 6, 2, 5, 9, 7, 1, 3, 8],
|
|
46
|
+
[9, 7, 8, 3, 1, 4, 5, 6, 2],
|
|
47
|
+
[3, 1, 5, 6, 8, 2, 7, 4, 9],
|
|
48
|
+
[2, 5, 9, 7, 4, 3, 6, 8, 1],
|
|
49
|
+
[1, 3, 6, 9, 2, 8, 4, 7, 5],
|
|
50
|
+
[7, 8, 4, 1, 6, 5, 2, 9, 3]],dtype=np.int32)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
from PySide6 import QtCore,QtGui
|
|
2
|
+
from PySide6.QtWidgets import QWidget,QGridLayout,QLineEdit,QHBoxLayout
|
|
3
|
+
from PySide6.QtCore import QTimer
|
|
4
|
+
from PySide6.QtGui import QIcon
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Gui(QWidget):
|
|
8
|
+
def __init__(self,board,mode,rendering_attention=False):
|
|
9
|
+
super().__init__()
|
|
10
|
+
self.setWindowTitle("Sudoku")
|
|
11
|
+
self.setMaximumSize(40,40)
|
|
12
|
+
self.setWindowIcon(QIcon("game.png"))
|
|
13
|
+
self.game = board
|
|
14
|
+
self.mode = mode
|
|
15
|
+
|
|
16
|
+
self.size = 9
|
|
17
|
+
self.rendering_attention = rendering_attention
|
|
18
|
+
|
|
19
|
+
self.main_layout = QHBoxLayout()
|
|
20
|
+
|
|
21
|
+
# Sudoku grid
|
|
22
|
+
self.grid = QGridLayout()
|
|
23
|
+
self.sudoku_widget = QWidget()
|
|
24
|
+
self.sudoku_widget.setLayout(self.grid)
|
|
25
|
+
self.main_layout.addWidget(self.sudoku_widget)
|
|
26
|
+
self.grid.setVerticalSpacing(0)
|
|
27
|
+
self.grid.setHorizontalSpacing(0)
|
|
28
|
+
self.grid.setContentsMargins(0,0,0,0)
|
|
29
|
+
|
|
30
|
+
self.cells = [[QLineEdit(self) for _ in range(self.size)] for _ in range (self.size)]
|
|
31
|
+
for line in self.game :
|
|
32
|
+
for x in range(self.size):
|
|
33
|
+
for y in range(self.size):
|
|
34
|
+
self.cells[x][y].setFixedSize(40,40)
|
|
35
|
+
self.cells[x][y].setReadOnly(True)
|
|
36
|
+
number = str(board[x][y])
|
|
37
|
+
self.cells[x][y].setText(number)
|
|
38
|
+
self.bl = (3 if (y%3 == 0 and y!= 0) else 0.5) # what is bl,bt ?
|
|
39
|
+
self.bt = (3 if (x%3 == 0 and x!= 0) else 0.5)
|
|
40
|
+
self.color =("transparent" if int(self.cells[x][y].text()) == 0 else "white")
|
|
41
|
+
self.cellStyle = [
|
|
42
|
+
"background-color:grey;"
|
|
43
|
+
f"border-left:{self.bl}px solid black;"
|
|
44
|
+
f"border-top: {self.bt}px solid black;"
|
|
45
|
+
"border-right: 1px solid black;"
|
|
46
|
+
"border-bottom: 1px solid black;"
|
|
47
|
+
f"color: {self.color};"
|
|
48
|
+
"font-weight: None;"
|
|
49
|
+
"font-size: 20px"
|
|
50
|
+
]
|
|
51
|
+
self.cells[x][y].setStyleSheet("".join(self.cellStyle))
|
|
52
|
+
self.cells[x][y].setAlignment(QtCore.Qt.AlignCenter)
|
|
53
|
+
self.grid.addWidget(self.cells[x][y],x,y)
|
|
54
|
+
|
|
55
|
+
if self.rendering_attention:
|
|
56
|
+
# Attention grid
|
|
57
|
+
self.attn_grid = QGridLayout()
|
|
58
|
+
self.attn_widget = QWidget()
|
|
59
|
+
self.attn_widget.setLayout(self.attn_grid)
|
|
60
|
+
self.main_layout.addWidget(self.attn_widget)
|
|
61
|
+
self.attn_grid.setVerticalSpacing(0)
|
|
62
|
+
self.attn_grid.setHorizontalSpacing(0)
|
|
63
|
+
self.attn_grid.setContentsMargins(0,0,0,0)
|
|
64
|
+
|
|
65
|
+
self.attn_cells = [[QLineEdit(self) for _ in range(self.size)] for _ in range(self.size)]
|
|
66
|
+
for x in range(self.size):
|
|
67
|
+
for y in range(self.size):
|
|
68
|
+
cell = self.attn_cells[x][y]
|
|
69
|
+
cell.setFixedSize(40,40)
|
|
70
|
+
cell.setAlignment(QtCore.Qt.AlignCenter)
|
|
71
|
+
cell.setStyleSheet(
|
|
72
|
+
"background-color: black;"
|
|
73
|
+
"border:none;"
|
|
74
|
+
)
|
|
75
|
+
self.attn_grid.addWidget(cell, x, y)
|
|
76
|
+
|
|
77
|
+
self.setLayout(self.main_layout)
|
|
78
|
+
|
|
79
|
+
def updated(self,action:[int,int,int],true_value:bool=False,attention_weights=None):
|
|
80
|
+
|
|
81
|
+
if action is not None:
|
|
82
|
+
assert len(action) == 3
|
|
83
|
+
row,column,value = action
|
|
84
|
+
styleList = self.cells[row][column].styleSheet().split(";")
|
|
85
|
+
if len(styleList) != 8 :
|
|
86
|
+
del styleList[-1]
|
|
87
|
+
styleDict = {k.strip() : v.strip() for k,v in (element.split(":") for element in styleList)}
|
|
88
|
+
cellColor = styleDict["color"]
|
|
89
|
+
|
|
90
|
+
if self.mode == "biased": # v0 version----------
|
|
91
|
+
if cellColor not in ("white","black") and value in range(1,10):
|
|
92
|
+
if true_value:
|
|
93
|
+
self.cells[row][column].setText(str(value))
|
|
94
|
+
assert self.cells[row][column].text() != str(0)
|
|
95
|
+
self.game[row][column] = value
|
|
96
|
+
color = "black"
|
|
97
|
+
else:
|
|
98
|
+
color = cellColor
|
|
99
|
+
|
|
100
|
+
self.update_style(action,color)
|
|
101
|
+
|
|
102
|
+
else: # v1 version-----------
|
|
103
|
+
if not cellColor=="white":
|
|
104
|
+
self.cells[row][column].setText(str(value))
|
|
105
|
+
color = "black"
|
|
106
|
+
else:
|
|
107
|
+
color = cellColor
|
|
108
|
+
|
|
109
|
+
self.update_style(action,color)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def update_style(self,action,color):
|
|
113
|
+
row,column,value = action
|
|
114
|
+
ubl = (3 if (column % 3 == 0 and column!= 0) else 0.5)
|
|
115
|
+
ubt = (3 if (row % 3 == 0 and row!= 0) else 0.5)
|
|
116
|
+
if color=="black":
|
|
117
|
+
background="orange"
|
|
118
|
+
else:
|
|
119
|
+
background="grey"
|
|
120
|
+
|
|
121
|
+
updatedStyle = [
|
|
122
|
+
f"background-color:{background};"
|
|
123
|
+
f"border-left:{ubl}px solid black;"
|
|
124
|
+
f"border-top: {ubt}px solid black;"
|
|
125
|
+
"border-right: 1px solid black;"
|
|
126
|
+
"border-bottom: 1px solid black;"
|
|
127
|
+
f"color: {color};"
|
|
128
|
+
"font-weight: None;"
|
|
129
|
+
"font-size: 20px"
|
|
130
|
+
]
|
|
131
|
+
self.cells[row][column].setStyleSheet("".join(updatedStyle)) # Update the cell color
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def reset(self,board):
|
|
135
|
+
self.game = board
|
|
136
|
+
for line in self.game :
|
|
137
|
+
for x in range(self.size):
|
|
138
|
+
for y in range(self.size):
|
|
139
|
+
self.cells[x][y].setFixedSize(40,40)
|
|
140
|
+
self.cells[x][y].setReadOnly(True)
|
|
141
|
+
number = str(board[x][y])
|
|
142
|
+
self.cells[x][y].setText(number)
|
|
143
|
+
self.bl = (3 if (y%3 == 0 and y!= 0) else 0.5)
|
|
144
|
+
self.bt = (3 if (x%3 == 0 and x!= 0) else 0.5)
|
|
145
|
+
self.color = ("transparent" if int(self.cells[x][y].text()) == 0 else "white")
|
|
146
|
+
self.cellStyle = [
|
|
147
|
+
"background-color:grey;"
|
|
148
|
+
f"border-left:{self.bl}px solid black;"
|
|
149
|
+
f"border-top: {self.bt}px solid black;"
|
|
150
|
+
"border-right: 1px solid black;"
|
|
151
|
+
"border-bottom: 1px solid black;"
|
|
152
|
+
f"color: {self.color};"
|
|
153
|
+
"font-weight: None;"
|
|
154
|
+
"font-size: 20px"
|
|
155
|
+
]
|
|
156
|
+
self.cells[x][y].setStyleSheet("".join(self.cellStyle))
|
|
157
|
+
|
|
158
|
+
def render_attention(self,attn):
|
|
159
|
+
for i in range(self.size):
|
|
160
|
+
for j in range(self.size):
|
|
161
|
+
v = attn[i, j]
|
|
162
|
+
intensity = int(255 * v)
|
|
163
|
+
self.attn_cells[i][j].setStyleSheet(
|
|
164
|
+
f"""
|
|
165
|
+
background-color: rgb({intensity}, {intensity}, 255);
|
|
166
|
+
"""
|
|
167
|
+
)
|
|
168
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gymnasium_sudoku
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: A Sudoku environment for Reinforcement Learning research
|
|
5
|
+
Author-email: adeottidev@gmail.com
|
|
6
|
+
License: The MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Author(s)
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in
|
|
18
|
+
all copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
26
|
+
THE SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/adeotti/Gymnasium-Sudoku
|
|
28
|
+
Project-URL: Repository, https://github.com/adeotti/Gymnasium-Sudoku
|
|
29
|
+
Keywords: Reinforcement Learning,game,RL,AI,gymnasium,Sudoku
|
|
30
|
+
Classifier: Development Status :: 3 - Alpha
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Programming Language :: Python :: 3
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
35
|
+
Classifier: Intended Audience :: Science/Research
|
|
36
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
37
|
+
Requires-Python: >=3.10
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
License-File: LICENSE
|
|
40
|
+
Requires-Dist: gymnasium>=1.1.1
|
|
41
|
+
Requires-Dist: numpy>=1.25.2
|
|
42
|
+
Requires-Dist: PySide6>=6.7.2
|
|
43
|
+
Requires-Dist: typing-extensions>=4.14.0
|
|
44
|
+
Requires-Dist: cloudpickle>=3.1.1
|
|
45
|
+
Requires-Dist: tqdm>=4.67.1
|
|
46
|
+
Requires-Dist: pathlib>=1.0.1
|
|
47
|
+
Dynamic: license-file
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
pip install gymnasium_sudoku
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Observation space :** The state returned after each `.reset()` or `.step()` is a raw sudoku board shape `[9,9]`.This observation can be converted into an image.
|
|
54
|
+
|
|
55
|
+
**Action space:** The action space is shaped `[x,y,z]`,representing : x = row position of the cell, y = column position of the cell and value that should go into that cell.When vectorizing, the current version of the environment do not handle action reshaping, so for n environments, the action's shape should be : `[[x0...xn],[y0...yn],[z0...zn]]`
|
|
56
|
+
|
|
57
|
+
**Horizon:** This parameter controls the number of steps after which `Truncated` is set to `True` and the environment is reset. Otherwise, early in training (when the policy is still mostly random and the exploration incentive is high), the policy may corrupt the board and either make it unsolvable or push it into a local minimum. The default value for this parameter is set to 400 for no specific reason and should probably be adjusted during initialization.
|
|
58
|
+
|
|
59
|
+
**Eval mode/Training mode :** By default, eval_mode in the __init__ method is set to `False`. This is used for training, where the environment is reset with one of 50 different boards after eacg .reset() call. During testing, eval_mode should be set to `True` in order to evaluate the agent on boards that were never seen during the training phase.
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
### Sudoku-v0 (biased version)
|
|
63
|
+
```python
|
|
64
|
+
import gymnasium as gym
|
|
65
|
+
|
|
66
|
+
env = gym.make("sudoku-v0",mode="biased"render_mode="human",horizon=600,eval_mode=True)
|
|
67
|
+
env.reset()
|
|
68
|
+
|
|
69
|
+
for n in range(int(6e3)):
|
|
70
|
+
env.step(env.action_space.sample())
|
|
71
|
+
env.render()
|
|
72
|
+
```
|
|
73
|
+
**Bias :**
|
|
74
|
+
Among the induced biases that immensely help guide that learning is the fact that the policy cannot modify a cell that was already correctly filled, on top of the existing untouchable cells present in the beginning.
|
|
75
|
+
|
|
76
|
+
**Measuring learning for this version of the environment:*** The current structure of the environment allows a completely random policy to solve it (this is true for easy boards in the current version of the environment), so a good way to measure learning might be to use the number of steps over N episodes under a random policy as a `baseline`. This implies that a policy able to consistently solve the test boards in fewer steps over the same N episodes used to run a random policy is, in theory, displaying some sort of learning.
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
### Sudoku-v1
|
|
80
|
+
```python
|
|
81
|
+
import gymnasium as gym
|
|
82
|
+
|
|
83
|
+
env = gym.make("sudoku-v1",mode="easy",render_mode="human",horizon=600,eval_mode=True)
|
|
84
|
+
env.reset()
|
|
85
|
+
|
|
86
|
+
for n in range(int(6e3)):
|
|
87
|
+
env.step(env.action_space.sample())
|
|
88
|
+
env.render()
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
gymnasium_sudoku/__init__.py
|
|
5
|
+
gymnasium_sudoku/environment.py
|
|
6
|
+
gymnasium_sudoku/puzzle.py
|
|
7
|
+
gymnasium_sudoku/rendering.py
|
|
8
|
+
gymnasium_sudoku.egg-info/PKG-INFO
|
|
9
|
+
gymnasium_sudoku.egg-info/SOURCES.txt
|
|
10
|
+
gymnasium_sudoku.egg-info/dependency_links.txt
|
|
11
|
+
gymnasium_sudoku.egg-info/requires.txt
|
|
12
|
+
gymnasium_sudoku.egg-info/top_level.txt
|
|
13
|
+
gymnasium_sudoku/datasets/v0_biased/test_boards.csv
|
|
14
|
+
gymnasium_sudoku/datasets/v0_biased/train_boards.csv
|
|
15
|
+
gymnasium_sudoku/datasets/v1_easy/test_boards.csv
|
|
16
|
+
gymnasium_sudoku/datasets/v1_easy/train_boards.csv
|
|
17
|
+
tests/test_1.py
|
|
18
|
+
tests/test_2.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
gymnasium_sudoku
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77.0.3"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "gymnasium_sudoku"
|
|
7
|
+
authors = [{ name = "", email = "adeottidev@gmail.com" }]
|
|
8
|
+
description = "A Sudoku environment for Reinforcement Learning research"
|
|
9
|
+
|
|
10
|
+
version = "0.3.4"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
license = { file = "LICENSE" }
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
keywords = ["Reinforcement Learning", "game", "RL", "AI", "gymnasium","Sudoku"]
|
|
15
|
+
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Intended Audience :: Science/Research",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
dependencies = [
|
|
27
|
+
"gymnasium>=1.1.1",
|
|
28
|
+
"numpy>=1.25.2",
|
|
29
|
+
"PySide6>=6.7.2",
|
|
30
|
+
"typing-extensions>=4.14.0",
|
|
31
|
+
"cloudpickle>=3.1.1",
|
|
32
|
+
"tqdm>=4.67.1",
|
|
33
|
+
"pathlib>=1.0.1"
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/adeotti/Gymnasium-Sudoku"
|
|
38
|
+
Repository = "https://github.com/adeotti/Gymnasium-Sudoku"
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.packages.find]
|
|
41
|
+
where = ["."]
|
|
42
|
+
exclude = ["venv*", "test*", "tests*", "*.egg-info", "dist", "build"]
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.package-data]
|
|
45
|
+
gymnasium_sudoku = [
|
|
46
|
+
"datasets/v0_biased/*.csv",
|
|
47
|
+
"datasets/v1_easy/*.csv"
|
|
48
|
+
]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import gymnasium as gym
|
|
3
|
+
import numpy as np
|
|
4
|
+
from gymnasium.utils.env_checker import check_env
|
|
5
|
+
import gymnasium_sudoku
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_env_creation():
|
|
9
|
+
try: # Test v0 with biased mode (should work)
|
|
10
|
+
env = gym.make("sudoku-v0", mode="biased")
|
|
11
|
+
print("created successfully")
|
|
12
|
+
env.close()
|
|
13
|
+
except Exception as e:
|
|
14
|
+
print(f"creation failed:{e}")
|
|
15
|
+
|
|
16
|
+
try: # Test v0 with easy mode (should fail)
|
|
17
|
+
env = gym.make("sudoku-v0", mode="easy")
|
|
18
|
+
print("should have failed but didn't")
|
|
19
|
+
env.close()
|
|
20
|
+
except ValueError as e:
|
|
21
|
+
print(f"rejected mode='easy':{e}")
|
|
22
|
+
|
|
23
|
+
try: # Test v1 with easy mode (should work)
|
|
24
|
+
env = gym.make("sudoku-v1", mode="easy")
|
|
25
|
+
print("created successfully")
|
|
26
|
+
env.close()
|
|
27
|
+
except Exception as e:
|
|
28
|
+
print(f"creation failed: {e}")
|
|
29
|
+
|
|
30
|
+
try: # Test v1 with biased mode (should fail)
|
|
31
|
+
env = gym.make("sudoku-v1", mode="biased")
|
|
32
|
+
env.close()
|
|
33
|
+
except ValueError as e:
|
|
34
|
+
print(f"rejected mode='biased':{e}")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_env_checker():
|
|
38
|
+
try: # test v0
|
|
39
|
+
env = gym.make("sudoku-v0",mode="biased")
|
|
40
|
+
check_env(env.unwrapped)
|
|
41
|
+
print("sudoku-v0 passes environment checker")
|
|
42
|
+
env.close()
|
|
43
|
+
except Exception as e:
|
|
44
|
+
print(f"sudoku-v0 failed environment checker:{e}")
|
|
45
|
+
|
|
46
|
+
try: # test v1
|
|
47
|
+
env = gym.make("sudoku-v1",mode="easy")
|
|
48
|
+
check_env(env.unwrapped)
|
|
49
|
+
print("sudoku-v1 passes environment checker")
|
|
50
|
+
env.close()
|
|
51
|
+
except Exception as e:
|
|
52
|
+
print(f"sudoku-v1 failed environment checker:{e}")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
pytest.main([__file__, "-v", "--tb=short"])
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import gymnasium_sudoku,torch,sys
|
|
2
|
+
import gymnasium as gym
|
|
3
|
+
import torch.nn as nn
|
|
4
|
+
import torch.nn.functional as F
|
|
5
|
+
from torch.distributions import Categorical
|
|
6
|
+
import numpy as np
|
|
7
|
+
import time
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def process_obs(x):
|
|
12
|
+
m = (x == 0).unsqueeze(1).float()
|
|
13
|
+
x = F.one_hot(x,num_classes=10).permute(0,-1,1,2).float()
|
|
14
|
+
return torch.cat([x,m],dim=1)
|
|
15
|
+
|
|
16
|
+
class p_net(nn.Module):
|
|
17
|
+
def __init__(self):
|
|
18
|
+
super().__init__()
|
|
19
|
+
self.c1 = nn.LazyConv2d(64,1,1)
|
|
20
|
+
self.c2 = nn.LazyConv2d(128,3,1,padding=1)
|
|
21
|
+
self.c3 = nn.LazyConv2d(128,3,1,padding=1)
|
|
22
|
+
self.emb = nn.Parameter(torch.randn(1,81,128) * 0.02)
|
|
23
|
+
self.attn = nn.MultiheadAttention(128,4,batch_first=True)
|
|
24
|
+
self.norm = nn.LayerNorm(128)
|
|
25
|
+
self.l1 = nn.LazyLinear(128)
|
|
26
|
+
self.l2 = nn.LazyLinear(128)
|
|
27
|
+
self.pos = nn.LazyLinear(1)
|
|
28
|
+
self.num = nn.LazyLinear(10)
|
|
29
|
+
self.v_aux = nn.LazyLinear(1)
|
|
30
|
+
self.register_buffer("attn_mask",self.attn_masks())
|
|
31
|
+
|
|
32
|
+
def forward(self,s):
|
|
33
|
+
x = self.c1(s)
|
|
34
|
+
x = F.silu(self.c2(x))
|
|
35
|
+
x = F.silu(self.c3(x))
|
|
36
|
+
x = x.flatten(2).transpose(-1,1)
|
|
37
|
+
x = x + self.emb
|
|
38
|
+
x,asc= self.attn(x,x,x,attn_mask=self.attn_mask,average_attn_weights=True)
|
|
39
|
+
x = self.norm(x)
|
|
40
|
+
x = F.silu(self.l1(x))
|
|
41
|
+
x = F.silu(self.l2(x))
|
|
42
|
+
pos = self.pos(x).squeeze(-1)
|
|
43
|
+
pos = self.pos_mask(s,pos)
|
|
44
|
+
pos = F.softmax(pos,-1)
|
|
45
|
+
pos = Categorical(probs=pos).sample()
|
|
46
|
+
num_logits = self.num(x)
|
|
47
|
+
idx = torch.arange(x.size(0))
|
|
48
|
+
o = num_logits[idx,pos]
|
|
49
|
+
o = self.action_mask(o)
|
|
50
|
+
o = F.softmax(o,-1)
|
|
51
|
+
num = Categorical(probs=o).sample()
|
|
52
|
+
return pos,num,asc
|
|
53
|
+
|
|
54
|
+
def pos_mask(self,s,x):
|
|
55
|
+
s = s.argmax(1)
|
|
56
|
+
mask = (s!=0).flatten(1)
|
|
57
|
+
value = -1e9
|
|
58
|
+
return torch.masked_fill(x,mask,value)
|
|
59
|
+
|
|
60
|
+
def action_mask(self,x):
|
|
61
|
+
mask = torch.zeros_like(x,dtype=torch.bool)
|
|
62
|
+
mask[:,0] = True
|
|
63
|
+
value = -float("inf")
|
|
64
|
+
return torch.masked_fill(x,mask,value)
|
|
65
|
+
|
|
66
|
+
def attn_masks(self,N=81):
|
|
67
|
+
indices = torch.arange(N)
|
|
68
|
+
|
|
69
|
+
rows = indices // 9
|
|
70
|
+
cols = indices % 9
|
|
71
|
+
boxes = (rows // 3) * 3 + (cols // 3) # shape [81]
|
|
72
|
+
|
|
73
|
+
row_mask = (rows.unsqueeze(0)==rows.unsqueeze(1)).float()
|
|
74
|
+
col_mask = (cols.unsqueeze(0)==cols.unsqueeze(1)).float()
|
|
75
|
+
box_mask = (boxes.unsqueeze(0)==boxes.unsqueeze(1)).float()
|
|
76
|
+
global_mask = torch.ones(N, N)
|
|
77
|
+
return torch.stack([row_mask,col_mask,box_mask,global_mask],dim=0)
|
|
78
|
+
|
|
79
|
+
env = gym.make(
|
|
80
|
+
"sudoku-v0",
|
|
81
|
+
mode = "biased",
|
|
82
|
+
render_mode="human",
|
|
83
|
+
horizon=800,
|
|
84
|
+
eval_mode=True
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
env.reset()
|
|
88
|
+
total_steps = int(6e3*5)
|
|
89
|
+
steps = 0
|
|
90
|
+
|
|
91
|
+
policy = p_net()
|
|
92
|
+
policy(process_obs(torch.randint(0,9,(1,9,9))))
|
|
93
|
+
#t_policy = torch.load("./model_test",map_location="cpu")["policy state"]
|
|
94
|
+
#policy.load_state_dict(t_policy,strict=False)
|
|
95
|
+
|
|
96
|
+
obs = env.reset()[0]
|
|
97
|
+
steps = r = 0
|
|
98
|
+
|
|
99
|
+
for n in range(total_steps):
|
|
100
|
+
#pos,num,attn = policy(process_obs(torch.tensor(obs,dtype=torch.int64).unsqueeze(0)))
|
|
101
|
+
#xpos = pos // 9 ; ypos = pos % 9
|
|
102
|
+
#action = np.stack((xpos,ypos,num),axis=-1).reshape(3)
|
|
103
|
+
obs,reward,done,trunc,_ = env.step(env.action_space.sample())
|
|
104
|
+
steps+=1 ; r+=reward
|
|
105
|
+
print(reward)
|
|
106
|
+
env.render()
|
|
107
|
+
if done:
|
|
108
|
+
print(f"\n{obs} | steps : {steps} | reward {r:.2f}")
|
|
109
|
+
time.sleep(5)
|
|
110
|
+
steps = r = 0
|
|
111
|
+
obs = env.reset()[0]
|