gymnasium-sudoku 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2025 Author(s)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: gymnasium_sudoku
3
+ Version: 0.3.4
4
+ Summary: A Sudoku environment for Reinforcement Learning research
5
+ Author-email: adeottidev@gmail.com
6
+ License: The MIT License
7
+
8
+ Copyright (c) 2025 Author(s)
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in
18
+ all copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26
+ THE SOFTWARE.
27
+ Project-URL: Homepage, https://github.com/adeotti/Gymnasium-Sudoku
28
+ Project-URL: Repository, https://github.com/adeotti/Gymnasium-Sudoku
29
+ Keywords: Reinforcement Learning,game,RL,AI,gymnasium,Sudoku
30
+ Classifier: Development Status :: 3 - Alpha
31
+ Classifier: License :: OSI Approved :: MIT License
32
+ Classifier: Programming Language :: Python :: 3
33
+ Classifier: Programming Language :: Python :: 3.10
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Intended Audience :: Science/Research
36
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
37
+ Requires-Python: >=3.10
38
+ Description-Content-Type: text/markdown
39
+ License-File: LICENSE
40
+ Requires-Dist: gymnasium>=1.1.1
41
+ Requires-Dist: numpy>=1.25.2
42
+ Requires-Dist: PySide6>=6.7.2
43
+ Requires-Dist: typing-extensions>=4.14.0
44
+ Requires-Dist: cloudpickle>=3.1.1
45
+ Requires-Dist: tqdm>=4.67.1
46
+ Requires-Dist: pathlib>=1.0.1
47
+ Dynamic: license-file
48
+
49
+ ```
50
+ pip install gymnasium_sudoku
51
+ ```
52
+
53
+ **Observation space :** The state returned after each `.reset()` or `.step()` is a raw sudoku board shape `[9,9]`.This observation can be converted into an image.
54
+
55
+ **Action space:** The action space is shaped `[x,y,z]`,representing : x = row position of the cell, y = column position of the cell and value that should go into that cell.When vectorizing, the current version of the environment do not handle action reshaping, so for n environments, the action's shape should be : `[[x0...xn],[y0...yn],[z0...zn]]`
56
+
57
+ **Horizon:** This parameter controls the number of steps after which `Truncated` is set to `True` and the environment is reset. Otherwise, early in training (when the policy is still mostly random and the exploration incentive is high), the policy may corrupt the board and either make it unsolvable or push it into a local minimum. The default value for this parameter is set to 400 for no specific reason and should probably be adjusted during initialization.
58
+
59
+ **Eval mode/Training mode :** By default, eval_mode in the __init__ method is set to `False`. This is used for training, where the environment is reset with one of 50 different boards after eacg .reset() call. During testing, eval_mode should be set to `True` in order to evaluate the agent on boards that were never seen during the training phase.
60
+
61
+
62
+ ### Sudoku-v0 (biased version)
63
+ ```python
64
+ import gymnasium as gym
65
+
66
+ env = gym.make("sudoku-v0",mode="biased"render_mode="human",horizon=600,eval_mode=True)
67
+ env.reset()
68
+
69
+ for n in range(int(6e3)):
70
+ env.step(env.action_space.sample())
71
+ env.render()
72
+ ```
73
+ **Bias :**
74
+ Among the induced biases that immensely help guide that learning is the fact that the policy cannot modify a cell that was already correctly filled, on top of the existing untouchable cells present in the beginning.
75
+
76
+ **Measuring learning for this version of the environment:*** The current structure of the environment allows a completely random policy to solve it (this is true for easy boards in the current version of the environment), so a good way to measure learning might be to use the number of steps over N episodes under a random policy as a `baseline`. This implies that a policy able to consistently solve the test boards in fewer steps over the same N episodes used to run a random policy is, in theory, displaying some sort of learning.
77
+
78
+
79
+ ### Sudoku-v1
80
+ ```python
81
+ import gymnasium as gym
82
+
83
+ env = gym.make("sudoku-v1",mode="easy",render_mode="human",horizon=600,eval_mode=True)
84
+ env.reset()
85
+
86
+ for n in range(int(6e3)):
87
+ env.step(env.action_space.sample())
88
+ env.render()
89
+ ```
90
+
91
+
@@ -0,0 +1,43 @@
1
+ ```
2
+ pip install gymnasium_sudoku
3
+ ```
4
+
5
+ **Observation space :** The state returned after each `.reset()` or `.step()` is a raw sudoku board shape `[9,9]`.This observation can be converted into an image.
6
+
7
+ **Action space:** The action space is shaped `[x,y,z]`,representing : x = row position of the cell, y = column position of the cell and value that should go into that cell.When vectorizing, the current version of the environment do not handle action reshaping, so for n environments, the action's shape should be : `[[x0...xn],[y0...yn],[z0...zn]]`
8
+
9
+ **Horizon:** This parameter controls the number of steps after which `Truncated` is set to `True` and the environment is reset. Otherwise, early in training (when the policy is still mostly random and the exploration incentive is high), the policy may corrupt the board and either make it unsolvable or push it into a local minimum. The default value for this parameter is set to 400 for no specific reason and should probably be adjusted during initialization.
10
+
11
+ **Eval mode/Training mode :** By default, eval_mode in the __init__ method is set to `False`. This is used for training, where the environment is reset with one of 50 different boards after eacg .reset() call. During testing, eval_mode should be set to `True` in order to evaluate the agent on boards that were never seen during the training phase.
12
+
13
+
14
+ ### Sudoku-v0 (biased version)
15
+ ```python
16
+ import gymnasium as gym
17
+
18
+ env = gym.make("sudoku-v0",mode="biased"render_mode="human",horizon=600,eval_mode=True)
19
+ env.reset()
20
+
21
+ for n in range(int(6e3)):
22
+ env.step(env.action_space.sample())
23
+ env.render()
24
+ ```
25
+ **Bias :**
26
+ Among the induced biases that immensely help guide that learning is the fact that the policy cannot modify a cell that was already correctly filled, on top of the existing untouchable cells present in the beginning.
27
+
28
+ **Measuring learning for this version of the environment:*** The current structure of the environment allows a completely random policy to solve it (this is true for easy boards in the current version of the environment), so a good way to measure learning might be to use the number of steps over N episodes under a random policy as a `baseline`. This implies that a policy able to consistently solve the test boards in fewer steps over the same N episodes used to run a random policy is, in theory, displaying some sort of learning.
29
+
30
+
31
+ ### Sudoku-v1
32
+ ```python
33
+ import gymnasium as gym
34
+
35
+ env = gym.make("sudoku-v1",mode="easy",render_mode="human",horizon=600,eval_mode=True)
36
+ env.reset()
37
+
38
+ for n in range(int(6e3)):
39
+ env.step(env.action_space.sample())
40
+ env.render()
41
+ ```
42
+
43
+
@@ -0,0 +1,33 @@
1
+ from gymnasium.envs.registration import register
2
+ from gymnasium_sudoku.environment import Gym_env,V0_MODES,V1_MODES
3
+
4
+ __all__ = ["Gym_env"]
5
+ __version__ = "0.3.4"
6
+
7
+
8
+ def _make_v0(**kwargs):
9
+ if not kwargs.get("mode") in V0_MODES:
10
+ raise ValueError(f"sudoku-v0 requires mode {[*V0_MODES]}")
11
+ return Gym_env(**kwargs)
12
+
13
+ def _make_v1(**kwargs):
14
+ if not kwargs.get("mode") in V1_MODES:
15
+ raise ValueError(f"sudoku-v1 availables modes are : {[*V1_MODES]}")
16
+ return Gym_env(**kwargs)
17
+
18
+
19
+ register(
20
+ id="sudoku-v0",
21
+ entry_point="gymnasium_sudoku:_make_v0",
22
+ kwargs={"mode":"biased"}
23
+ )
24
+
25
+ register(
26
+ id="sudoku-v1",
27
+ entry_point="gymnasium_sudoku:_make_v1",
28
+ kwargs={"mode":"easy"}
29
+ )
30
+
31
+
32
+
33
+
@@ -0,0 +1,50 @@
1
+ 083695020572104800960782500097401200254300980000928400009870000000240018000000794,483695127572134869961782543897451236254367981316928475149876352735249618628513794
2
+ 300000070002306010600890003000209560000570100500610934004000090006080701805001206,398124675742356819651897423183249567469573182527618934214765398936482751875931246
3
+ 080350972751200003032008510060904001194827005300000000209583000610470389000609207,486351972751296843932748516568934721194827635327165498279583164615472389843619257
4
+ 078021963029500000136790045650139087397284500800070392085010624003942051241056730,578421963429563178136798245652139487397284516814675392985317624763942851241856739
5
+ 478369102005781000630240008543096800020803006867004900010658070754902080006010005,478369152295781364631245798543196827129873546867524913312658479754932681986417235
6
+ 600730800025481006701000000490057001260000000510364098100540000030029407050876020,649735812325481976781692534498257361263918745517364298172543689836129457954876123
7
+ 078143000645970801910000240096000015500019060801405900784501000060830000059700184,278143596645972831913658247496287315527319468831465972784591623162834759359726184
8
+ 070029341609000520301570968804760293130890005000000016267050184413200009080107030,578629341649318527321574968854761293136892475792435816267953184413286759985147632
9
+ 560000003008690000092045007040002080010409020270000496400000500089510002657008000,564827913738691254192345867946752381813469725275183496421976538389514672657238149
10
+ 009001070143907208760824093030000001097410300620008000804065009306000000000080040,289631574143957268765824193438576921597412386621398457814265739356749812972183645
11
+ 000063470600040891004080003720614059415030000096270004940000000060000745807001000,589163472632547891174982563728614359415839627396275184943756218261398745857421936
12
+ 100400320003500090049317685837601250950000740410705036090000400374060502628150900,165489327783526194249317685837641259956832741412795836591273468374968512628154973
13
+ 004001200790400100031905784003610005062307000000002000300050040020094061006030000,654871293798423156231965784473619825562387419189542637317256948825794361946138572
14
+ 007000200310705406600040100000430527200607900000800010700004000190500860003080740,947168235318725496652349178869431527231657984475892613786214359194573862523986741
15
+ 070586000000700530000140000809257100001000020560831074007000400000408359080029006,973586241614792538258143697849257163731964825562831974397615482126478359485329716
16
+ 042639005300071600069208403034000000001000347000500100403015869596080000010062030,142639785385471692769258413634127958251896347978543126423715869596384271817962534
17
+ 300905074090008001608004050764000019030009087000100430247590068000780000016000005,321965874495378621678214953764853219132649587589127436247591368953786142816432795
18
+ 000000000300750960000800541030070100070490385045103600103245790000960013029001050,452619837318754962697832541936578124271496385845123679163245798584967213729381456
19
+ 610025430248001960030000200080604009001300652005010008056193004900060003170080596,619825437248731965537946281382654179491378652765219348856193724924567813173482596
20
+ 963710000000040903000000670502000030000050009007308154370200000209801300040030098,963715482785642913421983675512479836834156729697328154378294561259861347146537298
21
+ 800000600326070000041090038408960572060701940970000186000400300002030860504010020,859342617326178495741695238418963572265781943973254186687429351192537864534816729
22
+ 000300000032900781000410029093800200850060000004179050970500402281700530345620900,719382645432956781568417329193845267857263194624179853976538412281794536345621978
23
+ 400051000573062900000000507800009153130600804000003000926800430054090208000024609,498751362573462981612938547867249153139675824245183796926817435754396218381524679
24
+ 000039208812006004409812750243050060100000000500328490706180000051794680084205100,675439218812576934439812756243951867198647325567328491726183549351794682984265173
25
+ 001065079003000060629030800816047503930010007270000601108306050560491738307028000,481265379753189264629734815816947523935612487274853691148376952562491738397528146
26
+ 240109380639400051851300290090003502308000400560940130410785903925630040083200010,247159386639428751851376294194863572378512469562947138416785923925631847783294615
27
+ 109805020640109750285040691800000000756010004000200000090081000007306800300402019,179865423643129758285743691821634975756918234934257186492581367517396842368472519
28
+ 025046000460037900037810042800100463006480520054703890541000270003624009692570000,125946738468237915937815642879152463316489527254763891541398276783624159692571384
29
+ 000895100390410080160000950250980010410263895039050400902037041603109078071648239,724895163395416782168372954256984317417263895839751426982537641643129578571648239
30
+ 674301059020079438039002100900200864216045000040967510465798301790104080183000007,674381259521679438839452176957213864216845793348967512465798321792134685183526947
31
+ 208035009050400603000179802090000000506814007704093060007001006000700080460000021,278635419951482673643179852192567348536814297784293165827941536315726984469358721
32
+ 470100000619800003302907816521793004806420001030610290247300560060009030003064007,478136952619852743352947816521793684896425371734618295247381569165279438983564127
33
+ 080000051004089030020406900108002000007810523000070080703021845050008002812300600,986237451574189236321456978138592764697814523245673189763921845459768312812345697
34
+ 587600090012049800004007210905400102100900070240370000821004006703208001059160708,587612493312549867694837215975486132138925674246371589821794356763258941459163728
35
+ 008009000502700090300040000000600054250007130091800000020004010800902700600000049,148259376562783491379146582783621954256497138491835627927564813834912765615378249
36
+ 010509002085070000032410008206037005891054060070000000360705421400021673027040050,614589732985372146732416598246137985891254367573968214368795421459821673127643859
37
+ 630502100400000736900070405290800004001000200500200003109005870000000000050029000,637542198425198736918673425296837514381954267574216983149365872762481359853729641
38
+ 985000000400008610000025049100300462096500000040010007028450000000690070619802534,985164723472938615361725849157389462296547381843216957728453196534691278619872534
39
+ 040017008892603107001098300100009850358024060009180000910000030087500019030040000,543217698892653147761498325174369852358724961629185473916872534487536219235941786
40
+ 746000009308400070129600000000000002932000617004010090090080004607094203400200060,746351829358429176129678435871963542932845617564712398293186754617594283485237961
41
+ 010307805305009740004080030047010003000003400100058206009031000038000900201806050,912347865385169742764285139847612593526973481193458276459731628638524917271896354
42
+ 043927681010030920000408000724090006035000007900000030471000060050600019060070300,543927681817536924692418573724893156135264897986751432471389265358642719269175348
43
+ 000701053040005010000800200530108000962504000000209340406000009103000520200000634,628791453349625718751843296534178962962534871817269345476352189193486527285917634
44
+ 600930000000005306200008491100003904803600700924087560407500030302890605000070208,641932857798415326235768491176253984853649712924187563487526139312894675569371248
45
+ 200006700653709810098100036005008961076093000982401007000002579069080100427915680,214836795653729814798154236345278961176593428982461357831642579569387142427915683
46
+ 003005264609802000010000900096000730180600095000050000960030000005080019030209546,873195264659842173214367958596428731182673495347951682961534827425786319738219546
47
+ 305060071810407050200150840968510320000086500150000000690040030480305062030021408,345862971819437256276159843968514327723986514154273689692748135481395762537621498
48
+ 080021734026479050400083200000147390090208017010390582030910048100734920500062003,985621734326479851471583269852147396693258417714396582237915648168734925549862173
49
+ 500000201170000640900010008050070084040009072017348009826090010095100036431850920,584637291172985643963412758659271384348569172217348569826793415795124836431856927
50
+ 020000100050000080800435006600927400002000007907040021000298000290061040178000902,326879154754612389819435276681927435542183697937546821465298713293761548178354962
@@ -0,0 +1,50 @@
1
+ 085230070140809000070010008709005003000160000502300010001748059650903000890600702,985234176146879325273516948719485263438162597562397814321748659657923481894651732
2
+ 600000040000362905305478062013507620700104098082009000860043000001006000040290836,628915743174362985395478162913587624756124398482639517869743251231856479547291836
3
+ 000103609010000000250000310001200083004500201002641000006000030320400970905307020,487123659613895742259764318561279483794538261832641597176952834328416975945387126
4
+ 560020308000081000108000290486170930315009080020036400750090020090007843040000000,569724318234981765178653294486175932315249687927836451753498126692517843841362579
5
+ 249561070073290100816730205000170006100050300090083701030900082050026419004805630,249561873573298164816734295385172946167459328492683751631947582758326419924815637
6
+ 002105030070206058003000007040000670601027509720650300090513004007908000304000800,482175936179236458563894217945381672631427589728659341896513724257948163314762895
7
+ 204010003000304629963500740092450170840931005500728390430060000009145030681203400,274619583158374629963582741392456178847931265516728394435867912729145836681293457
8
+ 023001950000000020890050004900170230040036000002005040258607490704903800019008070,423761958576489321891352764985174236147236589632895147258617493764923815319548672
9
+ 879004160000001809601209570900130700150002438003405600000826314000013000008940250,879354162532761849641289573964138725157692438283475691795826314426513987318947256
10
+ 012697000000130000603500000000000035006080900097053681860300510070016248001040306,412697853759138462683524197148269735536781924297453681864372519375916248921845376
11
+ 076004920195600000004950008009061002000040570000230080950006010001000006620008057,876314925195682734234957168589761342362849571417235689958476213741523896623198457
12
+ 500607290420089501309001684034596800982400756651020400897062305243900008000030907,518647293426389571379251684734596812982413756651728439897162345243975168165834927
13
+ 908070060700358902030900007080000530600100000009000000860430050100080674495607003,948271365716358942532946817281794536654123798379865421867432159123589674495617283
14
+ 642010039300070100719042600090030000004000910036289040005090070061004098000108000,642815739358976124719342685597431862824567913136289547485693271261754398973128456
15
+ 005010400601030897090084000078326509053097640900045070809251004034900200502073000,285719463641532897397684125478326519153897642926145378869251734734968251512473986
16
+ 010574280028019746400800090090750408701008009004001072000900004349007800085000020,916574283528319746473862591692753418731248659854691372267985134349127865185436927
17
+ 012803740394120000750490200541078320206310497073042015030060954020084603405739100,612853749394127568758496231541978326286315497973642815837261954129584673465739182
18
+ 000562081000000050602318074001009820390080500008007409920703008000820095080000003,749562381813974652652318974571439826394286517268157439925743168436821795187695243
19
+ 082000071710040080049718000000950003063004095007860014000637020078105346230480050,382596471715342689649718532824951763163274895597863214451637928978125346236489157
20
+ 097800513032000006608075020700430005340526078256007100000618050005790000870000001,497862513532149786618375429789431265341526978256987134924618357165793842873254691
21
+ 082006013000400090090000640825079000000200951014305007709081004003002800008730000,482956713631427598597813642825179436376248951914365287769581324143692875258734169
22
+ 000050000900100000852000001001730650760805342300042107070026083600501200008300000,416257839937168425852493761241739658769815342385642197174926583693581274528374916
23
+ 025000000703065000000900000200009106000748205009610730051420008800090400004580300,425831679793265841618974523247359186136748295589612734351427968862193457974586312
24
+ 701580000352100064008000051106492500079308420000657030600905080427036910895721000,761584392352179864948263751136492578579318426284657139613945287427836915895721643
25
+ 604031009071009200859260004000054600406903005010682000207006850068010702045008006,624831579371549268859267314792154683486973125513682497237496851968315742145728936
26
+ 104052967690804052200600814976143080000260030325900041530080006069531028801726000,184352967697814352253697814976143285418265739325978641532489176769531428841726593
27
+ 000100070090060003102050006509014300004530100000209000058703061023001940000900730,635198472897462513142357896569814327274536189381279654958743261723681945416925738
28
+ 040902150000145060500376092000000601170000045604521730027003519013054006005200374,746982153239145867581376492358497621172638945694521738427863519913754286865219374
29
+ 140003000000000384008460070700030006200690100000080903067049500030000409004350000,142873695675912384398465271759231846283694157416587923867149532531728469924356718
30
+ 700030105030000002050100340523800074091000000867025031080340009370019000910060053,748932165136754892259186347523891674491673528867425931685347219372519486914268753
31
+ 945700032000230000000000108000100050057800903602354000060520010070000564000007800,945781632781236495326495178834179256157862943692354781469528317278913564513647829
32
+ 000050042000346090095000010019003000000482900000000007800000306657200109000801504,763159842128346795495728613219673458576482931384915267841597326657234189932861574
33
+ 000000500630042001250600004000251807710006003000000605000920700300000109008137206,984713562637542981251689374463251897715896423829374615146925738372468159598137246
34
+ 084002000069081000250600000700096008900510600000007450472960030590008062816003000,184752396369481527257639184745396218928514673631827459472965831593148762816273945
35
+ 003276040650004371784053690030027000000681030061309200920030000007460500048910000,193276845652894371784153692439527186275681934861349257926735418317468529548912763
36
+ 000090007008205001604000932000021093000004006700053008480009005100080000006140200,213496857978235641654718932845621793329874516761953428482369175197582364536147289
37
+ 020190003031482000609070200380500742007060300050004080208941537000800169010007020,825196473731482695649375218386519742497268351152734986268941537574823169913657824
38
+ 260094081078012039039708400023485000000007942600020850580170004006243010010050206,265394781478512639139768425923485167851637942647921853582176394796243518314859276
39
+ 009532180300074000052801300807690403096200000240007590070000002100009630938000051,769532184381974265452861379817695423596243817243187596675318942124759638938426751
40
+ 000073006090165800365000790002009403639720005540300009750208014900040028200091300,821973546497165832365482791172859463639724185548316279756238914913547628284691357
41
+ 109782003087354600040091000490200060832069000071548320304006807900073016720015934,169782543287354691543691782495237168832169475671548329314926857958473216726815934
42
+ 060200000310060578740000602807002105236800900154003020005170400000000709473056000,569287314312469578748531692897642135236815947154793826925178463681324759473956281
43
+ 900008016000930200021400390690103402053800609000000100060509700200047050070300900,934278516586931247721465398697153482153824679842796135368519724219647853475382961
44
+ 081506000500294000000810050328740010007001048000089732000000609036907020102460375,281576493573294861649813257328745916967321548415689732754132689836957124192468375
45
+ 054068000800920050000450000005010900700300000480000006060002014008030007003107600,354768291876921453291453768635814972712396845489275136967582314148639527523147689
46
+ 710600000000012090008007631000700204900103000604080000007091002500028900400500000,719634825356812497248957631183769254925143768674285319867391542531428976492576183
47
+ 020507000045269001790841600000300816000194003012786005107000000200075068408610007,621537489845269371793841652974352816586194723312786945167928534239475168458613297
48
+ 000000003000200001000430020204008136510300008003016042905000200001900305030045070,427861953369257481158439627274598136516324798893716542985673214741982365632145879
49
+ 570310892148926357920058160809002036000160000060089501000800003201690008004037005,576314892148926357923758164819572436457163289362489571795841623231695748684237915
50
+ 005001000807000914040080630204007083058123460360050700000016290000000146090074300,635941872827365914941782635214697583758123469369458721483516297572839146196274358
@@ -0,0 +1,50 @@
1
+ 010082690709100000040060000000010000490730050003840026500000102070304900026570380,315482697769153248248967513687215439492736851153849726534698172871324965926571384
2
+ 900000500000071002764800000503207010000050804471009003020030091008090320000400760,912364587835971642764825139583247916296153874471689253627538491148796325359412768
3
+ 091507000040060003000028097004000029720000805003190060000053400070600030306710082,291537648847961253635428197164875329729346815583192764918253476472689531356714982
4
+ 000859100006070090500000040010006072403001000069230080007020503002004018301067900,274859136136472895598613247815946372423781659769235481947128563652394718381567924
5
+ 090040702400980001058060300030005020206000000570100804000408507980070010041309060,693541782427983651158267349834795126216834975579126834362418597985672413741359268
6
+ 051003029060800400003100700080026000010940038902000060000680007700050090040031052,851473629267895413493162785384526971516947238972318564125689347738254196649731852
7
+ 040902006001470308000580079006000050023058040780103000050320067000009200009760100,847932516591476328362581479416297853923658741785143692158324967674819235239765184
8
+ 701004003040009060000508027003800109025690004008100070800023900004050030060700045,781264593542379861936518427673842159125697384498135672857423916214956738369781245
9
+ 356019020204000000000806005010970500000680090708030410503240009007003600900005280,356419827284357961179826345612974538435681792798532416563248179827193654941765283
10
+ 000010006030820009504300070000050400600009130002000087100580060005043020097102800,289715346736824519514396278371658492648279135952431687123587964865943721497162853
11
+ 000006120800409000560000080000205000302014005090830060140052003679001800000900704,934586127821479536567123489716295348382614975495837261148752693679341852253968714
12
+ 070080000150004020090005637067003009910500403804910000000607800620030501005001040,276389154153764928498125637567843219912576483834912765341657892629438571785291346
13
+ 904002700006100000580069000050030280008670001091004005405200016030400090000350028,914582763726143859583769142657931284248675931391824675475298316832416597169357428
14
+ 100030082090008056000050340410700000283094700050006008072900004960071030800020000,145637982397248156628159347416782593283594761759316428572963814964871235831425679
15
+ 700069050849150300003740120005007600078000903100600002052014700000000098400030000,721369854849152367563748129235497681678521943194683572952814736316275498487936215
16
+ 809400006210006030000710000102050007070060309060380042590003024004120500600800900,859432716217596438346718295132954687478261359965387142591673824784129563623845971
17
+ 900200040001006035350900008080005009070060010400390006130870024004002750006041800,968253147241786935357914268683125479579468312412397586135879624894632751726541893
18
+ 694002010003640820007500000100005060930020700020800100000000439050071002800306000,694782315513649827287513946178935264936124758425867193761258439359471682842396571
19
+ 007800003039600542040509700706490000084025030000000091063010080000004350002000006,657842913839671542241539768716493825984125637325786491563917284178264359492358176
20
+ 800104360000908705040063091000009678497200050500000000056030004002017000010000080,879154362631928745245763891123549678497286153568371429756832914982417536314695287
21
+ 090005204300061700840903065009000048050006300180007090430200010072000800500070006,691785234325461789847923165769132548254896371183547692436258917972614853518379426
22
+ 037400960200005000800060003400020050001703026080109304340902008096080000000600405,537418962264395781819267543473826159951743826682159374345972618196584237728631495
23
+ 050010009000809300062074008700002153090560840200083060004008000070640035001020700,358216479417859326962374518786492153193567842245183967524738691879641235631925784
24
+ 060000900003100254004020107000400006107308000029050038085270600700039005096800002,261547983973186254854923167538492716147368529629751438385274691712639845496815372
25
+ 100300059020670080894000030500700390400910000000082701006000410001005006003020508,167348259325679184894251637518764392472913865639582741256897413781435926943126578
26
+ 000000030068470000004139007206000050100804000000657000010000529082560074070300180,791286435368475912524139867246913758157824693839657241613748529982561374475392186
27
+ 408060700009030006150002008000000800007300049502409010200901307043080021070500060,438165792729834156156792438394216875617358249582479613265941387943687521871523964
28
+ 003500007091000003025098600000470980007080026350060070062009500800200139000041000,683514297491627853725398614216473985947185326358962471162739548874256139539841762
29
+ 005048270002030609010070043000019007004000800960800050701000026680290010050007430,395648271472531689816972543528419367134756892967823154741385926683294715259167438
30
+ 900007005140000080002900070004780000230500010805001046790806120000403069080100300,968217435147635982352948671614782593239564718875391246793856124521473869486129357
31
+ 090070300800960010600014507009005706184020050000300100002036000570280400040009020,491572368857963214623814597239145786184627953765398142912436875576281439348759621
32
+ 408090003000000600090251080010004000649030100027060308060720000001300590700008460,458697213172483659396251784813574926649832175527169348964725831281346597735918462
33
+ 080001007107054208094000030048020070605730810003500600360000000000900500000470961,286391457137654298594287136948126375625739814713548629369815742471962583852473961
34
+ 502090080030702050600001004705000620006008149040030005201900007004610300000007008,512394786438762951697851234785149623326578149149236875251983467874615392963427518
35
+ 010000900306500008004080120400300060501870004602005000190403580000092030000000700,718234956326519478954687123479321865531876294682945317197463582845792631263158749
36
+ 400300005000007301008250907210780506509003000000069103090018700000400000182000600,471396285925847361638251947213784596569123478847569123394618752756432819182975634
37
+ 820060900945000000001000275510904000060001002304007000000830701008200090003015640,827563914945172836631498275512984367769351482384627159456839721178246593293715648
38
+ 035107000600000020040026801070230010803701540500908600004000089007503000902004700,235187496618459327749326851476235918893761542521948673354672189187593264962814735
39
+ 003054078400209010002070009057630000090000100000700023080015600004003080006800001,963154278475289316812376459257631894398542167641798523789415632124963785536827941
40
+ 301042906680000500090001270000109020150600034200350007004000708006038010007095000,371542986682973541495861273743189625159627834268354197934216758526738419817495362
41
+ 000085070064070800000091236001900007000000080738200094006704051040800302509063000,213685479964372815875491236451938627692547183738216594386724951147859362529163748
42
+ 003590461600007900800100005481020090200080006050300020049700008300400102060053000,723598461615247983894136275481625397237984516956371824549712638378469152162853749
43
+ 004500000670010000120700094000080700750200609098000520086051002000094350003602910,834529167679418235125736894362985741751243689498167523986351472217894356543672918
44
+ 003067105100840600090002040205000907700000380048001000906200050000100798001450006,483967125152843679697512843265384917719625384348791562936278451524136798871459236
45
+ 104702008300560009062380500040270000201600735000009080503000092018000403000000670,154792368387564129962381547845273916291648735736159284573416892618927453429835671
46
+ 079000400602907005100008060751030006008004590000062000900243000300005720006000804,879356412642917385135428967751839246268174593493562178987243651314685729526791834
47
+ 320600900400030805007900010000040106830201000005079400006800090009000358500107020,328615947491732865657984213972548136834261579165379482246853791719426358583197624
48
+ 030000400790000260000015008002750040400620980673008000820040095000060000507100603,138276459795834261264915738982751346451623987673498512826347195319562874547189623
49
+ 000400000798000004003902106000028603021060008050040920480050009009076050100300700,612437895798615234543982176974528613321769548856143927487251369239876451165394782
50
+ 078090056100600040040013027007000690001408200250000001090000500480706003500032008,378294156125687349649513827837125694961478235254369781793841562482756913516932478
@@ -0,0 +1,50 @@
1
+ 500007109130096020004030050000008942001003600020750080903040060007001008800200503,562487139138596724794132856375618942481923675629754381913845267257361498846279513
2
+ 060010080830005207000400053027050906900200041000340000340600002005807090079000068,562713489834965217791482653427158936953276841186349725348691572615827394279534168
3
+ 050720380000603900100050000040008061081006400900500270310890750006040002290000003,659721384427683915138459627742938561581276439963514278314892756876345192295167843
4
+ 600700900002064000080009053040600570037208400005000201400070300098510006070043010,613785924952364187784129653249631578137258469865497231421876395398512746576943812
5
+ 208000300064308007000071006090063240003040570007105090005024109040009030000600800,278496315164358927359271486591763248683942571427185693735824169846519732912637854
6
+ 934510000020000590006008070790006120000070080605100400058002003300040700002950016,934517268827634591516298374793486125241375689685129437158762943369841752472953816
7
+ 000000000709420100645800007108009030060140902000030580310006008007010290000205700,231567849789423165645891327128759634563148972974632581312976458857314296496285713
8
+ 007000000000805200321000600600070300930120078000509004700301020250600019068700050,587263941496815237321947685615478392934126578872539164749351826253684719168792453
9
+ 000003200000500980008046001760010000030205007159600403010002570400800090200730064,697183245341527986528946731762314859834295617159678423913462578476851392285739164
10
+ 016700200020000057400309100068003000501240090000008735004000600090150080300970010,916785243823461957475329168768593421531247896249618735154832679697154382382976514
11
+ 006009805408170902070200040900020008010700060063050007004538600002400059031000000,126349875458176932379285146947623518815794263263851497794538621682417359531962784
12
+ 005400012000009300019800004000040028100027590096001070400760000080130650037005800,865473912724619385319852764573946128148327596296581473451768239982134657637295841
13
+ 009300050045010062000020008001500070400060890807203600603048000020907100700000340,289376451345819762176425938961584273432761895857293614693148527524937186718652349
14
+ 000307090009050081600000205900008043850610700020049100710000456002064030000080900,185327694279456381643891275961278543854613729327549168718932456592164837436785912
15
+ 005004002100000400040768000020980051300405706807030200070510008080200937496000000,735194862168352479942768513624987351319425786857631294273519648581246937496873125
16
+ 080040320269700504000001000540020800800175030000800607005080700070913000402600009,781549326269738514354261978547326891896175432123894657915482763678913245432657189
17
+ 000620578000000400100870092003008750607410000902060130009100023030005600840700000,394621578728359461165874392413298756657413289982567134579146823231985647846732915
18
+ 001806035540107900000000001006700204090005000173900000200090060780402300300501080,921846735548137926637259841856713294492685173173924658214398567785462319369571482
19
+ 094000020000001008760850430043008010009006500002179006000207801010300694050600000,894763125325941768761852439643528917179436582582179346436297851217385694958614273
20
+ 300800067001020400040003059003000906029010540080704000098602730600900020005078001,352849167961527483847163259413285976729316548586794312198652734674931825235478691
21
+ 410007960000010000208005070500900000070060038006351400192000004300002650040780001,413827965759613842268495173534978216971264538826351497192536784387142659645789321
22
+ 078000003020000106500930200000082009396510004000400000104060058200305460700800930,678124593923758146541936287457682319396517824812493675134269758289375461765841932
23
+ 080495017100003009050208040035064008200039406007000000000080090074002300006107050,683495217142673589759218643935764128218539476467821935521386794874952361396147852
24
+ 000000570708002000240503010000209000130048090800030604000300006402907305500014700,391486572758192463246573918674259831135648297829731654987325146412967385563814729
25
+ 075004000108320000600050901004019072000000506300705180001008230907002000800030690,275194863198326754643857921584619372719283546326745189461978235937562418852431697
26
+ 000700512040060903000928000200005000100080276386010400030000090408000305051046700,869734512742561983513928647274695831195483276386217459637852194428179365951346728
27
+ 063000102400209700005700000902075800840000060001008203506091080070802035000400001,763584192418239756295716348932675814847123569651948273526391487174862935389457621
28
+ 003604108800003206400020070720000004390001000005400903000106090682075400001830500,273694158859713246416528379728359614394261785165487923537146892682975431941832567
29
+ 010680903400052000008709520050000730043070051706000000000960000502801000007003846,215684973479352168638719524851496732943278651726135489384967215562841397197523846
30
+ 070000000004038000200901075008305609006000200520017304090050703105400806400200091,671542938954738162283961475748325619316894257529617384892156743135479826467283591
31
+ 006010009009204070075000340014097800060300057020000006050060920800540060000809000,346715289189234675275986341514697832968321457723458196457163928891542763632879514
32
+ 900167000234009000000400850002000040100086003069205007720000010005023600800510300,958167234234859761671432859582371946147986523369245187723698415415723698896514372
33
+ 030401020084053106015620000000085090021090403906002007007009080002060035040030001,639471528284953176715628349473185692821796453956342817367519284192864735548237961
34
+ 020600003907800051000090480073049000196730008005100000060070200500001034014053009,428615973937824651651397482873549126196732548245186397369478215582961734714253869
35
+ 034006500000040097120080000009600000310500740700100082000002001040709053875400600,934276518568341297127985364489627135312598746756134982693852471241769853875413629
36
+ 005430070800205000020000609000090160409006300730810200601000500980003040500780020,165439872897265413324178659258397164419526387736814295671942538982653741543781926
37
+ 000410090300000204160950000702000501590800400000600007030090050620108700040007832,278413695359786214164952378782349561596871423413625987837294156625138749941567832
38
+ 690000010000004050080107402007810000035092000806400007140005030702300068000080504,694528713271934856583167492927816345435792681816453927148675239752349168369281574
39
+ 703080600060003500200009043040710059001900826029500000008050031000001200050046007,713485692964123578285679143846712359571934826329568714698257431437891265152346987
40
+ 257000608000000702360090000035401070009008501070020000090010000640085300001072906,257143698914856732368297154835461279429738561176529483793614825642985317581372946
41
+ 000407210002900605000010089800305900470620000300094107780560001010002800056008000,698457213142983675537216489861375942479621538325894167784569321913742856256138794
42
+ 010000075000350604000070390400201000300049002809700060200010800086003950794060003,613894275978352614542176398467281539351649782829735461235917846186423957794568123
43
+ 006000004010030002007004930000009473204600081750108060109005008040300095300201000,936752814418936752527814936861529473294673581753148269179465328642387195385291647
44
+ 080005009902070000007601203070026000304700008600103040053000860490000100001040057,186235479932874516547691283875426391314759628629183745753912864498567132261348957
45
+ 060050070020740008005201030081063020002007800940000503300000094074002010050610007,468359172123746958795281436581463729632597841947128563316875294874932615259614387
46
+ 023057908064800000900030167002000300450000009008471506000063080000500001510090402,123657948764819253985234167672985314451326879398471526249163785837542691516798432
47
+ 653007200000000086000100340700600000286003000401902507070008001140309600000020058,653847219914235786827196345795614823286573194431982567572468931148359672369721458
48
+ 006090501020035870008400000040703000800010030079650080100009324093021005600007900,736298541924135876518476293241783659865912437379654182187569324493821765652347918
49
+ 506097030020001006004002080900000350073000104000620000805070420010840079600009005,586497231729381546134562987968714352273958164451623798895176423312845679647239815
50
+ 000907006004250080075063014067500090912300400000002063000010835041008200009700000,183947526694251387275863914367584192912376458458192763726419835541638279839725641
@@ -0,0 +1,210 @@
1
+ import csv,random,sys
2
+ import numpy as np
3
+ import gymnasium as gym
4
+ import gymnasium.spaces as spaces
5
+
6
+ from PySide6.QtWidgets import QApplication
7
+ from gymnasium_sudoku.rendering import Gui
8
+ from copy import deepcopy
9
+ from pathlib import Path
10
+
11
+ def _get_region(x,y,board,n = 3):
12
+ # T = target cell
13
+ # returns the region (row - T ∪ column - T ∪ 3X3 block T)
14
+ board = board.copy()
15
+ xlist = board[x]
16
+ xlist = np.concatenate((xlist[:y],xlist[y+1:]))
17
+
18
+ ylist = board[:,y]
19
+ ylist = np.concatenate((ylist[:x],ylist[x+1:]))
20
+
21
+ ix,iy = (x//n)* n , (y//n)* n
22
+ block = board[ix:ix+n , iy:iy+n].flatten()
23
+ local_row = x - ix
24
+ local_col = y - iy
25
+ action_index= local_row * n + local_col
26
+ block = np.delete(block,action_index)
27
+ return xlist,ylist,block
28
+
29
+ def _is_row_complete(board,x):
30
+ xlist = board[x]
31
+ return np.all(xlist!=0)
32
+
33
+ def _is_col_complete(board,y):
34
+ ylist = board[:,y]
35
+ return np.all(ylist!=0)
36
+
37
+ def _is_region_complete(board,x,y,n=3):
38
+ ix,iy = (x//n)* n , (y//n)* n
39
+ block = board[ix:ix+n , iy:iy+n].flatten()
40
+ return np.all(block!=0)
41
+
42
+ def _sudoku_board(csv_path,line_pick):
43
+ with open(csv_path) as file:
44
+ reader = csv.reader(file)
45
+ for n,row in enumerate(reader):
46
+ if n == line_pick:
47
+ chosen_line = row
48
+ board,solution = chosen_line
49
+ board,solution = list(
50
+ map(lambda x:np.fromiter(x,dtype=np.int32).reshape(9,9),(board,solution))
51
+ )
52
+ return board,solution
53
+
54
+ def _gen_board(env_mode,eval_mode):
55
+ csv_path = Path(__file__).parent
56
+ if env_mode=="biased":
57
+ csv_path_train = csv_path/"datasets/v0_biased/train_boards.csv"
58
+ csv_path_test = csv_path/"datasets/v0_biased/test_boards.csv"
59
+ line_pick = random.randint(0,49)
60
+
61
+ elif env_mode=="easy":
62
+ csv_path_train = csv_path/"datasets/v1_easy/train_boards.csv"
63
+ csv_path_test = csv_path/"datasets/v1_easy/test_boards.csv"
64
+ line_pick = random.randint(0,49)
65
+
66
+ if eval_mode:
67
+ state,solution = deepcopy(_sudoku_board(csv_path_test,line_pick))
68
+ else:
69
+ state,solution = deepcopy(_sudoku_board(csv_path_train,line_pick))
70
+ return state,solution
71
+
72
+
73
+ V0_MODES = ["biased"]
74
+ V1_MODES = ["easy"]
75
+
76
+ class Gym_env(gym.Env):
77
+ metadata = {"render_modes":["human"],"render_fps":60,"rendering_attention":False}
78
+ def __init__(self,
79
+ mode,
80
+ render_mode=None,
81
+ horizon=400,
82
+ eval_mode:bool=False,
83
+ rendering_attention=False
84
+ ):
85
+ super().__init__()
86
+
87
+ self.env_mode = mode
88
+ self.render_mode = render_mode
89
+ self.horizon = horizon
90
+ self.eval_mode = eval_mode
91
+ self.rendering_attention = rendering_attention
92
+ self.env_steps = 0
93
+ self.action = None
94
+ self.true_action = False
95
+
96
+ self.action_space = spaces.Tuple(
97
+ (
98
+ spaces.Discrete(9,None,0),
99
+ spaces.Discrete(9,None,0),
100
+ spaces.Discrete(9,None,1)
101
+ )
102
+ )
103
+ self.observation_space = spaces.Box(0,9,(9,9),dtype=np.int32)
104
+
105
+ self.state,self.solution = _gen_board(self.env_mode,self.eval_mode)
106
+ self.mask = (self.state==0)
107
+ self.conflicts = (self.state==0).sum()
108
+
109
+ # init gui
110
+ self.app = None
111
+ if self.render_mode=="human":
112
+ self.app = QApplication.instance()
113
+ if self.app is None:
114
+ self.app = QApplication([])
115
+
116
+ self.gui = Gui(deepcopy(self.state),self.env_mode,self.rendering_attention)
117
+
118
+ def reset(self,seed=None,options=None):
119
+ super().reset(seed=seed)
120
+ if seed is not None:
121
+ random.seed(seed)
122
+ np.random.seed(seed)
123
+
124
+ self.state,self.solution = _gen_board(self.env_mode,self.eval_mode)
125
+ self.env_steps = 0
126
+ self.mask = (self.state==0)
127
+
128
+ if self.render_mode =="human":
129
+ self.gui.reset(deepcopy(self.state))
130
+ return np.array(self.state,dtype=np.int32),{}
131
+
132
+ def _get_reward(self,env_mode,action,state):
133
+ x,y,value = action
134
+
135
+ if self.env_mode=="biased":
136
+ if not self.mask[x,y]:
137
+ reward = -0.1
138
+ true_action = False
139
+ else:
140
+ if value == self.solution[x,y]:
141
+ state[x,y] = value
142
+ self.mask[x,y] = False
143
+ assert action[-1] in range(1,10)
144
+ true_action = True
145
+ reward = 0.2
146
+
147
+ if _is_row_complete(state,x):
148
+ reward+= 0.2*9
149
+ if _is_col_complete(state,y):
150
+ reward+= 0.2*9
151
+ if _is_region_complete(state,x,y):
152
+ reward+= 0.2*9
153
+ else:
154
+ reward = -0.1
155
+ true_action = False
156
+ return reward,true_action,state
157
+
158
+ elif env_mode=="easy":
159
+ reward = 0
160
+ if not self.mask[x][y]:
161
+ reward = -0.1
162
+ true_action = False
163
+ return reward,true_action,state
164
+
165
+ state[x][y] = value
166
+ true_action = True
167
+ filter_zeros = lambda x : x[x!=0]
168
+ xlist,ylist,block = _get_region(x,y,state)
169
+
170
+ row = filter_zeros(xlist)
171
+ col = filter_zeros(ylist)
172
+ block = filter_zeros(block)
173
+
174
+ if not value in np.concatenate((xlist,ylist,block)):
175
+ reward = 0.2*3
176
+ return reward,true_action,state
177
+
178
+ reward = 0
179
+ if len(row) == len(np.unique(row)):
180
+ reward += 0.2
181
+
182
+ if len(col) == len(np.unique(col)):
183
+ reward += 0.2
184
+
185
+ if len(block) == len(np.unique(block)):
186
+ reward += 0.2
187
+
188
+ return reward,True,state
189
+
190
+ def step(self,action):
191
+ assert (action[0] and action[1]) in range(9)
192
+ self.env_steps+=1
193
+ self.action = action
194
+
195
+ reward,true_action,obs = self._get_reward(self.env_mode,self.action,self.state)
196
+ self.true_action = true_action
197
+ self.state = obs
198
+
199
+ truncated = (self.env_steps>=self.horizon)
200
+ done = np.array_equal(self.state,self.solution)
201
+ if done:
202
+ reward+=0.2*81
203
+ info = {}
204
+ return np.array(self.state,dtype=np.int32),round(reward,1),done,truncated,info
205
+
206
+ def render(self):
207
+ self.gui.show()
208
+ self.gui.updated(self.action,self.true_action)
209
+ self.app.processEvents()
210
+
@@ -0,0 +1,56 @@
1
+ import numpy as np
2
+
3
+ # tb = test board
4
+ # ts = test solution
5
+
6
+ tb_1 = np.array([
7
+ [8, 0, 0, 5, 3, 1, 0, 0, 0],
8
+ [0, 0, 0, 0, 4, 0, 3, 0, 1],
9
+ [1, 0, 0, 8, 0, 0, 0, 0, 0],
10
+ [0, 0, 4, 0, 0, 5, 6, 0, 0],
11
+ [0, 0, 3, 9, 0, 2, 1, 4, 0],
12
+ [6, 1, 5, 0, 7, 0, 0, 9, 8],
13
+ [0, 2, 0, 0, 9, 6, 0, 1, 0],
14
+ [0, 5, 7, 2, 0, 8, 0, 0, 6],
15
+ [9, 6, 1, 7, 5, 3, 0, 2, 4]],dtype=np.int32)
16
+
17
+ ts_1 = np.array([
18
+ [8, 4, 9, 5, 3, 1, 7, 6, 2],
19
+ [5, 7, 2, 6, 4, 9, 3, 8, 1],
20
+ [1, 3, 6, 8, 2, 7, 4, 5, 9],
21
+ [2, 9, 4, 1, 8, 5, 6, 7, 3],
22
+ [7, 8, 3, 9, 6, 2, 1, 4, 5],
23
+ [6, 1, 5, 3, 7, 4, 2, 9, 8],
24
+ [3, 2, 8, 4, 9, 6, 5, 1, 7],
25
+ [4, 5, 7, 2, 1, 8, 9, 3, 6],
26
+ [9, 6, 1, 7, 5, 3, 8, 2, 4]],dtype=np.int32)
27
+
28
+
29
+
30
+ tb_2 = np.array([
31
+ [5, 9, 1, 8, 7, 0, 0, 2, 4],
32
+ [8, 4, 0, 2, 3, 1, 0, 5, 6],
33
+ [0, 0, 0, 0, 0, 9, 1, 0, 7],
34
+ [0, 6, 0, 0, 0, 0, 1, 0, 0],
35
+ [9, 0, 8, 3, 0, 4, 0, 0, 0],
36
+ [0, 0, 0, 6, 8, 0, 0, 4, 0],
37
+ [0, 0, 0, 0, 0, 3, 0, 8, 0],
38
+ [1, 0, 6, 0, 2, 0, 4, 0, 0],
39
+ [7, 8, 0, 0, 0, 0, 2, 9, 0]],dtype=np.int32)
40
+
41
+ ts_2 = np.array([
42
+ [5, 9, 1, 8, 7, 6, 3, 2, 4],
43
+ [8, 4, 7, 2, 3, 1, 9, 5, 6],
44
+ [6, 2, 3, 4, 5, 9, 8, 1, 7],
45
+ [4, 6, 2, 5, 9, 7, 1, 3, 8],
46
+ [9, 7, 8, 3, 1, 4, 5, 6, 2],
47
+ [3, 1, 5, 6, 8, 2, 7, 4, 9],
48
+ [2, 5, 9, 7, 4, 3, 6, 8, 1],
49
+ [1, 3, 6, 9, 2, 8, 4, 7, 5],
50
+ [7, 8, 4, 1, 6, 5, 2, 9, 3]],dtype=np.int32)
51
+
52
+
53
+
54
+
55
+
56
+
@@ -0,0 +1,168 @@
1
+ from PySide6 import QtCore,QtGui
2
+ from PySide6.QtWidgets import QWidget,QGridLayout,QLineEdit,QHBoxLayout
3
+ from PySide6.QtCore import QTimer
4
+ from PySide6.QtGui import QIcon
5
+
6
+
7
+ class Gui(QWidget):
8
+ def __init__(self,board,mode,rendering_attention=False):
9
+ super().__init__()
10
+ self.setWindowTitle("Sudoku")
11
+ self.setMaximumSize(40,40)
12
+ self.setWindowIcon(QIcon("game.png"))
13
+ self.game = board
14
+ self.mode = mode
15
+
16
+ self.size = 9
17
+ self.rendering_attention = rendering_attention
18
+
19
+ self.main_layout = QHBoxLayout()
20
+
21
+ # Sudoku grid
22
+ self.grid = QGridLayout()
23
+ self.sudoku_widget = QWidget()
24
+ self.sudoku_widget.setLayout(self.grid)
25
+ self.main_layout.addWidget(self.sudoku_widget)
26
+ self.grid.setVerticalSpacing(0)
27
+ self.grid.setHorizontalSpacing(0)
28
+ self.grid.setContentsMargins(0,0,0,0)
29
+
30
+ self.cells = [[QLineEdit(self) for _ in range(self.size)] for _ in range (self.size)]
31
+ for line in self.game :
32
+ for x in range(self.size):
33
+ for y in range(self.size):
34
+ self.cells[x][y].setFixedSize(40,40)
35
+ self.cells[x][y].setReadOnly(True)
36
+ number = str(board[x][y])
37
+ self.cells[x][y].setText(number)
38
+ self.bl = (3 if (y%3 == 0 and y!= 0) else 0.5) # what is bl,bt ?
39
+ self.bt = (3 if (x%3 == 0 and x!= 0) else 0.5)
40
+ self.color =("transparent" if int(self.cells[x][y].text()) == 0 else "white")
41
+ self.cellStyle = [
42
+ "background-color:grey;"
43
+ f"border-left:{self.bl}px solid black;"
44
+ f"border-top: {self.bt}px solid black;"
45
+ "border-right: 1px solid black;"
46
+ "border-bottom: 1px solid black;"
47
+ f"color: {self.color};"
48
+ "font-weight: None;"
49
+ "font-size: 20px"
50
+ ]
51
+ self.cells[x][y].setStyleSheet("".join(self.cellStyle))
52
+ self.cells[x][y].setAlignment(QtCore.Qt.AlignCenter)
53
+ self.grid.addWidget(self.cells[x][y],x,y)
54
+
55
+ if self.rendering_attention:
56
+ # Attention grid
57
+ self.attn_grid = QGridLayout()
58
+ self.attn_widget = QWidget()
59
+ self.attn_widget.setLayout(self.attn_grid)
60
+ self.main_layout.addWidget(self.attn_widget)
61
+ self.attn_grid.setVerticalSpacing(0)
62
+ self.attn_grid.setHorizontalSpacing(0)
63
+ self.attn_grid.setContentsMargins(0,0,0,0)
64
+
65
+ self.attn_cells = [[QLineEdit(self) for _ in range(self.size)] for _ in range(self.size)]
66
+ for x in range(self.size):
67
+ for y in range(self.size):
68
+ cell = self.attn_cells[x][y]
69
+ cell.setFixedSize(40,40)
70
+ cell.setAlignment(QtCore.Qt.AlignCenter)
71
+ cell.setStyleSheet(
72
+ "background-color: black;"
73
+ "border:none;"
74
+ )
75
+ self.attn_grid.addWidget(cell, x, y)
76
+
77
+ self.setLayout(self.main_layout)
78
+
79
+ def updated(self,action:[int,int,int],true_value:bool=False,attention_weights=None):
80
+
81
+ if action is not None:
82
+ assert len(action) == 3
83
+ row,column,value = action
84
+ styleList = self.cells[row][column].styleSheet().split(";")
85
+ if len(styleList) != 8 :
86
+ del styleList[-1]
87
+ styleDict = {k.strip() : v.strip() for k,v in (element.split(":") for element in styleList)}
88
+ cellColor = styleDict["color"]
89
+
90
+ if self.mode == "biased": # v0 version----------
91
+ if cellColor not in ("white","black") and value in range(1,10):
92
+ if true_value:
93
+ self.cells[row][column].setText(str(value))
94
+ assert self.cells[row][column].text() != str(0)
95
+ self.game[row][column] = value
96
+ color = "black"
97
+ else:
98
+ color = cellColor
99
+
100
+ self.update_style(action,color)
101
+
102
+ else: # v1 version-----------
103
+ if not cellColor=="white":
104
+ self.cells[row][column].setText(str(value))
105
+ color = "black"
106
+ else:
107
+ color = cellColor
108
+
109
+ self.update_style(action,color)
110
+
111
+
112
+ def update_style(self,action,color):
113
+ row,column,value = action
114
+ ubl = (3 if (column % 3 == 0 and column!= 0) else 0.5)
115
+ ubt = (3 if (row % 3 == 0 and row!= 0) else 0.5)
116
+ if color=="black":
117
+ background="orange"
118
+ else:
119
+ background="grey"
120
+
121
+ updatedStyle = [
122
+ f"background-color:{background};"
123
+ f"border-left:{ubl}px solid black;"
124
+ f"border-top: {ubt}px solid black;"
125
+ "border-right: 1px solid black;"
126
+ "border-bottom: 1px solid black;"
127
+ f"color: {color};"
128
+ "font-weight: None;"
129
+ "font-size: 20px"
130
+ ]
131
+ self.cells[row][column].setStyleSheet("".join(updatedStyle)) # Update the cell color
132
+
133
+
134
+ def reset(self,board):
135
+ self.game = board
136
+ for line in self.game :
137
+ for x in range(self.size):
138
+ for y in range(self.size):
139
+ self.cells[x][y].setFixedSize(40,40)
140
+ self.cells[x][y].setReadOnly(True)
141
+ number = str(board[x][y])
142
+ self.cells[x][y].setText(number)
143
+ self.bl = (3 if (y%3 == 0 and y!= 0) else 0.5)
144
+ self.bt = (3 if (x%3 == 0 and x!= 0) else 0.5)
145
+ self.color = ("transparent" if int(self.cells[x][y].text()) == 0 else "white")
146
+ self.cellStyle = [
147
+ "background-color:grey;"
148
+ f"border-left:{self.bl}px solid black;"
149
+ f"border-top: {self.bt}px solid black;"
150
+ "border-right: 1px solid black;"
151
+ "border-bottom: 1px solid black;"
152
+ f"color: {self.color};"
153
+ "font-weight: None;"
154
+ "font-size: 20px"
155
+ ]
156
+ self.cells[x][y].setStyleSheet("".join(self.cellStyle))
157
+
158
+ def render_attention(self,attn):
159
+ for i in range(self.size):
160
+ for j in range(self.size):
161
+ v = attn[i, j]
162
+ intensity = int(255 * v)
163
+ self.attn_cells[i][j].setStyleSheet(
164
+ f"""
165
+ background-color: rgb({intensity}, {intensity}, 255);
166
+ """
167
+ )
168
+
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: gymnasium_sudoku
3
+ Version: 0.3.4
4
+ Summary: A Sudoku environment for Reinforcement Learning research
5
+ Author-email: adeottidev@gmail.com
6
+ License: The MIT License
7
+
8
+ Copyright (c) 2025 Author(s)
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in
18
+ all copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26
+ THE SOFTWARE.
27
+ Project-URL: Homepage, https://github.com/adeotti/Gymnasium-Sudoku
28
+ Project-URL: Repository, https://github.com/adeotti/Gymnasium-Sudoku
29
+ Keywords: Reinforcement Learning,game,RL,AI,gymnasium,Sudoku
30
+ Classifier: Development Status :: 3 - Alpha
31
+ Classifier: License :: OSI Approved :: MIT License
32
+ Classifier: Programming Language :: Python :: 3
33
+ Classifier: Programming Language :: Python :: 3.10
34
+ Classifier: Programming Language :: Python :: 3.11
35
+ Classifier: Intended Audience :: Science/Research
36
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
37
+ Requires-Python: >=3.10
38
+ Description-Content-Type: text/markdown
39
+ License-File: LICENSE
40
+ Requires-Dist: gymnasium>=1.1.1
41
+ Requires-Dist: numpy>=1.25.2
42
+ Requires-Dist: PySide6>=6.7.2
43
+ Requires-Dist: typing-extensions>=4.14.0
44
+ Requires-Dist: cloudpickle>=3.1.1
45
+ Requires-Dist: tqdm>=4.67.1
46
+ Requires-Dist: pathlib>=1.0.1
47
+ Dynamic: license-file
48
+
49
+ ```
50
+ pip install gymnasium_sudoku
51
+ ```
52
+
53
+ **Observation space :** The state returned after each `.reset()` or `.step()` is a raw sudoku board shape `[9,9]`.This observation can be converted into an image.
54
+
55
+ **Action space:** The action space is shaped `[x,y,z]`,representing : x = row position of the cell, y = column position of the cell and value that should go into that cell.When vectorizing, the current version of the environment do not handle action reshaping, so for n environments, the action's shape should be : `[[x0...xn],[y0...yn],[z0...zn]]`
56
+
57
+ **Horizon:** This parameter controls the number of steps after which `Truncated` is set to `True` and the environment is reset. Otherwise, early in training (when the policy is still mostly random and the exploration incentive is high), the policy may corrupt the board and either make it unsolvable or push it into a local minimum. The default value for this parameter is set to 400 for no specific reason and should probably be adjusted during initialization.
58
+
59
+ **Eval mode/Training mode :** By default, eval_mode in the __init__ method is set to `False`. This is used for training, where the environment is reset with one of 50 different boards after eacg .reset() call. During testing, eval_mode should be set to `True` in order to evaluate the agent on boards that were never seen during the training phase.
60
+
61
+
62
+ ### Sudoku-v0 (biased version)
63
+ ```python
64
+ import gymnasium as gym
65
+
66
+ env = gym.make("sudoku-v0",mode="biased"render_mode="human",horizon=600,eval_mode=True)
67
+ env.reset()
68
+
69
+ for n in range(int(6e3)):
70
+ env.step(env.action_space.sample())
71
+ env.render()
72
+ ```
73
+ **Bias :**
74
+ Among the induced biases that immensely help guide that learning is the fact that the policy cannot modify a cell that was already correctly filled, on top of the existing untouchable cells present in the beginning.
75
+
76
+ **Measuring learning for this version of the environment:*** The current structure of the environment allows a completely random policy to solve it (this is true for easy boards in the current version of the environment), so a good way to measure learning might be to use the number of steps over N episodes under a random policy as a `baseline`. This implies that a policy able to consistently solve the test boards in fewer steps over the same N episodes used to run a random policy is, in theory, displaying some sort of learning.
77
+
78
+
79
+ ### Sudoku-v1
80
+ ```python
81
+ import gymnasium as gym
82
+
83
+ env = gym.make("sudoku-v1",mode="easy",render_mode="human",horizon=600,eval_mode=True)
84
+ env.reset()
85
+
86
+ for n in range(int(6e3)):
87
+ env.step(env.action_space.sample())
88
+ env.render()
89
+ ```
90
+
91
+
@@ -0,0 +1,18 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ gymnasium_sudoku/__init__.py
5
+ gymnasium_sudoku/environment.py
6
+ gymnasium_sudoku/puzzle.py
7
+ gymnasium_sudoku/rendering.py
8
+ gymnasium_sudoku.egg-info/PKG-INFO
9
+ gymnasium_sudoku.egg-info/SOURCES.txt
10
+ gymnasium_sudoku.egg-info/dependency_links.txt
11
+ gymnasium_sudoku.egg-info/requires.txt
12
+ gymnasium_sudoku.egg-info/top_level.txt
13
+ gymnasium_sudoku/datasets/v0_biased/test_boards.csv
14
+ gymnasium_sudoku/datasets/v0_biased/train_boards.csv
15
+ gymnasium_sudoku/datasets/v1_easy/test_boards.csv
16
+ gymnasium_sudoku/datasets/v1_easy/train_boards.csv
17
+ tests/test_1.py
18
+ tests/test_2.py
@@ -0,0 +1,7 @@
1
+ gymnasium>=1.1.1
2
+ numpy>=1.25.2
3
+ PySide6>=6.7.2
4
+ typing-extensions>=4.14.0
5
+ cloudpickle>=3.1.1
6
+ tqdm>=4.67.1
7
+ pathlib>=1.0.1
@@ -0,0 +1 @@
1
+ gymnasium_sudoku
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77.0.3"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "gymnasium_sudoku"
7
+ authors = [{ name = "", email = "adeottidev@gmail.com" }]
8
+ description = "A Sudoku environment for Reinforcement Learning research"
9
+
10
+ version = "0.3.4"
11
+ requires-python = ">=3.10"
12
+ license = { file = "LICENSE" }
13
+ readme = "README.md"
14
+ keywords = ["Reinforcement Learning", "game", "RL", "AI", "gymnasium","Sudoku"]
15
+
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Intended Audience :: Science/Research",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ ]
25
+
26
+ dependencies = [
27
+ "gymnasium>=1.1.1",
28
+ "numpy>=1.25.2",
29
+ "PySide6>=6.7.2",
30
+ "typing-extensions>=4.14.0",
31
+ "cloudpickle>=3.1.1",
32
+ "tqdm>=4.67.1",
33
+ "pathlib>=1.0.1"
34
+ ]
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/adeotti/Gymnasium-Sudoku"
38
+ Repository = "https://github.com/adeotti/Gymnasium-Sudoku"
39
+
40
+ [tool.setuptools.packages.find]
41
+ where = ["."]
42
+ exclude = ["venv*", "test*", "tests*", "*.egg-info", "dist", "build"]
43
+
44
+ [tool.setuptools.package-data]
45
+ gymnasium_sudoku = [
46
+ "datasets/v0_biased/*.csv",
47
+ "datasets/v1_easy/*.csv"
48
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,59 @@
1
+ import pytest
2
+ import gymnasium as gym
3
+ import numpy as np
4
+ from gymnasium.utils.env_checker import check_env
5
+ import gymnasium_sudoku
6
+
7
+
8
+ def test_env_creation():
9
+ try: # Test v0 with biased mode (should work)
10
+ env = gym.make("sudoku-v0", mode="biased")
11
+ print("created successfully")
12
+ env.close()
13
+ except Exception as e:
14
+ print(f"creation failed:{e}")
15
+
16
+ try: # Test v0 with easy mode (should fail)
17
+ env = gym.make("sudoku-v0", mode="easy")
18
+ print("should have failed but didn't")
19
+ env.close()
20
+ except ValueError as e:
21
+ print(f"rejected mode='easy':{e}")
22
+
23
+ try: # Test v1 with easy mode (should work)
24
+ env = gym.make("sudoku-v1", mode="easy")
25
+ print("created successfully")
26
+ env.close()
27
+ except Exception as e:
28
+ print(f"creation failed: {e}")
29
+
30
+ try: # Test v1 with biased mode (should fail)
31
+ env = gym.make("sudoku-v1", mode="biased")
32
+ env.close()
33
+ except ValueError as e:
34
+ print(f"rejected mode='biased':{e}")
35
+
36
+
37
+ def test_env_checker():
38
+ try: # test v0
39
+ env = gym.make("sudoku-v0",mode="biased")
40
+ check_env(env.unwrapped)
41
+ print("sudoku-v0 passes environment checker")
42
+ env.close()
43
+ except Exception as e:
44
+ print(f"sudoku-v0 failed environment checker:{e}")
45
+
46
+ try: # test v1
47
+ env = gym.make("sudoku-v1",mode="easy")
48
+ check_env(env.unwrapped)
49
+ print("sudoku-v1 passes environment checker")
50
+ env.close()
51
+ except Exception as e:
52
+ print(f"sudoku-v1 failed environment checker:{e}")
53
+
54
+
55
+ if __name__ == "__main__":
56
+ pytest.main([__file__, "-v", "--tb=short"])
57
+
58
+
59
+
@@ -0,0 +1,111 @@
1
+ import gymnasium_sudoku,torch,sys
2
+ import gymnasium as gym
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torch.distributions import Categorical
6
+ import numpy as np
7
+ import time
8
+ from tqdm import tqdm
9
+
10
+
11
+ def process_obs(x):
12
+ m = (x == 0).unsqueeze(1).float()
13
+ x = F.one_hot(x,num_classes=10).permute(0,-1,1,2).float()
14
+ return torch.cat([x,m],dim=1)
15
+
16
+ class p_net(nn.Module):
17
+ def __init__(self):
18
+ super().__init__()
19
+ self.c1 = nn.LazyConv2d(64,1,1)
20
+ self.c2 = nn.LazyConv2d(128,3,1,padding=1)
21
+ self.c3 = nn.LazyConv2d(128,3,1,padding=1)
22
+ self.emb = nn.Parameter(torch.randn(1,81,128) * 0.02)
23
+ self.attn = nn.MultiheadAttention(128,4,batch_first=True)
24
+ self.norm = nn.LayerNorm(128)
25
+ self.l1 = nn.LazyLinear(128)
26
+ self.l2 = nn.LazyLinear(128)
27
+ self.pos = nn.LazyLinear(1)
28
+ self.num = nn.LazyLinear(10)
29
+ self.v_aux = nn.LazyLinear(1)
30
+ self.register_buffer("attn_mask",self.attn_masks())
31
+
32
+ def forward(self,s):
33
+ x = self.c1(s)
34
+ x = F.silu(self.c2(x))
35
+ x = F.silu(self.c3(x))
36
+ x = x.flatten(2).transpose(-1,1)
37
+ x = x + self.emb
38
+ x,asc= self.attn(x,x,x,attn_mask=self.attn_mask,average_attn_weights=True)
39
+ x = self.norm(x)
40
+ x = F.silu(self.l1(x))
41
+ x = F.silu(self.l2(x))
42
+ pos = self.pos(x).squeeze(-1)
43
+ pos = self.pos_mask(s,pos)
44
+ pos = F.softmax(pos,-1)
45
+ pos = Categorical(probs=pos).sample()
46
+ num_logits = self.num(x)
47
+ idx = torch.arange(x.size(0))
48
+ o = num_logits[idx,pos]
49
+ o = self.action_mask(o)
50
+ o = F.softmax(o,-1)
51
+ num = Categorical(probs=o).sample()
52
+ return pos,num,asc
53
+
54
+ def pos_mask(self,s,x):
55
+ s = s.argmax(1)
56
+ mask = (s!=0).flatten(1)
57
+ value = -1e9
58
+ return torch.masked_fill(x,mask,value)
59
+
60
+ def action_mask(self,x):
61
+ mask = torch.zeros_like(x,dtype=torch.bool)
62
+ mask[:,0] = True
63
+ value = -float("inf")
64
+ return torch.masked_fill(x,mask,value)
65
+
66
+ def attn_masks(self,N=81):
67
+ indices = torch.arange(N)
68
+
69
+ rows = indices // 9
70
+ cols = indices % 9
71
+ boxes = (rows // 3) * 3 + (cols // 3) # shape [81]
72
+
73
+ row_mask = (rows.unsqueeze(0)==rows.unsqueeze(1)).float()
74
+ col_mask = (cols.unsqueeze(0)==cols.unsqueeze(1)).float()
75
+ box_mask = (boxes.unsqueeze(0)==boxes.unsqueeze(1)).float()
76
+ global_mask = torch.ones(N, N)
77
+ return torch.stack([row_mask,col_mask,box_mask,global_mask],dim=0)
78
+
79
+ env = gym.make(
80
+ "sudoku-v0",
81
+ mode = "biased",
82
+ render_mode="human",
83
+ horizon=800,
84
+ eval_mode=True
85
+ )
86
+
87
+ env.reset()
88
+ total_steps = int(6e3*5)
89
+ steps = 0
90
+
91
+ policy = p_net()
92
+ policy(process_obs(torch.randint(0,9,(1,9,9))))
93
+ #t_policy = torch.load("./model_test",map_location="cpu")["policy state"]
94
+ #policy.load_state_dict(t_policy,strict=False)
95
+
96
+ obs = env.reset()[0]
97
+ steps = r = 0
98
+
99
+ for n in range(total_steps):
100
+ #pos,num,attn = policy(process_obs(torch.tensor(obs,dtype=torch.int64).unsqueeze(0)))
101
+ #xpos = pos // 9 ; ypos = pos % 9
102
+ #action = np.stack((xpos,ypos,num),axis=-1).reshape(3)
103
+ obs,reward,done,trunc,_ = env.step(env.action_space.sample())
104
+ steps+=1 ; r+=reward
105
+ print(reward)
106
+ env.render()
107
+ if done:
108
+ print(f"\n{obs} | steps : {steps} | reward {r:.2f}")
109
+ time.sleep(5)
110
+ steps = r = 0
111
+ obs = env.reset()[0]