pyrlutils 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyrlutils might be problematic. Click here for more details.

Files changed (38) hide show
  1. pyrlutils-0.0.4/.circleci/config.yml +76 -0
  2. pyrlutils-0.0.4/.gitignore +252 -0
  3. pyrlutils-0.0.4/.pyup.yml +5 -0
  4. pyrlutils-0.0.4/MANIFEST.in +3 -0
  5. {pyrlutils-0.0.2/pyrlutils.egg-info → pyrlutils-0.0.4}/PKG-INFO +17 -9
  6. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/README.md +3 -0
  7. pyrlutils-0.0.4/pyproject.toml +42 -0
  8. pyrlutils-0.0.4/pyrlutils/bandit/algo.py +128 -0
  9. pyrlutils-0.0.4/pyrlutils/bandit/reward.py +11 -0
  10. pyrlutils-0.0.4/pyrlutils/openai/__init__.py +0 -0
  11. pyrlutils-0.0.4/pyrlutils/openai/utils.py +31 -0
  12. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils/reward.py +1 -1
  13. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils/state.py +31 -2
  14. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils/transition.py +0 -19
  15. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils/valuefcns.py +1 -1
  16. {pyrlutils-0.0.2 → pyrlutils-0.0.4/pyrlutils.egg-info}/PKG-INFO +17 -9
  17. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils.egg-info/SOURCES.txt +11 -1
  18. pyrlutils-0.0.4/pyrlutils.egg-info/requires.txt +7 -0
  19. pyrlutils-0.0.4/test/__init__.py +0 -0
  20. pyrlutils-0.0.4/test/test_bandits.py +82 -0
  21. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/test/test_frozenlake.py +2 -1
  22. pyrlutils-0.0.2/pyrlutils.egg-info/requires.txt +0 -2
  23. pyrlutils-0.0.2/setup.py +0 -53
  24. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/LICENSE +0 -0
  25. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils/__init__.py +0 -0
  26. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils/action.py +0 -0
  27. /pyrlutils-0.0.2/MANIFEST.in → /pyrlutils-0.0.4/pyrlutils/bandit/__init__.py +0 -0
  28. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils/policy.py +0 -0
  29. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils.egg-info/dependency_links.txt +0 -0
  30. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils.egg-info/not-zip-safe +0 -0
  31. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/pyrlutils.egg-info/top_level.txt +0 -0
  32. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/setup.cfg +0 -0
  33. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/test/test_2ddiscrete.py +0 -0
  34. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/test/test_2dmaze.py +0 -0
  35. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/test/test_action.py +0 -0
  36. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/test/test_continous_state_actions.py +0 -0
  37. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/test/test_state.py +0 -0
  38. {pyrlutils-0.0.2 → pyrlutils-0.0.4}/test/test_transprobs.py +0 -0
@@ -0,0 +1,76 @@
1
+ version: 2
2
+
3
+ shared: &shared
4
+ working_directory: ~/pyrlutils
5
+
6
+ steps:
7
+ - checkout
8
+
9
+ - run:
10
+ name: Apt Install
11
+ command: |
12
+ sudo apt-get update
13
+ sudo apt-get install -y libc6
14
+ sudo apt-get install -y g++
15
+
16
+ - run:
17
+ name: Installing Packages
18
+ command: |
19
+ pip install --upgrade --user pip
20
+ pip install --upgrade --user .
21
+ pip install --upgrade --user .[openaigym]
22
+
23
+ - run:
24
+ name: Run Unit Tests
25
+ command: |
26
+ python -m unittest
27
+
28
+
29
+
30
+ jobs:
31
+ py37:
32
+ <<: *shared
33
+ docker:
34
+ - image: cimg/python:3.7
35
+
36
+ py38:
37
+ <<: *shared
38
+ docker:
39
+ - image: cimg/python:3.8
40
+
41
+ py39:
42
+ <<: *shared
43
+ docker:
44
+ - image: cimg/python:3.9
45
+
46
+ py310:
47
+ <<: *shared
48
+ docker:
49
+ - image: cimg/python:3.10
50
+
51
+ py311:
52
+ <<: *shared
53
+ docker:
54
+ - image: cimg/python:3.11
55
+
56
+ py312:
57
+ <<: *shared
58
+ docker:
59
+ - image: cimg/python:3.12
60
+
61
+ py313:
62
+ <<: *shared
63
+ docker:
64
+ - image: cimg/python:3.13
65
+
66
+ workflows:
67
+ version: 2
68
+ build:
69
+ jobs:
70
+ - py37
71
+ - py38
72
+ - py39
73
+ - py310
74
+ - py311
75
+ - py312
76
+ - py313
@@ -0,0 +1,252 @@
1
+ ### VisualStudioCode template
2
+ .vscode/*
3
+ !.vscode/settings.json
4
+ !.vscode/tasks.json
5
+ !.vscode/launch.json
6
+ !.vscode/extensions.json
7
+ *.code-workspace
8
+
9
+ # Local History for Visual Studio Code
10
+ .history/
11
+
12
+ ### JetBrains template
13
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
14
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
15
+
16
+ # User-specific stuff
17
+ .idea/**/workspace.xml
18
+ .idea/**/tasks.xml
19
+ .idea/**/usage.statistics.xml
20
+ .idea/**/dictionaries
21
+ .idea/**/shelf
22
+
23
+ # Generated files
24
+ .idea/**/contentModel.xml
25
+
26
+ # Sensitive or high-churn files
27
+ .idea/**/dataSources/
28
+ .idea/**/dataSources.ids
29
+ .idea/**/dataSources.local.xml
30
+ .idea/**/sqlDataSources.xml
31
+ .idea/**/dynamic.xml
32
+ .idea/**/uiDesigner.xml
33
+ .idea/**/dbnavigator.xml
34
+
35
+ # Gradle
36
+ .idea/**/gradle.xml
37
+ .idea/**/libraries
38
+
39
+ # Gradle and Maven with auto-import
40
+ # When using Gradle or Maven with auto-import, you should exclude module files,
41
+ # since they will be recreated, and may cause churn. Uncomment if using
42
+ # auto-import.
43
+ # .idea/artifacts
44
+ # .idea/compiler.xml
45
+ # .idea/jarRepositories.xml
46
+ # .idea/modules.xml
47
+ # .idea/*.iml
48
+ # .idea/modules
49
+ # *.iml
50
+ # *.ipr
51
+
52
+ # CMake
53
+ cmake-build-*/
54
+
55
+ # Mongo Explorer plugin
56
+ .idea/**/mongoSettings.xml
57
+
58
+ # File-based project format
59
+ *.iws
60
+
61
+ # IntelliJ
62
+ out/
63
+
64
+ # mpeltonen/sbt-idea plugin
65
+ .idea_modules/
66
+
67
+ # JIRA plugin
68
+ atlassian-ide-plugin.xml
69
+
70
+ # Cursive Clojure plugin
71
+ .idea/replstate.xml
72
+
73
+ # Crashlytics plugin (for Android Studio and IntelliJ)
74
+ com_crashlytics_export_strings.xml
75
+ crashlytics.properties
76
+ crashlytics-build.properties
77
+ fabric.properties
78
+
79
+ # Editor-based Rest Client
80
+ .idea/httpRequests
81
+
82
+ # Android studio 3.1+ serialized cache file
83
+ .idea/caches/build_file_checksums.ser
84
+
85
+ ### VirtualEnv template
86
+ # Virtualenv
87
+ # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
88
+ .Python
89
+ [Bb]in
90
+ [Ii]nclude
91
+ [Ll]ib
92
+ [Ll]ib64
93
+ [Ll]ocal
94
+ [Ss]cripts
95
+ pyvenv.cfg
96
+ .venv
97
+ pip-selfcheck.json
98
+
99
+ ### JupyterNotebooks template
100
+ # gitignore template for Jupyter Notebooks
101
+ # website: http://jupyter.org/
102
+
103
+ .ipynb_checkpoints
104
+ */.ipynb_checkpoints/*
105
+
106
+ # IPython
107
+ profile_default/
108
+ ipython_config.py
109
+
110
+ # Remove previous ipynb_checkpoints
111
+ # git rm -r .ipynb_checkpoints/
112
+
113
+ ### Python template
114
+ # Byte-compiled / optimized / DLL files
115
+ __pycache__/
116
+ *.py[cod]
117
+ *$py.class
118
+
119
+ # C extensions
120
+ *.so
121
+
122
+ # Distribution / packaging
123
+ .Python
124
+ build/
125
+ develop-eggs/
126
+ dist/
127
+ downloads/
128
+ eggs/
129
+ .eggs/
130
+ lib/
131
+ lib64/
132
+ parts/
133
+ sdist/
134
+ var/
135
+ wheels/
136
+ share/python-wheels/
137
+ *.egg-info/
138
+ .installed.cfg
139
+ *.egg
140
+ MANIFEST
141
+
142
+ # PyInstaller
143
+ # Usually these files are written by a python script from a template
144
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
145
+ *.manifest
146
+ *.spec
147
+
148
+ # Installer logs
149
+ pip-log.txt
150
+ pip-delete-this-directory.txt
151
+
152
+ # Unit test / coverage reports
153
+ htmlcov/
154
+ .tox/
155
+ .nox/
156
+ .coverage
157
+ .coverage.*
158
+ .cache
159
+ nosetests.xml
160
+ coverage.xml
161
+ *.cover
162
+ *.py,cover
163
+ .hypothesis/
164
+ .pytest_cache/
165
+ cover/
166
+
167
+ # Translations
168
+ *.mo
169
+ *.pot
170
+
171
+ # Django stuff:
172
+ *.log
173
+ local_settings.py
174
+ db.sqlite3
175
+ db.sqlite3-journal
176
+
177
+ # Flask stuff:
178
+ instance/
179
+ .webassets-cache
180
+
181
+ # Scrapy stuff:
182
+ .scrapy
183
+
184
+ # Sphinx documentation
185
+ docs/_build/
186
+
187
+ # PyBuilder
188
+ .pybuilder/
189
+ target/
190
+
191
+ # Jupyter Notebook
192
+ .ipynb_checkpoints
193
+
194
+ # IPython
195
+ profile_default/
196
+ ipython_config.py
197
+
198
+ # pyenv
199
+ # For a library or package, you might want to ignore these files since the code is
200
+ # intended to run in multiple environments; otherwise, check them in:
201
+ # .python-version
202
+
203
+ # pipenv
204
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
205
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
206
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
207
+ # install all needed dependencies.
208
+ #Pipfile.lock
209
+
210
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
211
+ __pypackages__/
212
+
213
+ # Celery stuff
214
+ celerybeat-schedule
215
+ celerybeat.pid
216
+
217
+ # SageMath parsed files
218
+ *.sage.py
219
+
220
+ # Environments
221
+ .env
222
+ .venv
223
+ env/
224
+ venv/
225
+ ENV/
226
+ env.bak/
227
+ venv.bak/
228
+
229
+ # Spyder project settings
230
+ .spyderproject
231
+ .spyproject
232
+
233
+ # Rope project settings
234
+ .ropeproject
235
+
236
+ # mkdocs documentation
237
+ /site
238
+
239
+ # mypy
240
+ .mypy_cache/
241
+ .dmypy.json
242
+ dmypy.json
243
+
244
+ # Pyre type checker
245
+ .pyre/
246
+
247
+ # pytype static type analyzer
248
+ .pytype/
249
+
250
+ # Cython debug symbols
251
+ cython_debug/
252
+
@@ -0,0 +1,5 @@
1
+ # autogenerated pyup.io config file
2
+ # see https://pyup.io/docs/configuration/ for all available options
3
+
4
+ schedule: ''
5
+ update: false
@@ -0,0 +1,3 @@
1
+ include README.md
2
+ include pyproject.toml
3
+ include LICENSE
@@ -1,14 +1,14 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: pyrlutils
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: Utility and Helpers for Reinformcement Learning
5
- Home-page: https://github.com/stephenhky/PyRLUtils
6
- Author: Kwan-Yuet Ho
7
- Author-email: stephenhky@yahoo.com.hk
5
+ Author-email: Kwan Yuet Stephen Ho <stephenhky@yahoo.com.hk>
8
6
  License: MIT
9
- Keywords: machine learning,reinforcement leaning,artifiial intelligence
10
- Platform: UNKNOWN
7
+ Project-URL: Repository, https://github.com/stephenhky/PyRLUtils
8
+ Project-URL: Issues, https://github.com/stephenhky/PyRLUtils/issues
9
+ Keywords: machine learning,reinforcement leaning,artificial intelligence
11
10
  Classifier: Topic :: Scientific/Engineering :: Mathematics
11
+ Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
13
  Classifier: Topic :: Software Development :: Version Control :: Git
14
14
  Classifier: Programming Language :: Python :: 3.7
@@ -16,10 +16,17 @@ Classifier: Programming Language :: Python :: 3.8
16
16
  Classifier: Programming Language :: Python :: 3.9
17
17
  Classifier: Programming Language :: Python :: 3.10
18
18
  Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
19
20
  Classifier: Intended Audience :: Science/Research
20
21
  Classifier: Intended Audience :: Developers
22
+ Requires-Python: >=3.7
21
23
  Description-Content-Type: text/markdown
22
24
  License-File: LICENSE
25
+ Requires-Dist: numpy
26
+ Provides-Extra: openaigym
27
+ Requires-Dist: gymnasium; extra == "openaigym"
28
+ Provides-Extra: test
29
+ Requires-Dist: unittest; extra == "test"
23
30
 
24
31
  # PyRLUtils
25
32
 
@@ -27,8 +34,9 @@ License-File: LICENSE
27
34
  [![GitHub release](https://img.shields.io/github/release/stephenhky/PyRLUtils.svg?maxAge=3600)](https://github.com/stephenhky/pyqentangle/PyRLUtils)
28
35
  [![pypi](https://img.shields.io/pypi/v/PyRLUtils.svg?maxAge=3600)](https://pypi.org/project/pyqentangle/)
29
36
  [![download](https://img.shields.io/pypi/dm/PyRLUtils.svg?maxAge=2592000&label=installs&color=%2327B1FF)](https://pypi.org/project/PyRLUtils/)
37
+ [![Updates](https://pyup.io/repos/github/stephenhky/PyRLUtils/shield.svg)](https://pyup.io/repos/github/stephenhky/PyRLUtils/)
38
+ [![Python 3](https://pyup.io/repos/github/stephenhky/PyRLUtils/python-3-shield.svg)](https://pyup.io/repos/github/stephenhky/PyRLUtils/)
39
+
30
40
 
31
41
  This is a Python package with utility classes and helper functions for
32
42
  that facilitates the development of any reinformecement learning projects.
33
-
34
-
@@ -4,6 +4,9 @@
4
4
  [![GitHub release](https://img.shields.io/github/release/stephenhky/PyRLUtils.svg?maxAge=3600)](https://github.com/stephenhky/pyqentangle/PyRLUtils)
5
5
  [![pypi](https://img.shields.io/pypi/v/PyRLUtils.svg?maxAge=3600)](https://pypi.org/project/pyqentangle/)
6
6
  [![download](https://img.shields.io/pypi/dm/PyRLUtils.svg?maxAge=2592000&label=installs&color=%2327B1FF)](https://pypi.org/project/PyRLUtils/)
7
+ [![Updates](https://pyup.io/repos/github/stephenhky/PyRLUtils/shield.svg)](https://pyup.io/repos/github/stephenhky/PyRLUtils/)
8
+ [![Python 3](https://pyup.io/repos/github/stephenhky/PyRLUtils/python-3-shield.svg)](https://pyup.io/repos/github/stephenhky/PyRLUtils/)
9
+
7
10
 
8
11
  This is a Python package with utility classes and helper functions for
9
12
  that facilitates the development of any reinformecement learning projects.
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools", "setuptools-scm", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pyrlutils"
7
+ version = "0.0.4"
8
+ authors = [
9
+ {name = "Kwan Yuet Stephen Ho", email = "stephenhky@yahoo.com.hk"}
10
+ ]
11
+ description = "Utility and Helpers for Reinformcement Learning"
12
+ readme = {file = "README.md", content-type = "text/markdown"}
13
+ license = {text = "MIT"}
14
+ keywords = ["machine learning", "reinforcement leaning", "artificial intelligence"]
15
+ requires-python = ">=3.7"
16
+ classifiers = [
17
+ "Topic :: Scientific/Engineering :: Mathematics",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ "Topic :: Software Development :: Version Control :: Git",
21
+ "Programming Language :: Python :: 3.7",
22
+ "Programming Language :: Python :: 3.8",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Intended Audience :: Science/Research",
28
+ "Intended Audience :: Developers",
29
+ ]
30
+ dependencies = ["numpy"]
31
+
32
+ [project.urls]
33
+ Repository = "https://github.com/stephenhky/PyRLUtils"
34
+ Issues = "https://github.com/stephenhky/PyRLUtils/issues"
35
+
36
+ [tool.setuptools]
37
+ packages = ["pyrlutils", "pyrlutils.bandit", "pyrlutils.openai"]
38
+ zip-safe = false
39
+
40
+ [project.optional-dependencies]
41
+ openaigym = ["gymnasium"]
42
+ test = ["unittest"]
@@ -0,0 +1,128 @@
1
+
2
+ from abc import ABC, abstractmethod
3
+
4
+ import numpy as np
5
+
6
+ from .reward import IndividualBanditRewardFunction
7
+
8
+
9
+ class BanditAlgorithm(ABC):
10
+ def __init__(self, action_values: list, reward_function: IndividualBanditRewardFunction):
11
+ self._action_values = action_values
12
+ self._reward_function = reward_function
13
+
14
+ @abstractmethod
15
+ def _go_one_loop(self):
16
+ pass
17
+
18
+ def loop(self, nbiterations: int):
19
+ for _ in range(nbiterations):
20
+ self._go_one_loop()
21
+
22
+ def reward(self, action_value) -> float:
23
+ return self._reward_function(action_value)
24
+
25
+ @abstractmethod
26
+ def get_action(self):
27
+ pass
28
+
29
+ @property
30
+ def action_values(self):
31
+ return self._action_values
32
+
33
+ @property
34
+ def reward_function(self) -> IndividualBanditRewardFunction:
35
+ return self._reward_function
36
+
37
+
38
+ class SimpleBandit(BanditAlgorithm):
39
+ def __init__(
40
+ self,
41
+ action_values: list,
42
+ reward_function: IndividualBanditRewardFunction,
43
+ epsilon: float=0.05
44
+ ):
45
+ super().__init__(action_values, reward_function)
46
+ self._epsilon = epsilon
47
+ self._initialize()
48
+
49
+ def _initialize(self):
50
+ self._Q = np.zeros(len(self._action_values))
51
+ self._N = np.zeros(len(self._action_values), dtype=np.int32)
52
+
53
+ def _go_one_loop(self):
54
+ r = np.random.uniform()
55
+ if r < self.epsilon:
56
+ selected_action_idx = np.argmax(self._Q)
57
+ else:
58
+ selected_action_idx = np.random.choice(range(len(self._action_values)))
59
+ reward = self._reward_function(self._action_values[selected_action_idx])
60
+ self._N[selected_action_idx] += 1
61
+ self._Q[selected_action_idx] += (reward - self._Q[selected_action_idx]) / self._N[selected_action_idx]
62
+
63
+ def get_action(self):
64
+ selected_action_idx = np.argmax(self._Q)
65
+ return self._action_values[selected_action_idx]
66
+
67
+ @property
68
+ def epsilon(self) -> float:
69
+ return self._epsilon
70
+
71
+ @epsilon.setter
72
+ def epsilon(self, val: float):
73
+ self._epsilon = val
74
+
75
+
76
+ class GradientBandit(BanditAlgorithm):
77
+ def __init__(self, action_values: list, reward_function: IndividualBanditRewardFunction, temperature: float=1.0, alpha: float=0.1):
78
+ super().__init__(action_values, reward_function)
79
+ self._T = temperature
80
+ self._alpha = alpha
81
+ self._initialize()
82
+
83
+ def _initialize(self):
84
+ self._preferences = np.zeros(len(self._action_values))
85
+ self._rewards_over_time = []
86
+
87
+ def _get_probs(self) -> np.ndarray:
88
+ # getting probabilities using softmax
89
+ exp_preferences = np.exp(self._preferences / self.T)
90
+ sum_exp_preferences = np.sum(exp_preferences)
91
+ return exp_preferences / sum_exp_preferences
92
+
93
+ def get_action(self):
94
+ selected_action_idx = np.argmax(self._preferences)
95
+ return self._action_values[selected_action_idx]
96
+
97
+ def _go_one_loop(self):
98
+ probs = self._get_probs()
99
+ selected_action_idx = np.random.choice(range(self._preferences.shape[0]), p=probs)
100
+ reward = self._reward_function(self._action_values[selected_action_idx])
101
+ self._rewards_over_time.append(reward)
102
+ average_reward = np.mean(self._rewards_over_time) if len(self._rewards_over_time) > 0 else 0.
103
+
104
+ for i in range(len(self._action_values)):
105
+ if i == selected_action_idx:
106
+ self._preferences[i] += self.alpha * (reward - average_reward) * (1 - probs[i])
107
+ else:
108
+ self._preferences[i] -= self.alpha * (reward - average_reward) * probs[i]
109
+
110
+ @property
111
+ def alpha(self) -> float:
112
+ return self._alpha
113
+
114
+ @alpha.setter
115
+ def alpha(self, val: float):
116
+ self._alpha = val
117
+
118
+ @property
119
+ def T(self) -> float:
120
+ return self._T
121
+
122
+ @T.setter
123
+ def T(self, val: float):
124
+ self._T = val
125
+
126
+ @property
127
+ def temperature(self) -> float:
128
+ return self._T
@@ -0,0 +1,11 @@
1
+
2
+ from abc import ABC, abstractmethod
3
+
4
+
5
+ class IndividualBanditRewardFunction(ABC):
6
+ @abstractmethod
7
+ def reward(self, action_value) -> float:
8
+ pass
9
+
10
+ def __call__(self, action_value) -> float:
11
+ return self.reward(action_value)
File without changes
@@ -0,0 +1,31 @@
1
+
2
+ import gymnasium as gym
3
+
4
+ from ..transition import TransitionProbabilityFactory, NextStateTuple
5
+
6
+
7
+ class OpenAIGymDiscreteEnvironmentTransitionProbabilityFactory(TransitionProbabilityFactory):
8
+ def __init__(self, envname):
9
+ super().__init__()
10
+ self._envname = envname
11
+ self._gymenv = gym.make(envname)
12
+ self._convert_openai_gymenv_to_transprob()
13
+
14
+ def _convert_openai_gymenv_to_transprob(self):
15
+ P = self._gymenv.env.env.env.P
16
+ for state_value, trans_dict in P.items():
17
+ new_trans_dict = {}
18
+ for action_value, next_state_list in trans_dict.items():
19
+ new_trans_dict[action_value] = [
20
+ NextStateTuple(next_state[1], next_state[0], next_state[2], next_state[3])
21
+ for next_state in next_state_list
22
+ ]
23
+ self.add_state_transitions(state_value, new_trans_dict)
24
+
25
+ @property
26
+ def envname(self):
27
+ return self._envname
28
+
29
+ @property
30
+ def gymenv(self):
31
+ return self._gymenv
@@ -21,7 +21,7 @@ class RewardFunction(ABC):
21
21
  return self._discount_factor
22
22
 
23
23
  @discount_factor.setter
24
- def discount_factor(self, discount_factor):
24
+ def discount_factor(self, discount_factor: float):
25
25
  self._discount_factor = discount_factor
26
26
 
27
27
  def individual_reward(self, state_value, action_value, next_state_value) -> float:
@@ -1,10 +1,39 @@
1
1
 
2
2
  from abc import ABC, abstractmethod
3
+ from enum import Enum
4
+ from dataclasses import dataclass
3
5
  from typing import Tuple, List, Optional, Union
4
6
 
5
7
  import numpy as np
6
8
 
7
9
 
10
+ class StateValue(ABC):
11
+ @property
12
+ @abstractmethod
13
+ def value(self):
14
+ pass
15
+
16
+
17
+ @dataclass
18
+ class DiscreteStateValue(StateValue):
19
+ enum: Enum
20
+
21
+ @property
22
+ def value(self):
23
+ return self.enum.value
24
+
25
+ def name(self):
26
+ return self.enum.name
27
+
28
+
29
+ class ContinuousStateValue(StateValue):
30
+ _value: float
31
+
32
+ @property
33
+ def value(self) -> float:
34
+ return self._value
35
+
36
+
8
37
  class State(ABC):
9
38
  @property
10
39
  def state_value(self):
@@ -23,7 +52,7 @@ class State(ABC):
23
52
  self.set_state_value(new_state_value)
24
53
 
25
54
 
26
- DiscreteStateValueType = Union[float, str, Tuple[int]]
55
+ DiscreteStateValueType = Union[float, str, Tuple[int], Enum]
27
56
 
28
57
 
29
58
  class DiscreteState(State):
@@ -182,7 +211,7 @@ class Discrete2DCartesianState(DiscreteState):
182
211
  self._county = self._y_hilim - self._y_lowlim + 1
183
212
  if initial_coordinate is None:
184
213
  initial_coordinate = [self._x_lowlim, self._y_lowlim]
185
- initial_value = (initial_coordinate[1] - self._y_lowlim) * self._countx + (initial_coordinate[0] - self._x_lowlim)
214
+ initial_value = (initial_coordinate[1] - self._y_lowlim) * self._countx + (initial_coordinate[0] - self._x_lowlim)
186
215
  super().__init__(list(range(self._countx*self._county)), initial_values=initial_value)
187
216
 
188
217
  def _encode_coordinates(self, x, y) -> int:
@@ -3,7 +3,6 @@ from types import LambdaType
3
3
  from typing import Tuple, Dict
4
4
 
5
5
  import numpy as np
6
- import gym
7
6
 
8
7
  from .state import DiscreteState, DiscreteStateValueType
9
8
  from .reward import IndividualRewardFunction
@@ -145,21 +144,3 @@ class TransitionProbabilityFactory:
145
144
  @property
146
145
  def objects_generated(self) -> bool:
147
146
  return self._objects_generated
148
-
149
-
150
- class OpenAIGymDiscreteEnvironmentTransitionProbabilityFactory(TransitionProbabilityFactory):
151
- def __init__(self, envname):
152
- super().__init__()
153
- self.gymenv = gym.make(envname)
154
- self._convert_openai_gymenv_to_transprob()
155
-
156
- def _convert_openai_gymenv_to_transprob(self):
157
- P = self.gymenv.env.P
158
- for state_value, trans_dict in P.items():
159
- new_trans_dict = {}
160
- for action_value, next_state_list in trans_dict.items():
161
- new_trans_dict[action_value] = [
162
- NextStateTuple(next_state[1], next_state[0], next_state[2], next_state[3])
163
- for next_state in next_state_list
164
- ]
165
- self.add_state_transitions(state_value, new_trans_dict)
@@ -14,7 +14,7 @@ from .policy import DiscreteDeterminsticPolicy
14
14
  class OptimalPolicyOnValueFunctions:
15
15
  def __init__(self, discount_factor: float, transprobfac: TransitionProbabilityFactory):
16
16
  try:
17
- assert discount_factor >= 0. and discount_factor <= 1.
17
+ assert 0. <= discount_factor <= 1.
18
18
  except AssertionError:
19
19
  raise ValueError('Discount factor must be between 0 and 1.')
20
20
  self._gamma = discount_factor
@@ -1,14 +1,14 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: pyrlutils
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: Utility and Helpers for Reinformcement Learning
5
- Home-page: https://github.com/stephenhky/PyRLUtils
6
- Author: Kwan-Yuet Ho
7
- Author-email: stephenhky@yahoo.com.hk
5
+ Author-email: Kwan Yuet Stephen Ho <stephenhky@yahoo.com.hk>
8
6
  License: MIT
9
- Keywords: machine learning,reinforcement leaning,artifiial intelligence
10
- Platform: UNKNOWN
7
+ Project-URL: Repository, https://github.com/stephenhky/PyRLUtils
8
+ Project-URL: Issues, https://github.com/stephenhky/PyRLUtils/issues
9
+ Keywords: machine learning,reinforcement leaning,artificial intelligence
11
10
  Classifier: Topic :: Scientific/Engineering :: Mathematics
11
+ Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
13
  Classifier: Topic :: Software Development :: Version Control :: Git
14
14
  Classifier: Programming Language :: Python :: 3.7
@@ -16,10 +16,17 @@ Classifier: Programming Language :: Python :: 3.8
16
16
  Classifier: Programming Language :: Python :: 3.9
17
17
  Classifier: Programming Language :: Python :: 3.10
18
18
  Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
19
20
  Classifier: Intended Audience :: Science/Research
20
21
  Classifier: Intended Audience :: Developers
22
+ Requires-Python: >=3.7
21
23
  Description-Content-Type: text/markdown
22
24
  License-File: LICENSE
25
+ Requires-Dist: numpy
26
+ Provides-Extra: openaigym
27
+ Requires-Dist: gymnasium; extra == "openaigym"
28
+ Provides-Extra: test
29
+ Requires-Dist: unittest; extra == "test"
23
30
 
24
31
  # PyRLUtils
25
32
 
@@ -27,8 +34,9 @@ License-File: LICENSE
27
34
  [![GitHub release](https://img.shields.io/github/release/stephenhky/PyRLUtils.svg?maxAge=3600)](https://github.com/stephenhky/pyqentangle/PyRLUtils)
28
35
  [![pypi](https://img.shields.io/pypi/v/PyRLUtils.svg?maxAge=3600)](https://pypi.org/project/pyqentangle/)
29
36
  [![download](https://img.shields.io/pypi/dm/PyRLUtils.svg?maxAge=2592000&label=installs&color=%2327B1FF)](https://pypi.org/project/PyRLUtils/)
37
+ [![Updates](https://pyup.io/repos/github/stephenhky/PyRLUtils/shield.svg)](https://pyup.io/repos/github/stephenhky/PyRLUtils/)
38
+ [![Python 3](https://pyup.io/repos/github/stephenhky/PyRLUtils/python-3-shield.svg)](https://pyup.io/repos/github/stephenhky/PyRLUtils/)
39
+
30
40
 
31
41
  This is a Python package with utility classes and helper functions for
32
42
  that facilitates the development of any reinformecement learning projects.
33
-
34
-
@@ -1,7 +1,10 @@
1
+ .gitignore
2
+ .pyup.yml
1
3
  LICENSE
2
4
  MANIFEST.in
3
5
  README.md
4
- setup.py
6
+ pyproject.toml
7
+ .circleci/config.yml
5
8
  pyrlutils/__init__.py
6
9
  pyrlutils/action.py
7
10
  pyrlutils/policy.py
@@ -15,9 +18,16 @@ pyrlutils.egg-info/dependency_links.txt
15
18
  pyrlutils.egg-info/not-zip-safe
16
19
  pyrlutils.egg-info/requires.txt
17
20
  pyrlutils.egg-info/top_level.txt
21
+ pyrlutils/bandit/__init__.py
22
+ pyrlutils/bandit/algo.py
23
+ pyrlutils/bandit/reward.py
24
+ pyrlutils/openai/__init__.py
25
+ pyrlutils/openai/utils.py
26
+ test/__init__.py
18
27
  test/test_2ddiscrete.py
19
28
  test/test_2dmaze.py
20
29
  test/test_action.py
30
+ test/test_bandits.py
21
31
  test/test_continous_state_actions.py
22
32
  test/test_frozenlake.py
23
33
  test/test_state.py
@@ -0,0 +1,7 @@
1
+ numpy
2
+
3
+ [openaigym]
4
+ gymnasium
5
+
6
+ [test]
7
+ unittest
File without changes
@@ -0,0 +1,82 @@
1
+
2
+ import unittest
3
+ from enum import Enum
4
+ import random
5
+
6
+ import numpy as np
7
+
8
+ from pyrlutils.bandit.reward import IndividualBanditRewardFunction
9
+ from pyrlutils.bandit.algo import SimpleBandit, GradientBandit
10
+
11
+
12
+ class BanditWalk(Enum):
13
+ LEFT = 0
14
+ RIGHT = 1
15
+
16
+
17
+ class BanditWalkReward(IndividualBanditRewardFunction):
18
+ def reward(self, action_value: BanditWalk) -> float:
19
+ return 0. if action_value == BanditWalk.LEFT else 1.
20
+
21
+
22
+ class BanditSlipperyWalkReward(IndividualBanditRewardFunction):
23
+ def reward(self, action_value: BanditWalk) -> float:
24
+ r = random.uniform(0, 1)
25
+ if action_value == BanditWalk.LEFT:
26
+ return 0. if r <= 0.8 else 1.
27
+ else:
28
+ return 0. if r <= 0.2 else 1.
29
+
30
+
31
+ class TestBandits(unittest.TestCase):
32
+ def test_simple_bandit_BW(self):
33
+ simple_bandit_BW = SimpleBandit(list(BanditWalk), BanditWalkReward())
34
+
35
+ assert simple_bandit_BW._Q.shape[0] == len(list(BanditWalk))
36
+ assert len(simple_bandit_BW.action_values) == len(list(BanditWalk))
37
+
38
+ # go for 100 loops
39
+ simple_bandit_BW.loop(100)
40
+
41
+ assert simple_bandit_BW.get_action() == BanditWalk.RIGHT
42
+
43
+ def test_simple_bandit_BSW(self):
44
+ simple_bandit_BSW = SimpleBandit(list(BanditWalk), BanditSlipperyWalkReward())
45
+
46
+ assert simple_bandit_BSW._Q.shape[0] == len(list(BanditWalk))
47
+ assert len(simple_bandit_BSW.action_values) == len(list(BanditWalk))
48
+
49
+ # go for 100 loops
50
+ simple_bandit_BSW.loop(100)
51
+
52
+ assert simple_bandit_BSW.get_action() == BanditWalk.RIGHT
53
+
54
+ def test_gradient_bandit_BW(self):
55
+ gradient_bandit_BW = GradientBandit(list(BanditWalk), BanditWalkReward())
56
+
57
+ assert gradient_bandit_BW._preferences.shape[0] == len(list(BanditWalk))
58
+ probs = gradient_bandit_BW._get_probs()
59
+ self.assertAlmostEqual(probs[0], 0.5)
60
+ self.assertAlmostEqual(probs[1], 0.5)
61
+
62
+ # go for 100 loops
63
+ gradient_bandit_BW.loop(100)
64
+
65
+ assert gradient_bandit_BW.get_action() == BanditWalk.RIGHT
66
+
67
+ def test_gradient_bandit_BSW(self):
68
+ gradient_bandit_BSW = GradientBandit(list(BanditWalk), BanditSlipperyWalkReward())
69
+
70
+ assert gradient_bandit_BSW._preferences.shape[0] == len(list(BanditWalk))
71
+ probs = gradient_bandit_BSW._get_probs()
72
+ self.assertAlmostEqual(probs[0], 0.5)
73
+ self.assertAlmostEqual(probs[1], 0.5)
74
+
75
+ # go for 100 loops
76
+ gradient_bandit_BSW.loop(100)
77
+
78
+ assert gradient_bandit_BSW.get_action() == BanditWalk.RIGHT
79
+
80
+
81
+ if __name__ == '__main__':
82
+ unittest.main()
@@ -1,7 +1,8 @@
1
1
 
2
2
  import unittest
3
3
 
4
- from pyrlutils.transition import OpenAIGymDiscreteEnvironmentTransitionProbabilityFactory
4
+ from pyrlutils.openai.utils import OpenAIGymDiscreteEnvironmentTransitionProbabilityFactory
5
+
5
6
 
6
7
  class TestFrozenLake(unittest.TestCase):
7
8
  def test_factory(self):
@@ -1,2 +0,0 @@
1
- numpy
2
- gym
pyrlutils-0.0.2/setup.py DELETED
@@ -1,53 +0,0 @@
1
-
2
- from setuptools import setup
3
-
4
-
5
- def readme():
6
- with open('README.md') as f:
7
- return f.read()
8
-
9
-
10
- def install_requirements():
11
- return [package_string.strip() for package_string in open('requirements.txt', 'r')]
12
-
13
-
14
- def package_description():
15
- text = open('README.md', 'r').read()
16
- return text
17
-
18
-
19
- setup(
20
- name='pyrlutils',
21
- version="0.0.2",
22
- description="Utility and Helpers for Reinformcement Learning",
23
- long_description=package_description(),
24
- long_description_content_type='text/markdown',
25
- classifiers=[
26
- "Topic :: Scientific/Engineering :: Mathematics",
27
- "Topic :: Software Development :: Libraries :: Python Modules",
28
- "Topic :: Software Development :: Version Control :: Git",
29
- "Programming Language :: Python :: 3.7",
30
- "Programming Language :: Python :: 3.8",
31
- "Programming Language :: Python :: 3.9",
32
- "Programming Language :: Python :: 3.10",
33
- "Programming Language :: Python :: 3.11",
34
- "Intended Audience :: Science/Research",
35
- "Intended Audience :: Developers",
36
- ],
37
- keywords="machine learning, reinforcement leaning, artifiial intelligence",
38
- url="https://github.com/stephenhky/PyRLUtils",
39
- author="Kwan-Yuet Ho",
40
- author_email="stephenhky@yahoo.com.hk",
41
- license='MIT',
42
- packages=[
43
- 'pyrlutils'
44
- ],
45
- install_requires=install_requirements(),
46
- tests_require=[
47
- 'unittest'
48
- ],
49
- # scripts=[],
50
- include_package_data=True,
51
- test_suite="test",
52
- zip_safe=False
53
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes