PyPI - pyrlutils - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

pyrlutils 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyrlutils might be problematic. Click here for more details.

Files changed (10) hide show

pyrlutils/state.py +4 -1
pyrlutils/td/qlearn.py +86 -0
pyrlutils/td/sarsa.py +86 -0
pyrlutils/td/{td.py → state_td.py} +38 -28
pyrlutils/td/utils.py +149 -10
{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/METADATA +2 -1
{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/RECORD +10 -8
{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/WHEEL +0 -0
{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/licenses/LICENSE +0 -0
{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/top_level.txt +0 -0

pyrlutils/state.py CHANGED Viewed

@@ -66,6 +66,9 @@ class DiscreteState(State):
     def get_all_possible_state_values(self) -> list[DiscreteStateValueType]:
         return self._all_state_values
+    def query_state_index_from_value(self, value: DiscreteStateValueType) -> int:
+        return self._state_values_to_indices[value]
     @property
     def state_index(self) -> int:
         return self._current_index
@@ -73,7 +76,7 @@ class DiscreteState(State):
     @state_index.setter
     def state_index(self, new_index: int) -> None:
         if new_index >= len(self._all_state_values):
-            raise ValueError(f"Invalid index {new_index}; it must be less than {len(self._all_state_values)}.")
+            raise ValueError(f"Invalid index {new_index}; it must be less than {self.nb_state_values}.")
         self._current_index = new_index
     @property

pyrlutils/td/qlearn.py ADDED Viewed

@@ -0,0 +1,86 @@
+from typing import Annotated
+import numpy as np
+from npdict import NumpyNDArrayWrappedDict
+from .utils import AbstractStateActionValueFunctionTemporalDifferenceLearner, decay_schedule, select_action
+from ..policy import DiscreteDeterminsticPolicy
+class QLearner(AbstractStateActionValueFunctionTemporalDifferenceLearner):
+    def learn(
+            self,
+            episodes: int
+    ) -> tuple[
+        Annotated[NumpyNDArrayWrappedDict, "2D array"],
+        Annotated[NumpyNDArrayWrappedDict, "1D array"],
+        DiscreteDeterminsticPolicy,
+        Annotated[NumpyNDArrayWrappedDict, "3D array"],
+        list[DiscreteDeterminsticPolicy]
+    ]:
+        Q = NumpyNDArrayWrappedDict(
+            [
+                self._state.get_all_possible_state_values(),
+                self._action_names
+            ],
+            default_initial_value=0.0
+        )
+        Q_track = NumpyNDArrayWrappedDict(
+            [
+                list(range(episodes)),
+                self._state.get_all_possible_state_values(),
+                self._action_names
+            ],
+            default_initial_value=0.0
+        )
+        pi_track = []
+        Q_array, Q_track_array = Q.to_numpy(), Q_track.to_numpy()
+        alphas = decay_schedule(
+            self.init_alpha, self.min_alpha, self.alpha_decay_ratio, episodes
+        )
+        epsilons = decay_schedule(
+            self.init_epsilon, self.min_epsilon, self.epsilon_decay_ratio, episodes
+        )
+        for i in range(episodes):
+            self._state.state_index = self.initial_state_index
+            done = False
+            action_value = select_action(self._state.state_value, Q, epsilons[i])
+            while not done:
+                old_state_value = self._state.state_value
+                new_action_value = select_action(self._state.state_value, Q, epsilons[i])
+                new_action_func = self._actions_dict[new_action_value]
+                self._state = new_action_func(self._state)
+                new_state_value = self._state.state_value
+                reward = self._indrewardfcn(old_state_value, action_value, new_state_value)
+                done = self._state.is_terminal
+                new_state_index = Q.get_key_index(0, new_state_value)
+                max_Q_given_state = Q.to_numpy()[new_state_index, :].max()
+                td_target = reward + self.gamma * max_Q_given_state * (not done)
+                td_error = td_target - Q[old_state_value, action_value]
+                Q[old_state_value, action_value] = Q[old_state_value, action_value] + alphas[i] * td_error
+            Q_track_array[i, :, :] = Q_array
+            pi_track.append(DiscreteDeterminsticPolicy(
+                {
+                    state_value: select_action(state_value, Q, epsilon=0.0)
+                    for state_value in self._state.get_all_possible_state_values()
+                }
+            ))
+        V_array = np.max(Q_array, axis=1)
+        V = NumpyNDArrayWrappedDict.from_numpyarray_given_keywords(
+            [self._state.get_all_possible_state_values()],
+            V_array
+        )
+        pi = DiscreteDeterminsticPolicy(
+                {
+                    state_value: select_action(state_value, Q, epsilon=0.0)
+                    for state_value in self._state.get_all_possible_state_values()
+                }
+        )
+        return Q, V, pi, Q_track, pi_track

pyrlutils/td/sarsa.py ADDED Viewed

@@ -0,0 +1,86 @@
+from typing import Annotated
+import numpy as np
+from npdict import NumpyNDArrayWrappedDict
+from .utils import AbstractStateActionValueFunctionTemporalDifferenceLearner, decay_schedule, select_action
+from ..policy import DiscreteDeterminsticPolicy
+class SARSALearner(AbstractStateActionValueFunctionTemporalDifferenceLearner):
+    def learn(
+            self,
+            episodes: int
+    ) -> tuple[
+        Annotated[NumpyNDArrayWrappedDict, "2D array"],
+        Annotated[NumpyNDArrayWrappedDict, "1D array"],
+        DiscreteDeterminsticPolicy,
+        Annotated[NumpyNDArrayWrappedDict, "3D array"],
+        list[DiscreteDeterminsticPolicy]
+    ]:
+        Q = NumpyNDArrayWrappedDict(
+            [
+                self._state.get_all_possible_state_values(),
+                self._action_names
+            ],
+            default_initial_value=0.0
+        )
+        Q_track = NumpyNDArrayWrappedDict(
+            [
+                list(range(episodes)),
+                self._state.get_all_possible_state_values(),
+                self._action_names
+            ],
+            default_initial_value=0.0
+        )
+        pi_track = []
+        Q_array, Q_track_array = Q.to_numpy(), Q_track.to_numpy()
+        alphas = decay_schedule(
+            self.init_alpha, self.min_alpha, self.alpha_decay_ratio, episodes
+        )
+        epsilons = decay_schedule(
+            self.init_epsilon, self.min_epsilon, self.epsilon_decay_ratio, episodes
+        )
+        for i in range(episodes):
+            self._state.state_index = self.initial_state_index
+            done = False
+            action_value = select_action(self._state.state_value, Q, epsilons[i])
+            while not done:
+                old_state_value = self._state.state_value
+                action_func = self._actions_dict[action_value]
+                self._state = action_func(self._state)
+                new_state_value = self._state.state_value
+                reward = self._indrewardfcn(old_state_value, action_value, new_state_value)
+                done = self._state.is_terminal
+                new_action_value = select_action(new_state_value, Q, epsilons[i])
+                td_target = reward + self.gamma * Q[new_state_value, new_action_value] * (not done)
+                td_error = td_target - Q[old_state_value, action_value]
+                Q[old_state_value, action_value] = Q[old_state_value, action_value] + alphas[i] * td_error
+                action_value = new_action_value
+            Q_track_array[i, :, :] = Q_array
+            pi_track.append(DiscreteDeterminsticPolicy(
+                {
+                    state_value: select_action(state_value, Q, epsilon=0.0)
+                    for state_value in self._state.get_all_possible_state_values()
+                }
+            ))
+        V_array = np.max(Q_array, axis=1)
+        V = NumpyNDArrayWrappedDict.from_numpyarray_given_keywords(
+            [self._state.get_all_possible_state_values()],
+            V_array
+        )
+        pi = DiscreteDeterminsticPolicy(
+                {
+                    state_value: select_action(state_value, Q, epsilon=0.0)
+                    for state_value in self._state.get_all_possible_state_values()
+                }
+        )
+        return Q, V, pi, Q_track, pi_track

pyrlutils/td/{td.py → state_td.py} RENAMED Viewed

@@ -2,83 +2,93 @@
 from typing import Annotated
 import numpy as np
-from numpy.typing import NDArray
+from npdict import NumpyNDArrayWrappedDict
-from .utils import decay_schedule, AbstractTemporalDifferenceLearner, TimeDifferencePathElements
+from .utils import decay_schedule, TimeDifferencePathElements, AbstractStateValueFunctionTemporalDifferenceLearner
-class SingleStepTemporalDifferenceLearner(AbstractTemporalDifferenceLearner):
+class SingleStepTemporalDifferenceLearner(AbstractStateValueFunctionTemporalDifferenceLearner):
     def learn(
             self,
             episodes: int
-    ) -> tuple[Annotated[NDArray[np.float64], "1D Array"], Annotated[NDArray[np.float64], "2D Array"]]:
-        V = np.zeros(self.nb_states)
-        V_track = np.zeros((episodes, self.nb_states))
+    ) -> tuple[Annotated[NumpyNDArrayWrappedDict, "1D Array"], Annotated[NumpyNDArrayWrappedDict, "2D Array"]]:
+        V = NumpyNDArrayWrappedDict(
+            [self._state.get_all_possible_state_values()],
+            default_initial_value=0.0
+        )
+        V_track = NumpyNDArrayWrappedDict(
+            [list(range(episodes)), self._state.get_all_possible_state_values()],
+            default_initial_value=0.0
+        )
+        V_array, V_track_array = V.to_numpy(), V_track.to_numpy()
         alphas = decay_schedule(
             self.init_alpha, self.min_alpha, self.alpha_decay_ratio, episodes
         )
         for i in range(episodes):
-            self._state.set_state_value(self.initial_state_index)
+            self._state.state_index = self.initial_state_index
             done = False
             while not done:
-                old_state_index = self._state.state_index
                 old_state_value = self._state.state_value
                 action_value = self._policy.get_action_value(self._state.state_value)
                 action_func = self._actions_dict[action_value]
                 self._state = action_func(self._state)
-                new_state_index = self._state.state_index
                 new_state_value = self._state.state_value
                 reward = self._indrewardfcn(old_state_value, action_value, new_state_value)
                 done = self._state.is_terminal
-                td_target = reward + self.gamma * V[new_state_index] * (not done)
-                td_error = td_target - V[old_state_index]
-                V[old_state_index] = V[old_state_index] + alphas[i] * td_error
+                td_target = reward + self.gamma * V[new_state_value] * (not done)
+                td_error = td_target - V[old_state_value]
+                V[old_state_value] = V[old_state_value] + alphas[i] * td_error
-            V_track[i, :] = V
+            V_track_array[i, :] = V_array
         return V, V_track
-class MultipleStepTemporalDifferenceLearner(AbstractTemporalDifferenceLearner):
+class MultipleStepTemporalDifferenceLearner(AbstractStateValueFunctionTemporalDifferenceLearner):
     def learn(
             self,
             episodes: int,
             n_steps: int=3
-    ) -> tuple[Annotated[NDArray[np.float64], "1D Array"], Annotated[NDArray[np.float64], "2D Array"]]:
-        V = np.zeros(self.nb_states)
-        V_track = np.zeros((episodes, self.nb_states))
+    ) -> tuple[Annotated[NumpyNDArrayWrappedDict, "1D Array"], Annotated[NumpyNDArrayWrappedDict, "2D Array"]]:
+        V = NumpyNDArrayWrappedDict(
+            [self._state.get_all_possible_state_values()],
+            default_initial_value=0.0
+        )
+        V_track = NumpyNDArrayWrappedDict(
+            [list(range(episodes)), self._state.get_all_possible_state_values()],
+            default_initial_value=0.0
+        )
+        V_array, V_track_array = V.to_numpy(), V_track.to_numpy()
         alphas = decay_schedule(
             self.init_alpha, self.min_alpha, self.alpha_decay_ratio, episodes
         )
         discounts = np.logspace(0, n_steps-1, num=n_steps+1, base=self.gamma, endpoint=False)
         for i in range(episodes):
-            self._state.set_state_value(self.initial_state_index)
+            self._state.state_index = self.initial_state_index
             done = False
             path = []
             while not done or path is not None:
                 path = path[1:]     # worth revisiting this line
-                next_state_index = -1
+                new_state_value = self._state._get_state_value_from_index(self._state.nb_state_values-1)
                 while not done and len(path) < n_steps:
-                    old_state_index = self._state.state_index
                     old_state_value = self._state.state_value
                     action_value = self._policy.get_action_value(self._state.state_value)
                     action_func = self._actions_dict[action_value]
                     self._state = action_func(self._state)
-                    new_state_index = self._state.state_index
                     new_state_value = self._state.state_value
                     reward = self._indrewardfcn(old_state_value, action_value, new_state_value)
                     done = self._state.is_terminal
                     path.append(
                         TimeDifferencePathElements(
-                            this_state_index=old_state_index,
+                            this_state_value=old_state_value,
                             reward=reward,
-                            next_state_index=new_state_index,
+                            next_state_value=new_state_value,
                             done=done
                         )
                     )
@@ -86,16 +96,16 @@ class MultipleStepTemporalDifferenceLearner(AbstractTemporalDifferenceLearner):
                         break
                 n = len(path)
-                estimated_state_index = path[0].this_state_index
+                estimated_state_value = path[0].this_state_value
                 rewards = np.array([this_moment.reward for this_moment in path])
                 partial_return = discounts[n:] * rewards
-                bs_val = discounts[-1] * V[next_state_index] * (not done)
+                bs_val = discounts[-1] * V[new_state_value] * (not done)
                 ntd_target = np.sum(np.append(partial_return, bs_val))
-                ntd_error = ntd_target - V[estimated_state_index]
-                V[estimated_state_index] = V[estimated_state_index] + alphas[i] * ntd_error
+                ntd_error = ntd_target - V[estimated_state_value]
+                V[(estimated_state_value,)] = V[estimated_state_value] + alphas[i] * ntd_error
                 if len(path) == 1 and path[0].done:
                     path = None
-            V_track[i, :] = V
+            V_track_array[i, :] = V_array
         return V, V_track

pyrlutils/td/utils.py CHANGED Viewed

@@ -1,11 +1,14 @@
-from abc import ABC, abstractmethod
-from typing import Optional, Annotated
+from typing import Annotated, Union, Optional
 from dataclasses import dataclass
+from abc import ABC, abstractmethod
 import numpy as np
 from numpy.typing import NDArray
+from npdict import NumpyNDArrayWrappedDict
+from ..state import DiscreteStateValueType
+from ..action import DiscreteActionValueType
 from ..policy import DiscretePolicy
 from ..transition import TransitionProbabilityFactory
@@ -28,7 +31,36 @@ def decay_schedule(
     return values
-class AbstractTemporalDifferenceLearner(ABC):
+def select_action(
+        state_value: DiscreteStateValueType,
+        Q: Union[Annotated[NDArray[np.float64], "2D Array"], NumpyNDArrayWrappedDict],
+        epsilon: float,
+) -> Union[DiscreteActionValueType, int]:
+    if np.random.random() <= epsilon:
+        if isinstance(Q, NumpyNDArrayWrappedDict):
+            return np.random.choice(Q._lists_keystrings[1])
+        else:
+            return np.random.choice(np.arange(Q.shape[1]))
+    q_matrix = Q.to_numpy() if isinstance(Q, NumpyNDArrayWrappedDict) else Q
+    state_index = Q.get_key_index(0, state_value) if isinstance(Q, NumpyNDArrayWrappedDict) else state_value
+    max_index = np.argmax(q_matrix[state_index, :])
+    if isinstance(Q, NumpyNDArrayWrappedDict):
+        return Q._lists_keystrings[1][max_index]
+    else:
+        return max_index
+@dataclass
+class TimeDifferencePathElements:
+    this_state_value: DiscreteStateValueType
+    reward: float
+    next_state_value: DiscreteStateValueType
+    done: bool
+class AbstractStateValueFunctionTemporalDifferenceLearner(ABC):
     def __init__(
             self,
             transprobfac: TransitionProbabilityFactory,
@@ -55,7 +87,7 @@ class AbstractTemporalDifferenceLearner(ABC):
         try:
             assert 0 <= initial_state_index < self._state.nb_state_values
         except AssertionError:
-            raise ValueError("Initial state index must be between 0 and {}".format(len(self._state_names)))
+            raise ValueError(f"Initial state index must be between 0 and {self._state.nb_state_values}")
         self._init_state_index = initial_state_index
     @abstractmethod
@@ -111,9 +143,116 @@ class AbstractTemporalDifferenceLearner(ABC):
         self._init_state_index = val
-@dataclass
-class TimeDifferencePathElements:
-    this_state_index: int
-    reward: float
-    next_state_index: int
-    done: bool
+class AbstractStateActionValueFunctionTemporalDifferenceLearner(ABC):
+    def __init__(
+            self,
+            transprobfac: TransitionProbabilityFactory,
+            gamma: float=1.0,
+            init_alpha: float=0.5,
+            min_alpha: float=0.01,
+            alpha_decay_ratio: float=0.3,
+            init_epsilon: float=1.0,
+            min_epsilon: float=0.1,
+            epsilon_decay_ratio: float=0.9,
+            policy: Optional[DiscretePolicy]=None,
+            initial_state_index: int=0
+    ):
+        self._gamma = gamma
+        self._init_alpha = init_alpha
+        self._min_alpha = min_alpha
+        try:
+            assert 0.0 <= alpha_decay_ratio <= 1.0
+        except AssertionError:
+            raise ValueError("alpha_decay_ratio must be between 0 and 1!")
+        self._alpha_decay_ratio = alpha_decay_ratio
+        self._init_epsilon = init_epsilon
+        self._min_epsilon = min_epsilon
+        self._epsilon_decay_ratio = epsilon_decay_ratio
+        self._transprobfac = transprobfac
+        self._state, self._actions_dict, self._indrewardfcn = self._transprobfac.generate_mdp_objects()
+        self._action_names = list(self._actions_dict.keys())
+        self._actions_to_indices = {action_value: idx for idx, action_value in enumerate(self._action_names)}
+        self._policy = policy
+        try:
+            assert 0 <= initial_state_index < self._state.nb_state_values
+        except AssertionError:
+            raise ValueError(f"Initial state index must be between 0 and {self._state.nb_state_values}")
+        self._init_state_index = initial_state_index
+    @abstractmethod
+    def learn(self, *args, **kwargs) -> tuple[Annotated[NDArray[np.float64], "1D Array"], Annotated[NDArray[np.float64], "2D Array"]]:
+        raise NotImplementedError()
+    @property
+    def nb_states(self) -> int:
+        return self._state.nb_state_values
+    @property
+    def policy(self) -> DiscretePolicy:
+        return self._policy
+    @policy.setter
+    def policy(self, val: DiscretePolicy):
+        self._policy = val
+    @property
+    def gamma(self) -> float:
+        return self._gamma
+    @gamma.setter
+    def gamma(self, val: float):
+        self._gamma = val
+    @property
+    def init_alpha(self) -> float:
+        return self._init_alpha
+    @init_alpha.setter
+    def init_alpha(self, val: float):
+        self._init_alpha = val
+    @property
+    def min_alpha(self) -> float:
+        return self._min_alpha
+    @min_alpha.setter
+    def min_alpha(self, val: float):
+        self._min_alpha = val
+    @property
+    def alpha_decay_ratio(self) -> float:
+        return self._alpha_decay_ratio
+    @property
+    def init_epsilon(self) -> float:
+        return self._init_epsilon
+    @init_epsilon.setter
+    def init_epsilon(self, val: float):
+        self._init_epsilon = val
+    @property
+    def min_epsilon(self) -> float:
+        return self._min_epsilon
+    @min_epsilon.setter
+    def min_epsilon(self, val: float):
+        self._min_epsilon = val
+    @property
+    def epsilon_decay_ratio(self) -> float:
+        return self._epsilon_decay_ratio
+    @epsilon_decay_ratio.setter
+    def epsilon_decay_ratio(self, val: float):
+        self._epsilon_decay_ratio = val
+    @property
+    def initial_state_index(self) -> int:
+        return self._init_state_index
+    @initial_state_index.setter
+    def initial_state_index(self, val: int):
+        self._init_state_index = val

{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyrlutils
-Version: 0.1.0
+Version: 0.1.1
 Summary: Utility and Helpers for Reinformcement Learning
 Author-email: Kwan Yuet Stephen Ho <stephenhky@yahoo.com.hk>
 License: MIT
@@ -21,6 +21,7 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy
+Requires-Dist: npdict>=0.0.7
 Requires-Dist: typing-extensions
 Provides-Extra: openaigym
 Requires-Dist: gymnasium; extra == "openaigym"

{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ pyrlutils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyrlutils/action.py,sha256=QoBdtcGtK_EkYAjb50bruhoB_XIz0agLpQjdGFnGbRQ,732
 pyrlutils/policy.py,sha256=A9bj2eVd6XjNNkClSYVJDoxoGuGkyoYVr1DpVdI0wzs,5120
 pyrlutils/reward.py,sha256=are0swsobMqI1IbrBVBaPMYXWpJnp6lZwAyfgBEm2zg,1211
-pyrlutils/state.py,sha256=A3XJSjNJrsInXUWsUvb1GE7Oq-CY6DNEB-ulrVa1rR4,11774
+pyrlutils/state.py,sha256=h-OGrezt0fWfVdM9-BTfqdhx1Ert_utG0ORpIwHwXCw,11902
 pyrlutils/transition.py,sha256=_32jxeYbsiKyaHR9Y2XceUQYbb1jslLCQO2AWL61_EU,6260
 pyrlutils/bandit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyrlutils/bandit/algo.py,sha256=X2Pn4DOi-RXWz5CNg1h0RJCoV3VlAwEGHRMjkfbckfw,3969
@@ -14,10 +14,12 @@ pyrlutils/helpers/exceptions.py,sha256=4fPGW839BChfap-Gd7b-75Dz-Ed3foqbJQ1lg15TZ
 pyrlutils/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyrlutils/openai/utils.py,sha256=PJc9WHZM8aM4Z9MlACUxUC8TO7VARp8taatba_ikhew,1056
 pyrlutils/td/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pyrlutils/td/td.py,sha256=EnecL84yyUm7rO2idaHgVfvtWW5LYPxEkefHhI1SVPQ,4269
-pyrlutils/td/utils.py,sha256=PALXGaDLd3PjFh8qDV9DY_MkaBuj3_GpfVWJOb424vE,3571
-pyrlutils-0.1.0.dist-info/licenses/LICENSE,sha256=bnQPjIcaeBdr2ZofX-_j-nELs8pAx5fQ4Cdfgeaspew,1063
-pyrlutils-0.1.0.dist-info/METADATA,sha256=qKVydib9iWVw-NXgMnB3y0JtDibVQcvclyc7zP2PYH0,2185
-pyrlutils-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pyrlutils-0.1.0.dist-info/top_level.txt,sha256=gOBuxugE2MA4WDXlLhzkQh_rUonZU6nvJnMuomeHMCU,10
-pyrlutils-0.1.0.dist-info/RECORD,,
+pyrlutils/td/qlearn.py,sha256=ZibW_fuB89ZAST5snNYLe5H_zUIMZ93vuJXguXpccyo,3374
+pyrlutils/td/sarsa.py,sha256=jtnfMdPHld9C8yzDMQd3xyLZ3BwGL6ShvDq5WpHfZEo,3281
+pyrlutils/td/state_td.py,sha256=gMX-RuSZQ-UIoTWnsmR7xLZvL2jndRknXTExWnhixpM,4778
+pyrlutils/td/utils.py,sha256=VM5MAfWLQIk6a_qENU-iWHglp1azlaP68qkHvl4jXro,8022
+pyrlutils-0.1.1.dist-info/licenses/LICENSE,sha256=bnQPjIcaeBdr2ZofX-_j-nELs8pAx5fQ4Cdfgeaspew,1063
+pyrlutils-0.1.1.dist-info/METADATA,sha256=1PmFggMx23mxdJlhG04qE7_lhAtEDz_qniaBBhTmiVI,2214
+pyrlutils-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pyrlutils-0.1.1.dist-info/top_level.txt,sha256=gOBuxugE2MA4WDXlLhzkQh_rUonZU6nvJnMuomeHMCU,10
+pyrlutils-0.1.1.dist-info/RECORD,,

{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pyrlutils-0.1.0.dist-info → pyrlutils-0.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

pyrlutils 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

Potentially problematic release.

pyrlutils 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl