PyPI - pyrlutils - Versions diffs - 0.0.4__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

pyrlutils 0.0.4py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyrlutils might be problematic. Click here for more details.

Files changed (21) hide show

pyrlutils/action.py +10 -2
pyrlutils/bandit/reward.py +3 -2
pyrlutils/dp/__init__.py +0 -0
pyrlutils/{valuefcns.py → dp/valuefcns.py} +16 -11
pyrlutils/helpers/__init__.py +0 -0
pyrlutils/helpers/exceptions.py +5 -0
pyrlutils/openai/utils.py +3 -3
pyrlutils/policy.py +79 -12
pyrlutils/state.py +169 -74
pyrlutils/td/__init__.py +0 -0
pyrlutils/td/qlearn.py +86 -0
pyrlutils/td/sarsa.py +86 -0
pyrlutils/td/state_td.py +111 -0
pyrlutils/td/utils.py +258 -0
pyrlutils/transition.py +44 -35
{pyrlutils-0.0.4.dist-info → pyrlutils-0.1.1.dist-info}/METADATA +7 -6
pyrlutils-0.1.1.dist-info/RECORD +25 -0
{pyrlutils-0.0.4.dist-info → pyrlutils-0.1.1.dist-info}/WHEEL +1 -1
pyrlutils-0.0.4.dist-info/RECORD +0 -17
{pyrlutils-0.0.4.dist-info → pyrlutils-0.1.1.dist-info/licenses}/LICENSE +0 -0
{pyrlutils-0.0.4.dist-info → pyrlutils-0.1.1.dist-info}/top_level.txt +0 -0

pyrlutils/action.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from types import LambdaType
+from types import LambdaType, FunctionType
 from typing import Union
 from .state import State
@@ -8,7 +8,7 @@ from .state import State
 DiscreteActionValueType = Union[float, str]
 class Action:
-    def __init__(self, actionfunc: LambdaType):
+    def __init__(self, actionfunc: Union[FunctionType, LambdaType]):
         self._actionfunc = actionfunc
     def act(self, state: State, *args, **kwargs) -> State:
@@ -17,3 +17,11 @@ class Action:
     def __call__(self, state: State) -> State:
         return self.act(state)
+    @property
+    def action_function(self) -> Union[FunctionType, LambdaType]:
+        return self._actionfunc
+    @action_function.setter
+    def action_function(self, new_func: Union[FunctionType, LambdaType]) -> None:
+        self._actionfunc = new_func

pyrlutils/bandit/reward.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from abc import ABC, abstractmethod
+from typing import Any
 class IndividualBanditRewardFunction(ABC):
     @abstractmethod
-    def reward(self, action_value) -> float:
+    def reward(self, action_value: Any) -> float:
         pass
-    def __call__(self, action_value) -> float:
+    def __call__(self, action_value: Any) -> float:
         return self.reward(action_value)

pyrlutils/dp/__init__.py ADDED Viewed

File without changes

pyrlutils/{valuefcns.py → dp/valuefcns.py} RENAMED Viewed

@@ -1,18 +1,23 @@
 import random
 from copy import copy
-from typing import Tuple, Dict
 from itertools import product
+from typing import Annotated
 import numpy as np
+from numpy.typing import NDArray
-from .state import DiscreteStateValueType
-from .transition import TransitionProbabilityFactory
-from .policy import DiscreteDeterminsticPolicy
+from ..state import DiscreteStateValueType
+from ..transition import TransitionProbabilityFactory
+from ..policy import DiscreteDeterminsticPolicy
 class OptimalPolicyOnValueFunctions:
-    def __init__(self, discount_factor: float, transprobfac: TransitionProbabilityFactory):
+    def __init__(
+            self,
+            discount_factor: float,
+            transprobfac: TransitionProbabilityFactory
+    ):
         try:
             assert 0. <= discount_factor <= 1.
         except AssertionError:
@@ -31,7 +36,7 @@ class OptimalPolicyOnValueFunctions:
         self._theta = 1e-10
         self._policy_evaluation_maxiter = 10000
-    def _policy_evaluation(self, policy: DiscreteDeterminsticPolicy) -> np.ndarray:
+    def _policy_evaluation(self, policy: DiscreteDeterminsticPolicy) -> Annotated[NDArray[np.float64], "1D Array"]:
         prev_V = np.zeros(len(self._states_to_indices))
         for _ in range(self._policy_evaluation_maxiter):
@@ -55,7 +60,7 @@ class OptimalPolicyOnValueFunctions:
         return V
-    def _policy_improvement(self, V: np.ndarray) -> DiscreteDeterminsticPolicy:
+    def _policy_improvement(self, V: Annotated[NDArray[np.float64], "1D Array"]) -> DiscreteDeterminsticPolicy:
         Q = np.zeros((len(self._states_to_indices), len(self._actions_to_indices)))
         for state_value in self._state_names:
@@ -78,7 +83,7 @@ class OptimalPolicyOnValueFunctions:
             optimal_policy.add_deterministic_rule(state_value, action_value)
         return optimal_policy
-    def _policy_iteration(self) -> Tuple[np.ndarray, DiscreteDeterminsticPolicy]:
+    def _policy_iteration(self) -> tuple[Annotated[NDArray[np.float64], "1D Array"], DiscreteDeterminsticPolicy]:
         policy = DiscreteDeterminsticPolicy(self._actions_dict)
         for state_value in self._state_names:
             policy.add_deterministic_rule(state_value, random.choice(self._action_names))
@@ -97,7 +102,7 @@ class OptimalPolicyOnValueFunctions:
         return V, policy
-    def _value_iteration(self) -> Tuple[np.ndarray, DiscreteDeterminsticPolicy]:
+    def _value_iteration(self) -> tuple[Annotated[NDArray[np.float64], "1D Array"], DiscreteDeterminsticPolicy]:
         V = np.zeros(len(self._state_names))
         for _ in range(self._policy_evaluation_maxiter):
@@ -127,7 +132,7 @@ class OptimalPolicyOnValueFunctions:
         return V, policy
-    def policy_iteration(self) -> Tuple[Dict[DiscreteStateValueType, float], DiscreteDeterminsticPolicy]:
+    def policy_iteration(self) -> tuple[dict[DiscreteStateValueType, float], DiscreteDeterminsticPolicy]:
         V, policy = self._policy_iteration()
         state_values_dict = {
             self._state_names[i]: V[i]
@@ -135,7 +140,7 @@ class OptimalPolicyOnValueFunctions:
         }
         return state_values_dict, policy
-    def value_iteration(self) -> Tuple[Dict[DiscreteStateValueType, float], DiscreteDeterminsticPolicy]:
+    def value_iteration(self) -> tuple[dict[DiscreteStateValueType, float], DiscreteDeterminsticPolicy]:
         V, policy = self._value_iteration()
         state_values_dict = {
             self._state_names[i]: V[i]

pyrlutils/helpers/__init__.py ADDED Viewed

File without changes

pyrlutils/helpers/exceptions.py ADDED Viewed

@@ -0,0 +1,5 @@
+class InvalidRangeError(Exception):
+    def __init__(self, message=None):
+        self.message = "Invalid range error!" if message is None else message
+        super().__init__(self.message)

pyrlutils/openai/utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ from ..transition import TransitionProbabilityFactory, NextStateTuple
 class OpenAIGymDiscreteEnvironmentTransitionProbabilityFactory(TransitionProbabilityFactory):
-    def __init__(self, envname):
+    def __init__(self, envname: str):
         super().__init__()
         self._envname = envname
         self._gymenv = gym.make(envname)
@@ -23,9 +23,9 @@ class OpenAIGymDiscreteEnvironmentTransitionProbabilityFactory(TransitionProbabi
             self.add_state_transitions(state_value, new_trans_dict)
     @property
-    def envname(self):
+    def envname(self) -> str:
         return self._envname
     @property
-    def gymenv(self):
+    def gymenv(self) -> gym.Env:
         return self._gymenv

pyrlutils/policy.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from abc import ABC, abstractmethod
-from typing import Union, Dict
+from typing import Union, Annotated
 from warnings import warn
 import numpy as np
+from numpy.typing import NDArray
 from .state import State, DiscreteState, DiscreteStateValueType
 from .action import Action, DiscreteActionValueType
@@ -12,7 +13,11 @@ from .action import Action, DiscreteActionValueType
 class Policy(ABC):
     @abstractmethod
     def get_action(self, state: State) -> Action:
-        pass
+        raise NotImplemented()
+    @abstractmethod
+    def get_action_value(self, state: State) -> DiscreteActionValueType:
+        raise NotImplemented()
     def __call__(self, state: State) -> Action:
         return self.get_action(state)
@@ -25,7 +30,7 @@ class Policy(ABC):
 class DeterministicPolicy(Policy):
     @abstractmethod
     def add_deterministic_rule(self, *args, **kwargs):
-        pass
+        raise NotImplemented()
     @property
     def is_stochastic(self) -> bool:
@@ -33,16 +38,23 @@ class DeterministicPolicy(Policy):
 class DiscreteDeterminsticPolicy(DeterministicPolicy):
-    def __init__(self, actions_dict: Dict[DiscreteActionValueType, Action]):
+    def __init__(self, actions_dict: dict[DiscreteActionValueType, Action]):
         self._state_to_action = {}
         self._actions_dict = actions_dict
-    def add_deterministic_rule(self, state_value: DiscreteStateValueType, action_value: DiscreteActionValueType):
+    def add_deterministic_rule(
+            self,
+            state_value: DiscreteStateValueType,
+            action_value: DiscreteActionValueType
+    ) -> None:
         if state_value in self._state_to_action:
             warn('State value {} exists in rule; it will be replaced.'.format(state_value))
         self._state_to_action[state_value] = action_value
-    def get_action_value(self, state_value: DiscreteStateValueType) -> DiscreteActionValueType:
+    def get_action_value(
+            self,
+            state_value: DiscreteStateValueType
+    ) -> DiscreteActionValueType:
         return self._state_to_action.get(state_value)
     def get_action(self, state: DiscreteState) -> Action:
@@ -62,10 +74,16 @@ class DiscreteDeterminsticPolicy(DeterministicPolicy):
         return True
+class DiscreteContinuousPolicy(DeterministicPolicy):
+    @abstractmethod
+    def get_action(self, state: State) -> Action:
+        raise NotImplemented()
 class StochasticPolicy(Policy):
     @abstractmethod
     def get_probability(self, *args, **kwargs) -> float:
-        pass
+        raise NotImplemented()
     @property
     def is_stochastic(self) -> bool:
@@ -73,12 +91,61 @@ class StochasticPolicy(Policy):
 class DiscreteStochasticPolicy(StochasticPolicy):
-    @abstractmethod
-    def get_probability(self, state_value: DiscreteStateValueType, action_value: DiscreteActionValueType) -> float:
-        pass
+    def __init__(self, actions_dict: dict[DiscreteActionValueType, Action]):
+        self._state_to_action = {}
+        self._actions_dict = actions_dict
+    def add_stochastic_rule(
+            self,
+            state_value: DiscreteStateValueType,
+            action_values: list[DiscreteActionValueType],
+            probs: Union[list[float], Annotated[NDArray[np.float64], "1D Array"]] = None
+    ):
+        if probs is not None:
+            assert len(action_values) == len(probs)
+            probs = np.array(probs)
+        else:
+            probs = np.repeat(1./len(action_values), len(action_values))
+        if state_value in self._state_to_action:
+            warn('State value {} exists in rule; it will be replaced.'.format(state_value))
+        self._state_to_action[state_value] = {
+            action_value: prob
+            for action_value, prob in zip(action_values, probs)
+        }
+    def get_probability(
+            self,
+            state_value: DiscreteStateValueType,
+            action_value: DiscreteActionValueType
+    ) -> float:
+        if state_value not in self._state_to_action:
+            return 0.0
+        if action_value in self._state_to_action[state_value]:
+            return self._state_to_action[state_value][action_value]
+        else:
+            return 0.0
+    def get_action_value(self, state: State) -> DiscreteActionValueType:
+        allowed_actions = list(self._state_to_action[state].keys())
+        probs = np.array(list(self._state_to_action[state].values()))
+        sumprobs = np.sum(probs)
+        return np.random.choice(allowed_actions, p=probs/sumprobs)
+    def get_action(self, state: DiscreteState) -> Action:
+        return self._actions_dict[self.get_action_value(state.state_value)]
 class ContinuousStochasticPolicy(StochasticPolicy):
     @abstractmethod
-    def get_probability(self, state_value: Union[float, np.ndarray], action_value: DiscreteActionValueType, value: Union[float, np.ndarray]) -> float:
-        pass
+    def get_probability(
+            self,
+            state_value: Union[float, Annotated[NDArray[np.float64], "1D Array"]],
+            action_value: DiscreteActionValueType,
+            value: Union[float, Annotated[NDArray[np.float64], "1D Array"]]
+    ) -> float:
+        raise NotImplemented()
+DiscretePolicy = Union[DiscreteDeterminsticPolicy, DiscreteStochasticPolicy]
+ContinuousPolicy = Union[ContinuousStochasticPolicy]

pyrlutils/state.py CHANGED Viewed

@@ -1,81 +1,87 @@
-from abc import ABC, abstractmethod
+import sys
+from abc import ABC
 from enum import Enum
-from dataclasses import dataclass
-from typing import Tuple, List, Optional, Union
+from typing import Optional, Union, Annotated, Literal
 import numpy as np
+from numpy.typing import NDArray
+if sys.version_info < (3, 11):
+    from typing_extensions import Self
+else:
+    from typing import Self
-class StateValue(ABC):
-    @property
-    @abstractmethod
-    def value(self):
-        pass
-@dataclass
-class DiscreteStateValue(StateValue):
-    enum: Enum
-    @property
-    def value(self):
-        return self.enum.value
-    def name(self):
-        return self.enum.name
-class ContinuousStateValue(StateValue):
-    _value: float
-    @property
-    def value(self) -> float:
-        return self._value
+from .helpers.exceptions import InvalidRangeError
 class State(ABC):
     @property
     def state_value(self):
-        return self.get_state_value()
-    @abstractmethod
-    def set_state_value(self, state_value):
-        pass
+        raise NotImplemented()
-    @abstractmethod
-    def get_state_value(self):
-        pass
-    @state_value.setter
-    def state_value(self, new_state_value):
-        self.set_state_value(new_state_value)
-DiscreteStateValueType = Union[float, str, Tuple[int], Enum]
+DiscreteStateValueType = Union[str, int, tuple[int], Enum]
 class DiscreteState(State):
-    def __init__(self, all_state_values: List[DiscreteStateValueType], initial_values: Optional[List[DiscreteStateValueType]] = None):
+    def __init__(
+            self,
+            all_state_values: list[DiscreteStateValueType],
+            initial_value: Optional[DiscreteStateValueType] = None,
+            terminals: Optional[dict[DiscreteStateValueType, bool]]=None
+    ):
         super().__init__()
         self._all_state_values = all_state_values
-        self._state_value = initial_values if initial_values is not None and initial_values in self._all_state_values else self._all_state_values[0]
+        self._state_values_to_indices = {
+            state_value: idx
+            for idx, state_value in enumerate(self._all_state_values)
+        }
+        if initial_value is not None:
+            self._current_index = self._state_values_to_indices[initial_value]
+        else:
+            self._current_index = 0
+        if terminals is None:
+            self._terminal_dict = {
+                state_value: False
+                for state_value in self._all_state_values
+            }
+        else:
+            self._terminal_dict = terminals.copy()
+            for state_value in self._all_state_values:
+                if self._terminal_dict.get(state_value) is None:
+                    self._terminal_dict[state_value] = False
+    def _get_state_value_from_index(self, index: int) -> DiscreteStateValueType:
+        return self._all_state_values[index]
     def get_state_value(self) -> DiscreteStateValueType:
-        return self._state_value
+        return self._get_state_value_from_index(self._current_index)
-    def set_state_value(self, state_value: DiscreteStateValueType):
+    def set_state_value(self, state_value: DiscreteStateValueType) -> None:
         if state_value in self._all_state_values:
-            self._state_value = state_value
+            self._current_index = self._state_values_to_indices[state_value]
         else:
             raise ValueError('State value {} is invalid.'.format(state_value))
-    def get_all_possible_state_values(self) -> List[DiscreteStateValueType]:
+    def get_all_possible_state_values(self) -> list[DiscreteStateValueType]:
         return self._all_state_values
+    def query_state_index_from_value(self, value: DiscreteStateValueType) -> int:
+        return self._state_values_to_indices[value]
+    @property
+    def state_index(self) -> int:
+        return self._current_index
+    @state_index.setter
+    def state_index(self, new_index: int) -> None:
+        if new_index >= len(self._all_state_values):
+            raise ValueError(f"Invalid index {new_index}; it must be less than {self.nb_state_values}.")
+        self._current_index = new_index
     @property
     def state_value(self) -> DiscreteStateValueType:
-        return self._state_value
+        return self._all_state_values[self._current_index]
     @state_value.setter
     def state_value(self, new_state_value: DiscreteStateValueType):
@@ -85,22 +91,53 @@ class DiscreteState(State):
     def state_space_size(self):
         return len(self._all_state_values)
+    @property
+    def nb_state_values(self) -> int:
+        return len(self._all_state_values)
-class InvalidRangeError(Exception):
-    def __init__(self, message=None):
-        self.message = "Invalid range error!" if message is None else message
-        super().__init__(self.message)
+    @property
+    def is_terminal(self) -> bool:
+        return self._terminal_dict[self._all_state_values[self._current_index]]
+    def __hash__(self):
+        return self._current_index
+    def __eq__(self, other: Self) -> bool:
+        return self._current_index == other._current_index
 class ContinuousState(State):
-    def __init__(self, nbdims: int, ranges: np.array, init_value: Optional[Union[float, np.ndarray]] = None):
+    def __init__(
+            self,
+            nbdims: int,
+            ranges: Union[Annotated[NDArray[np.float64], Literal["2"]], Annotated[NDArray[np.float64], Literal["*", "2"]]],
+            init_value: Optional[Union[float, Annotated[NDArray[np.float64], "1D Array"]]] = None
+    ):
+        super().__init__()
         self._nbdims = nbdims
+        try:
+            assert isinstance(ranges, np.ndarray)
+        except AssertionError:
+            raise TypeError('Range must be a numpy array.')
         try:
             assert (ranges.dtype == np.float64) or (ranges.dtype == np.float32) or (ranges.dtype == np.float16)
         except AssertionError:
             raise TypeError('It has to be floating type numpy.ndarray.')
+        try:
+            assert ranges.ndim == 1 or ranges.ndim == 2
+            match ranges.ndim:
+                case 1:
+                    assert ranges.shape[0] == 2
+                case 2:
+                    assert ranges.shape[1] == 2
+                case _:
+                    raise ValueError("Ranges must be of shape (2, ) or (*, 2).")
+        except AssertionError:
+            raise ValueError("Ranges must be of shape (2, ) or (*, 2).")
         try:
             assert self._nbdims > 0
         except AssertionError:
@@ -146,50 +183,53 @@ class ContinuousState(State):
                     raise ValueError('Initialized value does not have the right dimension.')
                 for i in range(self._nbdims):
                     try:
-                        assert (init_value[i] >= self._ranges[i, 0]) and (init_value[i] <= self.ranges[i, 1])
+                        assert self._ranges[i, 0] <= init_value[i] <= self.ranges[i, 1]
                     except AssertionError:
                         raise InvalidRangeError('Initialized value at dimension {} (value: {}) is not within the permitted range ({} -> {})!'.format(i, init_value[i], self._ranges[i, 0], self._ranges[i, 1]))
             else:
                 try:
-                    assert (init_value >= self._ranges[0, 0]) and (init_value <= self.ranges[0, 1])
+                    assert self._ranges[0, 0] <= init_value <= self.ranges[0, 1]
                 except AssertionError:
                     raise InvalidRangeError('Initialized value is out of range.')
             self._state_value = init_value
-    def set_state_value(self, state_value: Union[float, np.ndarray]):
-        if self.nbdims > 1:
+    def set_state_value(self, state_value: Union[float, Annotated[NDArray[np.float64], "1D Array"]]):
+        if self._nbdims > 1:
             try:
                 assert state_value.shape[0] == self._nbdims
             except AssertionError:
                 raise ValueError('Given value does not have the right dimension.')
-            for i in range(self.nbdims):
+            for i in range(self._nbdims):
                 try:
-                    assert state_value[i] >= self.ranges[i, 0] and state_value[i] <= self.ranges[i, 1]
+                    assert self.ranges[i, 0] <= state_value[i] <= self.ranges[i, 1]
                 except AssertionError:
                     raise InvalidRangeError()
         else:
             try:
-                assert state_value >= self.ranges[0, 0] and state_value <= self.ranges[0, 1]
+                assert self.ranges[0, 0] <= state_value <= self.ranges[0, 1]
             except AssertionError:
                 raise InvalidRangeError()
         self._state_value = state_value
-    def get_state_value(self) -> np.ndarray:
+    def get_state_value(self) -> Annotated[NDArray[np.float64], "1D Array"]:
         return self._state_value
-    def get_state_value_ranges(self) -> np.ndarray:
+    def get_state_value_ranges(self) -> Union[Annotated[NDArray[np.float64], Literal["2"]], Annotated[NDArray[np.float64], Literal["*", "2"]]]:
         return self._ranges
-    def get_state_value_range_at_dimension(self, dimension: int) -> np.ndarray:
-        return self._ranges[dimension]
+    def get_state_value_range_at_dimension(self, dimension: int) -> Annotated[NDArray[np.float64], Literal["2"]]:
+        if dimension < self._nbdims:
+            return self._ranges[dimension]
+        else:
+            raise ValueError(f"There are only {self._nbdims} dimensions!")
     @property
-    def ranges(self) -> np.ndarray:
+    def ranges(self) -> Union[Annotated[NDArray[np.float64], Literal["2"]], Annotated[NDArray[np.float64], Literal["*", "2"]]]:
         return self.get_state_value_ranges()
     @property
-    def state_value(self) -> Union[float, np.ndarray]:
+    def state_value(self) -> Union[float, NDArray[np.float64]]:
         return self.get_state_value()
     @state_value.setter
@@ -200,9 +240,28 @@ class ContinuousState(State):
     def nbdims(self) -> int:
         return self._nbdims
+    def __hash__(self):
+        return hash(tuple(self._state_value))
+    def __eq__(self, other: Self):
+        if self.nbdims != other.nbdims:
+            raise ValueError(f"The two states have two different dimensions. ({self.nbdims} vs. {other.nbdims})")
+        for i in range(self.nbdims):
+            if self.state_value[i] != other.state_value[i]:
+                return False
+        return True
 class Discrete2DCartesianState(DiscreteState):
-    def __init__(self, x_lowlim: int, x_hilim: int, y_lowlim: int, y_hilim: int, initial_coordinate: List[int]=None):
+    def __init__(
+            self,
+            x_lowlim: int,
+            x_hilim: int,
+            y_lowlim: int,
+            y_hilim: int,
+            initial_coordinate: list[int]=None,
+            terminals: Optional[dict[DiscreteStateValueType, bool]] = None
+    ):
         self._x_lowlim = x_lowlim
         self._x_hilim = x_hilim
         self._y_lowlim = y_lowlim
@@ -212,14 +271,50 @@ class Discrete2DCartesianState(DiscreteState):
         if initial_coordinate is None:
             initial_coordinate = [self._x_lowlim, self._y_lowlim]
         initial_value = (initial_coordinate[1] - self._y_lowlim) * self._countx + (initial_coordinate[0] - self._x_lowlim)
-        super().__init__(list(range(self._countx*self._county)), initial_values=initial_value)
+        super().__init__(list(range(self._countx*self._county)), initial_value=initial_value, terminals=terminals)
     def _encode_coordinates(self, x, y) -> int:
         return (y - self._y_lowlim) * self._countx + (x - self._x_lowlim)
-    def encode_coordinates(self, coordinates: List[int]) -> int:
-        assert len(coordinates) == 2
+    def encode_coordinates(self, coordinates: Union[list[int], Annotated[NDArray[np.int64], Literal["2"]]]) -> int:
+        if isinstance(coordinates, list):
+            assert len(coordinates) == 2
         return self._encode_coordinates(coordinates[0], coordinates[1])
-    def decode_coordinates(self, hashcode) -> List[int]:
-        return [hashcode % self._countx, hashcode // self._countx]
+    def decode_coordinates(self, hashcode) -> list[int]:
+        return [hashcode % self._countx + self._x_lowlim, hashcode // self._countx + self._y_lowlim]
+    def get_whether_terminal_given_coordinates(
+            self,
+            coordinates: Union[list[int], Annotated[NDArray[np.int64], Literal["2"]]]
+    ) -> bool:
+        if isinstance(coordinates, list):
+            assert len(coordinates) == 2
+        hashcode = self._encode_coordinates(coordinates[0], coordinates[1])
+        return self._terminal_dict.get(hashcode, False)
+    def set_terminal_given_coordinates(
+            self,
+            coordinates: Union[list[int], Annotated[NDArray[np.int64], Literal["2"]]],
+            terminal_value: bool
+    ) -> None:
+        if isinstance(coordinates, list):
+            assert len(coordinates) == 2
+        hashcode = self._encode_coordinates(coordinates[0], coordinates[1])
+        self._terminal_dict[hashcode] = terminal_value
+    @property
+    def x_lowlim(self) -> int:
+        return self._x_lowlim
+    @property
+    def x_hilim(self) -> int:
+        return self._x_hilim
+    @property
+    def y_lowlim(self) -> int:
+        return self._y_lowlim
+    @property
+    def y_hilim(self) -> int:
+        return self._y_hilim

pyrlutils/td/__init__.py ADDED Viewed

File without changes

pyrlutils 0.0.4__py3-none-any.whl → 0.1.1__py3-none-any.whl

Potentially problematic release.

pyrlutils 0.0.4py3-none-any.whl → 0.1.1py3-none-any.whl