PyPI - pyRDDLGym-jax - Versions diffs - 0.3__py3-none-any.whl → 0.5__py3-none-any.whl - Mend

pyRDDLGym-jax 0.3py3-none-any.whl → 0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

pyRDDLGym_jax/__init__.py +1 -1
pyRDDLGym_jax/core/compiler.py +90 -67
pyRDDLGym_jax/core/logic.py +286 -82
pyRDDLGym_jax/core/planner.py +191 -97
pyRDDLGym_jax/core/simulator.py +2 -1
pyRDDLGym_jax/core/tuning.py +58 -63
pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg +2 -1
pyRDDLGym_jax/examples/configs/PowerGen_Continuous_replan.cfg +2 -1
pyRDDLGym_jax/examples/configs/Reservoir_Continuous_replan.cfg +2 -1
pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_replan.cfg +4 -3
pyRDDLGym_jax/examples/configs/default_replan.cfg +2 -1
pyRDDLGym_jax/examples/run_tune.py +1 -3
pyRDDLGym_jax-0.5.dist-info/METADATA +278 -0
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.5.dist-info}/RECORD +17 -17
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.5.dist-info}/WHEEL +1 -1
pyRDDLGym_jax-0.3.dist-info/METADATA +0 -26
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.5.dist-info}/LICENSE +0 -0
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.5.dist-info}/top_level.txt +0 -0

pyRDDLGym_jax/core/logic.py CHANGED Viewed

@@ -1,24 +1,18 @@
+from typing import Optional, Set
 import jax
 import jax.numpy as jnp
 import jax.random as random
-from typing import Optional, Set
 from pyRDDLGym.core.debug.exception import raise_warning
-class Complement:
-    '''Base class for approximate logical complement operations.'''
-    def __call__(self, x):
-        raise NotImplementedError
-class StandardComplement(Complement):
-    '''The standard approximate logical complement given by x -> 1 - x.'''
-    def __call__(self, x):
-        return 1.0 - x
+# ===========================================================================
+# RELATIONAL OPERATIONS
+# - abstract class
+# - sigmoid comparison
+#
+# ===========================================================================
 class Comparison:
     '''Base class for approximate comparison operations.'''
@@ -32,10 +26,14 @@ class Comparison:
     def equal(self, x, y, param):
         raise NotImplementedError
+    def sgn(self, x, param):
+        raise NotImplementedError
 class SigmoidComparison(Comparison):
     '''Comparison operations approximated using sigmoid functions.'''
+    # https://arxiv.org/abs/2110.05651
     def greater_equal(self, x, y, param):
         return jax.nn.sigmoid(param * (x - y))
@@ -44,7 +42,75 @@ class SigmoidComparison(Comparison):
     def equal(self, x, y, param):
         return 1.0 - jnp.square(jnp.tanh(param * (y - x)))
+    def sgn(self, x, param):
+        return jnp.tanh(param * x)
+# ===========================================================================
+# ROUNDING OPERATIONS
+# - abstract class
+# - soft rounding
+#
+# ===========================================================================
+class Rounding:
+    '''Base class for approximate rounding operations.'''
+    def floor(self, x, param):
+        raise NotImplementedError
+    def round(self, x, param):
+        raise NotImplementedError
+class SoftRounding(Rounding):
+    '''Rounding operations approximated using soft operations.'''
+    # https://www.tensorflow.org/probability/api_docs/python/tfp/substrates/jax/bijectors/Softfloor
+    def floor(self, x, param):
+        denom = jnp.tanh(param / 4.0)
+        return (jax.nn.sigmoid(param * (x - jnp.floor(x) - 1.0)) -
+                jax.nn.sigmoid(-param / 2.0)) / denom + jnp.floor(x)
+    # https://arxiv.org/abs/2006.09952
+    def round(self, x, param):
+        m = jnp.floor(x) + 0.5
+        return m + 0.5 * jnp.tanh(param * (x - m)) / jnp.tanh(param / 2.0)
+# ===========================================================================
+# LOGICAL COMPLEMENT
+# - abstract class
+# - standard complement
+#
+# ===========================================================================
+class Complement:
+    '''Base class for approximate logical complement operations.'''
+    def __call__(self, x):
+        raise NotImplementedError
+class StandardComplement(Complement):
+    '''The standard approximate logical complement given by x -> 1 - x.'''
+    # https://www.sciencedirect.com/science/article/abs/pii/016501149190171L
+    def __call__(self, x):
+        return 1.0 - x
+# ===========================================================================
+# TNORMS
+# - abstract tnorm
+# - product tnorm
+# - Godel tnorm
+# - Lukasiewicz tnorm
+# - Yager(p) tnorm
+#
+# https://www.sciencedirect.com/science/article/abs/pii/016501149190171L
+# ===========================================================================
 class TNorm:
     '''Base class for fuzzy differentiable t-norms.'''
@@ -86,8 +152,134 @@ class LukasiewiczTNorm(TNorm):
     def norms(self, x, axis):
         return jax.nn.relu(jnp.sum(x - 1.0, axis=axis) + 1.0)
+class YagerTNorm(TNorm):
+    '''Yager t-norm given by the expression
+    (x, y) -> max(1 - ((1 - x)^p + (1 - y)^p)^(1/p)).'''
+    def __init__(self, p=2.0):
+        self.p = float(p)
+    def norm(self, x, y):
+        base = jax.nn.relu(1.0 - jnp.stack([x, y], axis=0))
+        arg = jnp.linalg.norm(base, ord=self.p, axis=0)
+        return jax.nn.relu(1.0 - arg)
+    def norms(self, x, axis):
+        arg = jax.nn.relu(1.0 - x)
+        for ax in sorted(axis, reverse=True):
+            arg = jnp.linalg.norm(arg, ord=self.p, axis=ax)
+        return jax.nn.relu(1.0 - arg)
+# ===========================================================================
+# RANDOM SAMPLING
+# - abstract sampler
+# - Gumbel-softmax sampler
+# - determinization
+#
+# ===========================================================================
+class RandomSampling:
+    '''An abstract class that describes how discrete and non-reparameterizable
+    random variables are sampled.'''
+    def discrete(self, logic):
+        raise NotImplementedError
+    def bernoulli(self, logic):
+        jax_discrete, jax_param = self.discrete(logic)
+        def _jax_wrapped_calc_bernoulli_approx(key, prob, param):
+            prob = jnp.stack([1.0 - prob, prob], axis=-1)
+            sample = jax_discrete(key, prob, param)
+            return sample
+        return _jax_wrapped_calc_bernoulli_approx, jax_param
+    def poisson(self, logic):
+        def _jax_wrapped_calc_poisson_exact(key, rate, param):
+            return random.poisson(key=key, lam=rate, dtype=logic.INT)
+        return _jax_wrapped_calc_poisson_exact, None
+    def geometric(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: '
+                          'Geometric(p) --> floor(log(U) / log(1 - p)) + 1')
+        jax_floor, jax_param = logic.floor()
+        def _jax_wrapped_calc_geometric_approx(key, prob, param):
+            U = random.uniform(key=key, shape=jnp.shape(prob), dtype=logic.REAL)
+            sample = jax_floor(jnp.log(U) / jnp.log(1.0 - prob), param) + 1
+            return sample
+        return _jax_wrapped_calc_geometric_approx, jax_param
+class GumbelSoftmax(RandomSampling):
+    '''Random sampling of discrete variables using Gumbel-softmax trick.'''
+    def discrete(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: '
+                          'Discrete(p) --> Gumbel-Softmax(p)')
+        jax_argmax, jax_param = logic.argmax()
+        # https://arxiv.org/pdf/1611.01144
+        def _jax_wrapped_calc_discrete_gumbel_softmax(key, prob, param):
+            Gumbel01 = random.gumbel(key=key, shape=prob.shape, dtype=logic.REAL)
+            sample = Gumbel01 + jnp.log(prob + logic.eps)
+            sample = jax_argmax(sample, axis=-1, param=param)
+            return sample
+        return _jax_wrapped_calc_discrete_gumbel_softmax, jax_param
+class Determinization(RandomSampling):
+    '''Random sampling of variables using their deterministic mean estimate.'''
+    def discrete(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: '
+                          'Discrete(p) --> sum(i * p[i])')
+        def _jax_wrapped_calc_discrete_determinized(key, prob, param):
+            literals = FuzzyLogic.enumerate_literals(prob.shape, axis=-1)
+            sample = jnp.sum(literals * prob, axis=-1)
+            return sample
+        return _jax_wrapped_calc_discrete_determinized, None
+    def poisson(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: Poisson(rate) --> rate')
+        def _jax_wrapped_calc_poisson_determinized(key, rate, param):
+            return rate
+        return _jax_wrapped_calc_poisson_determinized, None
+    def geometric(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: Geometric(p) --> 1 / p')
+        def _jax_wrapped_calc_geometric_determinized(key, prob, param):
+            sample = 1.0 / prob
+            return sample
+        return _jax_wrapped_calc_geometric_determinized, None
+# ===========================================================================
+# FUZZY LOGIC
+#
+# ===========================================================================
 class FuzzyLogic:
     '''A class representing fuzzy logic in JAX.
@@ -98,9 +290,11 @@ class FuzzyLogic:
     def __init__(self, tnorm: TNorm=ProductTNorm(),
                  complement: Complement=StandardComplement(),
                  comparison: Comparison=SigmoidComparison(),
+                 sampling: RandomSampling=GumbelSoftmax(),
+                 rounding: Rounding=SoftRounding(),
                  weight: float=10.0,
                  debias: Optional[Set[str]]=None,
-                 eps: float=1e-10,
+                 eps: float=1e-15,
                  verbose: bool=False,
                  use64bit: bool=False) -> None:
         '''Creates a new fuzzy logic in Jax.
@@ -108,8 +302,9 @@ class FuzzyLogic:
         :param tnorm: fuzzy operator for logical AND
         :param complement: fuzzy operator for logical NOT
         :param comparison: fuzzy operator for comparisons (>, >=, <, ==, ~=, ...)
+        :param sampling: random sampling of non-reparameterizable distributions
+        :param rounding: rounding floating values to integers
         :param weight: a sharpness parameter for sigmoid and softmax activations
-        :param error: an error parameter (e.g. floor) (smaller means better accuracy)
         :param debias: which functions to de-bias approximate on forward pass
         :param eps: small positive float to mitigate underflow
         :param verbose: whether to dump replacements and other info to console
@@ -118,6 +313,8 @@ class FuzzyLogic:
         self.tnorm = tnorm
         self.complement = complement
         self.comparison = comparison
+        self.sampling = sampling
+        self.rounding = rounding
         self.weight = float(weight)
         if debias is None:
             debias = set()
@@ -142,10 +339,12 @@ class FuzzyLogic:
               f'    tnorm         ={type(self.tnorm).__name__}\n'
               f'    complement    ={type(self.complement).__name__}\n'
               f'    comparison    ={type(self.comparison).__name__}\n'
+              f'    sampling      ={type(self.sampling).__name__}\n'
+              f'    rounding      ={type(self.rounding).__name__}\n'
               f'    sigmoid_weight={self.weight}\n'
               f'    cpfs_to_debias={self.debias}\n'
               f'    underflow_tol ={self.eps}\n'
-              f'    use64bit      ={self.use64bit}')
+              f'    use_64_bit    ={self.use64bit}')
     # ===========================================================================
     # logical operators
@@ -339,12 +538,14 @@ class FuzzyLogic:
     def sgn(self):
         if self.verbose:
-            raise_warning('Using the replacement rule: sgn(x) --> tanh(x)')
+            raise_warning('Using the replacement rule: '
+                          'sgn(x) --> comparison.sgn(x)')
+        sgn_op = self.comparison.sgn
         debias = 'sgn' in self.debias
         def _jax_wrapped_calc_sgn_approx(x, param):
-            sample = jnp.tanh(param * x)
+            sample = sgn_op(x, param)
             if debias:
                 hard_sample = jnp.sign(x)
                 sample += jax.lax.stop_gradient(hard_sample - sample)
@@ -357,37 +558,48 @@ class FuzzyLogic:
     def floor(self):
         if self.verbose:
             raise_warning('Using the replacement rule: '
-                          'floor(x) --> x - atan(-1.0 / tan(pi * x)) / pi - 0.5')
+                          'floor(x) --> rounding.floor(x)')
+        floor_op = self.rounding.floor
+        debias = 'floor' in self.debias
         def _jax_wrapped_calc_floor_approx(x, param):
-            sawtooth_part = jnp.arctan(-1.0 / jnp.tan(x * jnp.pi)) / jnp.pi + 0.5
-            sample = x - jax.lax.stop_gradient(sawtooth_part)
+            sample = floor_op(x, param)
+            if debias:
+                hard_sample = jnp.floor(x)
+                sample += jax.lax.stop_gradient(hard_sample - sample)
             return sample
-        return _jax_wrapped_calc_floor_approx, None
-    def ceil(self):
-        jax_floor, jax_param = self.floor()
-        def _jax_wrapped_calc_ceil_approx(x, param):
-            return -jax_floor(-x, param)
+        tags = ('weight', 'floor')
+        new_param = (tags, self.weight)
+        return _jax_wrapped_calc_floor_approx, new_param
-        return _jax_wrapped_calc_ceil_approx, jax_param
     def round(self):
         if self.verbose:
-            raise_warning('Using the replacement rule: round(x) --> x')
+            raise_warning('Using the replacement rule: '
+                          'round(x) --> rounding.round(x)')
+        round_op = self.rounding.round
         debias = 'round' in self.debias
         def _jax_wrapped_calc_round_approx(x, param):
-            sample = x
+            sample = round_op(x, param)
             if debias:
                 hard_sample = jnp.round(x)
                 sample += jax.lax.stop_gradient(hard_sample - sample)
             return sample
-        return _jax_wrapped_calc_round_approx, None
+        tags = ('weight', 'round')
+        new_param = (tags, self.weight)
+        return _jax_wrapped_calc_round_approx, new_param
+    def ceil(self):
+        jax_floor, jax_param = self.floor()
+        def _jax_wrapped_calc_ceil_approx(x, param):
+            return -jax_floor(-x, param)
+        return _jax_wrapped_calc_ceil_approx, jax_param
     def mod(self):
         jax_floor, jax_param = self.floor()
@@ -419,7 +631,7 @@ class FuzzyLogic:
     # ===========================================================================
     @staticmethod
-    def _literals(shape, axis):
+    def enumerate_literals(shape, axis):
         literals = jnp.arange(shape[axis])
         literals = literals[(...,) + (jnp.newaxis,) * (len(shape) - 1)]
         literals = jnp.moveaxis(literals, source=0, destination=axis)
@@ -433,8 +645,9 @@ class FuzzyLogic:
         debias = 'argmax' in self.debias
+        # https://arxiv.org/abs/2110.05651
         def _jax_wrapped_calc_argmax_approx(x, axis, param):
-            literals = FuzzyLogic._literals(x.shape, axis=axis)
+            literals = FuzzyLogic.enumerate_literals(x.shape, axis=axis)
             soft_max = jax.nn.softmax(param * x, axis=axis)
             sample = jnp.sum(literals * soft_max, axis=axis)
             if debias:
@@ -468,7 +681,7 @@ class FuzzyLogic:
         def _jax_wrapped_calc_if_approx(c, a, b, param):
             sample = c * a + (1.0 - c) * b
             if debias:
-                hard_sample = jnp.select([c, ~c], [a, b])
+                hard_sample = jnp.where(c > 0.5, a, b)
                 sample += jax.lax.stop_gradient(hard_sample - sample)
             return sample
@@ -478,14 +691,14 @@ class FuzzyLogic:
         if self.verbose:
             raise_warning('Using the replacement rule: '
                           'switch(pred) { cases } --> '
-                          'sum(cases[i] * softmax(-abs(pred - i)))')
+                          'sum(cases[i] * softmax(-(pred - i)^2))')
         debias = 'switch' in self.debias
         def _jax_wrapped_calc_switch_approx(pred, cases, param):
-            literals = FuzzyLogic._literals(cases.shape, axis=0)
+            literals = FuzzyLogic.enumerate_literals(cases.shape, axis=0)
             pred = jnp.broadcast_to(pred[jnp.newaxis, ...], shape=cases.shape)
-            proximity = -jnp.abs(pred - literals)
+            proximity = -jnp.square(pred - literals)
             soft_case = jax.nn.softmax(param * proximity, axis=0)
             sample = jnp.sum(cases * soft_case, axis=0)
             if debias:
@@ -502,46 +715,26 @@ class FuzzyLogic:
     # random variables
     # ===========================================================================
-    def _gumbel_softmax(self, key, prob):
-        Gumbel01 = random.gumbel(key=key, shape=prob.shape, dtype=self.REAL)
-        sample = Gumbel01 + jnp.log(prob + self.eps)
-        return sample
+    def discrete(self):
+        return self.sampling.discrete(self)
     def bernoulli(self):
-        if self.verbose:
-            raise_warning('Using the replacement rule: '
-                          'Bernoulli(p) --> Gumbel-softmax(p)')
-        jax_gs = self._gumbel_softmax
-        jax_argmax, jax_param = self.argmax()
-        def _jax_wrapped_calc_bernoulli_approx(key, prob, param):
-            prob = jnp.stack([1.0 - prob, prob], axis=-1)
-            sample = jax_gs(key, prob)
-            sample = jax_argmax(sample, axis=-1, param=param)
-            return sample
-        return _jax_wrapped_calc_bernoulli_approx, jax_param
+        return self.sampling.bernoulli(self)
-    def discrete(self):
-        if self.verbose:
-            raise_warning('Using the replacement rule: '
-                          'Discrete(p) --> Gumbel-softmax(p)')
-        jax_gs = self._gumbel_softmax
-        jax_argmax, jax_param = self.argmax()
-        def _jax_wrapped_calc_discrete_approx(key, prob, param):
-            sample = jax_gs(key, prob)
-            sample = jax_argmax(sample, axis=-1, param=param)
-            return sample
-        return _jax_wrapped_calc_discrete_approx, jax_param
+    def poisson(self):
+        return self.sampling.poisson(self)
+    def geometric(self):
+        return self.sampling.geometric(self)
+# ===========================================================================
 # UNIT TESTS
+#
+# ===========================================================================
 logic = FuzzyLogic()
-w = 100.0
+w = 1000.0
 def _test_logical():
@@ -568,7 +761,7 @@ def _test_logical():
 def _test_indexing():
     print('testing indexing')
     _argmax, _ = logic.argmax()
-    _argmin, _ = logic.argmax()
+    _argmin, _ = logic.argmin()
     def argmaxmin(x):
         amax = _argmax(x, 0, w)
@@ -598,13 +791,14 @@ def _test_random():
     key = random.PRNGKey(42)
     _bernoulli, _ = logic.bernoulli()
     _discrete, _ = logic.discrete()
+    _geometric, _ = logic.geometric()
     def bern(n):
         prob = jnp.asarray([0.3] * n)
         sample = _bernoulli(key, prob, w)
         return sample
-    samples = bern(5000)
+    samples = bern(50000)
     print(jnp.mean(samples))
     def disc(n):
@@ -613,20 +807,30 @@ def _test_random():
         sample = _discrete(key, prob, w)
         return sample
-    samples = disc(5000)
+    samples = disc(50000)
     samples = jnp.round(samples)
     print([jnp.mean(samples == i) for i in range(3)])
+    def geom(n):
+        prob = jnp.asarray([0.3] * n)
+        sample = _geometric(key, prob, w)
+        return sample
+    samples = geom(50000)
+    print(jnp.mean(samples))
 def _test_rounding():
     print('testing rounding')
     _floor, _ = logic.floor()
     _ceil, _ = logic.ceil()
+    _round, _ = logic.round()
     _mod, _ = logic.mod()
-    x = jnp.asarray([2.1, 0.5001, 1.99, -2.01, -3.2, -0.1, -1.01, 23.01, -101.99, 200.01])
+    x = jnp.asarray([2.1, 0.6, 1.99, -2.01, -3.2, -0.1, -1.01, 23.01, -101.99, 200.01])
     print(_floor(x, w))
     print(_ceil(x, w))
+    print(_round(x, w))
     print(_mod(x, 2.0, w))

pyRDDLGym-jax 0.3__py3-none-any.whl → 0.5__py3-none-any.whl

pyRDDLGym-jax 0.3py3-none-any.whl → 0.5py3-none-any.whl