PyPI - pyRDDLGym-jax - Versions diffs - 0.3__py3-none-any.whl → 0.4__py3-none-any.whl - Mend

pyRDDLGym-jax 0.3py3-none-any.whl → 0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

pyRDDLGym_jax/__init__.py +1 -1
pyRDDLGym_jax/core/compiler.py +90 -67
pyRDDLGym_jax/core/logic.py +188 -46
pyRDDLGym_jax/core/planner.py +59 -47
pyRDDLGym_jax/core/simulator.py +2 -1
pyRDDLGym_jax/core/tuning.py +7 -7
pyRDDLGym_jax-0.4.dist-info/METADATA +276 -0
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/RECORD +11 -11
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/WHEEL +1 -1
pyRDDLGym_jax-0.3.dist-info/METADATA +0 -26
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/LICENSE +0 -0
{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/top_level.txt +0 -0

pyRDDLGym_jax/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '0.3'
1	+ __version__ = '0.4'

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -1,22 +1,11 @@
 from functools import partial
+import traceback
+from typing import Any, Callable, Dict, List, Optional
 import jax
 import jax.numpy as jnp
 import jax.random as random
 import jax.scipy as scipy
-import traceback
-from typing import Any, Callable, Dict, List, Optional
-from pyRDDLGym.core.debug.exception import raise_warning
-# more robust approach - if user does not have this or broken try to continue
-try:
-    from tensorflow_probability.substrates import jax as tfp
-except Exception:
-    raise_warning('Failed to import tensorflow-probability: '
-                  'compilation of some complex distributions '
-                  '(Binomial, Negative-Binomial, Multinomial) will fail.', 'red')
-    traceback.print_exc()
-    tfp = None
 from pyRDDLGym.core.compiler.initializer import RDDLValueInitializer
 from pyRDDLGym.core.compiler.levels import RDDLLevelAnalysis
@@ -25,12 +14,23 @@ from pyRDDLGym.core.compiler.tracer import RDDLObjectsTracer
 from pyRDDLGym.core.constraints import RDDLConstraints
 from pyRDDLGym.core.debug.exception import (
     print_stack_trace,
+    raise_warning,
     RDDLInvalidNumberOfArgumentsError,
     RDDLNotImplementedError
 )
 from pyRDDLGym.core.debug.logger import Logger
 from pyRDDLGym.core.simulator import RDDLSimulatorPrecompiled
+# more robust approach - if user does not have this or broken try to continue
+try:
+    from tensorflow_probability.substrates import jax as tfp
+except Exception:
+    raise_warning('Failed to import tensorflow-probability: '
+                  'compilation of some complex distributions '
+                  '(Binomial, Negative-Binomial, Multinomial) will fail.', 'red')
+    traceback.print_exc()
+    tfp = None
 # ===========================================================================
 # EXACT RDDL TO JAX COMPILATION RULES
@@ -87,7 +87,7 @@ def _function_aggregation_exact_named(op, name):
 def _function_if_exact_named():
     def _jax_wrapped_if_exact(c, a, b, param):
-        return jnp.where(c, a, b)
+        return jnp.where(c > 0.5, a, b)
     return _jax_wrapped_if_exact
@@ -114,16 +114,27 @@ def _function_bernoulli_exact_named():
 def _function_discrete_exact_named():
     def _jax_wrapped_discrete_exact(key, prob, param):
-        logits = jnp.log(prob)
-        sample = random.categorical(key=key, logits=logits, axis=-1)
-        out_of_bounds = jnp.logical_not(jnp.logical_and(
-            jnp.all(prob >= 0),
-            jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
-        return sample, out_of_bounds
+        return random.categorical(key=key, logits=jnp.log(prob), axis=-1)
     return _jax_wrapped_discrete_exact
+def _function_poisson_exact_named():
+    def _jax_wrapped_poisson_exact(key, rate, param):
+        return random.poisson(key=key, lam=rate, dtype=jnp.int64)
+    return _jax_wrapped_poisson_exact
+def _function_geometric_exact_named():
+    def _jax_wrapped_geometric_exact(key, prob, param):
+        return random.geometric(key=key, p=prob, dtype=jnp.int64)
+    return _jax_wrapped_geometric_exact
 class JaxRDDLCompiler:
     '''Compiles a RDDL AST representation into an equivalent JAX representation.
     All operations are identical to their numpy equivalents.
@@ -210,12 +221,12 @@ class JaxRDDLCompiler:
     }
     EXACT_RDDL_TO_JAX_IF = _function_if_exact_named()
     EXACT_RDDL_TO_JAX_SWITCH = _function_switch_exact_named()
     EXACT_RDDL_TO_JAX_BERNOULLI = _function_bernoulli_exact_named()
     EXACT_RDDL_TO_JAX_DISCRETE = _function_discrete_exact_named()
+    EXACT_RDDL_TO_JAX_POISSON = _function_poisson_exact_named()
+    EXACT_RDDL_TO_JAX_GEOMETRIC = _function_geometric_exact_named()
     def __init__(self, rddl: RDDLLiftedModel,
                  allow_synchronous_state: bool=True,
@@ -289,6 +300,8 @@ class JaxRDDLCompiler:
         self.SWITCH_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_SWITCH
         self.BERNOULLI_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BERNOULLI
         self.DISCRETE_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
+        self.POISSON_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_POISSON
+        self.GEOMETRIC_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_GEOMETRIC
     # ===========================================================================
     # main compilation subroutines
@@ -996,13 +1009,14 @@ class JaxRDDLCompiler:
             jax_op, jax_param = self._unwrap(negative_op, expr.id, info)
             return self._jax_unary(jax_expr, jax_op, jax_param, at_least_int=True)
-        elif n == 2:
-            lhs, rhs = args
-            jax_lhs = self._jax(lhs, info)
-            jax_rhs = self._jax(rhs, info)
+        elif n == 2 or (n >= 2 and op in {'*', '+'}):
+            jax_exprs = [self._jax(arg, info) for arg in args]
             jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
-            return self._jax_binary(
-                jax_lhs, jax_rhs, jax_op, jax_param, at_least_int=True)
+            result = jax_exprs[0]
+            for jax_rhs in jax_exprs[1:]:
+                result = self._jax_binary(
+                    result, jax_rhs, jax_op, jax_param, at_least_int=True)
+            return result
         JaxRDDLCompiler._check_num_args(expr, 2)
@@ -1046,13 +1060,14 @@ class JaxRDDLCompiler:
             jax_op, jax_param = self._unwrap(logical_not_op, expr.id, info)
             return self._jax_unary(jax_expr, jax_op, jax_param, check_dtype=bool)
-        elif n == 2:
-            lhs, rhs = args
-            jax_lhs = self._jax(lhs, info)
-            jax_rhs = self._jax(rhs, info)
+        elif n == 2 or (n >= 2 and op in {'^', '&', '|'}):
+            jax_exprs = [self._jax(arg, info) for arg in args]
             jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
-            return self._jax_binary(
-                jax_lhs, jax_rhs, jax_op, jax_param, check_dtype=bool)
+            result = jax_exprs[0]
+            for jax_rhs in jax_exprs[1:]:
+                result = self._jax_binary(
+                    result, jax_rhs, jax_op, jax_param, check_dtype=bool)
+            return result
         JaxRDDLCompiler._check_num_args(expr, 2)
@@ -1165,16 +1180,17 @@ class JaxRDDLCompiler:
         return _jax_wrapped_if_then_else
     def _jax_switch(self, expr, info):
+        pred, *_ = expr.args
-        # if expression is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
+        # if predicate is non-fluent, always use the exact operation
+        # case conditions are currently only literals so they are non-fluent
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(pred):
             switch_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_SWITCH
         else:
             switch_op = self.SWITCH_HELPER
         jax_switch, jax_param = self._unwrap(switch_op, expr.id, info)
         # recursively compile predicate
-        pred, *_ = expr.args
         jax_pred = self._jax(pred, info)
         # recursively compile cases
@@ -1426,15 +1442,24 @@ class JaxRDDLCompiler:
     def _jax_poisson(self, expr, info):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_POISSON']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_rate, = expr.args
+        # if rate is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_rate):
+            poisson_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_POISSON
+        else:
+            poisson_op = self.POISSON_HELPER
+        jax_poisson, jax_param = self._unwrap(poisson_op, expr.id, info)
+        # recursively compile arguments
         jax_rate = self._jax(arg_rate, info)
         # uses the implicit JAX subroutine
         def _jax_wrapped_distribution_poisson(x, params, key):
             rate, key, err = jax_rate(x, params, key)
             key, subkey = random.split(key)
-            sample = random.poisson(key=subkey, lam=rate, dtype=self.INT)
+            param = params.get(jax_param, None)
+            sample = jax_poisson(subkey, rate, param).astype(self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(rate >= 0))
             err |= (out_of_bounds * ERR)
             return sample, key, err
@@ -1535,33 +1560,25 @@ class JaxRDDLCompiler:
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GEOMETRIC']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_prob, = expr.args
+        # if prob is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
+            geom_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_GEOMETRIC
+        else:
+            geom_op = self.GEOMETRIC_HELPER
+        jax_geom, jax_param = self._unwrap(geom_op, expr.id, info)
+        # recursively compile arguments
         jax_prob = self._jax(arg_prob, info)
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
-            # prob is non-fluent: do not reparameterize
-            def _jax_wrapped_distribution_geometric(x, params, key):
-                prob, key, err = jax_prob(x, params, key)
-                key, subkey = random.split(key)
-                sample = random.geometric(key=subkey, p=prob, dtype=self.INT)
-                out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
-                err |= (out_of_bounds * ERR)
-                return sample, key, err
-        else:
-            floor_op, jax_param = self._unwrap(
-                self.KNOWN_UNARY['floor'], expr.id, info)
-            # reparameterization trick Geom(p) = floor(ln(U(0, 1)) / ln(p)) + 1
-            def _jax_wrapped_distribution_geometric(x, params, key):
-                prob, key, err = jax_prob(x, params, key)
-                key, subkey = random.split(key)
-                U = random.uniform(key=subkey, shape=jnp.shape(prob), dtype=self.REAL)
-                param = params.get(jax_param, None)
-                sample = floor_op(jnp.log(U) / jnp.log(1.0 - prob), param) + 1
-                out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
-                err |= (out_of_bounds * ERR)
-                return sample, key, err
+        def _jax_wrapped_distribution_geometric(x, params, key):
+            prob, key, err = jax_prob(x, params, key)
+            key, subkey = random.split(key)
+            param = params.get(jax_param, None)
+            sample = jax_geom(subkey, prob, param).astype(self.INT)
+            out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
+            err |= (out_of_bounds * ERR)
+            return sample, key, err
         return _jax_wrapped_distribution_geometric
@@ -1770,7 +1787,10 @@ class JaxRDDLCompiler:
             # dispatch to sampling subroutine
             key, subkey = random.split(key)
             param = params.get(jax_param, None)
-            sample, out_of_bounds = jax_discrete(subkey, prob, param)
+            sample = jax_discrete(subkey, prob, param)
+            out_of_bounds = jnp.logical_not(jnp.logical_and(
+                jnp.all(prob >= 0),
+                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
             error |= (out_of_bounds * ERR)
             return sample, key, error
@@ -1803,7 +1823,10 @@ class JaxRDDLCompiler:
             # dispatch to sampling subroutine
             key, subkey = random.split(key)
             param = params.get(jax_param, None)
-            sample, out_of_bounds = jax_discrete(subkey, prob, param)
+            sample = jax_discrete(subkey, prob, param)
+            out_of_bounds = jnp.logical_not(jnp.logical_and(
+                jnp.all(prob >= 0),
+                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
             error |= (out_of_bounds * ERR)
             return sample, key, error

pyRDDLGym_jax/core/logic.py CHANGED Viewed

@@ -1,11 +1,19 @@
+from typing import Optional, Set
 import jax
 import jax.numpy as jnp
 import jax.random as random
-from typing import Optional, Set
 from pyRDDLGym.core.debug.exception import raise_warning
+# ===========================================================================
+# LOGICAL COMPLEMENT
+# - abstract class
+# - standard complement
+#
+# ===========================================================================
 class Complement:
     '''Base class for approximate logical complement operations.'''
@@ -20,6 +28,13 @@ class StandardComplement(Complement):
         return 1.0 - x
+# ===========================================================================
+# RELATIONAL OPERATIONS
+# - abstract class
+# - sigmoid comparison
+#
+# ===========================================================================
 class Comparison:
     '''Base class for approximate comparison operations.'''
@@ -44,7 +59,17 @@ class SigmoidComparison(Comparison):
     def equal(self, x, y, param):
         return 1.0 - jnp.square(jnp.tanh(param * (y - x)))
+# ===========================================================================
+# TNORMS
+# - abstract tnorm
+# - product tnorm
+# - Godel tnorm
+# - Lukasiewicz tnorm
+# - Yager(p) tnorm
+#
+# ===========================================================================
 class TNorm:
     '''Base class for fuzzy differentiable t-norms.'''
@@ -86,8 +111,133 @@ class LukasiewiczTNorm(TNorm):
     def norms(self, x, axis):
         return jax.nn.relu(jnp.sum(x - 1.0, axis=axis) + 1.0)
+class YagerTNorm(TNorm):
+    '''Yager t-norm given by the expression
+    (x, y) -> max(1 - ((1 - x)^p + (1 - y)^p)^(1/p)).'''
+    def __init__(self, p=2.0):
+        self.p = p
+    def norm(self, x, y):
+        base_x = jax.nn.relu(1.0 - x)
+        base_y = jax.nn.relu(1.0 - y)
+        arg = jnp.power(base_x ** self.p + base_y ** self.p, 1.0 / self.p)
+        return jax.nn.relu(1.0 - arg)
+    def norms(self, x, axis):
+        base = jax.nn.relu(1.0 - x)
+        arg = jnp.power(jnp.sum(base ** self.p, axis=axis), 1.0 / self.p)
+        return jax.nn.relu(1.0 - arg)
+# ===========================================================================
+# RANDOM SAMPLING
+# - abstract sampler
+# - Gumbel-softmax sampler
+# - determinization
+#
+# ===========================================================================
+class RandomSampling:
+    '''An abstract class that describes how discrete and non-reparameterizable
+    random variables are sampled.'''
+    def discrete(self, logic):
+        raise NotImplementedError
+    def bernoulli(self, logic):
+        jax_discrete, jax_param = self.discrete(logic)
+        def _jax_wrapped_calc_bernoulli_approx(key, prob, param):
+            prob = jnp.stack([1.0 - prob, prob], axis=-1)
+            sample = jax_discrete(key, prob, param)
+            return sample
+        return _jax_wrapped_calc_bernoulli_approx, jax_param
+    def poisson(self, logic):
+        def _jax_wrapped_calc_poisson_exact(key, rate, param):
+            return random.poisson(key=key, lam=rate, dtype=logic.INT)
+        return _jax_wrapped_calc_poisson_exact, None
+    def geometric(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: '
+                          'Geometric(p) --> floor(log(U) / log(1 - p)) + 1')
+        jax_floor, jax_param = logic.floor()
+        def _jax_wrapped_calc_geometric_approx(key, prob, param):
+            U = random.uniform(key=key, shape=jnp.shape(prob), dtype=logic.REAL)
+            sample = jax_floor(jnp.log(U) / jnp.log(1.0 - prob), param) + 1
+            return sample
+        return _jax_wrapped_calc_geometric_approx, jax_param
+class GumbelSoftmax(RandomSampling):
+    '''Random sampling of discrete variables using Gumbel-softmax trick.'''
+    def discrete(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: '
+                          'Discrete(p) --> Gumbel-softmax(p)')
+        jax_argmax, jax_param = logic.argmax()
+        def _jax_wrapped_calc_discrete_gumbel_softmax(key, prob, param):
+            Gumbel01 = random.gumbel(key=key, shape=prob.shape, dtype=logic.REAL)
+            sample = Gumbel01 + jnp.log(prob + logic.eps)
+            sample = jax_argmax(sample, axis=-1, param=param)
+            return sample
+        return _jax_wrapped_calc_discrete_gumbel_softmax, jax_param
+class Determinization(RandomSampling):
+    '''Random sampling of variables using their deterministic mean estimate.'''
+    def discrete(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: '
+                          'Discrete(p) --> sum(i * p[i])')
+        def _jax_wrapped_calc_discrete_determinized(key, prob, param):
+            literals = FuzzyLogic.enumerate_literals(prob.shape, axis=-1)
+            sample = jnp.sum(literals * prob, axis=-1)
+            return sample
+        return _jax_wrapped_calc_discrete_determinized, None
+    def poisson(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: Poisson(rate) --> rate')
+        def _jax_wrapped_calc_poisson_determinized(key, rate, param):
+            return rate
+        return _jax_wrapped_calc_poisson_determinized, None
+    def geometric(self, logic):
+        if logic.verbose:
+            raise_warning('Using the replacement rule: Geometric(p) --> 1 / p')
+        def _jax_wrapped_calc_geometric_determinized(key, prob, param):
+            sample = 1.0 / prob
+            return sample
+        return _jax_wrapped_calc_geometric_determinized, None
+# ===========================================================================
+# FUZZY LOGIC
+#
+# ===========================================================================
 class FuzzyLogic:
     '''A class representing fuzzy logic in JAX.
@@ -98,9 +248,10 @@ class FuzzyLogic:
     def __init__(self, tnorm: TNorm=ProductTNorm(),
                  complement: Complement=StandardComplement(),
                  comparison: Comparison=SigmoidComparison(),
+                 sampling: RandomSampling=GumbelSoftmax(),
                  weight: float=10.0,
                  debias: Optional[Set[str]]=None,
-                 eps: float=1e-10,
+                 eps: float=1e-15,
                  verbose: bool=False,
                  use64bit: bool=False) -> None:
         '''Creates a new fuzzy logic in Jax.
@@ -108,8 +259,8 @@ class FuzzyLogic:
         :param tnorm: fuzzy operator for logical AND
         :param complement: fuzzy operator for logical NOT
         :param comparison: fuzzy operator for comparisons (>, >=, <, ==, ~=, ...)
+        :param sampling: random sampling of non-reparameterizable distributions
         :param weight: a sharpness parameter for sigmoid and softmax activations
-        :param error: an error parameter (e.g. floor) (smaller means better accuracy)
         :param debias: which functions to de-bias approximate on forward pass
         :param eps: small positive float to mitigate underflow
         :param verbose: whether to dump replacements and other info to console
@@ -118,6 +269,7 @@ class FuzzyLogic:
         self.tnorm = tnorm
         self.complement = complement
         self.comparison = comparison
+        self.sampling = sampling
         self.weight = float(weight)
         if debias is None:
             debias = set()
@@ -142,10 +294,11 @@ class FuzzyLogic:
               f'    tnorm         ={type(self.tnorm).__name__}\n'
               f'    complement    ={type(self.complement).__name__}\n'
               f'    comparison    ={type(self.comparison).__name__}\n'
+              f'    sampling      ={type(self.sampling).__name__}\n'
               f'    sigmoid_weight={self.weight}\n'
               f'    cpfs_to_debias={self.debias}\n'
               f'    underflow_tol ={self.eps}\n'
-              f'    use64bit      ={self.use64bit}')
+              f'    use_64_bit    ={self.use64bit}')
     # ===========================================================================
     # logical operators
@@ -419,7 +572,7 @@ class FuzzyLogic:
     # ===========================================================================
     @staticmethod
-    def _literals(shape, axis):
+    def enumerate_literals(shape, axis):
         literals = jnp.arange(shape[axis])
         literals = literals[(...,) + (jnp.newaxis,) * (len(shape) - 1)]
         literals = jnp.moveaxis(literals, source=0, destination=axis)
@@ -434,7 +587,7 @@ class FuzzyLogic:
         debias = 'argmax' in self.debias
         def _jax_wrapped_calc_argmax_approx(x, axis, param):
-            literals = FuzzyLogic._literals(x.shape, axis=axis)
+            literals = FuzzyLogic.enumerate_literals(x.shape, axis=axis)
             soft_max = jax.nn.softmax(param * x, axis=axis)
             sample = jnp.sum(literals * soft_max, axis=axis)
             if debias:
@@ -468,7 +621,7 @@ class FuzzyLogic:
         def _jax_wrapped_calc_if_approx(c, a, b, param):
             sample = c * a + (1.0 - c) * b
             if debias:
-                hard_sample = jnp.select([c, ~c], [a, b])
+                hard_sample = jnp.where(c > 0.5, a, b)
                 sample += jax.lax.stop_gradient(hard_sample - sample)
             return sample
@@ -483,7 +636,7 @@ class FuzzyLogic:
         debias = 'switch' in self.debias
         def _jax_wrapped_calc_switch_approx(pred, cases, param):
-            literals = FuzzyLogic._literals(cases.shape, axis=0)
+            literals = FuzzyLogic.enumerate_literals(cases.shape, axis=0)
             pred = jnp.broadcast_to(pred[jnp.newaxis, ...], shape=cases.shape)
             proximity = -jnp.abs(pred - literals)
             soft_case = jax.nn.softmax(param * proximity, axis=0)
@@ -502,44 +655,24 @@ class FuzzyLogic:
     # random variables
     # ===========================================================================
-    def _gumbel_softmax(self, key, prob):
-        Gumbel01 = random.gumbel(key=key, shape=prob.shape, dtype=self.REAL)
-        sample = Gumbel01 + jnp.log(prob + self.eps)
-        return sample
+    def discrete(self):
+        return self.sampling.discrete(self)
     def bernoulli(self):
-        if self.verbose:
-            raise_warning('Using the replacement rule: '
-                          'Bernoulli(p) --> Gumbel-softmax(p)')
-        jax_gs = self._gumbel_softmax
-        jax_argmax, jax_param = self.argmax()
-        def _jax_wrapped_calc_bernoulli_approx(key, prob, param):
-            prob = jnp.stack([1.0 - prob, prob], axis=-1)
-            sample = jax_gs(key, prob)
-            sample = jax_argmax(sample, axis=-1, param=param)
-            return sample
-        return _jax_wrapped_calc_bernoulli_approx, jax_param
+        return self.sampling.bernoulli(self)
-    def discrete(self):
-        if self.verbose:
-            raise_warning('Using the replacement rule: '
-                          'Discrete(p) --> Gumbel-softmax(p)')
-        jax_gs = self._gumbel_softmax
-        jax_argmax, jax_param = self.argmax()
-        def _jax_wrapped_calc_discrete_approx(key, prob, param):
-            sample = jax_gs(key, prob)
-            sample = jax_argmax(sample, axis=-1, param=param)
-            return sample
-        return _jax_wrapped_calc_discrete_approx, jax_param
+    def poisson(self):
+        return self.sampling.poisson(self)
+    def geometric(self):
+        return self.sampling.geometric(self)
+# ===========================================================================
 # UNIT TESTS
+#
+# ===========================================================================
 logic = FuzzyLogic()
 w = 100.0
@@ -598,13 +731,14 @@ def _test_random():
     key = random.PRNGKey(42)
     _bernoulli, _ = logic.bernoulli()
     _discrete, _ = logic.discrete()
+    _geometric, _ = logic.geometric()
     def bern(n):
         prob = jnp.asarray([0.3] * n)
         sample = _bernoulli(key, prob, w)
         return sample
-    samples = bern(5000)
+    samples = bern(50000)
     print(jnp.mean(samples))
     def disc(n):
@@ -613,10 +747,18 @@ def _test_random():
         sample = _discrete(key, prob, w)
         return sample
-    samples = disc(5000)
+    samples = disc(50000)
     samples = jnp.round(samples)
     print([jnp.mean(samples == i) for i in range(3)])
+    def geom(n):
+        prob = jnp.asarray([0.3] * n)
+        sample = _geometric(key, prob, w)
+        return sample
+    samples = geom(50000)
+    print(jnp.mean(samples))
 def _test_rounding():
     print('testing rounding')

pyRDDLGym_jax/core/planner.py CHANGED Viewed

@@ -2,54 +2,51 @@ from ast import literal_eval
 from collections import deque
 import configparser
 from enum import Enum
+import os
+import sys
+import time
+import traceback
+from typing import Any, Callable, Dict, Generator, Optional, Set, Sequence, Tuple, Union
 import haiku as hk
 import jax
+import jax.nn.initializers as initializers
 import jax.numpy as jnp
 import jax.random as random
-import jax.nn.initializers as initializers
 import numpy as np
 import optax
-import os
-import sys
 import termcolor
-import time
-import traceback
 from tqdm import tqdm
-from typing import Any, Callable, Dict, Generator, Optional, Set, Sequence, Tuple, Union
-Activation = Callable[[jnp.ndarray], jnp.ndarray]
-Bounds = Dict[str, Tuple[np.ndarray, np.ndarray]]
-Kwargs = Dict[str, Any]
-Pytree = Any
-from pyRDDLGym.core.debug.exception import raise_warning
-from pyRDDLGym_jax import __version__
-# try to import matplotlib, if failed then skip plotting
-try:
-    import matplotlib
-    import matplotlib.pyplot as plt
-    matplotlib.use('TkAgg')
-except Exception:
-    raise_warning('failed to import matplotlib: '
-                  'plotting functionality will be disabled.', 'red')
-    traceback.print_exc()
-    plt = None
 from pyRDDLGym.core.compiler.model import RDDLPlanningModel, RDDLLiftedModel
 from pyRDDLGym.core.debug.logger import Logger
 from pyRDDLGym.core.debug.exception import (
+    raise_warning,
     RDDLNotImplementedError,
     RDDLUndefinedVariableError,
     RDDLTypeError
 )
 from pyRDDLGym.core.policy import BaseAgent
-from pyRDDLGym_jax.core.compiler import JaxRDDLCompiler
+from pyRDDLGym_jax import __version__
 from pyRDDLGym_jax.core import logic
+from pyRDDLGym_jax.core.compiler import JaxRDDLCompiler
 from pyRDDLGym_jax.core.logic import FuzzyLogic
+# try to import matplotlib, if failed then skip plotting
+try:
+    import matplotlib.pyplot as plt
+except Exception:
+    raise_warning('failed to import matplotlib: '
+                  'plotting functionality will be disabled.', 'red')
+    traceback.print_exc()
+    plt = None
+Activation = Callable[[jnp.ndarray], jnp.ndarray]
+Bounds = Dict[str, Tuple[np.ndarray, np.ndarray]]
+Kwargs = Dict[str, Any]
+Pytree = Any
 # ***********************************************************************
 # CONFIG FILE MANAGEMENT
@@ -104,9 +101,12 @@ def _load_config(config, args):
     comp_kwargs = model_args.get('complement_kwargs', {})
     compare_name = model_args.get('comparison', 'SigmoidComparison')
     compare_kwargs = model_args.get('comparison_kwargs', {})
+    sampling_name = model_args.get('sampling', 'GumbelSoftmax')
+    sampling_kwargs = model_args.get('sampling_kwargs', {})
     logic_kwargs['tnorm'] = getattr(logic, tnorm_name)(**tnorm_kwargs)
     logic_kwargs['complement'] = getattr(logic, comp_name)(**comp_kwargs)
     logic_kwargs['comparison'] = getattr(logic, compare_name)(**compare_kwargs)
+    logic_kwargs['sampling'] = getattr(logic, sampling_name)(**sampling_kwargs)
     # read the policy settings
     plan_method = planner_args.pop('method')
@@ -184,18 +184,6 @@ def load_config_from_string(value: str) -> Tuple[Kwargs, ...]:
 #
 # ***********************************************************************
-def _function_discrete_approx_named(logic):
-    jax_discrete, jax_param = logic.discrete()
-    def _jax_wrapped_discrete_calc_approx(key, prob, params):
-        sample = jax_discrete(key, prob, params)
-        out_of_bounds = jnp.logical_not(jnp.logical_and(
-            jnp.all(prob >= 0),
-            jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
-        return sample, out_of_bounds
-    return _jax_wrapped_discrete_calc_approx, jax_param
 class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
     '''Compiles a RDDL AST representation to an equivalent JAX representation.
@@ -271,7 +259,9 @@ class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
         self.IF_HELPER = logic.control_if()
         self.SWITCH_HELPER = logic.control_switch()
         self.BERNOULLI_HELPER = logic.bernoulli()
-        self.DISCRETE_HELPER = _function_discrete_approx_named(logic)
+        self.DISCRETE_HELPER = logic.discrete()
+        self.POISSON_HELPER = logic.poisson()
+        self.GEOMETRIC_HELPER = logic.geometric()
     def _jax_stop_grad(self, jax_expr):
@@ -469,7 +459,8 @@ class JaxStraightLinePlan(JaxPlan):
               f'    wrap_non_bool        ={self._wrap_non_bool}\n'
               f'constraint-sat strategy (complex):\n'
               f'    wrap_softmax         ={self._wrap_softmax}\n'
-              f'    use_new_projection   ={self._use_new_projection}')
+              f'    use_new_projection   ={self._use_new_projection}\n'
+              f'    max_projection_iters ={self._max_constraint_iter}')
     def compile(self, compiled: JaxRDDLCompilerWithGrad,
                 _bounds: Bounds,
@@ -1348,8 +1339,18 @@ class JaxBackpropPlanner:
                 map(str, jax._src.xla_bridge.devices())).replace('\n', '')
         except Exception as _:
             devices_short = 'N/A'
+        LOGO = \
+"""
+   __    ______    __  __    ______  __        ______    __   __
+  /\ \  /\  __ \  /\_\_\_\  /\  == \/\ \      /\  __ \  /\ "-.\ \
+ _\_\ \ \ \  __ \ \/_/\_\/_ \ \  _-/\ \ \____ \ \  __ \ \ \ \-.  \
+/\_____\ \ \_\ \_\  /\_\/\_\ \ \_\   \ \_____\ \ \_\ \_\ \ \_\\"\_\
+\/_____/  \/_/\/_/  \/_/\/_/  \/_/    \/_____/  \/_/\/_/  \/_/ \/_/
+"""
         print('\n'
-              f'JAX Planner version {__version__}\n'
+              f'{LOGO}\n'
+              f'Version {__version__}\n'
               f'Python {sys.version}\n'
               f'jax {jax.version.__version__}, jaxlib {jaxlib_version}, '
               f'optax {optax.__version__}, haiku {hk.__version__}, '
@@ -1711,6 +1712,14 @@ class JaxBackpropPlanner:
             hyperparam_value = float(policy_hyperparams)
             policy_hyperparams = {action: hyperparam_value
                                   for action in self.rddl.action_fluents}
+        # fill in missing entries
+        elif isinstance(policy_hyperparams, dict):
+            for action in self.rddl.action_fluents:
+                if action not in policy_hyperparams:
+                    raise_warning(f'policy_hyperparams[{action}] is not set, '
+                                  'setting 1.0 which could be suboptimal.')
+                    policy_hyperparams[action] = 1.0
         # print summary of parameters:
         if print_summary:
@@ -1772,6 +1781,7 @@ class JaxBackpropPlanner:
         rolling_test_loss = RollingMean(test_rolling_window)
         log = {}
         status = JaxPlannerStatus.NORMAL
+        is_all_zero_fn = lambda x: np.allclose(x, 0)
         # initialize plot area
         if plot_step is None or plot_step <= 0 or plt is None:
@@ -1786,6 +1796,7 @@ class JaxBackpropPlanner:
         iters = range(epochs)
         if print_progress:
             iters = tqdm(iters, total=100, position=tqdm_position)
+        position_str = '' if tqdm_position is None else f'[{tqdm_position}]'
         for it in iters:
             status = JaxPlannerStatus.NORMAL
@@ -1799,7 +1810,7 @@ class JaxBackpropPlanner:
             # no progress
             grad_norm_zero, _ = jax.tree_util.tree_flatten(
-                jax.tree_map(lambda x: np.allclose(x, 0), train_log['grad']))
+                jax.tree_map(is_all_zero_fn, train_log['grad']))
             if np.all(grad_norm_zero):
                 status = JaxPlannerStatus.NO_PROGRESS
@@ -1843,8 +1854,9 @@ class JaxBackpropPlanner:
             if print_progress:
                 iters.n = int(100 * min(1, max(elapsed / train_seconds, it / epochs)))
                 iters.set_description(
-                    f'[{tqdm_position}] {it:6} it / {-train_loss:14.6f} train / '
-                    f'{-test_loss:14.6f} test / {-best_loss:14.6f} best')
+                    f'{position_str} {it:6} it / {-train_loss:14.6f} train / '
+                    f'{-test_loss:14.6f} test / {-best_loss:14.6f} best / '
+                    f'{status.value} status')
             # reached computation budget
             if elapsed >= train_seconds:
@@ -1904,7 +1916,7 @@ class JaxBackpropPlanner:
                   f'    iterations    ={it}\n'
                   f'    best_objective={-best_loss}\n'
                   f'    best_grad_norm={grad_norm}\n'
-                  f'diagnosis: {diagnosis}\n')
+                  f'    diagnosis: {diagnosis}\n')
     def _perform_diagnosis(self, last_iter_improve,
                            train_return, test_return, best_return, grad_norm):
@@ -2116,7 +2128,7 @@ class JaxLineSearchPlanner(JaxBackpropPlanner):
 @jax.jit
 def entropic_utility(returns: jnp.ndarray, beta: float) -> float:
     return (-1.0 / beta) * jax.scipy.special.logsumexp(
-            -beta * returns, b=1.0 / returns.size)
+        -beta * returns, b=1.0 / returns.size)
 @jax.jit

pyRDDLGym_jax/core/simulator.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import jax
 import time
 from typing import Dict, Optional
+import jax
 from pyRDDLGym.core.compiler.model import RDDLLiftedModel
 from pyRDDLGym.core.debug.exception import (
     RDDLActionPreconditionNotSatisfiedError,

pyRDDLGym_jax/core/tuning.py CHANGED Viewed

@@ -1,20 +1,18 @@
-from bayes_opt import BayesianOptimization
-from bayes_opt.util import UtilityFunction
 from copy import deepcopy
 import csv
 import datetime
-import jax
 from multiprocessing import get_context
-import numpy as np
 import os
 import time
 from typing import Any, Callable, Dict, Optional, Tuple
-Kwargs = Dict[str, Any]
 import warnings
 warnings.filterwarnings("ignore")
+from bayes_opt import BayesianOptimization
+from bayes_opt.util import UtilityFunction
+import jax
+import numpy as np
 from pyRDDLGym.core.debug.exception import raise_warning
 from pyRDDLGym.core.env import RDDLEnv
@@ -26,6 +24,8 @@ from pyRDDLGym_jax.core.planner import (
     JaxOnlineController
 )
+Kwargs = Dict[str, Any]
 # ===============================================================================
 #

pyRDDLGym_jax-0.4.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,276 @@
+Metadata-Version: 2.1
+Name: pyRDDLGym-jax
+Version: 0.4
+Summary: pyRDDLGym-jax: automatic differentiation for solving sequential planning problems in JAX.
+Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
+Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner
+Author-email: mike.gimelfarb@mail.utoronto.ca, ataitler@gmail.com, ssanner@mie.utoronto.ca
+License: MIT License
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Natural Language :: English
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pyRDDLGym >=2.0
+Requires-Dist: tqdm >=4.66
+Requires-Dist: bayesian-optimization >=1.4.3
+Requires-Dist: jax >=0.4.12
+Requires-Dist: optax >=0.1.9
+Requires-Dist: dm-haiku >=0.0.10
+Requires-Dist: tensorflow-probability >=0.21.0
+# pyRDDLGym-jax
+Author: [Mike Gimelfarb](https://mike-gimelfarb.github.io)
+This directory provides:
+1. automated translation and compilation of RDDL description files into [JAX](https://github.com/google/jax), converting any RDDL domain to a differentiable simulator!
+2. powerful, fast and scalable gradient-based planning algorithms, with extendible and flexible policy class representations, automatic model relaxations for working in discrete and hybrid domains, and much more!
+> [!NOTE]
+> While Jax planners can support some discrete state/action problems through model relaxations, on some discrete problems it can perform poorly (though there is an ongoing effort to remedy this!).
+> If you find it is not making sufficient progress, check out the [PROST planner](https://github.com/pyrddlgym-project/pyRDDLGym-prost) (for discrete spaces) or the [deep reinforcement learning wrappers](https://github.com/pyrddlgym-project/pyRDDLGym-rl).
+## Contents
+- [Installation](#installation)
+- [Running from the Command Line](#running-from-the-command-line)
+- [Running from within Python](#running-from-within-python)
+- [Configuring the Planner](#configuring-the-planner)
+- [Simulation](#simulation)
+- [Manual Gradient Calculation](#manual-gradient-calculation)
+- [Citing pyRDDLGym-jax](#citing-pyrddlgym-jax)
+## Installation
+To use the compiler or planner without the automated hyper-parameter tuning, you will need the following packages installed:
+- ``pyRDDLGym>=2.0``
+- ``tqdm>=4.66``
+- ``jax>=0.4.12``
+- ``optax>=0.1.9``
+- ``dm-haiku>=0.0.10``
+- ``tensorflow-probability>=0.21.0``
+Additionally, if you wish to run the examples, you need ``rddlrepository>=2``.
+To run the automated tuning optimization, you will also need ``bayesian-optimization>=1.4.3``.
+You can install this package, together with all of its requirements, via pip:
+```shell
+pip install rddlrepository pyRDDLGym-jax
+```
+## Running from the Command Line
+A basic run script is provided to run the Jax Planner on any domain in ``rddlrepository``, and can be launched in the command line from the install directory of pyRDDLGym-jax:
+```shell
+python -m pyRDDLGym_jax.examples.run_plan <domain> <instance> <method> <episodes>
+```
+where:
+- ``domain`` is the domain identifier as specified in rddlrepository (i.e. Wildfire_MDP_ippc2014), or a path pointing to a valid ``domain.rddl`` file
+- ``instance`` is the instance identifier (i.e. 1, 2, ... 10), or a path pointing to a valid ``instance.rddl`` file
+- ``method`` is the planning method to use (i.e. drp, slp, replan)
+- ``episodes`` is the (optional) number of episodes to evaluate the learned policy.
+The ``method`` parameter supports three possible modes:
+- ``slp`` is the basic straight line planner described [in this paper](https://proceedings.neurips.cc/paper_files/paper/2017/file/98b17f068d5d9b7668e19fb8ae470841-Paper.pdf)
+- ``drp`` is the deep reactive policy network described [in this paper](https://ojs.aaai.org/index.php/AAAI/article/view/4744)
+- ``replan`` is the same as ``slp`` except the plan is recalculated at every decision time step.
+A basic run script is also provided to run the automatic hyper-parameter tuning:
+```shell
+python -m pyRDDLGym_jax.examples.run_tune <domain> <instance> <method> <trials> <iters> <workers>
+```
+where:
+- ``domain`` is the domain identifier as specified in rddlrepository (i.e. Wildfire_MDP_ippc2014)
+- ``instance`` is the instance identifier (i.e. 1, 2, ... 10)
+- ``method`` is the planning method to use (i.e. drp, slp, replan)
+- ``trials`` is the (optional) number of trials/episodes to average in evaluating each hyper-parameter setting
+- ``iters`` is the (optional) maximum number of iterations/evaluations of Bayesian optimization to perform
+- ``workers`` is the (optional) number of parallel evaluations to be done at each iteration, e.g. the total evaluations = ``iters * workers``.
+For example, the following will train the Jax Planner on the Quadcopter domain with 4 drones:
+```shell
+python -m pyRDDLGym_jax.examples.run_plan Quadcopter 1 slp
+```
+After several minutes of optimization, you should get a visualization as follows:
+<p align="center">
+<img src="Images/quadcopter.gif" width="400" height="400" margin=1/>
+</p>
+## Running from within Python
+To run the Jax planner from within a Python application, refer to the following example:
+```python
+import pyRDDLGym
+from pyRDDLGym_jax.core.planner import JaxBackpropPlanner, JaxOfflineController
+# set up the environment (note the vectorized option must be True)
+env = pyRDDLGym.make("domain", "instance", vectorized=True)
+# create the planning algorithm
+planner = JaxBackpropPlanner(rddl=env.model, **planner_args)
+controller = JaxOfflineController(planner, **train_args)
+# evaluate the planner
+controller.evaluate(env, episodes=1, verbose=True, render=True)
+env.close()
+```
+Here, we have used the straight-line controller, although you can configure the combination of planner and policy representation if you wish.
+All controllers are instances of pyRDDLGym's ``BaseAgent`` class, so they provide the ``evaluate()`` function to streamline interaction with the environment.
+The ``**planner_args`` and ``**train_args`` are keyword argument parameters to pass during initialization, but we strongly recommend creating and loading a config file as discussed in the next section.
+## Configuring the Planner
+The simplest way to configure the planner is to write and pass a configuration file with the necessary [hyper-parameters](https://pyrddlgym.readthedocs.io/en/latest/jax.html#configuring-pyrddlgym-jax).
+The basic structure of a configuration file is provided below for a straight-line planner:
+```ini
+[Model]
+logic='FuzzyLogic'
+logic_kwargs={'weight': 20}
+tnorm='ProductTNorm'
+tnorm_kwargs={}
+[Optimizer]
+method='JaxStraightLinePlan'
+method_kwargs={}
+optimizer='rmsprop'
+optimizer_kwargs={'learning_rate': 0.001}
+batch_size_train=1
+batch_size_test=1
+[Training]
+key=42
+epochs=5000
+train_seconds=30
+```
+The configuration file contains three sections:
+- ``[Model]`` specifies the fuzzy logic operations used to relax discrete operations to differentiable approximations; the ``weight`` dictates the quality of the approximation,
+and ``tnorm`` specifies the type of [fuzzy logic](https://en.wikipedia.org/wiki/T-norm_fuzzy_logics) for relacing logical operations in RDDL (e.g. ``ProductTNorm``, ``GodelTNorm``, ``LukasiewiczTNorm``)
+- ``[Optimizer]`` generally specify the optimizer and plan settings; the ``method`` specifies the plan/policy representation (e.g. ``JaxStraightLinePlan``, ``JaxDeepReactivePolicy``), the gradient descent settings, learning rate, batch size, etc.
+- ``[Training]`` specifies computation limits, such as total training time and number of iterations, and options for printing or visualizing information from the planner.
+For a policy network approach, simply change the ``[Optimizer]`` settings like so:
+```ini
+...
+[Optimizer]
+method='JaxDeepReactivePolicy'
+method_kwargs={'topology': [128, 64], 'activation': 'tanh'}
+...
+```
+The configuration file must then be passed to the planner during initialization.
+For example, the [previous script here](#running-from-within-python) can be modified to set parameters from a config file:
+```python
+from pyRDDLGym_jax.core.planner import load_config
+# load the config file with planner settings
+planner_args, _, train_args = load_config("/path/to/config.cfg")
+# create the planning algorithm
+planner = JaxBackpropPlanner(rddl=env.model, **planner_args)
+controller = JaxOfflineController(planner, **train_args)
+...
+```
+## Simulation
+The JAX compiler can be used as a backend for simulating and evaluating RDDL environments:
+```python
+import pyRDDLGym
+from pyRDDLGym.core.policy import RandomAgent
+from pyRDDLGym_jax.core.simulator import JaxRDDLSimulator
+# create the environment
+env = pyRDDLGym.make("domain", "instance", backend=JaxRDDLSimulator)
+# evaluate the random policy
+agent = RandomAgent(action_space=env.action_space,
+                    num_actions=env.max_allowed_actions)
+agent.evaluate(env, verbose=True, render=True)
+```
+For some domains, the JAX backend could perform better than the numpy-based one, due to various compiler optimizations.
+In any event, the simulation results using the JAX backend should (almost) always match the numpy backend.
+## Manual Gradient Calculation
+For custom applications, it is desirable to compute gradients of the model that can be optimized downstream.
+Fortunately, we provide a very convenient function for compiling the transition/step function ``P(s, a, s')`` of the environment into JAX.
+```python
+import pyRDDLGym
+from pyRDDLGym_jax.core.planner import JaxRDDLCompilerWithGrad
+# set up the environment
+env = pyRDDLGym.make("domain", "instance", vectorized=True)
+# create the step function
+compiled = JaxRDDLCompilerWithGrad(rddl=env.model)
+compiled.compile()
+step_fn = compiled.compile_transition()
+```
+This will return a JAX compiled (pure) function requiring the following inputs:
+- ``key`` is the ``jax.random.PRNGKey`` key for reproducible randomness
+- ``actions`` is the dictionary of action fluent tensors
+- ``subs`` is the dictionary of state-fluent and non-fluent tensors
+- ``model_params`` are the parameters of the differentiable relaxations, such as ``weight``
+The function returns a dictionary containing a variety of variables, such as updated pvariables including next-state fluents (``pvar``), reward obtained (``reward``), error codes (``error``).
+It is thus possible to apply any JAX transformation to the output of the function, such as computing gradient using ``jax.grad()`` or batched simulation using ``jax.vmap()``.
+Compilation of entire rollouts is also possible by calling the ``compile_rollouts`` function.
+An [example is provided to illustrate how you can define your own policy class and compute the return gradient manually](https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/pyRDDLGym_jax/examples/run_gradient.py).
+## Citing pyRDDLGym-jax
+The [following citation](https://ojs.aaai.org/index.php/ICAPS/article/view/31480) describes the main ideas of the framework. Please cite it if you found it useful:
+```
+@inproceedings{gimelfarb2024jaxplan,
+    title={JaxPlan and GurobiPlan: Optimization Baselines for Replanning in Discrete and Mixed Discrete and Continuous Probabilistic Domains},
+    author={Michael Gimelfarb and Ayal Taitler and Scott Sanner},
+    booktitle={34th International Conference on Automated Planning and Scheduling},
+    year={2024},
+    url={https://openreview.net/forum?id=7IKtmUpLEH}
+}
+```
+The utility optimization is discussed in [this paper](https://ojs.aaai.org/index.php/AAAI/article/view/21226):
+```
+@inproceedings{patton2022distributional,
+    title={A distributional framework for risk-sensitive end-to-end planning in continuous mdps},
+    author={Patton, Noah and Jeong, Jihwan and Gimelfarb, Mike and Sanner, Scott},
+    booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
+    volume={36},
+    number={9},
+    pages={9894--9901},
+    year={2022}
+}
+```
+Some of the implementation details derive from the following literature, which you may wish to also cite in your research papers:
+- [Deep reactive policies for planning in stochastic nonlinear domains, AAAI 2019](https://ojs.aaai.org/index.php/AAAI/article/view/4744)
+- [Scalable planning with tensorflow for hybrid nonlinear domains, NeurIPS 2017](https://proceedings.neurips.cc/paper/2017/file/98b17f068d5d9b7668e19fb8ae470841-Paper.pdf)

{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-pyRDDLGym_jax/__init__.py,sha256=Cl7DWkrPP64Ofc2ILXnudFOdnCuKs2p0Pm7ykZOOPh4,19
+pyRDDLGym_jax/__init__.py,sha256=rexmxcBiCOcwctw4wGvk7UxS9MfZn_1CYXp53SoLKlU,19
 pyRDDLGym_jax/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pyRDDLGym_jax/core/compiler.py,sha256=m7p0CHOU4Wma0cKMu_WQwfoieIQ2pXD68hZ8BFJ970A,89103
-pyRDDLGym_jax/core/logic.py,sha256=zujSHiR5KhTO81E5Zn8Gy_xSzVzfDskFCGvZygFRdMI,21930
-pyRDDLGym_jax/core/planner.py,sha256=1BtU1G3rihRZaMfNu0VtbSl1LXEXu6pT75EkF6-WVnM,101827
-pyRDDLGym_jax/core/simulator.py,sha256=fp6bep3XwwBWED0w7_4qhiwDjkSka6B2prwdNcPRCMc,8329
-pyRDDLGym_jax/core/tuning.py,sha256=Dv0YyOgGnej-zdVymWdkVg0MZjm2lNRfr7gySzFOeow,29589
+pyRDDLGym_jax/core/compiler.py,sha256=SnDN3-J84Wv_YVHoDmfM_U4Ob8uaFLGX4vEaeWC-ERY,90037
+pyRDDLGym_jax/core/logic.py,sha256=o1YAjMnXfi8gwb42kAigBmaf9uIYUWal9__FEkWohrk,26733
+pyRDDLGym_jax/core/planner.py,sha256=Hrwfn88bUu1LNZcnFC5psHPzcIUbPeF4Rn1pFO6_qH0,102655
+pyRDDLGym_jax/core/simulator.py,sha256=hWv6pr-4V-SSCzBYgdIPmKdUDMalft-Zh6dzOo5O9-0,8331
+pyRDDLGym_jax/core/tuning.py,sha256=D_kD8wjqMroCdtjE9eksR2UqrqXJqazsAKrMEHwPxYM,29589
 pyRDDLGym_jax/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/examples/run_gradient.py,sha256=KhXvijRDZ4V7N8NOI2WV8ePGpPna5_vnET61YwS7Tco,2919
 pyRDDLGym_jax/examples/run_gym.py,sha256=rXvNWkxe4jHllvbvU_EOMji_2-2k5d4tbBKhpMm_Gaw,1526
@@ -37,8 +37,8 @@ pyRDDLGym_jax/examples/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 pyRDDLGym_jax/examples/configs/default_drp.cfg,sha256=S2-5hPZtgAwUAFpiCAgSi-cnGhYHSDzMGMmatwhbM78,344
 pyRDDLGym_jax/examples/configs/default_replan.cfg,sha256=VWWPhOYBRq4cWwtrChw5pPqRmlX_nHbMvwciHd9hoLc,357
 pyRDDLGym_jax/examples/configs/default_slp.cfg,sha256=TG3mtHUnCA7J2Gm9SczENpqAymTnzCE9dj1Z_R-FnVk,340
-pyRDDLGym_jax-0.3.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
-pyRDDLGym_jax-0.3.dist-info/METADATA,sha256=e_1MlMdQoqQHW-KA2OSIZzIAQyfe-jDtMOxkIyhmLmI,1085
-pyRDDLGym_jax-0.3.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
-pyRDDLGym_jax-0.3.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
-pyRDDLGym_jax-0.3.dist-info/RECORD,,
+pyRDDLGym_jax-0.4.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
+pyRDDLGym_jax-0.4.dist-info/METADATA,sha256=-Kf8PLxf_7MiiYXzlZAf31kV1pT-Rurc7QY7dT3Fwk0,12857
+pyRDDLGym_jax-0.4.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+pyRDDLGym_jax-0.4.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
+pyRDDLGym_jax-0.4.dist-info/RECORD,,

{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.2.0)
+Generator: setuptools (75.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

pyRDDLGym_jax-0.3.dist-info/METADATA DELETED Viewed

@@ -1,26 +0,0 @@
-Metadata-Version: 2.1
-Name: pyRDDLGym-jax
-Version: 0.3
-Summary: pyRDDLGym-jax: JAX compilation of RDDL description files, and a differentiable planner in JAX.
-Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
-Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner
-Author-email: mike.gimelfarb@mail.utoronto.ca, ataitler@gmail.com, ssanner@mie.utoronto.ca
-License: MIT License
-Classifier: Development Status :: 3 - Alpha
-Classifier: Intended Audience :: Science/Research
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Natural Language :: English
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: >=3.8
-License-File: LICENSE
-Requires-Dist: pyRDDLGym >=2.0
-Requires-Dist: tqdm >=4.66
-Requires-Dist: bayesian-optimization >=1.4.3
-Requires-Dist: jax >=0.4.12
-Requires-Dist: optax >=0.1.9
-Requires-Dist: dm-haiku >=0.0.10
-Requires-Dist: tensorflow >=2.13.0
-Requires-Dist: tensorflow-probability >=0.21.0

{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{pyRDDLGym_jax-0.3.dist-info → pyRDDLGym_jax-0.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

pyRDDLGym-jax 0.3__py3-none-any.whl → 0.4__py3-none-any.whl

pyRDDLGym-jax 0.3py3-none-any.whl → 0.4py3-none-any.whl