PyPI - pyRDDLGym-jax - Versions diffs - 0.1__py3-none-any.whl → 0.2__py3-none-any.whl - Mend

pyRDDLGym-jax 0.1py3-none-any.whl → 0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -4,7 +4,7 @@ import jax.numpy as jnp
 import jax.random as random
 import jax.scipy as scipy
 import traceback
-from typing import Callable, Dict, List
+from typing import Any, Callable, Dict, List, Optional
 from pyRDDLGym.core.debug.exception import raise_warning
@@ -13,8 +13,9 @@ try:
     from tensorflow_probability.substrates import jax as tfp
 except Exception:
     raise_warning('Failed to import tensorflow-probability: '
-                  'compilation of some complex distributions will not work.',
-                  'red')
+                  'compilation of some complex distributions '
+                  '(Binomial, Negative-Binomial, Multinomial) will fail. '
+                  'Please ensure this package is installed correctly.', 'red')
     traceback.print_exc()
     tfp = None
@@ -32,6 +33,98 @@ from pyRDDLGym.core.debug.logger import Logger
 from pyRDDLGym.core.simulator import RDDLSimulatorPrecompiled
+# ===========================================================================
+# EXACT RDDL TO JAX COMPILATION RULES
+# ===========================================================================
+def _function_unary_exact_named(op, name):
+    def _jax_wrapped_unary_fn_exact(x, param):
+        return op(x)
+    return _jax_wrapped_unary_fn_exact
+def _function_unary_exact_named_gamma():
+    def _jax_wrapped_unary_gamma_exact(x, param):
+        return jnp.exp(scipy.special.gammaln(x))
+    return _jax_wrapped_unary_gamma_exact
+def _function_binary_exact_named(op, name):
+    def _jax_wrapped_binary_fn_exact(x, y, param):
+        return op(x, y)
+    return _jax_wrapped_binary_fn_exact
+def _function_binary_exact_named_implies():
+    def _jax_wrapped_binary_implies_exact(x, y, param):
+        return jnp.logical_or(jnp.logical_not(x), y)
+    return _jax_wrapped_binary_implies_exact
+def _function_binary_exact_named_log():
+    def _jax_wrapped_binary_log_exact(x, y, param):
+        return jnp.log(x) / jnp.log(y)
+    return _jax_wrapped_binary_log_exact
+def _function_aggregation_exact_named(op, name):
+    def _jax_wrapped_aggregation_fn_exact(x, axis, param):
+        return op(x, axis=axis)
+    return _jax_wrapped_aggregation_fn_exact
+def _function_if_exact_named():
+    def _jax_wrapped_if_exact(c, a, b, param):
+        return jnp.where(c, a, b)
+    return _jax_wrapped_if_exact
+def _function_switch_exact_named():
+    def _jax_wrapped_switch_exact(pred, cases, param):
+        pred = pred[jnp.newaxis, ...]
+        sample = jnp.take_along_axis(cases, pred, axis=0)
+        assert sample.shape[0] == 1
+        return sample[0, ...]
+    return _jax_wrapped_switch_exact
+def _function_bernoulli_exact_named():
+    def _jax_wrapped_bernoulli_exact(key, prob, param):
+        return random.bernoulli(key, prob)
+    return _jax_wrapped_bernoulli_exact
+def _function_discrete_exact_named():
+    def _jax_wrapped_discrete_exact(key, prob, param):
+        logits = jnp.log(prob)
+        sample = random.categorical(key=key, logits=logits, axis=-1)
+        out_of_bounds = jnp.logical_not(jnp.logical_and(
+            jnp.all(prob >= 0),
+            jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
+        return sample, out_of_bounds
+    return _jax_wrapped_discrete_exact
 class JaxRDDLCompiler:
     '''Compiles a RDDL AST representation into an equivalent JAX representation.
     All operations are identical to their numpy equivalents.
@@ -39,10 +132,97 @@ class JaxRDDLCompiler:
     MODEL_PARAM_TAG_SEPARATOR = '___'
+    # ===========================================================================
+    # EXACT RDDL TO JAX COMPILATION RULES BY DEFAULT
+    # ===========================================================================
+    EXACT_RDDL_TO_JAX_NEGATIVE = _function_unary_exact_named(jnp.negative, 'negative')
+    EXACT_RDDL_TO_JAX_ARITHMETIC = {
+        '+': _function_binary_exact_named(jnp.add, 'add'),
+        '-': _function_binary_exact_named(jnp.subtract, 'subtract'),
+        '*': _function_binary_exact_named(jnp.multiply, 'multiply'),
+        '/': _function_binary_exact_named(jnp.divide, 'divide')
+    }
+    EXACT_RDDL_TO_JAX_RELATIONAL = {
+        '>=': _function_binary_exact_named(jnp.greater_equal, 'greater_equal'),
+        '<=': _function_binary_exact_named(jnp.less_equal, 'less_equal'),
+        '<': _function_binary_exact_named(jnp.less, 'less'),
+        '>': _function_binary_exact_named(jnp.greater, 'greater'),
+        '==': _function_binary_exact_named(jnp.equal, 'equal'),
+        '~=': _function_binary_exact_named(jnp.not_equal, 'not_equal')
+    }
+    EXACT_RDDL_TO_JAX_LOGICAL = {
+        '^': _function_binary_exact_named(jnp.logical_and, 'and'),
+        '&': _function_binary_exact_named(jnp.logical_and, 'and'),
+        '|': _function_binary_exact_named(jnp.logical_or, 'or'),
+        '~': _function_binary_exact_named(jnp.logical_xor, 'xor'),
+        '=>': _function_binary_exact_named_implies(),
+        '<=>': _function_binary_exact_named(jnp.equal, 'iff')
+    }
+    EXACT_RDDL_TO_JAX_LOGICAL_NOT = _function_unary_exact_named(jnp.logical_not, 'not')
+    EXACT_RDDL_TO_JAX_AGGREGATION = {
+        'sum': _function_aggregation_exact_named(jnp.sum, 'sum'),
+        'avg': _function_aggregation_exact_named(jnp.mean, 'avg'),
+        'prod': _function_aggregation_exact_named(jnp.prod, 'prod'),
+        'minimum': _function_aggregation_exact_named(jnp.min, 'minimum'),
+        'maximum': _function_aggregation_exact_named(jnp.max, 'maximum'),
+        'forall': _function_aggregation_exact_named(jnp.all, 'forall'),
+        'exists': _function_aggregation_exact_named(jnp.any, 'exists'),
+        'argmin': _function_aggregation_exact_named(jnp.argmin, 'argmin'),
+        'argmax': _function_aggregation_exact_named(jnp.argmax, 'argmax')
+    }
+    EXACT_RDDL_TO_JAX_UNARY = {
+        'abs': _function_unary_exact_named(jnp.abs, 'abs'),
+        'sgn': _function_unary_exact_named(jnp.sign, 'sgn'),
+        'round': _function_unary_exact_named(jnp.round, 'round'),
+        'floor': _function_unary_exact_named(jnp.floor, 'floor'),
+        'ceil': _function_unary_exact_named(jnp.ceil, 'ceil'),
+        'cos': _function_unary_exact_named(jnp.cos, 'cos'),
+        'sin': _function_unary_exact_named(jnp.sin, 'sin'),
+        'tan': _function_unary_exact_named(jnp.tan, 'tan'),
+        'acos': _function_unary_exact_named(jnp.arccos, 'acos'),
+        'asin': _function_unary_exact_named(jnp.arcsin, 'asin'),
+        'atan': _function_unary_exact_named(jnp.arctan, 'atan'),
+        'cosh': _function_unary_exact_named(jnp.cosh, 'cosh'),
+        'sinh': _function_unary_exact_named(jnp.sinh, 'sinh'),
+        'tanh': _function_unary_exact_named(jnp.tanh, 'tanh'),
+        'exp': _function_unary_exact_named(jnp.exp, 'exp'),
+        'ln': _function_unary_exact_named(jnp.log, 'ln'),
+        'sqrt': _function_unary_exact_named(jnp.sqrt, 'sqrt'),
+        'lngamma': _function_unary_exact_named(scipy.special.gammaln, 'lngamma'),
+        'gamma': _function_unary_exact_named_gamma()
+    }
+    EXACT_RDDL_TO_JAX_BINARY = {
+        'div': _function_binary_exact_named(jnp.floor_divide, 'div'),
+        'mod': _function_binary_exact_named(jnp.mod, 'mod'),
+        'fmod': _function_binary_exact_named(jnp.mod, 'fmod'),
+        'min': _function_binary_exact_named(jnp.minimum, 'min'),
+        'max': _function_binary_exact_named(jnp.maximum, 'max'),
+        'pow': _function_binary_exact_named(jnp.power, 'pow'),
+        'log': _function_binary_exact_named_log(),
+        'hypot': _function_binary_exact_named(jnp.hypot, 'hypot'),
+    }
+    EXACT_RDDL_TO_JAX_IF = _function_if_exact_named()
+    EXACT_RDDL_TO_JAX_SWITCH = _function_switch_exact_named()
+    EXACT_RDDL_TO_JAX_BERNOULLI = _function_bernoulli_exact_named()
+    EXACT_RDDL_TO_JAX_DISCRETE = _function_discrete_exact_named()
     def __init__(self, rddl: RDDLLiftedModel,
                  allow_synchronous_state: bool=True,
-                 logger: Logger=None,
-                 use64bit: bool=False) -> None:
+                 logger: Optional[Logger]=None,
+                 use64bit: bool=False,
+                 compile_non_fluent_exact: bool=True) -> None:
         '''Creates a new RDDL to Jax compiler.
         :param rddl: the RDDL model to compile into Jax
@@ -50,11 +230,14 @@ class JaxRDDLCompiler:
         on each other
         :param logger: to log information about compilation to file
         :param use64bit: whether to use 64 bit arithmetic
+        :param compile_non_fluent_exact: whether non-fluent expressions
+        are always compiled using exact JAX expressions.
         '''
         self.rddl = rddl
         self.logger = logger
         # jax.config.update('jax_log_compiles', True) # for testing ONLY
+        self.use64bit = use64bit
         if use64bit:
             self.INT = jnp.int64
             self.REAL = jnp.float64
@@ -62,6 +245,7 @@ class JaxRDDLCompiler:
         else:
             self.INT = jnp.int32
             self.REAL = jnp.float32
+            jax.config.update('jax_enable_x64', False)
         self.ONE = jnp.asarray(1, dtype=self.INT)
         self.JAX_TYPES = {
             'int': self.INT,
@@ -70,17 +254,16 @@ class JaxRDDLCompiler:
         }
         # compile initial values
-        if self.logger is not None:
-            self.logger.clear()
-        initializer = RDDLValueInitializer(rddl, logger=self.logger)
+        initializer = RDDLValueInitializer(rddl)
         self.init_values = initializer.initialize()
         # compute dependency graph for CPFs and sort them by evaluation order
-        sorter = RDDLLevelAnalysis(rddl, allow_synchronous_state, logger=self.logger)
+        sorter = RDDLLevelAnalysis(
+            rddl, allow_synchronous_state=allow_synchronous_state)
         self.levels = sorter.compute_levels()
         # trace expressions to cache information to be used later
-        tracer = RDDLObjectsTracer(rddl, logger=self.logger, cpf_levels=self.levels)
+        tracer = RDDLObjectsTracer(rddl, cpf_levels=self.levels)
         self.traced = tracer.trace()
         # extract the box constraints on actions
@@ -92,92 +275,42 @@ class JaxRDDLCompiler:
         constraints = RDDLConstraints(simulator, vectorized=True)
         self.constraints = constraints
-        # basic operations
-        self.NEGATIVE = lambda x, param: jnp.negative(x)
-        self.ARITHMETIC_OPS = {
-            '+': lambda x, y, param: jnp.add(x, y),
-            '-': lambda x, y, param: jnp.subtract(x, y),
-            '*': lambda x, y, param: jnp.multiply(x, y),
-            '/': lambda x, y, param: jnp.divide(x, y)
-        }
-        self.RELATIONAL_OPS = {
-            '>=': lambda x, y, param: jnp.greater_equal(x, y),
-            '<=': lambda x, y, param: jnp.less_equal(x, y),
-            '<': lambda x, y, param: jnp.less(x, y),
-            '>': lambda x, y, param: jnp.greater(x, y),
-            '==': lambda x, y, param: jnp.equal(x, y),
-            '~=': lambda x, y, param: jnp.not_equal(x, y)
-        }
-        self.LOGICAL_NOT = lambda x, param: jnp.logical_not(x)
-        self.LOGICAL_OPS = {
-            '^': lambda x, y, param: jnp.logical_and(x, y),
-            '&': lambda x, y, param: jnp.logical_and(x, y),
-            '|': lambda x, y, param: jnp.logical_or(x, y),
-            '~': lambda x, y, param: jnp.logical_xor(x, y),
-            '=>': lambda x, y, param: jnp.logical_or(jnp.logical_not(x), y),
-            '<=>': lambda x, y, param: jnp.equal(x, y)
-        }
-        self.AGGREGATION_OPS = {
-            'sum': lambda x, axis, param: jnp.sum(x, axis=axis),
-            'avg': lambda x, axis, param: jnp.mean(x, axis=axis),
-            'prod': lambda x, axis, param: jnp.prod(x, axis=axis),
-            'minimum': lambda x, axis, param: jnp.min(x, axis=axis),
-            'maximum': lambda x, axis, param: jnp.max(x, axis=axis),
-            'forall': lambda x, axis, param: jnp.all(x, axis=axis),
-            'exists': lambda x, axis, param: jnp.any(x, axis=axis),
-            'argmin': lambda x, axis, param: jnp.argmin(x, axis=axis),
-            'argmax': lambda x, axis, param: jnp.argmax(x, axis=axis)
-        }
+        # basic operations - these can be override in subclasses
+        self.compile_non_fluent_exact = compile_non_fluent_exact
+        self.NEGATIVE = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_NEGATIVE
+        self.ARITHMETIC_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_ARITHMETIC.copy()
+        self.RELATIONAL_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_RELATIONAL.copy()
+        self.LOGICAL_NOT = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL_NOT
+        self.LOGICAL_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL.copy()
+        self.AGGREGATION_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_AGGREGATION.copy()
         self.AGGREGATION_BOOL = {'forall', 'exists'}
-        self.KNOWN_UNARY = {
-            'abs': lambda x, param: jnp.abs(x),
-            'sgn': lambda x, param: jnp.sign(x),
-            'round': lambda x, param: jnp.round(x),
-            'floor': lambda x, param: jnp.floor(x),
-            'ceil': lambda x, param: jnp.ceil(x),
-            'cos': lambda x, param: jnp.cos(x),
-            'sin': lambda x, param: jnp.sin(x),
-            'tan': lambda x, param: jnp.tan(x),
-            'acos': lambda x, param: jnp.arccos(x),
-            'asin': lambda x, param: jnp.arcsin(x),
-            'atan': lambda x, param: jnp.arctan(x),
-            'cosh': lambda x, param: jnp.cosh(x),
-            'sinh': lambda x, param: jnp.sinh(x),
-            'tanh': lambda x, param: jnp.tanh(x),
-            'exp': lambda x, param: jnp.exp(x),
-            'ln': lambda x, param: jnp.log(x),
-            'sqrt': lambda x, param: jnp.sqrt(x),
-            'lngamma': lambda x, param: scipy.special.gammaln(x),
-            'gamma': lambda x, param: jnp.exp(scipy.special.gammaln(x))
-        }
-        self.KNOWN_BINARY = {
-            'div': lambda x, y, param: jnp.floor_divide(x, y),
-            'mod': lambda x, y, param: jnp.mod(x, y),
-            'fmod': lambda x, y, param: jnp.mod(x, y),
-            'min': lambda x, y, param: jnp.minimum(x, y),
-            'max': lambda x, y, param: jnp.maximum(x, y),
-            'pow': lambda x, y, param: jnp.power(x, y),
-            'log': lambda x, y, param: jnp.log(x) / jnp.log(y),
-            'hypot': lambda x, y, param: jnp.hypot(x, y)
-        }
+        self.KNOWN_UNARY = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_UNARY.copy()
+        self.KNOWN_BINARY = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BINARY.copy()
+        self.IF_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_IF
+        self.SWITCH_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_SWITCH
+        self.BERNOULLI_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BERNOULLI
+        self.DISCRETE_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
     # ===========================================================================
     # main compilation subroutines
     # ===========================================================================
-    def compile(self, log_jax_expr: bool=False) -> None:
+    def compile(self, log_jax_expr: bool=False, heading: str='') -> None:
         '''Compiles the current RDDL into Jax expressions.
         :param log_jax_expr: whether to pretty-print the compiled Jax functions
         to the log file
+        :param heading: the heading to print before compilation information
         '''
-        info = {}
+        info = ({}, [])
         self.invariants = self._compile_constraints(self.rddl.invariants, info)
         self.preconditions = self._compile_constraints(self.rddl.preconditions, info)
         self.terminations = self._compile_constraints(self.rddl.terminations, info)
         self.cpfs = self._compile_cpfs(info)
         self.reward = self._compile_reward(info)
-        self.model_params = info
+        self.model_params = {key: value
+                             for (key, (value, *_)) in info[0].items()}
+        self.relaxations = info[1]
         if log_jax_expr and self.logger is not None:
             printed = self.print_jax()
@@ -189,6 +322,7 @@ class JaxRDDLCompiler:
             printed_terminals = '\n\n'.join(v for v in printed['terminations'])
             printed_params = '\n'.join(f'{k}: {v}' for (k, v) in info.items())
             message = (
+                f'[info] {heading}\n'
                 f'[info] compiled JAX CPFs:\n\n'
                 f'{printed_cpfs}\n\n'
                 f'[info] compiled JAX reward:\n\n'
@@ -281,17 +415,21 @@ class JaxRDDLCompiler:
         return jax_inequalities, jax_equalities
     def compile_transition(self, check_constraints: bool=False,
-                           constraint_func: bool=False):
+                           constraint_func: bool=False) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples the next state.
-        The signature of the returned function is (key, actions, subs,
-        model_params), where:
+        The arguments of the returned function is:
             - key is the PRNG key
             - actions is the dict of action tensors
             - subs is the dict of current pvar value tensors
             - model_params is a dict of parameters for the relaxed model.
+        The returned value of the function is:
+            - subs is the returned next epoch fluent values
+            - log includes all the auxiliary information about constraints
+              satisfied, errors, etc.
         constraint_func provides the option to compile nonlinear constraints:
             1. f(s, a) ?? g(s, a)
@@ -361,6 +499,10 @@ class JaxRDDLCompiler:
             reward, key, err = reward_fn(subs, model_params, key)
             errors |= err
+            # calculate fluent values
+            fluents = {name: values for (name, values) in subs.items()
+                       if name not in rddl.non_fluents}
             # set the next state to the current state
             for (state, next_state) in rddl.next_state.items():
                 subs[state] = subs[next_state]
@@ -383,8 +525,7 @@ class JaxRDDLCompiler:
             # prepare the return value
             log = {
-                'pvar': subs,
-                'action': actions,
+                'fluents': fluents,
                 'reward': reward,
                 'error': errors,
                 'precondition': precond_check,
@@ -395,7 +536,7 @@ class JaxRDDLCompiler:
                 log['inequalities'] = inequalities
                 log['equalities'] = equalities
-            return log
+            return subs, log
         return _jax_wrapped_single_step
@@ -403,18 +544,28 @@ class JaxRDDLCompiler:
                          n_steps: int,
                          n_batch: int,
                          check_constraints: bool=False,
-                         constraint_func: bool=False):
+                         constraint_func: bool=False) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples trajectories with a fixed horizon from a policy.
-        The signature of the policy function is (key, params, hyperparams,
-        step, states), where:
+        The arguments of the returned function is:
+            - key is the PRNG key (used by a stochastic policy)
+            - policy_params is a pytree of trainable policy weights
+            - hyperparams is a pytree of (optional) fixed policy hyper-parameters
+            - subs is the dictionary of current fluent tensor values
+            - model_params is a dict of model hyperparameters.
+        The returned value of the returned function is:
+            - log is the dictionary of all trajectory information, including
+              constraints that were satisfied, errors, etc.
+        The arguments of the policy function is:
             - key is the PRNG key (used by a stochastic policy)
             - params is a pytree of trainable policy weights
             - hyperparams is a pytree of (optional) fixed policy hyper-parameters
             - step is the time index of the decision in the current rollout
             - states is a dict of tensors for the current observation.
         :param policy: a Jax compiled function for the policy as described above
         decision epoch, state dict, and an RNG key and returns an action dict
         :param n_steps: the rollout horizon
@@ -428,27 +579,32 @@ class JaxRDDLCompiler:
         rddl = self.rddl
         jax_step_fn = self.compile_transition(check_constraints, constraint_func)
+        # for POMDP only observ-fluents are assumed visible to the policy
+        if rddl.observ_fluents:
+            observed_vars = rddl.observ_fluents
+        else:
+            observed_vars = rddl.state_fluents
         # evaluate the step from the policy
         def _jax_wrapped_single_step_policy(key, policy_params, hyperparams,
                                             step, subs, model_params):
             states = {var: values
                       for (var, values) in subs.items()
-                      if rddl.variable_types[var] == 'state-fluent'}
+                      if var in observed_vars}
             actions = policy(key, policy_params, hyperparams, step, states)
             key, subkey = random.split(key)
-            log = jax_step_fn(subkey, actions, subs, model_params)
-            return log
+            subs, log = jax_step_fn(subkey, actions, subs, model_params)
+            return subs, log
         # do a batched step update from the policy
         def _jax_wrapped_batched_step_policy(carry, step):
             key, policy_params, hyperparams, subs, model_params = carry
             key, *subkeys = random.split(key, num=1 + n_batch)
             keys = jnp.asarray(subkeys)
-            log = jax.vmap(
+            subs, log = jax.vmap(
                 _jax_wrapped_single_step_policy,
                 in_axes=(0, None, None, None, 0, None)
             )(keys, policy_params, hyperparams, step, subs, model_params)
-            subs = log['pvar']
             carry = (key, policy_params, hyperparams, subs, model_params)
             return carry, log
@@ -467,7 +623,7 @@ class JaxRDDLCompiler:
     # error checks
     # ===========================================================================
-    def print_jax(self) -> Dict[str, object]:
+    def print_jax(self) -> Dict[str, Any]:
         '''Returns a dictionary containing the string representations of all
         Jax compiled expressions from the RDDL file.
         '''
@@ -564,7 +720,7 @@ class JaxRDDLCompiler:
     }
     @staticmethod
-    def get_error_codes(error):
+    def get_error_codes(error: int) -> List[int]:
         '''Given a compacted integer error flag from the execution of Jax, and
         decomposes it into individual error codes.
         '''
@@ -573,7 +729,7 @@ class JaxRDDLCompiler:
         return errors
     @staticmethod
-    def get_error_messages(error):
+    def get_error_messages(error: int) -> List[str]:
         '''Given a compacted integer error flag from the execution of Jax, and
         decomposes it into error strings.
         '''
@@ -586,28 +742,40 @@ class JaxRDDLCompiler:
     # ===========================================================================
     def _unwrap(self, op, expr_id, info):
-        sep = JaxRDDLCompiler.MODEL_PARAM_TAG_SEPARATOR
         jax_op, name = op, None
+        model_params, relaxed_list = info
         if isinstance(op, tuple):
             jax_op, param = op
             if param is not None:
                 tags, values = param
+                sep = JaxRDDLCompiler.MODEL_PARAM_TAG_SEPARATOR
                 if isinstance(tags, tuple):
                     name = sep.join(tags)
                 else:
                     name = str(tags)
                 name = f'{name}{sep}{expr_id}'
-                if name in info:
-                    raise Exception(f'Model parameter {name} is already defined.')
-                info[name] = values
+                if name in model_params:
+                    raise RuntimeError(
+                        f'Internal error: model parameter {name} is already defined.')
+                model_params[name] = (values, tags, expr_id, jax_op.__name__)
+            relaxed_list.append((param, expr_id, jax_op.__name__))
         return jax_op, name
-    def get_ids_of_parameterized_expressions(self) -> List[int]:
-        '''Returns a list of expression IDs that have tuning parameters.'''
-        sep = JaxRDDLCompiler.MODEL_PARAM_TAG_SEPARATOR
-        ids = [int(key.split(sep)[-1]) for key in self.model_params]
-        return ids
+    def summarize_model_relaxations(self) -> str:
+        '''Returns a string of information about model relaxations in the
+        compiled model.'''
+        occurence_by_type = {}
+        for (_, expr_id, jax_op) in self.relaxations:
+            etype = self.traced.lookup(expr_id).etype
+            source = f'{etype[1]} ({etype[0]})'
+            sub = f'{source:<30} --> {jax_op}'
+            occurence_by_type[sub] = occurence_by_type.get(sub, 0) + 1
+        col = "{:<80} {:<10}\n"
+        table = col.format('Substitution', 'Count')
+        for (sub, occurs) in occurence_by_type.items():
+            table += col.format(sub, occurs)
+        return table
     # ===========================================================================
     # expression compilation
     # ===========================================================================
@@ -640,7 +808,8 @@ class JaxRDDLCompiler:
             raise RDDLNotImplementedError(
                 f'Internal error: expression type {expr} is not supported.\n' +
                 print_stack_trace(expr))
+        # force type cast of tensor as required by caller
         if dtype is not None:
             jax_expr = self._jax_cast(jax_expr, dtype)
@@ -660,6 +829,17 @@ class JaxRDDLCompiler:
         return _jax_wrapped_cast
+    def _fix_dtype(self, value):
+        dtype = jnp.atleast_1d(value).dtype
+        if jnp.issubdtype(dtype, jnp.integer):
+            return self.INT
+        elif jnp.issubdtype(dtype, jnp.floating):
+            return self.REAL
+        elif jnp.issubdtype(dtype, jnp.bool_) or jnp.issubdtype(dtype, bool):
+            return bool
+        else:
+            raise TypeError(f'Invalid type {dtype} of {value}.')
     # ===========================================================================
     # leaves
     # ===========================================================================
@@ -669,7 +849,7 @@ class JaxRDDLCompiler:
         cached_value = self.traced.cached_sim_info(expr)
         def _jax_wrapped_constant(x, params, key):
-            sample = jnp.asarray(cached_value)
+            sample = jnp.asarray(cached_value, dtype=self._fix_dtype(cached_value))
             return sample, key, NORMAL
         return _jax_wrapped_constant
@@ -693,7 +873,7 @@ class JaxRDDLCompiler:
             cached_value = cached_info
             def _jax_wrapped_object(x, params, key):
-                sample = jnp.asarray(cached_value)
+                sample = jnp.asarray(cached_value, dtype=self._fix_dtype(cached_value))
                 return sample, key, NORMAL
             return _jax_wrapped_object
@@ -702,7 +882,8 @@ class JaxRDDLCompiler:
         elif cached_info is None:
             def _jax_wrapped_pvar_scalar(x, params, key):
-                sample = jnp.asarray(x[var])
+                value = x[var]
+                sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                 return sample, key, NORMAL
             return _jax_wrapped_pvar_scalar
@@ -721,7 +902,8 @@ class JaxRDDLCompiler:
                 def _jax_wrapped_pvar_tensor_nested(x, params, key):
                     error = NORMAL
-                    sample = jnp.asarray(x[var])
+                    value = x[var]
+                    sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                     new_slices = [None] * len(jax_nested_expr)
                     for (i, jax_expr) in enumerate(jax_nested_expr):
                         new_slices[i], key, err = jax_expr(x, params, key)
@@ -736,7 +918,8 @@ class JaxRDDLCompiler:
             else:
                 def _jax_wrapped_pvar_tensor_non_nested(x, params, key):
-                    sample = jnp.asarray(x[var])
+                    value = x[var]
+                    sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                     if slices:
                         sample = sample[slices]
                     if axis:
@@ -795,16 +978,23 @@ class JaxRDDLCompiler:
     def _jax_arithmetic(self, expr, info):
         _, op = expr.etype
-        valid_ops = self.ARITHMETIC_OPS
+        # if expression is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
+            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_ARITHMETIC
+            negative_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_NEGATIVE
+        else:
+            valid_ops = self.ARITHMETIC_OPS
+            negative_op = self.NEGATIVE
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
+        # recursively compile arguments
         args = expr.args
         n = len(args)
         if n == 1 and op == '-':
             arg, = args
             jax_expr = self._jax(arg, info)
-            jax_op, jax_param = self._unwrap(self.NEGATIVE, expr.id, info)
+            jax_op, jax_param = self._unwrap(negative_op, expr.id, info)
             return self._jax_unary(jax_expr, jax_op, jax_param, at_least_int=True)
         elif n == 2:
@@ -819,29 +1009,42 @@ class JaxRDDLCompiler:
     def _jax_relational(self, expr, info):
         _, op = expr.etype
-        valid_ops = self.RELATIONAL_OPS
+        # if expression is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
+            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_RELATIONAL
+        else:
+            valid_ops = self.RELATIONAL_OPS
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        JaxRDDLCompiler._check_num_args(expr, 2)
+        jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
+        # recursively compile arguments
+        JaxRDDLCompiler._check_num_args(expr, 2)
         lhs, rhs = expr.args
         jax_lhs = self._jax(lhs, info)
         jax_rhs = self._jax(rhs, info)
-        jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
         return self._jax_binary(
             jax_lhs, jax_rhs, jax_op, jax_param, at_least_int=True)
     def _jax_logical(self, expr, info):
         _, op = expr.etype
-        valid_ops = self.LOGICAL_OPS
+        # if expression is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
+            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL
+            logical_not_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL_NOT
+        else:
+            valid_ops = self.LOGICAL_OPS
+            logical_not_op = self.LOGICAL_NOT
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
+        # recursively compile arguments
         args = expr.args
-        n = len(args)
+        n = len(args)
         if n == 1 and op == '~':
             arg, = args
             jax_expr = self._jax(arg, info)
-            jax_op, jax_param = self._unwrap(self.LOGICAL_NOT, expr.id, info)
+            jax_op, jax_param = self._unwrap(logical_not_op, expr.id, info)
             return self._jax_unary(jax_expr, jax_op, jax_param, check_dtype=bool)
         elif n == 2:
@@ -856,17 +1059,21 @@ class JaxRDDLCompiler:
     def _jax_aggregation(self, expr, info):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         _, op = expr.etype
-        valid_ops = self.AGGREGATION_OPS
+        # if expression is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
+            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_AGGREGATION
+        else:
+            valid_ops = self.AGGREGATION_OPS
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        is_floating = op not in self.AGGREGATION_BOOL
+        jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
+        # recursively compile arguments
+        is_floating = op not in self.AGGREGATION_BOOL
         * _, arg = expr.args
-        _, axes = self.traced.cached_sim_info(expr)
+        _, axes = self.traced.cached_sim_info(expr)
         jax_expr = self._jax(arg, info)
-        jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
         def _jax_wrapped_aggregation(x, params, key):
             sample, key, err = jax_expr(x, params, key)
@@ -884,21 +1091,28 @@ class JaxRDDLCompiler:
     def _jax_functional(self, expr, info):
         _, op = expr.etype
-        # unary function
-        if op in self.KNOWN_UNARY:
+        # if expression is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
+            unary_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_UNARY
+            binary_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BINARY
+        else:
+            unary_ops = self.KNOWN_UNARY
+            binary_ops = self.KNOWN_BINARY
+        # recursively compile arguments
+        if op in unary_ops:
             JaxRDDLCompiler._check_num_args(expr, 1)
             arg, = expr.args
             jax_expr = self._jax(arg, info)
-            jax_op, jax_param = self._unwrap(self.KNOWN_UNARY[op], expr.id, info)
+            jax_op, jax_param = self._unwrap(unary_ops[op], expr.id, info)
             return self._jax_unary(jax_expr, jax_op, jax_param, at_least_int=True)
-        # binary function
-        elif op in self.KNOWN_BINARY:
+        elif op in binary_ops:
             JaxRDDLCompiler._check_num_args(expr, 2)
             lhs, rhs = expr.args
             jax_lhs = self._jax(lhs, info)
             jax_rhs = self._jax(rhs, info)
-            jax_op, jax_param = self._unwrap(self.KNOWN_BINARY[op], expr.id, info)
+            jax_op, jax_param = self._unwrap(binary_ops[op], expr.id, info)
             return self._jax_binary(
                 jax_lhs, jax_rhs, jax_op, jax_param, at_least_int=True)
@@ -921,19 +1135,19 @@ class JaxRDDLCompiler:
             f'Control operator {op} is not supported.\n' +
             print_stack_trace(expr))
-    def _jax_if_helper(self):
-        def _jax_wrapped_if_calc_exact(c, a, b, param):
-            return jnp.where(c, a, b)
-        return _jax_wrapped_if_calc_exact
     def _jax_if(self, expr, info):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         JaxRDDLCompiler._check_num_args(expr, 3)
-        jax_if, jax_param = self._unwrap(self._jax_if_helper(), expr.id, info)
+        pred, if_true, if_false = expr.args
+        # if predicate is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(pred):
+            if_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_IF
+        else:
+            if_op = self.IF_HELPER
+        jax_if, jax_param = self._unwrap(if_op, expr.id, info)
-        pred, if_true, if_false = expr.args
+        # recursively compile arguments
         jax_pred = self._jax(pred, info)
         jax_true = self._jax(if_true, info)
         jax_false = self._jax(if_false, info)
@@ -951,23 +1165,20 @@ class JaxRDDLCompiler:
         return _jax_wrapped_if_then_else
-    def _jax_switch_helper(self):
-        def _jax_wrapped_switch_calc_exact(pred, cases, param):
-            pred = pred[jnp.newaxis, ...]
-            sample = jnp.take_along_axis(cases, pred, axis=0)
-            assert sample.shape[0] == 1
-            return sample[0, ...]
-        return _jax_wrapped_switch_calc_exact
     def _jax_switch(self, expr, info):
-        pred, *_ = expr.args
+        # if expression is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
+            switch_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_SWITCH
+        else:
+            switch_op = self.SWITCH_HELPER
+        jax_switch, jax_param = self._unwrap(switch_op, expr.id, info)
+        # recursively compile predicate
+        pred, *_ = expr.args
         jax_pred = self._jax(pred, info)
-        jax_switch, jax_param = self._unwrap(
-            self._jax_switch_helper(), expr.id, info)
-        # wrap cases as JAX expressions
+        # recursively compile cases
         cases, default = self.traced.cached_sim_info(expr)
         jax_default = None if default is None else self._jax(default, info)
         jax_cases = [(jax_default if _case is None else self._jax(_case, info))
@@ -983,7 +1194,8 @@ class JaxRDDLCompiler:
             for (i, jax_case) in enumerate(jax_cases):
                 sample_cases[i], key, err_case = jax_case(x, params, key)
                 err |= err_case
-            sample_cases = jnp.asarray(sample_cases)
+            sample_cases = jnp.asarray(
+                sample_cases, dtype=self._fix_dtype(sample_cases))
             # predicate (enum) is an integer - use it to extract from case array
             param = params.get(jax_param, None)
@@ -1179,30 +1391,28 @@ class JaxRDDLCompiler:
             scale, key, err2 = jax_scale(x, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
-            sample = scale * jnp.power(-jnp.log1p(-U), 1.0 / shape)
+            sample = scale * jnp.power(-jnp.log(U), 1.0 / shape)
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
             return sample, key, err
         return _jax_wrapped_distribution_weibull
-    def _jax_bernoulli_helper(self):
-        def _jax_wrapped_calc_bernoulli_exact(key, prob, param):
-            return random.bernoulli(key, prob)
-        return _jax_wrapped_calc_bernoulli_exact
     def _jax_bernoulli(self, expr, info):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BERNOULLI']
         JaxRDDLCompiler._check_num_args(expr, 1)
-        jax_bern, jax_param = self._unwrap(
-            self._jax_bernoulli_helper(), expr.id, info)
         arg_prob, = expr.args
+        # if probability is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
+            bern_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BERNOULLI
+        else:
+            bern_op = self.BERNOULLI_HELPER
+        jax_bern, jax_param = self._unwrap(bern_op, expr.id, info)
+        # recursively compile arguments
         jax_prob = self._jax(arg_prob, info)
-        # uses the implicit JAX subroutine
         def _jax_wrapped_distribution_bernoulli(x, params, key):
             prob, key, err = jax_prob(x, params, key)
             key, subkey = random.split(key)
@@ -1266,8 +1476,8 @@ class JaxRDDLCompiler:
         def _jax_wrapped_distribution_binomial(x, params, key):
             trials, key, err2 = jax_trials(x, params, key)
             prob, key, err1 = jax_prob(x, params, key)
-            trials = jnp.asarray(trials, self.REAL)
-            prob = jnp.asarray(prob, self.REAL)
+            trials = jnp.asarray(trials, dtype=self.REAL)
+            prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
             dist = tfp.distributions.Binomial(total_count=trials, probs=prob)
             sample = dist.sample(seed=subkey).astype(self.INT)
@@ -1290,11 +1500,10 @@ class JaxRDDLCompiler:
         def _jax_wrapped_distribution_negative_binomial(x, params, key):
             trials, key, err2 = jax_trials(x, params, key)
             prob, key, err1 = jax_prob(x, params, key)
-            trials = jnp.asarray(trials, self.REAL)
-            prob = jnp.asarray(prob, self.REAL)
+            trials = jnp.asarray(trials, dtype=self.REAL)
+            prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
-            dist = tfp.distributions.NegativeBinomial(
-                total_count=trials, probs=prob)
+            dist = tfp.distributions.NegativeBinomial(total_count=trials, probs=prob)
             sample = dist.sample(seed=subkey).astype(self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(
                 (prob >= 0) & (prob <= 1) & (trials > 0)))
@@ -1316,7 +1525,7 @@ class JaxRDDLCompiler:
             shape, key, err1 = jax_shape(x, params, key)
             rate, key, err2 = jax_rate(x, params, key)
             key, subkey = random.split(key)
-            sample = random.beta(key=subkey, a=shape, b=rate)
+            sample = random.beta(key=subkey, a=shape, b=rate, dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (rate > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
             return sample, key, err
@@ -1325,23 +1534,35 @@ class JaxRDDLCompiler:
     def _jax_geometric(self, expr, info):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GEOMETRIC']
-        JaxRDDLCompiler._check_num_args(expr, 1)
+        JaxRDDLCompiler._check_num_args(expr, 1)
         arg_prob, = expr.args
         jax_prob = self._jax(arg_prob, info)
-        floor_op, jax_param = self._unwrap(
-            self.KNOWN_UNARY['floor'], expr.id, info)
-        # reparameterization trick Geom(p) = floor(ln(U(0, 1)) / ln(p)) + 1
-        def _jax_wrapped_distribution_geometric(x, params, key):
-            prob, key, err = jax_prob(x, params, key)
-            key, subkey = random.split(key)
-            U = random.uniform(key=subkey, shape=jnp.shape(prob), dtype=self.REAL)
-            param = params.get(jax_param, None)
-            sample = floor_op(jnp.log1p(-U) / jnp.log1p(-prob), param) + 1
-            out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
-            err |= (out_of_bounds * ERR)
-            return sample, key, err
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
+            # prob is non-fluent: do not reparameterize
+            def _jax_wrapped_distribution_geometric(x, params, key):
+                prob, key, err = jax_prob(x, params, key)
+                key, subkey = random.split(key)
+                sample = random.geometric(key=subkey, p=prob, dtype=self.INT)
+                out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
+                err |= (out_of_bounds * ERR)
+                return sample, key, err
+        else:
+            floor_op, jax_param = self._unwrap(
+                self.KNOWN_UNARY['floor'], expr.id, info)
+            # reparameterization trick Geom(p) = floor(ln(U(0, 1)) / ln(p)) + 1
+            def _jax_wrapped_distribution_geometric(x, params, key):
+                prob, key, err = jax_prob(x, params, key)
+                key, subkey = random.split(key)
+                U = random.uniform(key=subkey, shape=jnp.shape(prob), dtype=self.REAL)
+                param = params.get(jax_param, None)
+                sample = floor_op(jnp.log(U) / jnp.log(1.0 - prob), param) + 1
+                out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
+                err |= (out_of_bounds * ERR)
+                return sample, key, err
         return _jax_wrapped_distribution_geometric
@@ -1359,7 +1580,7 @@ class JaxRDDLCompiler:
             shape, key, err1 = jax_shape(x, params, key)
             scale, key, err2 = jax_scale(x, params, key)
             key, subkey = random.split(key)
-            sample = scale * random.pareto(key=subkey, b=shape)
+            sample = scale * random.pareto(key=subkey, b=shape, dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
             return sample, key, err
@@ -1377,7 +1598,8 @@ class JaxRDDLCompiler:
         def _jax_wrapped_distribution_t(x, params, key):
             df, key, err = jax_df(x, params, key)
             key, subkey = random.split(key)
-            sample = random.t(key=subkey, df=df, shape=jnp.shape(df))
+            sample = random.t(
+                key=subkey, df=df, shape=jnp.shape(df), dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all(df > 0))
             err |= (out_of_bounds * ERR)
             return sample, key, err
@@ -1464,7 +1686,7 @@ class JaxRDDLCompiler:
             scale, key, err2 = jax_scale(x, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
-            sample = jnp.log(1.0 - jnp.log1p(-U) / shape) / scale
+            sample = jnp.log(1.0 - jnp.log(U) / shape) / scale
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
             return sample, key, err
@@ -1516,25 +1738,21 @@ class JaxRDDLCompiler:
     # random variables with enum support
     # ===========================================================================
-    def _jax_discrete_helper(self):
-        def _jax_wrapped_discrete_calc_exact(key, prob, param):
-            logits = jnp.log(prob)
-            sample = random.categorical(key=key, logits=logits, axis=-1)
-            out_of_bounds = jnp.logical_not(jnp.logical_and(
-                jnp.all(prob >= 0),
-                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
-            return sample, out_of_bounds
-        return _jax_wrapped_discrete_calc_exact
     def _jax_discrete(self, expr, info, unnorm):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DISCRETE']
-        jax_discrete, jax_param = self._unwrap(
-            self._jax_discrete_helper(), expr.id, info)
         ordered_args = self.traced.cached_sim_info(expr)
+        # if all probabilities are non-fluent, then always sample exact
+        has_fluent_arg = any(self.traced.cached_is_fluent(arg)
+                             for arg in ordered_args)
+        if self.compile_non_fluent_exact and not has_fluent_arg:
+            discrete_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
+        else:
+            discrete_op = self.DISCRETE_HELPER
+        jax_discrete, jax_param = self._unwrap(discrete_op, expr.id, info)
+        # compile probability expressions
         jax_probs = [self._jax(arg, info) for arg in ordered_args]
         def _jax_wrapped_distribution_discrete(x, params, key):
@@ -1561,12 +1779,18 @@ class JaxRDDLCompiler:
     def _jax_discrete_pvar(self, expr, info, unnorm):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DISCRETE']
-        JaxRDDLCompiler._check_num_args(expr, 1)
-        jax_discrete, jax_param = self._unwrap(
-            self._jax_discrete_helper(), expr.id, info)
+        JaxRDDLCompiler._check_num_args(expr, 2)
         _, args = expr.args
         arg, = args
+        # if probabilities are non-fluent, then always sample exact
+        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg):
+            discrete_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
+        else:
+            discrete_op = self.DISCRETE_HELPER
+        jax_discrete, jax_param = self._unwrap(discrete_op, expr.id, info)
+        # compile probability function
         jax_probs = self._jax(arg, info)
         def _jax_wrapped_distribution_discrete_pvar(x, params, key):
@@ -1687,7 +1911,7 @@ class JaxRDDLCompiler:
             out_of_bounds = jnp.logical_not(jnp.all(alpha > 0))
             error |= (out_of_bounds * ERR)
             key, subkey = random.split(key)
-            Gamma = random.gamma(key=subkey, a=alpha)
+            Gamma = random.gamma(key=subkey, a=alpha, dtype=self.REAL)
             sample = Gamma / jnp.sum(Gamma, axis=-1, keepdims=True)
             sample = jnp.moveaxis(sample, source=-1, destination=index)
             return sample, key, error
@@ -1706,8 +1930,8 @@ class JaxRDDLCompiler:
         def _jax_wrapped_distribution_multinomial(x, params, key):
             trials, key, err1 = jax_trials(x, params, key)
             prob, key, err2 = jax_prob(x, params, key)
-            trials = jnp.asarray(trials, self.REAL)
-            prob = jnp.asarray(prob, self.REAL)
+            trials = jnp.asarray(trials, dtype=self.REAL)
+            prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
             dist = tfp.distributions.Multinomial(total_count=trials, probs=prob)
             sample = dist.sample(seed=subkey).astype(self.INT)

pyRDDLGym-jax 0.1__py3-none-any.whl → 0.2__py3-none-any.whl

pyRDDLGym-jax 0.1py3-none-any.whl → 0.2py3-none-any.whl