PyPI - pyRDDLGym-jax - Versions diffs - 0.5__py3-none-any.whl → 1.0__py3-none-any.whl - Mend

pyRDDLGym-jax 0.5py3-none-any.whl → 1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -21,6 +21,8 @@ from pyRDDLGym.core.debug.exception import (
 from pyRDDLGym.core.debug.logger import Logger
 from pyRDDLGym.core.simulator import RDDLSimulatorPrecompiled
+from pyRDDLGym_jax.core.logic import ExactLogic
 # more robust approach - if user does not have this or broken try to continue
 try:
     from tensorflow_probability.substrates import jax as tfp
@@ -32,109 +34,6 @@ except Exception:
     tfp = None
-# ===========================================================================
-# EXACT RDDL TO JAX COMPILATION RULES
-# ===========================================================================
-def _function_unary_exact_named(op, name):
-    def _jax_wrapped_unary_fn_exact(x, param):
-        return op(x)
-    return _jax_wrapped_unary_fn_exact
-def _function_unary_exact_named_gamma():
-    def _jax_wrapped_unary_gamma_exact(x, param):
-        return jnp.exp(scipy.special.gammaln(x))
-    return _jax_wrapped_unary_gamma_exact
-def _function_binary_exact_named(op, name):
-    def _jax_wrapped_binary_fn_exact(x, y, param):
-        return op(x, y)
-    return _jax_wrapped_binary_fn_exact
-def _function_binary_exact_named_implies():
-    def _jax_wrapped_binary_implies_exact(x, y, param):
-        return jnp.logical_or(jnp.logical_not(x), y)
-    return _jax_wrapped_binary_implies_exact
-def _function_binary_exact_named_log():
-    def _jax_wrapped_binary_log_exact(x, y, param):
-        return jnp.log(x) / jnp.log(y)
-    return _jax_wrapped_binary_log_exact
-def _function_aggregation_exact_named(op, name):
-    def _jax_wrapped_aggregation_fn_exact(x, axis, param):
-        return op(x, axis=axis)
-    return _jax_wrapped_aggregation_fn_exact
-def _function_if_exact_named():
-    def _jax_wrapped_if_exact(c, a, b, param):
-        return jnp.where(c > 0.5, a, b)
-    return _jax_wrapped_if_exact
-def _function_switch_exact_named():
-    def _jax_wrapped_switch_exact(pred, cases, param):
-        pred = pred[jnp.newaxis, ...]
-        sample = jnp.take_along_axis(cases, pred, axis=0)
-        assert sample.shape[0] == 1
-        return sample[0, ...]
-    return _jax_wrapped_switch_exact
-def _function_bernoulli_exact_named():
-    def _jax_wrapped_bernoulli_exact(key, prob, param):
-        return random.bernoulli(key, prob)
-    return _jax_wrapped_bernoulli_exact
-def _function_discrete_exact_named():
-    def _jax_wrapped_discrete_exact(key, prob, param):
-        return random.categorical(key=key, logits=jnp.log(prob), axis=-1)
-    return _jax_wrapped_discrete_exact
-def _function_poisson_exact_named():
-    def _jax_wrapped_poisson_exact(key, rate, param):
-        return random.poisson(key=key, lam=rate, dtype=jnp.int64)
-    return _jax_wrapped_poisson_exact
-def _function_geometric_exact_named():
-    def _jax_wrapped_geometric_exact(key, prob, param):
-        return random.geometric(key=key, p=prob, dtype=jnp.int64)
-    return _jax_wrapped_geometric_exact
 class JaxRDDLCompiler:
     '''Compiles a RDDL AST representation into an equivalent JAX representation.
     All operations are identical to their numpy equivalents.
@@ -145,88 +44,96 @@ class JaxRDDLCompiler:
     # ===========================================================================
     # EXACT RDDL TO JAX COMPILATION RULES BY DEFAULT
     # ===========================================================================
-    EXACT_RDDL_TO_JAX_NEGATIVE = _function_unary_exact_named(jnp.negative, 'negative')
-    EXACT_RDDL_TO_JAX_ARITHMETIC = {
-        '+': _function_binary_exact_named(jnp.add, 'add'),
-        '-': _function_binary_exact_named(jnp.subtract, 'subtract'),
-        '*': _function_binary_exact_named(jnp.multiply, 'multiply'),
-        '/': _function_binary_exact_named(jnp.divide, 'divide')
-    }
+    @staticmethod
+    def wrap_logic(func):
+        def exact_func(id, init_params):
+            return func
+        return exact_func
+    EXACT_RDDL_TO_JAX_NEGATIVE = wrap_logic(ExactLogic.exact_unary_function(jnp.negative))
+    EXACT_RDDL_TO_JAX_ARITHMETIC = {
+        '+': wrap_logic(ExactLogic.exact_binary_function(jnp.add)),
+        '-': wrap_logic(ExactLogic.exact_binary_function(jnp.subtract)),
+        '*': wrap_logic(ExactLogic.exact_binary_function(jnp.multiply)),
+        '/': wrap_logic(ExactLogic.exact_binary_function(jnp.divide))
+    }
     EXACT_RDDL_TO_JAX_RELATIONAL = {
-        '>=': _function_binary_exact_named(jnp.greater_equal, 'greater_equal'),
-        '<=': _function_binary_exact_named(jnp.less_equal, 'less_equal'),
-        '<': _function_binary_exact_named(jnp.less, 'less'),
-        '>': _function_binary_exact_named(jnp.greater, 'greater'),
-        '==': _function_binary_exact_named(jnp.equal, 'equal'),
-        '~=': _function_binary_exact_named(jnp.not_equal, 'not_equal')
-    }
+        '>=': wrap_logic(ExactLogic.exact_binary_function(jnp.greater_equal)),
+        '<=': wrap_logic(ExactLogic.exact_binary_function(jnp.less_equal)),
+        '<': wrap_logic(ExactLogic.exact_binary_function(jnp.less)),
+        '>': wrap_logic(ExactLogic.exact_binary_function(jnp.greater)),
+        '==': wrap_logic(ExactLogic.exact_binary_function(jnp.equal)),
+        '~=': wrap_logic(ExactLogic.exact_binary_function(jnp.not_equal))
+    }
+    EXACT_RDDL_TO_JAX_LOGICAL_NOT = wrap_logic(ExactLogic.exact_unary_function(jnp.logical_not))
     EXACT_RDDL_TO_JAX_LOGICAL = {
-        '^': _function_binary_exact_named(jnp.logical_and, 'and'),
-        '&': _function_binary_exact_named(jnp.logical_and, 'and'),
-        '|': _function_binary_exact_named(jnp.logical_or, 'or'),
-        '~': _function_binary_exact_named(jnp.logical_xor, 'xor'),
-        '=>': _function_binary_exact_named_implies(),
-        '<=>': _function_binary_exact_named(jnp.equal, 'iff')
-    }
-    EXACT_RDDL_TO_JAX_LOGICAL_NOT = _function_unary_exact_named(jnp.logical_not, 'not')
+        '^': wrap_logic(ExactLogic.exact_binary_function(jnp.logical_and)),
+        '&': wrap_logic(ExactLogic.exact_binary_function(jnp.logical_and)),
+        '|': wrap_logic(ExactLogic.exact_binary_function(jnp.logical_or)),
+        '~': wrap_logic(ExactLogic.exact_binary_function(jnp.logical_xor)),
+        '=>': wrap_logic(ExactLogic.exact_binary_implies),
+        '<=>': wrap_logic(ExactLogic.exact_binary_function(jnp.equal))
+    }
     EXACT_RDDL_TO_JAX_AGGREGATION = {
-        'sum': _function_aggregation_exact_named(jnp.sum, 'sum'),
-        'avg': _function_aggregation_exact_named(jnp.mean, 'avg'),
-        'prod': _function_aggregation_exact_named(jnp.prod, 'prod'),
-        'minimum': _function_aggregation_exact_named(jnp.min, 'minimum'),
-        'maximum': _function_aggregation_exact_named(jnp.max, 'maximum'),
-        'forall': _function_aggregation_exact_named(jnp.all, 'forall'),
-        'exists': _function_aggregation_exact_named(jnp.any, 'exists'),
-        'argmin': _function_aggregation_exact_named(jnp.argmin, 'argmin'),
-        'argmax': _function_aggregation_exact_named(jnp.argmax, 'argmax')
+        'sum': wrap_logic(ExactLogic.exact_aggregation(jnp.sum)),
+        'avg': wrap_logic(ExactLogic.exact_aggregation(jnp.mean)),
+        'prod': wrap_logic(ExactLogic.exact_aggregation(jnp.prod)),
+        'minimum': wrap_logic(ExactLogic.exact_aggregation(jnp.min)),
+        'maximum': wrap_logic(ExactLogic.exact_aggregation(jnp.max)),
+        'forall': wrap_logic(ExactLogic.exact_aggregation(jnp.all)),
+        'exists': wrap_logic(ExactLogic.exact_aggregation(jnp.any)),
+        'argmin': wrap_logic(ExactLogic.exact_aggregation(jnp.argmin)),
+        'argmax': wrap_logic(ExactLogic.exact_aggregation(jnp.argmax))
     }
     EXACT_RDDL_TO_JAX_UNARY = {
-        'abs': _function_unary_exact_named(jnp.abs, 'abs'),
-        'sgn': _function_unary_exact_named(jnp.sign, 'sgn'),
-        'round': _function_unary_exact_named(jnp.round, 'round'),
-        'floor': _function_unary_exact_named(jnp.floor, 'floor'),
-        'ceil': _function_unary_exact_named(jnp.ceil, 'ceil'),
-        'cos': _function_unary_exact_named(jnp.cos, 'cos'),
-        'sin': _function_unary_exact_named(jnp.sin, 'sin'),
-        'tan': _function_unary_exact_named(jnp.tan, 'tan'),
-        'acos': _function_unary_exact_named(jnp.arccos, 'acos'),
-        'asin': _function_unary_exact_named(jnp.arcsin, 'asin'),
-        'atan': _function_unary_exact_named(jnp.arctan, 'atan'),
-        'cosh': _function_unary_exact_named(jnp.cosh, 'cosh'),
-        'sinh': _function_unary_exact_named(jnp.sinh, 'sinh'),
-        'tanh': _function_unary_exact_named(jnp.tanh, 'tanh'),
-        'exp': _function_unary_exact_named(jnp.exp, 'exp'),
-        'ln': _function_unary_exact_named(jnp.log, 'ln'),
-        'sqrt': _function_unary_exact_named(jnp.sqrt, 'sqrt'),
-        'lngamma': _function_unary_exact_named(scipy.special.gammaln, 'lngamma'),
-        'gamma': _function_unary_exact_named_gamma()
-    }
+        'abs': wrap_logic(ExactLogic.exact_unary_function(jnp.abs)),
+        'sgn': wrap_logic(ExactLogic.exact_unary_function(jnp.sign)),
+        'round': wrap_logic(ExactLogic.exact_unary_function(jnp.round)),
+        'floor': wrap_logic(ExactLogic.exact_unary_function(jnp.floor)),
+        'ceil': wrap_logic(ExactLogic.exact_unary_function(jnp.ceil)),
+        'cos': wrap_logic(ExactLogic.exact_unary_function(jnp.cos)),
+        'sin': wrap_logic(ExactLogic.exact_unary_function(jnp.sin)),
+        'tan': wrap_logic(ExactLogic.exact_unary_function(jnp.tan)),
+        'acos': wrap_logic(ExactLogic.exact_unary_function(jnp.arccos)),
+        'asin': wrap_logic(ExactLogic.exact_unary_function(jnp.arcsin)),
+        'atan': wrap_logic(ExactLogic.exact_unary_function(jnp.arctan)),
+        'cosh': wrap_logic(ExactLogic.exact_unary_function(jnp.cosh)),
+        'sinh': wrap_logic(ExactLogic.exact_unary_function(jnp.sinh)),
+        'tanh': wrap_logic(ExactLogic.exact_unary_function(jnp.tanh)),
+        'exp': wrap_logic(ExactLogic.exact_unary_function(jnp.exp)),
+        'ln': wrap_logic(ExactLogic.exact_unary_function(jnp.log)),
+        'sqrt': wrap_logic(ExactLogic.exact_unary_function(jnp.sqrt)),
+        'lngamma': wrap_logic(ExactLogic.exact_unary_function(scipy.special.gammaln)),
+        'gamma': wrap_logic(ExactLogic.exact_unary_function(scipy.special.gamma))
+    }
+    @staticmethod
+    def _jax_wrapped_calc_log_exact(x, y, params):
+        return jnp.log(x) / jnp.log(y), params
     EXACT_RDDL_TO_JAX_BINARY = {
-        'div': _function_binary_exact_named(jnp.floor_divide, 'div'),
-        'mod': _function_binary_exact_named(jnp.mod, 'mod'),
-        'fmod': _function_binary_exact_named(jnp.mod, 'fmod'),
-        'min': _function_binary_exact_named(jnp.minimum, 'min'),
-        'max': _function_binary_exact_named(jnp.maximum, 'max'),
-        'pow': _function_binary_exact_named(jnp.power, 'pow'),
-        'log': _function_binary_exact_named_log(),
-        'hypot': _function_binary_exact_named(jnp.hypot, 'hypot'),
+        'div': wrap_logic(ExactLogic.exact_binary_function(jnp.floor_divide)),
+        'mod': wrap_logic(ExactLogic.exact_binary_function(jnp.mod)),
+        'fmod': wrap_logic(ExactLogic.exact_binary_function(jnp.mod)),
+        'min': wrap_logic(ExactLogic.exact_binary_function(jnp.minimum)),
+        'max': wrap_logic(ExactLogic.exact_binary_function(jnp.maximum)),
+        'pow': wrap_logic(ExactLogic.exact_binary_function(jnp.power)),
+        'log': wrap_logic(_jax_wrapped_calc_log_exact),
+        'hypot': wrap_logic(ExactLogic.exact_binary_function(jnp.hypot)),
     }
-    EXACT_RDDL_TO_JAX_IF = _function_if_exact_named()
-    EXACT_RDDL_TO_JAX_SWITCH = _function_switch_exact_named()
+    EXACT_RDDL_TO_JAX_IF = wrap_logic(ExactLogic.exact_if_then_else)
+    EXACT_RDDL_TO_JAX_SWITCH = wrap_logic(ExactLogic.exact_switch)
-    EXACT_RDDL_TO_JAX_BERNOULLI = _function_bernoulli_exact_named()
-    EXACT_RDDL_TO_JAX_DISCRETE = _function_discrete_exact_named()
-    EXACT_RDDL_TO_JAX_POISSON = _function_poisson_exact_named()
-    EXACT_RDDL_TO_JAX_GEOMETRIC = _function_geometric_exact_named()
+    EXACT_RDDL_TO_JAX_BERNOULLI = wrap_logic(ExactLogic.exact_bernoulli)
+    EXACT_RDDL_TO_JAX_DISCRETE = wrap_logic(ExactLogic.exact_discrete)
+    EXACT_RDDL_TO_JAX_POISSON = wrap_logic(ExactLogic.exact_poisson)
+    EXACT_RDDL_TO_JAX_GEOMETRIC = wrap_logic(ExactLogic.exact_geometric)
     def __init__(self, rddl: RDDLLiftedModel,
                  allow_synchronous_state: bool=True,
@@ -251,18 +158,17 @@ class JaxRDDLCompiler:
         if use64bit:
             self.INT = jnp.int64
             self.REAL = jnp.float64
-            jax.config.update('jax_enable_x64', True)
         else:
             self.INT = jnp.int32
             self.REAL = jnp.float32
-            jax.config.update('jax_enable_x64', False)
+        jax.config.update('jax_enable_x64', use64bit)
         self.ONE = jnp.asarray(1, dtype=self.INT)
         self.JAX_TYPES = {
             'int': self.INT,
             'real': self.REAL,
             'bool': bool
         }
         # compile initial values
         initializer = RDDLValueInitializer(rddl)
         self.init_values = initializer.initialize()
@@ -314,15 +220,13 @@ class JaxRDDLCompiler:
         to the log file
         :param heading: the heading to print before compilation information
         '''
-        info = ({}, [])
-        self.invariants = self._compile_constraints(self.rddl.invariants, info)
-        self.preconditions = self._compile_constraints(self.rddl.preconditions, info)
-        self.terminations = self._compile_constraints(self.rddl.terminations, info)
-        self.cpfs = self._compile_cpfs(info)
-        self.reward = self._compile_reward(info)
-        self.model_params = {key: value
-                             for (key, (value, *_)) in info[0].items()}
-        self.relaxations = info[1]
+        init_params = {}
+        self.invariants = self._compile_constraints(self.rddl.invariants, init_params)
+        self.preconditions = self._compile_constraints(self.rddl.preconditions, init_params)
+        self.terminations = self._compile_constraints(self.rddl.terminations, init_params)
+        self.cpfs = self._compile_cpfs(init_params)
+        self.reward = self._compile_reward(init_params)
+        self.model_params = init_params
         if log_jax_expr and self.logger is not None:
             printed = self.print_jax()
@@ -332,7 +236,7 @@ class JaxRDDLCompiler:
             printed_invariants = '\n\n'.join(v for v in printed['invariants'])
             printed_preconds = '\n\n'.join(v for v in printed['preconditions'])
             printed_terminals = '\n\n'.join(v for v in printed['terminations'])
-            printed_params = '\n'.join(f'{k}: {v}' for (k, v) in info.items())
+            printed_params = '\n'.join(f'{k}: {v}' for (k, v) in init_params.items())
             message = (
                 f'[info] {heading}\n'
                 f'[info] compiled JAX CPFs:\n\n'
@@ -350,21 +254,21 @@ class JaxRDDLCompiler:
             )
             self.logger.log(message)
-    def _compile_constraints(self, constraints, info):
-        return [self._jax(expr, info, dtype=bool) for expr in constraints]
+    def _compile_constraints(self, constraints, init_params):
+        return [self._jax(expr, init_params, dtype=bool) for expr in constraints]
-    def _compile_cpfs(self, info):
+    def _compile_cpfs(self, init_params):
         jax_cpfs = {}
         for cpfs in self.levels.values():
             for cpf in cpfs:
                 _, expr = self.rddl.cpfs[cpf]
                 prange = self.rddl.variable_ranges[cpf]
                 dtype = self.JAX_TYPES.get(prange, self.INT)
-                jax_cpfs[cpf] = self._jax(expr, info, dtype=dtype)
+                jax_cpfs[cpf] = self._jax(expr, init_params, dtype=dtype)
         return jax_cpfs
-    def _compile_reward(self, info):
-        return self._jax(self.rddl.reward, info, dtype=self.REAL)
+    def _compile_reward(self, init_params):
+        return self._jax(self.rddl.reward, init_params, dtype=self.REAL)
     def _extract_inequality_constraint(self, expr):
         result = []
@@ -392,7 +296,7 @@ class JaxRDDLCompiler:
                 result.extend(self._extract_equality_constraint(arg))
         return result
-    def _jax_nonlinear_constraints(self):
+    def _jax_nonlinear_constraints(self, init_params):
         rddl = self.rddl
         # extract the non-box inequality constraints on actions
@@ -402,12 +306,12 @@ class JaxRDDLCompiler:
                         if not self.constraints.is_box_preconditions[i]]
         # compile them to JAX and write as h(s, a) <= 0
-        op = self.ARITHMETIC_OPS['-']
+        jax_op = ExactLogic.exact_binary_function(jnp.subtract)
         jax_inequalities = []
         for (left, right) in inequalities:
-            jax_lhs = self._jax(left, {})
-            jax_rhs = self._jax(right, {})
-            jax_constr = self._jax_binary(jax_lhs, jax_rhs, op, '', at_least_int=True)
+            jax_lhs = self._jax(left, init_params)
+            jax_rhs = self._jax(right, init_params)
+            jax_constr = self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
             jax_inequalities.append(jax_constr)
         # extract the non-box equality constraints on actions
@@ -419,15 +323,16 @@ class JaxRDDLCompiler:
         # compile them to JAX and write as g(s, a) == 0
         jax_equalities = []
         for (left, right) in equalities:
-            jax_lhs = self._jax(left, {})
-            jax_rhs = self._jax(right, {})
-            jax_constr = self._jax_binary(jax_lhs, jax_rhs, op, '', at_least_int=True)
+            jax_lhs = self._jax(left, init_params)
+            jax_rhs = self._jax(right, init_params)
+            jax_constr = self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
             jax_equalities.append(jax_constr)
         return jax_inequalities, jax_equalities
     def compile_transition(self, check_constraints: bool=False,
-                           constraint_func: bool=False) -> Callable:
+                           constraint_func: bool=False,
+                           init_params_constr: Dict[str, Any]={}) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples the next state.
@@ -467,13 +372,12 @@ class JaxRDDLCompiler:
         '''
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         rddl = self.rddl
-        reward_fn, cpfs = self.reward, self.cpfs
-        preconds, invariants, terminals = \
-            self.preconditions, self.invariants, self.terminations
+        reward_fn, cpfs, preconds, invariants, terminals = \
+            self.reward, self.cpfs, self.preconditions, self.invariants, self.terminations
         # compile constraint information
         if constraint_func:
-            inequality_fns, equality_fns = self._jax_nonlinear_constraints()
+            inequality_fns, equality_fns = self._jax_nonlinear_constraints(init_params_constr)
         else:
             inequality_fns, equality_fns = None, None
@@ -486,7 +390,7 @@ class JaxRDDLCompiler:
             precond_check = True
             if check_constraints:
                 for precond in preconds:
-                    sample, key, err = precond(subs, model_params, key)
+                    sample, key, err, model_params = precond(subs, model_params, key)
                     precond_check = jnp.logical_and(precond_check, sample)
                     errors |= err
@@ -494,21 +398,21 @@ class JaxRDDLCompiler:
             inequalities, equalities = [], []
             if constraint_func:
                 for constraint in inequality_fns:
-                    sample, key, err = constraint(subs, model_params, key)
+                    sample, key, err, model_params = constraint(subs, model_params, key)
                     inequalities.append(sample)
                     errors |= err
                 for constraint in equality_fns:
-                    sample, key, err = constraint(subs, model_params, key)
+                    sample, key, err, model_params = constraint(subs, model_params, key)
                     equalities.append(sample)
                     errors |= err
             # calculate CPFs in topological order
             for (name, cpf) in cpfs.items():
-                subs[name], key, err = cpf(subs, model_params, key)
+                subs[name], key, err, model_params = cpf(subs, model_params, key)
                 errors |= err
             # calculate the immediate reward
-            reward, key, err = reward_fn(subs, model_params, key)
+            reward, key, err, model_params = reward_fn(subs, model_params, key)
             errors |= err
             # calculate fluent values
@@ -523,7 +427,7 @@ class JaxRDDLCompiler:
             invariant_check = True
             if check_constraints:
                 for invariant in invariants:
-                    sample, key, err = invariant(subs, model_params, key)
+                    sample, key, err, model_params = invariant(subs, model_params, key)
                     invariant_check = jnp.logical_and(invariant_check, sample)
                     errors |= err
@@ -531,7 +435,7 @@ class JaxRDDLCompiler:
             terminated_check = False
             if check_constraints:
                 for terminal in terminals:
-                    sample, key, err = terminal(subs, model_params, key)
+                    sample, key, err, model_params = terminal(subs, model_params, key)
                     terminated_check = jnp.logical_or(terminated_check, sample)
                     errors |= err
@@ -548,7 +452,7 @@ class JaxRDDLCompiler:
                 log['inequalities'] = inequalities
                 log['equalities'] = equalities
-            return subs, log
+            return subs, log, model_params
         return _jax_wrapped_single_step
@@ -556,7 +460,8 @@ class JaxRDDLCompiler:
                          n_steps: int,
                          n_batch: int,
                          check_constraints: bool=False,
-                         constraint_func: bool=False) -> Callable:
+                         constraint_func: bool=False,
+                         init_params_constr: Dict[str, Any]={}) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples trajectories with a fixed horizon from a policy.
@@ -569,7 +474,8 @@ class JaxRDDLCompiler:
         The returned value of the returned function is:
             - log is the dictionary of all trajectory information, including
-              constraints that were satisfied, errors, etc.
+              constraints that were satisfied, errors, etc
+            - model_params is the final set of model parameters.
         The arguments of the policy function is:
             - key is the PRNG key (used by a stochastic policy)
@@ -589,7 +495,8 @@ class JaxRDDLCompiler:
         in addition to the usual outputs
         '''
         rddl = self.rddl
-        jax_step_fn = self.compile_transition(check_constraints, constraint_func)
+        jax_step_fn = self.compile_transition(
+            check_constraints, constraint_func, init_params_constr)
         # for POMDP only observ-fluents are assumed visible to the policy
         if rddl.observ_fluents:
@@ -605,18 +512,19 @@ class JaxRDDLCompiler:
                       if var in observed_vars}
             actions = policy(key, policy_params, hyperparams, step, states)
             key, subkey = random.split(key)
-            subs, log = jax_step_fn(subkey, actions, subs, model_params)
-            return subs, log
+            return jax_step_fn(subkey, actions, subs, model_params)
         # do a batched step update from the policy
+        # TODO: come up with a better way to reduce the model_param batch dim
         def _jax_wrapped_batched_step_policy(carry, step):
             key, policy_params, hyperparams, subs, model_params = carry
             key, *subkeys = random.split(key, num=1 + n_batch)
             keys = jnp.asarray(subkeys)
-            subs, log = jax.vmap(
+            subs, log, model_params = jax.vmap(
                 _jax_wrapped_single_step_policy,
                 in_axes=(0, None, None, None, 0, None)
             )(keys, policy_params, hyperparams, step, subs, model_params)
+            model_params = jax.tree_map(lambda x: jnp.mean(x, axis=0), model_params)
             carry = (key, policy_params, hyperparams, subs, model_params)
             return carry, log
@@ -625,14 +533,15 @@ class JaxRDDLCompiler:
                                          subs, model_params):
             start = (key, policy_params, hyperparams, subs, model_params)
             steps = jnp.arange(n_steps)
-            _, log = jax.lax.scan(_jax_wrapped_batched_step_policy, start, steps)
+            end, log = jax.lax.scan(_jax_wrapped_batched_step_policy, start, steps)
             log = jax.tree_map(partial(jnp.swapaxes, axis1=0, axis2=1), log)
-            return log
+            model_params = end[-1]
+            return log, model_params
         return _jax_wrapped_batched_rollout
     # ===========================================================================
-    # error checks
+    # error checks and prints
     # ===========================================================================
     def print_jax(self) -> Dict[str, Any]:
@@ -640,19 +549,30 @@ class JaxRDDLCompiler:
         Jax compiled expressions from the RDDL file.
         '''
         subs = self.init_values
-        params = self.model_params
+        init_params = self.model_params
         key = jax.random.PRNGKey(42)
-        printed = {}
-        printed['cpfs'] = {name: str(jax.make_jaxpr(expr)(subs, params, key))
-                           for (name, expr) in self.cpfs.items()}
-        printed['reward'] = str(jax.make_jaxpr(self.reward)(subs, params, key))
-        printed['invariants'] = [str(jax.make_jaxpr(expr)(subs, params, key))
-                                 for expr in self.invariants]
-        printed['preconditions'] = [str(jax.make_jaxpr(expr)(subs, params, key))
-                                    for expr in self.preconditions]
-        printed['terminations'] = [str(jax.make_jaxpr(expr)(subs, params, key))
-                                   for expr in self.terminations]
+        printed = {
+            'cpfs': {name: str(jax.make_jaxpr(expr)(subs, init_params, key))
+                     for (name, expr) in self.cpfs.items()},
+            'reward': str(jax.make_jaxpr(self.reward)(subs, init_params, key)),
+            'invariants': [str(jax.make_jaxpr(expr)(subs, init_params, key))
+                           for expr in self.invariants],
+            'preconditions': [str(jax.make_jaxpr(expr)(subs, init_params, key))
+                              for expr in self.preconditions],
+            'terminations': [str(jax.make_jaxpr(expr)(subs, init_params, key))
+                             for expr in self.terminations]
+        }
         return printed
+    def model_parameter_info(self) -> Dict[str, Dict[str, Any]]:
+        '''Returns a dictionary of additional information about model
+        parameters.'''
+        result = {}
+        for (id, value) in self.model_params.items():
+            expr_id = int(str(id).split('_')[0])
+            expr = self.traced.lookup(expr_id)
+            result[id] = {'id': expr_id, 'rddl_op': ' '.join(expr.etype), 'init_value': value}
+        return result
     @staticmethod
     def _check_valid_op(expr, valid_ops):
@@ -661,8 +581,7 @@ class JaxRDDLCompiler:
             valid_op_str = ','.join(valid_ops.keys())
             raise RDDLNotImplementedError(
                 f'{etype} operator {op} is not supported: '
-                f'must be in {valid_op_str}.\n' +
-                print_stack_trace(expr))
+                f'must be in {valid_op_str}.\n' + print_stack_trace(expr))
     @staticmethod
     def _check_num_args(expr, required_args):
@@ -671,8 +590,7 @@ class JaxRDDLCompiler:
             etype, op = expr.etype
             raise RDDLInvalidNumberOfArgumentsError(
                 f'{etype} operator {op} requires {required_args} arguments, '
-                f'got {actual_args}.\n' +
-                print_stack_trace(expr))
+                f'got {actual_args}.\n' + print_stack_trace(expr))
     ERROR_CODES = {
         'NORMAL': 0,
@@ -749,73 +667,34 @@ class JaxRDDLCompiler:
         messages = [JaxRDDLCompiler.INVERSE_ERROR_CODES[i] for i in codes]
         return messages
-    # ===========================================================================
-    # handling of auxiliary data (e.g. model tuning parameters)
-    # ===========================================================================
-    def _unwrap(self, op, expr_id, info):
-        jax_op, name = op, None
-        model_params, relaxed_list = info
-        if isinstance(op, tuple):
-            jax_op, param = op
-            if param is not None:
-                tags, values = param
-                sep = JaxRDDLCompiler.MODEL_PARAM_TAG_SEPARATOR
-                if isinstance(tags, tuple):
-                    name = sep.join(tags)
-                else:
-                    name = str(tags)
-                name = f'{name}{sep}{expr_id}'
-                if name in model_params:
-                    raise RuntimeError(
-                        f'Internal error: model parameter {name} is already defined.')
-                model_params[name] = (values, tags, expr_id, jax_op.__name__)
-            relaxed_list.append((param, expr_id, jax_op.__name__))
-        return jax_op, name
-    def summarize_model_relaxations(self) -> str:
-        '''Returns a string of information about model relaxations in the
-        compiled model.'''
-        occurence_by_type = {}
-        for (_, expr_id, jax_op) in self.relaxations:
-            etype = self.traced.lookup(expr_id).etype
-            source = f'{etype[1]} ({etype[0]})'
-            sub = f'{source:<30} --> {jax_op}'
-            occurence_by_type[sub] = occurence_by_type.get(sub, 0) + 1
-        col = "{:<80} {:<10}\n"
-        table = col.format('Substitution', 'Count')
-        for (sub, occurs) in occurence_by_type.items():
-            table += col.format(sub, occurs)
-        return table
     # ===========================================================================
     # expression compilation
     # ===========================================================================
-    def _jax(self, expr, info, dtype=None):
+    def _jax(self, expr, init_params, dtype=None):
         etype, _ = expr.etype
         if etype == 'constant':
-            jax_expr = self._jax_constant(expr, info)
+            jax_expr = self._jax_constant(expr, init_params)
         elif etype == 'pvar':
-            jax_expr = self._jax_pvar(expr, info)
+            jax_expr = self._jax_pvar(expr, init_params)
         elif etype == 'arithmetic':
-            jax_expr = self._jax_arithmetic(expr, info)
+            jax_expr = self._jax_arithmetic(expr, init_params)
         elif etype == 'relational':
-            jax_expr = self._jax_relational(expr, info)
+            jax_expr = self._jax_relational(expr, init_params)
         elif etype == 'boolean':
-            jax_expr = self._jax_logical(expr, info)
+            jax_expr = self._jax_logical(expr, init_params)
         elif etype == 'aggregation':
-            jax_expr = self._jax_aggregation(expr, info)
+            jax_expr = self._jax_aggregation(expr, init_params)
         elif etype == 'func':
-            jax_expr = self._jax_functional(expr, info)
+            jax_expr = self._jax_functional(expr, init_params)
         elif etype == 'control':
-            jax_expr = self._jax_control(expr, info)
+            jax_expr = self._jax_control(expr, init_params)
         elif etype == 'randomvar':
-            jax_expr = self._jax_random(expr, info)
+            jax_expr = self._jax_random(expr, init_params)
         elif etype == 'randomvector':
-            jax_expr = self._jax_random_vector(expr, info)
+            jax_expr = self._jax_random_vector(expr, init_params)
         elif etype == 'matrix':
-            jax_expr = self._jax_matrix(expr, info)
+            jax_expr = self._jax_matrix(expr, init_params)
         else:
             raise RDDLNotImplementedError(
                 f'Internal error: expression type {expr} is not supported.\n' +
@@ -831,13 +710,14 @@ class JaxRDDLCompiler:
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         def _jax_wrapped_cast(x, params, key):
-            val, key, err = jax_expr(x, params, key)
+            val, key, err, params = jax_expr(x, params, key)
             sample = jnp.asarray(val, dtype=dtype)
             invalid_cast = jnp.logical_and(
                 jnp.logical_not(jnp.can_cast(val, dtype)),
-                jnp.any(sample != val))
+                jnp.any(sample != val)
+            )
             err |= (invalid_cast * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_cast
@@ -856,13 +736,13 @@ class JaxRDDLCompiler:
     # leaves
     # ===========================================================================
-    def _jax_constant(self, expr, info):
+    def _jax_constant(self, expr, init_params):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         cached_value = self.traced.cached_sim_info(expr)
         def _jax_wrapped_constant(x, params, key):
             sample = jnp.asarray(cached_value, dtype=self._fix_dtype(cached_value))
-            return sample, key, NORMAL
+            return sample, key, NORMAL, params
         return _jax_wrapped_constant
@@ -870,11 +750,11 @@ class JaxRDDLCompiler:
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         def _jax_wrapped_pvar_slice(x, params, key):
-            return _slice, key, NORMAL
+            return _slice, key, NORMAL, params
         return _jax_wrapped_pvar_slice
-    def _jax_pvar(self, expr, info):
+    def _jax_pvar(self, expr, init_params):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         var, pvars = expr.args
         is_value, cached_info = self.traced.cached_sim_info(expr)
@@ -886,7 +766,7 @@ class JaxRDDLCompiler:
             def _jax_wrapped_object(x, params, key):
                 sample = jnp.asarray(cached_value, dtype=self._fix_dtype(cached_value))
-                return sample, key, NORMAL
+                return sample, key, NORMAL, params
             return _jax_wrapped_object
@@ -896,7 +776,7 @@ class JaxRDDLCompiler:
             def _jax_wrapped_pvar_scalar(x, params, key):
                 value = x[var]
                 sample = jnp.asarray(value, dtype=self._fix_dtype(value))
-                return sample, key, NORMAL
+                return sample, key, NORMAL, params
             return _jax_wrapped_pvar_scalar
@@ -907,7 +787,7 @@ class JaxRDDLCompiler:
             # compile nested expressions
             if slices and op_code == RDDLObjectsTracer.NUMPY_OP_CODE.NESTED_SLICE:
-                jax_nested_expr = [(self._jax(arg, info)
+                jax_nested_expr = [(self._jax(arg, init_params)
                                     if _slice is None
                                     else self._jax_pvar_slice(_slice))
                                    for (arg, _slice) in zip(pvars, slices)]
@@ -918,11 +798,11 @@ class JaxRDDLCompiler:
                     sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                     new_slices = [None] * len(jax_nested_expr)
                     for (i, jax_expr) in enumerate(jax_nested_expr):
-                        new_slices[i], key, err = jax_expr(x, params, key)
+                        new_slices[i], key, err, params = jax_expr(x, params, key)
                         error |= err
                     new_slices = tuple(new_slices)
                     sample = sample[new_slices]
-                    return sample, key, error
+                    return sample, key, error, params
                 return _jax_wrapped_pvar_tensor_nested
@@ -941,7 +821,7 @@ class JaxRDDLCompiler:
                         sample = jnp.einsum(sample, *op_args)
                     elif op_code == RDDLObjectsTracer.NUMPY_OP_CODE.TRANSPOSE:
                         sample = jnp.transpose(sample, axes=op_args)
-                    return sample, key, NORMAL
+                    return sample, key, NORMAL, params
                 return _jax_wrapped_pvar_tensor_non_nested
@@ -949,46 +829,43 @@ class JaxRDDLCompiler:
     # mathematical
     # ===========================================================================
-    def _jax_unary(self, jax_expr, jax_op, jax_param,
-                   at_least_int=False, check_dtype=None):
+    def _jax_unary(self, jax_expr, jax_op, at_least_int=False, check_dtype=None):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         def _jax_wrapped_unary_op(x, params, key):
-            sample, key, err = jax_expr(x, params, key)
+            sample, key, err, params = jax_expr(x, params, key)
             if at_least_int:
                 sample = self.ONE * sample
-            param = params.get(jax_param, None)
-            sample = jax_op(sample, param)
+            sample, params = jax_op(sample, params)
             if check_dtype is not None:
                 invalid_cast = jnp.logical_not(jnp.can_cast(sample, check_dtype))
                 err |= (invalid_cast * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_unary_op
-    def _jax_binary(self, jax_lhs, jax_rhs, jax_op, jax_param,
-                    at_least_int=False, check_dtype=None):
+    def _jax_binary(self, jax_lhs, jax_rhs, jax_op, at_least_int=False, check_dtype=None):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         def _jax_wrapped_binary_op(x, params, key):
-            sample1, key, err1 = jax_lhs(x, params, key)
-            sample2, key, err2 = jax_rhs(x, params, key)
+            sample1, key, err1, params = jax_lhs(x, params, key)
+            sample2, key, err2, params = jax_rhs(x, params, key)
             if at_least_int:
                 sample1 = self.ONE * sample1
                 sample2 = self.ONE * sample2
-            param = params.get(jax_param, None)
-            sample = jax_op(sample1, sample2, param)
+            sample, params = jax_op(sample1, sample2, params)
             err = err1 | err2
             if check_dtype is not None:
                 invalid_cast = jnp.logical_not(jnp.logical_and(
                     jnp.can_cast(sample1, check_dtype),
-                    jnp.can_cast(sample2, check_dtype)))
+                    jnp.can_cast(sample2, check_dtype))
+                )
                 err |= (invalid_cast * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_binary_op
-    def _jax_arithmetic(self, expr, info):
+    def _jax_arithmetic(self, expr, init_params):
         _, op = expr.etype
         # if expression is non-fluent, always use the exact operation
@@ -1005,22 +882,21 @@ class JaxRDDLCompiler:
         n = len(args)
         if n == 1 and op == '-':
             arg, = args
-            jax_expr = self._jax(arg, info)
-            jax_op, jax_param = self._unwrap(negative_op, expr.id, info)
-            return self._jax_unary(jax_expr, jax_op, jax_param, at_least_int=True)
+            jax_expr = self._jax(arg, init_params)
+            jax_op = negative_op(expr.id, init_params)
+            return self._jax_unary(jax_expr, jax_op, at_least_int=True)
         elif n == 2 or (n >= 2 and op in {'*', '+'}):
-            jax_exprs = [self._jax(arg, info) for arg in args]
-            jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
+            jax_exprs = [self._jax(arg, init_params) for arg in args]
             result = jax_exprs[0]
-            for jax_rhs in jax_exprs[1:]:
-                result = self._jax_binary(
-                    result, jax_rhs, jax_op, jax_param, at_least_int=True)
+            for i, jax_rhs in enumerate(jax_exprs[1:]):
+                jax_op = valid_ops[op](f'{expr.id}_{op}{i}', init_params)
+                result = self._jax_binary(result, jax_rhs, jax_op, at_least_int=True)
             return result
         JaxRDDLCompiler._check_num_args(expr, 2)
-    def _jax_relational(self, expr, info):
+    def _jax_relational(self, expr, init_params):
         _, op = expr.etype
         # if expression is non-fluent, always use the exact operation
@@ -1029,17 +905,16 @@ class JaxRDDLCompiler:
         else:
             valid_ops = self.RELATIONAL_OPS
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
         # recursively compile arguments
         JaxRDDLCompiler._check_num_args(expr, 2)
         lhs, rhs = expr.args
-        jax_lhs = self._jax(lhs, info)
-        jax_rhs = self._jax(rhs, info)
-        return self._jax_binary(
-            jax_lhs, jax_rhs, jax_op, jax_param, at_least_int=True)
+        jax_lhs = self._jax(lhs, init_params)
+        jax_rhs = self._jax(rhs, init_params)
+        jax_op = valid_ops[op](expr.id, init_params)
+        return self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
-    def _jax_logical(self, expr, info):
+    def _jax_logical(self, expr, init_params):
         _, op = expr.etype
         # if expression is non-fluent, always use the exact operation
@@ -1056,22 +931,21 @@ class JaxRDDLCompiler:
         n = len(args)
         if n == 1 and op == '~':
             arg, = args
-            jax_expr = self._jax(arg, info)
-            jax_op, jax_param = self._unwrap(logical_not_op, expr.id, info)
-            return self._jax_unary(jax_expr, jax_op, jax_param, check_dtype=bool)
+            jax_expr = self._jax(arg, init_params)
+            jax_op = logical_not_op(expr.id, init_params)
+            return self._jax_unary(jax_expr, jax_op, check_dtype=bool)
         elif n == 2 or (n >= 2 and op in {'^', '&', '|'}):
-            jax_exprs = [self._jax(arg, info) for arg in args]
-            jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
+            jax_exprs = [self._jax(arg, init_params) for arg in args]
             result = jax_exprs[0]
-            for jax_rhs in jax_exprs[1:]:
-                result = self._jax_binary(
-                    result, jax_rhs, jax_op, jax_param, check_dtype=bool)
+            for i, jax_rhs in enumerate(jax_exprs[1:]):
+                jax_op = valid_ops[op](f'{expr.id}_{op}{i}', init_params)
+                result = self._jax_binary(result, jax_rhs, jax_op, check_dtype=bool)
             return result
         JaxRDDLCompiler._check_num_args(expr, 2)
-    def _jax_aggregation(self, expr, info):
+    def _jax_aggregation(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         _, op = expr.etype
@@ -1081,28 +955,27 @@ class JaxRDDLCompiler:
         else:
             valid_ops = self.AGGREGATION_OPS
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        jax_op, jax_param = self._unwrap(valid_ops[op], expr.id, info)
+        is_floating = op not in self.AGGREGATION_BOOL
         # recursively compile arguments
-        is_floating = op not in self.AGGREGATION_BOOL
         * _, arg = expr.args
         _, axes = self.traced.cached_sim_info(expr)
-        jax_expr = self._jax(arg, info)
+        jax_expr = self._jax(arg, init_params)
+        jax_op = valid_ops[op](expr.id, init_params)
         def _jax_wrapped_aggregation(x, params, key):
-            sample, key, err = jax_expr(x, params, key)
+            sample, key, err, params = jax_expr(x, params, key)
             if is_floating:
                 sample = self.ONE * sample
             else:
                 invalid_cast = jnp.logical_not(jnp.can_cast(sample, bool))
                 err |= (invalid_cast * ERR)
-            param = params.get(jax_param, None)
-            sample = jax_op(sample, axis=axes, param=param)
-            return sample, key, err
+            sample, params = jax_op(sample, axis=axes, params=params)
+            return sample, key, err, params
         return _jax_wrapped_aggregation
-    def _jax_functional(self, expr, info):
+    def _jax_functional(self, expr, init_params):
         _, op = expr.etype
         # if expression is non-fluent, always use the exact operation
@@ -1117,39 +990,36 @@ class JaxRDDLCompiler:
         if op in unary_ops:
             JaxRDDLCompiler._check_num_args(expr, 1)
             arg, = expr.args
-            jax_expr = self._jax(arg, info)
-            jax_op, jax_param = self._unwrap(unary_ops[op], expr.id, info)
-            return self._jax_unary(jax_expr, jax_op, jax_param, at_least_int=True)
+            jax_expr = self._jax(arg, init_params)
+            jax_op = unary_ops[op](expr.id, init_params)
+            return self._jax_unary(jax_expr, jax_op, at_least_int=True)
         elif op in binary_ops:
             JaxRDDLCompiler._check_num_args(expr, 2)
             lhs, rhs = expr.args
-            jax_lhs = self._jax(lhs, info)
-            jax_rhs = self._jax(rhs, info)
-            jax_op, jax_param = self._unwrap(binary_ops[op], expr.id, info)
-            return self._jax_binary(
-                jax_lhs, jax_rhs, jax_op, jax_param, at_least_int=True)
+            jax_lhs = self._jax(lhs, init_params)
+            jax_rhs = self._jax(rhs, init_params)
+            jax_op = binary_ops[op](expr.id, init_params)
+            return self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
         raise RDDLNotImplementedError(
-            f'Function {op} is not supported.\n' +
-            print_stack_trace(expr))
+            f'Function {op} is not supported.\n' + print_stack_trace(expr))
     # ===========================================================================
     # control flow
     # ===========================================================================
-    def _jax_control(self, expr, info):
+    def _jax_control(self, expr, init_params):
         _, op = expr.etype
         if op == 'if':
-            return self._jax_if(expr, info)
+            return self._jax_if(expr, init_params)
         elif op == 'switch':
-            return self._jax_switch(expr, info)
+            return self._jax_switch(expr, init_params)
         raise RDDLNotImplementedError(
-            f'Control operator {op} is not supported.\n' +
-            print_stack_trace(expr))
+            f'Control operator {op} is not supported.\n' + print_stack_trace(expr))
-    def _jax_if(self, expr, info):
+    def _jax_if(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         JaxRDDLCompiler._check_num_args(expr, 3)
         pred, if_true, if_false = expr.args
@@ -1159,27 +1029,26 @@ class JaxRDDLCompiler:
             if_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_IF
         else:
             if_op = self.IF_HELPER
-        jax_if, jax_param = self._unwrap(if_op, expr.id, info)
+        jax_op = if_op(expr.id, init_params)
         # recursively compile arguments
-        jax_pred = self._jax(pred, info)
-        jax_true = self._jax(if_true, info)
-        jax_false = self._jax(if_false, info)
+        jax_pred = self._jax(pred, init_params)
+        jax_true = self._jax(if_true, init_params)
+        jax_false = self._jax(if_false, init_params)
         def _jax_wrapped_if_then_else(x, params, key):
-            sample1, key, err1 = jax_pred(x, params, key)
-            sample2, key, err2 = jax_true(x, params, key)
-            sample3, key, err3 = jax_false(x, params, key)
-            param = params.get(jax_param, None)
-            sample = jax_if(sample1, sample2, sample3, param)
+            sample1, key, err1, params = jax_pred(x, params, key)
+            sample2, key, err2, params = jax_true(x, params, key)
+            sample3, key, err3, params = jax_false(x, params, key)
+            sample, params = jax_op(sample1, sample2, sample3, params)
             err = err1 | err2 | err3
             invalid_cast = jnp.logical_not(jnp.can_cast(sample1, bool))
             err |= (invalid_cast * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_if_then_else
-    def _jax_switch(self, expr, info):
+    def _jax_switch(self, expr, init_params):
         pred, *_ = expr.args
         # if predicate is non-fluent, always use the exact operation
@@ -1188,34 +1057,33 @@ class JaxRDDLCompiler:
             switch_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_SWITCH
         else:
             switch_op = self.SWITCH_HELPER
-        jax_switch, jax_param = self._unwrap(switch_op, expr.id, info)
+        jax_op = switch_op(expr.id, init_params)
         # recursively compile predicate
-        jax_pred = self._jax(pred, info)
+        jax_pred = self._jax(pred, init_params)
         # recursively compile cases
         cases, default = self.traced.cached_sim_info(expr)
-        jax_default = None if default is None else self._jax(default, info)
-        jax_cases = [(jax_default if _case is None else self._jax(_case, info))
+        jax_default = None if default is None else self._jax(default, init_params)
+        jax_cases = [(jax_default if _case is None else self._jax(_case, init_params))
                      for _case in cases]
         def _jax_wrapped_switch(x, params, key):
             # sample predicate
-            sample_pred, key, err = jax_pred(x, params, key)
+            sample_pred, key, err, params = jax_pred(x, params, key)
             # sample cases
             sample_cases = [None] * len(jax_cases)
             for (i, jax_case) in enumerate(jax_cases):
-                sample_cases[i], key, err_case = jax_case(x, params, key)
+                sample_cases[i], key, err_case, params = jax_case(x, params, key)
                 err |= err_case
             sample_cases = jnp.asarray(
                 sample_cases, dtype=self._fix_dtype(sample_cases))
             # predicate (enum) is an integer - use it to extract from case array
-            param = params.get(jax_param, None)
-            sample = jax_switch(sample_pred, sample_cases, param)
-            return sample, key, err
+            sample, params = jax_op(sample_pred, sample_cases, params)
+            return sample, key, err, params
         return _jax_wrapped_switch
@@ -1251,169 +1119,169 @@ class JaxRDDLCompiler:
     # Geometric: (implement safe floor)
     # Student: (no reparameterization)
-    def _jax_random(self, expr, info):
+    def _jax_random(self, expr, init_params):
         _, name = expr.etype
         if name == 'KronDelta':
-            return self._jax_kron(expr, info)
+            return self._jax_kron(expr, init_params)
         elif name == 'DiracDelta':
-            return self._jax_dirac(expr, info)
+            return self._jax_dirac(expr, init_params)
         elif name == 'Uniform':
-            return self._jax_uniform(expr, info)
+            return self._jax_uniform(expr, init_params)
         elif name == 'Bernoulli':
-            return self._jax_bernoulli(expr, info)
+            return self._jax_bernoulli(expr, init_params)
         elif name == 'Normal':
-            return self._jax_normal(expr, info)
+            return self._jax_normal(expr, init_params)
         elif name == 'Poisson':
-            return self._jax_poisson(expr, info)
+            return self._jax_poisson(expr, init_params)
         elif name == 'Exponential':
-            return self._jax_exponential(expr, info)
+            return self._jax_exponential(expr, init_params)
         elif name == 'Weibull':
-            return self._jax_weibull(expr, info)
+            return self._jax_weibull(expr, init_params)
         elif name == 'Gamma':
-            return self._jax_gamma(expr, info)
+            return self._jax_gamma(expr, init_params)
         elif name == 'Binomial':
-            return self._jax_binomial(expr, info)
+            return self._jax_binomial(expr, init_params)
         elif name == 'NegativeBinomial':
-            return self._jax_negative_binomial(expr, info)
+            return self._jax_negative_binomial(expr, init_params)
         elif name == 'Beta':
-            return self._jax_beta(expr, info)
+            return self._jax_beta(expr, init_params)
         elif name == 'Geometric':
-            return self._jax_geometric(expr, info)
+            return self._jax_geometric(expr, init_params)
         elif name == 'Pareto':
-            return self._jax_pareto(expr, info)
+            return self._jax_pareto(expr, init_params)
         elif name == 'Student':
-            return self._jax_student(expr, info)
+            return self._jax_student(expr, init_params)
         elif name == 'Gumbel':
-            return self._jax_gumbel(expr, info)
+            return self._jax_gumbel(expr, init_params)
         elif name == 'Laplace':
-            return self._jax_laplace(expr, info)
+            return self._jax_laplace(expr, init_params)
         elif name == 'Cauchy':
-            return self._jax_cauchy(expr, info)
+            return self._jax_cauchy(expr, init_params)
         elif name == 'Gompertz':
-            return self._jax_gompertz(expr, info)
+            return self._jax_gompertz(expr, init_params)
         elif name == 'ChiSquare':
-            return self._jax_chisquare(expr, info)
+            return self._jax_chisquare(expr, init_params)
         elif name == 'Kumaraswamy':
-            return self._jax_kumaraswamy(expr, info)
+            return self._jax_kumaraswamy(expr, init_params)
         elif name == 'Discrete':
-            return self._jax_discrete(expr, info, unnorm=False)
+            return self._jax_discrete(expr, init_params, unnorm=False)
         elif name == 'UnnormDiscrete':
-            return self._jax_discrete(expr, info, unnorm=True)
+            return self._jax_discrete(expr, init_params, unnorm=True)
         elif name == 'Discrete(p)':
-            return self._jax_discrete_pvar(expr, info, unnorm=False)
+            return self._jax_discrete_pvar(expr, init_params, unnorm=False)
         elif name == 'UnnormDiscrete(p)':
-            return self._jax_discrete_pvar(expr, info, unnorm=True)
+            return self._jax_discrete_pvar(expr, init_params, unnorm=True)
         else:
             raise RDDLNotImplementedError(
                 f'Distribution {name} is not supported.\n' +
                 print_stack_trace(expr))
-    def _jax_kron(self, expr, info):
+    def _jax_kron(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_KRON_DELTA']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg, = expr.args
-        arg = self._jax(arg, info)
+        arg = self._jax(arg, init_params)
         # just check that the sample can be cast to int
         def _jax_wrapped_distribution_kron(x, params, key):
-            sample, key, err = arg(x, params, key)
+            sample, key, err, params = arg(x, params, key)
             invalid_cast = jnp.logical_not(jnp.can_cast(sample, self.INT))
             err |= (invalid_cast * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_kron
-    def _jax_dirac(self, expr, info):
+    def _jax_dirac(self, expr, init_params):
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg, = expr.args
-        arg = self._jax(arg, info, dtype=self.REAL)
+        arg = self._jax(arg, init_params, dtype=self.REAL)
         return arg
-    def _jax_uniform(self, expr, info):
+    def _jax_uniform(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_UNIFORM']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_lb, arg_ub = expr.args
-        jax_lb = self._jax(arg_lb, info)
-        jax_ub = self._jax(arg_ub, info)
+        jax_lb = self._jax(arg_lb, init_params)
+        jax_ub = self._jax(arg_ub, init_params)
         # reparameterization trick U(a, b) = a + (b - a) * U(0, 1)
         def _jax_wrapped_distribution_uniform(x, params, key):
-            lb, key, err1 = jax_lb(x, params, key)
-            ub, key, err2 = jax_ub(x, params, key)
+            lb, key, err1, params = jax_lb(x, params, key)
+            ub, key, err2, params = jax_ub(x, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(lb), dtype=self.REAL)
             sample = lb + (ub - lb) * U
             out_of_bounds = jnp.logical_not(jnp.all(lb <= ub))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_uniform
-    def _jax_normal(self, expr, info):
+    def _jax_normal(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_NORMAL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_var = expr.args
-        jax_mean = self._jax(arg_mean, info)
-        jax_var = self._jax(arg_var, info)
+        jax_mean = self._jax(arg_mean, init_params)
+        jax_var = self._jax(arg_var, init_params)
         # reparameterization trick N(m, s^2) = m + s * N(0, 1)
         def _jax_wrapped_distribution_normal(x, params, key):
-            mean, key, err1 = jax_mean(x, params, key)
-            var, key, err2 = jax_var(x, params, key)
+            mean, key, err1, params = jax_mean(x, params, key)
+            var, key, err2, params = jax_var(x, params, key)
             std = jnp.sqrt(var)
             key, subkey = random.split(key)
             Z = random.normal(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + std * Z
             out_of_bounds = jnp.logical_not(jnp.all(var >= 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_normal
-    def _jax_exponential(self, expr, info):
+    def _jax_exponential(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_EXPONENTIAL']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_scale, = expr.args
-        jax_scale = self._jax(arg_scale, info)
+        jax_scale = self._jax(arg_scale, init_params)
         # reparameterization trick Exp(s) = s * Exp(1)
         def _jax_wrapped_distribution_exp(x, params, key):
-            scale, key, err = jax_scale(x, params, key)
+            scale, key, err, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             Exp1 = random.exponential(
                 key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
             sample = scale * Exp1
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err |= (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_exp
-    def _jax_weibull(self, expr, info):
+    def _jax_weibull(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_WEIBULL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, info)
-        jax_scale = self._jax(arg_scale, info)
+        jax_shape = self._jax(arg_shape, init_params)
+        jax_scale = self._jax(arg_scale, init_params)
         # reparameterization trick W(s, r) = r * (-ln(1 - U(0, 1))) ** (1 / s)
         def _jax_wrapped_distribution_weibull(x, params, key):
-            shape, key, err1 = jax_shape(x, params, key)
-            scale, key, err2 = jax_scale(x, params, key)
+            shape, key, err1, params = jax_shape(x, params, key)
+            scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
             sample = scale * jnp.power(-jnp.log(U), 1.0 / shape)
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_weibull
-    def _jax_bernoulli(self, expr, info):
+    def _jax_bernoulli(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BERNOULLI']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_prob, = expr.args
@@ -1423,23 +1291,22 @@ class JaxRDDLCompiler:
             bern_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BERNOULLI
         else:
             bern_op = self.BERNOULLI_HELPER
-        jax_bern, jax_param = self._unwrap(bern_op, expr.id, info)
+        jax_op = bern_op(expr.id, init_params)
         # recursively compile arguments
-        jax_prob = self._jax(arg_prob, info)
+        jax_prob = self._jax(arg_prob, init_params)
         def _jax_wrapped_distribution_bernoulli(x, params, key):
-            prob, key, err = jax_prob(x, params, key)
+            prob, key, err, params = jax_prob(x, params, key)
             key, subkey = random.split(key)
-            param = params.get(jax_param, None)
-            sample = jax_bern(subkey, prob, param)
+            sample, params = jax_op(subkey, prob, params)
             out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
             err |= (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_bernoulli
-    def _jax_poisson(self, expr, info):
+    def _jax_poisson(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_POISSON']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_rate, = expr.args
@@ -1449,57 +1316,57 @@ class JaxRDDLCompiler:
             poisson_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_POISSON
         else:
             poisson_op = self.POISSON_HELPER
-        jax_poisson, jax_param = self._unwrap(poisson_op, expr.id, info)
+        jax_op = poisson_op(expr.id, init_params)
         # recursively compile arguments
-        jax_rate = self._jax(arg_rate, info)
+        jax_rate = self._jax(arg_rate, init_params)
         # uses the implicit JAX subroutine
         def _jax_wrapped_distribution_poisson(x, params, key):
-            rate, key, err = jax_rate(x, params, key)
+            rate, key, err, params = jax_rate(x, params, key)
             key, subkey = random.split(key)
-            param = params.get(jax_param, None)
-            sample = jax_poisson(subkey, rate, param).astype(self.INT)
+            sample, params = jax_op(subkey, rate, params)
+            sample = sample.astype(self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(rate >= 0))
             err |= (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_poisson
-    def _jax_gamma(self, expr, info):
+    def _jax_gamma(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GAMMA']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, info)
-        jax_scale = self._jax(arg_scale, info)
+        jax_shape = self._jax(arg_shape, init_params)
+        jax_scale = self._jax(arg_scale, init_params)
         # partial reparameterization trick Gamma(s, r) = r * Gamma(s, 1)
         # uses the implicit JAX subroutine for Gamma(s, 1)
         def _jax_wrapped_distribution_gamma(x, params, key):
-            shape, key, err1 = jax_shape(x, params, key)
-            scale, key, err2 = jax_scale(x, params, key)
+            shape, key, err1, params = jax_shape(x, params, key)
+            scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             Gamma = random.gamma(key=subkey, a=shape, dtype=self.REAL)
             sample = scale * Gamma
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_gamma
-    def _jax_binomial(self, expr, info):
+    def _jax_binomial(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BINOMIAL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_trials, arg_prob = expr.args
-        jax_trials = self._jax(arg_trials, info)
-        jax_prob = self._jax(arg_prob, info)
+        jax_trials = self._jax(arg_trials, init_params)
+        jax_prob = self._jax(arg_prob, init_params)
         # uses the JAX substrate of tensorflow-probability
         def _jax_wrapped_distribution_binomial(x, params, key):
-            trials, key, err2 = jax_trials(x, params, key)
-            prob, key, err1 = jax_prob(x, params, key)
+            trials, key, err2, params = jax_trials(x, params, key)
+            prob, key, err1, params = jax_prob(x, params, key)
             trials = jnp.asarray(trials, dtype=self.REAL)
             prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
@@ -1508,55 +1375,56 @@ class JaxRDDLCompiler:
             out_of_bounds = jnp.logical_not(jnp.all(
                 (prob >= 0) & (prob <= 1) & (trials >= 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_binomial
-    def _jax_negative_binomial(self, expr, info):
+    def _jax_negative_binomial(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_NEGATIVE_BINOMIAL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_trials, arg_prob = expr.args
-        jax_trials = self._jax(arg_trials, info)
-        jax_prob = self._jax(arg_prob, info)
+        jax_trials = self._jax(arg_trials, init_params)
+        jax_prob = self._jax(arg_prob, init_params)
         # uses the JAX substrate of tensorflow-probability
         def _jax_wrapped_distribution_negative_binomial(x, params, key):
-            trials, key, err2 = jax_trials(x, params, key)
-            prob, key, err1 = jax_prob(x, params, key)
+            trials, key, err2, params = jax_trials(x, params, key)
+            prob, key, err1, params = jax_prob(x, params, key)
             trials = jnp.asarray(trials, dtype=self.REAL)
             prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
             dist = tfp.distributions.NegativeBinomial(total_count=trials, probs=prob)
             sample = dist.sample(seed=subkey).astype(self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(
-                (prob >= 0) & (prob <= 1) & (trials > 0)))
+                (prob >= 0) & (prob <= 1) & (trials > 0))
+            )
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_negative_binomial
-    def _jax_beta(self, expr, info):
+    def _jax_beta(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BETA']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_rate = expr.args
-        jax_shape = self._jax(arg_shape, info)
-        jax_rate = self._jax(arg_rate, info)
+        jax_shape = self._jax(arg_shape, init_params)
+        jax_rate = self._jax(arg_rate, init_params)
         # uses the implicit JAX subroutine
         def _jax_wrapped_distribution_beta(x, params, key):
-            shape, key, err1 = jax_shape(x, params, key)
-            rate, key, err2 = jax_rate(x, params, key)
+            shape, key, err1, params = jax_shape(x, params, key)
+            rate, key, err2, params = jax_rate(x, params, key)
             key, subkey = random.split(key)
             sample = random.beta(key=subkey, a=shape, b=rate, dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (rate > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_beta
-    def _jax_geometric(self, expr, info):
+    def _jax_geometric(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GEOMETRIC']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_prob, = expr.args
@@ -1566,187 +1434,187 @@ class JaxRDDLCompiler:
             geom_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_GEOMETRIC
         else:
             geom_op = self.GEOMETRIC_HELPER
-        jax_geom, jax_param = self._unwrap(geom_op, expr.id, info)
+        jax_op = geom_op(expr.id, init_params)
         # recursively compile arguments
-        jax_prob = self._jax(arg_prob, info)
+        jax_prob = self._jax(arg_prob, init_params)
         def _jax_wrapped_distribution_geometric(x, params, key):
-            prob, key, err = jax_prob(x, params, key)
+            prob, key, err, params = jax_prob(x, params, key)
             key, subkey = random.split(key)
-            param = params.get(jax_param, None)
-            sample = jax_geom(subkey, prob, param).astype(self.INT)
+            sample, params = jax_op(subkey, prob, params)
+            sample = sample.astype(self.INT)
             out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
             err |= (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_geometric
-    def _jax_pareto(self, expr, info):
+    def _jax_pareto(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_PARETO']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, info)
-        jax_scale = self._jax(arg_scale, info)
+        jax_shape = self._jax(arg_shape, init_params)
+        jax_scale = self._jax(arg_scale, init_params)
         # partial reparameterization trick Pareto(s, r) = r * Pareto(s, 1)
         # uses the implicit JAX subroutine for Pareto(s, 1)
         def _jax_wrapped_distribution_pareto(x, params, key):
-            shape, key, err1 = jax_shape(x, params, key)
-            scale, key, err2 = jax_scale(x, params, key)
+            shape, key, err1, params = jax_shape(x, params, key)
+            scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             sample = scale * random.pareto(key=subkey, b=shape, dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_pareto
-    def _jax_student(self, expr, info):
+    def _jax_student(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_STUDENT']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_df, = expr.args
-        jax_df = self._jax(arg_df, info)
+        jax_df = self._jax(arg_df, init_params)
         # uses the implicit JAX subroutine for student(df)
         def _jax_wrapped_distribution_t(x, params, key):
-            df, key, err = jax_df(x, params, key)
+            df, key, err, params = jax_df(x, params, key)
             key, subkey = random.split(key)
             sample = random.t(
                 key=subkey, df=df, shape=jnp.shape(df), dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all(df > 0))
             err |= (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_t
-    def _jax_gumbel(self, expr, info):
+    def _jax_gumbel(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GUMBEL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_scale = expr.args
-        jax_mean = self._jax(arg_mean, info)
-        jax_scale = self._jax(arg_scale, info)
+        jax_mean = self._jax(arg_mean, init_params)
+        jax_scale = self._jax(arg_scale, init_params)
         # reparameterization trick Gumbel(m, s) = m + s * Gumbel(0, 1)
         def _jax_wrapped_distribution_gumbel(x, params, key):
-            mean, key, err1 = jax_mean(x, params, key)
-            scale, key, err2 = jax_scale(x, params, key)
+            mean, key, err1, params = jax_mean(x, params, key)
+            scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             Gumbel01 = random.gumbel(
                 key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + scale * Gumbel01
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_gumbel
-    def _jax_laplace(self, expr, info):
+    def _jax_laplace(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_LAPLACE']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_scale = expr.args
-        jax_mean = self._jax(arg_mean, info)
-        jax_scale = self._jax(arg_scale, info)
+        jax_mean = self._jax(arg_mean, init_params)
+        jax_scale = self._jax(arg_scale, init_params)
         # reparameterization trick Laplace(m, s) = m + s * Laplace(0, 1)
         def _jax_wrapped_distribution_laplace(x, params, key):
-            mean, key, err1 = jax_mean(x, params, key)
-            scale, key, err2 = jax_scale(x, params, key)
+            mean, key, err1, params = jax_mean(x, params, key)
+            scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             Laplace01 = random.laplace(
                 key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + scale * Laplace01
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_laplace
-    def _jax_cauchy(self, expr, info):
+    def _jax_cauchy(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_CAUCHY']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_scale = expr.args
-        jax_mean = self._jax(arg_mean, info)
-        jax_scale = self._jax(arg_scale, info)
+        jax_mean = self._jax(arg_mean, init_params)
+        jax_scale = self._jax(arg_scale, init_params)
         # reparameterization trick Cauchy(m, s) = m + s * Cauchy(0, 1)
         def _jax_wrapped_distribution_cauchy(x, params, key):
-            mean, key, err1 = jax_mean(x, params, key)
-            scale, key, err2 = jax_scale(x, params, key)
+            mean, key, err1, params = jax_mean(x, params, key)
+            scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             Cauchy01 = random.cauchy(
                 key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + scale * Cauchy01
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_cauchy
-    def _jax_gompertz(self, expr, info):
+    def _jax_gompertz(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GOMPERTZ']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, info)
-        jax_scale = self._jax(arg_scale, info)
+        jax_shape = self._jax(arg_shape, init_params)
+        jax_scale = self._jax(arg_scale, init_params)
         # reparameterization trick Gompertz(s, r) = ln(1 - log(U(0, 1)) / s) / r
         def _jax_wrapped_distribution_gompertz(x, params, key):
-            shape, key, err1 = jax_shape(x, params, key)
-            scale, key, err2 = jax_scale(x, params, key)
+            shape, key, err1, params = jax_shape(x, params, key)
+            scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
             sample = jnp.log(1.0 - jnp.log(U) / shape) / scale
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_gompertz
-    def _jax_chisquare(self, expr, info):
+    def _jax_chisquare(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_CHISQUARE']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_df, = expr.args
-        jax_df = self._jax(arg_df, info)
+        jax_df = self._jax(arg_df, init_params)
         # use the fact that ChiSquare(df) = Gamma(df/2, 2)
         def _jax_wrapped_distribution_chisquare(x, params, key):
-            df, key, err1 = jax_df(x, params, key)
+            df, key, err1, params = jax_df(x, params, key)
             key, subkey = random.split(key)
             shape = df / 2.0
             Gamma = random.gamma(key=subkey, a=shape, dtype=self.REAL)
             sample = 2.0 * Gamma
             out_of_bounds = jnp.logical_not(jnp.all(df > 0))
             err = err1 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_chisquare
-    def _jax_kumaraswamy(self, expr, info):
+    def _jax_kumaraswamy(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_KUMARASWAMY']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_a, arg_b = expr.args
-        jax_a = self._jax(arg_a, info)
-        jax_b = self._jax(arg_b, info)
+        jax_a = self._jax(arg_a, init_params)
+        jax_b = self._jax(arg_b, init_params)
         # uses the reparameterization K(a, b) = (1 - (1 - U(0, 1))^{1/b})^{1/a}
         def _jax_wrapped_distribution_kumaraswamy(x, params, key):
-            a, key, err1 = jax_a(x, params, key)
-            b, key, err2 = jax_b(x, params, key)
+            a, key, err1, params = jax_a(x, params, key)
+            b, key, err2, params = jax_b(x, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(a), dtype=self.REAL)
             sample = jnp.power(1.0 - jnp.power(U, 1.0 / b), 1.0 / a)
             out_of_bounds = jnp.logical_not(jnp.all((a > 0) & (b > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_kumaraswamy
@@ -1754,7 +1622,7 @@ class JaxRDDLCompiler:
     # random variables with enum support
     # ===========================================================================
-    def _jax_discrete(self, expr, info, unnorm):
+    def _jax_discrete(self, expr, init_params, unnorm):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DISCRETE']
         ordered_args = self.traced.cached_sim_info(expr)
@@ -1766,10 +1634,10 @@ class JaxRDDLCompiler:
             discrete_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
         else:
             discrete_op = self.DISCRETE_HELPER
-        jax_discrete, jax_param = self._unwrap(discrete_op, expr.id, info)
+        jax_op = discrete_op(expr.id, init_params)
         # compile probability expressions
-        jax_probs = [self._jax(arg, info) for arg in ordered_args]
+        jax_probs = [self._jax(arg, init_params) for arg in ordered_args]
         def _jax_wrapped_distribution_discrete(x, params, key):
@@ -1777,7 +1645,7 @@ class JaxRDDLCompiler:
             error = NORMAL
             prob = [None] * len(jax_probs)
             for (i, jax_prob) in enumerate(jax_probs):
-                prob[i], key, error_pdf = jax_prob(x, params, key)
+                prob[i], key, error_pdf, params = jax_prob(x, params, key)
                 error |= error_pdf
             prob = jnp.stack(prob, axis=-1)
             if unnorm:
@@ -1786,17 +1654,17 @@ class JaxRDDLCompiler:
             # dispatch to sampling subroutine
             key, subkey = random.split(key)
-            param = params.get(jax_param, None)
-            sample = jax_discrete(subkey, prob, param)
+            sample, params = jax_op(subkey, prob, params)
             out_of_bounds = jnp.logical_not(jnp.logical_and(
                 jnp.all(prob >= 0),
-                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
+                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)
+            ))
             error |= (out_of_bounds * ERR)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_distribution_discrete
-    def _jax_discrete_pvar(self, expr, info, unnorm):
+    def _jax_discrete_pvar(self, expr, init_params, unnorm):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DISCRETE']
         JaxRDDLCompiler._check_num_args(expr, 2)
         _, args = expr.args
@@ -1807,28 +1675,28 @@ class JaxRDDLCompiler:
             discrete_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
         else:
             discrete_op = self.DISCRETE_HELPER
-        jax_discrete, jax_param = self._unwrap(discrete_op, expr.id, info)
+        jax_op = discrete_op(expr.id, init_params)
         # compile probability function
-        jax_probs = self._jax(arg, info)
+        jax_probs = self._jax(arg, init_params)
         def _jax_wrapped_distribution_discrete_pvar(x, params, key):
             # sample probabilities
-            prob, key, error = jax_probs(x, params, key)
+            prob, key, error, params = jax_probs(x, params, key)
             if unnorm:
                 normalizer = jnp.sum(prob, axis=-1, keepdims=True)
                 prob = prob / normalizer
             # dispatch to sampling subroutine
             key, subkey = random.split(key)
-            param = params.get(jax_param, None)
-            sample = jax_discrete(subkey, prob, param)
+            sample, params = jax_op(subkey, prob, params)
             out_of_bounds = jnp.logical_not(jnp.logical_and(
                 jnp.all(prob >= 0),
-                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)))
+                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)
+            ))
             error |= (out_of_bounds * ERR)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_distribution_discrete_pvar
@@ -1836,68 +1704,69 @@ class JaxRDDLCompiler:
     # random vectors
     # ===========================================================================
-    def _jax_random_vector(self, expr, info):
+    def _jax_random_vector(self, expr, init_params):
         _, name = expr.etype
         if name == 'MultivariateNormal':
-            return self._jax_multivariate_normal(expr, info)
+            return self._jax_multivariate_normal(expr, init_params)
         elif name == 'MultivariateStudent':
-            return self._jax_multivariate_student(expr, info)
+            return self._jax_multivariate_student(expr, init_params)
         elif name == 'Dirichlet':
-            return self._jax_dirichlet(expr, info)
+            return self._jax_dirichlet(expr, init_params)
         elif name == 'Multinomial':
-            return self._jax_multinomial(expr, info)
+            return self._jax_multinomial(expr, init_params)
         else:
             raise RDDLNotImplementedError(
                 f'Distribution {name} is not supported.\n' +
                 print_stack_trace(expr))
-    def _jax_multivariate_normal(self, expr, info):
+    def _jax_multivariate_normal(self, expr, init_params):
         _, args = expr.args
         mean, cov = args
-        jax_mean = self._jax(mean, info)
-        jax_cov = self._jax(cov, info)
+        jax_mean = self._jax(mean, init_params)
+        jax_cov = self._jax(cov, init_params)
         index, = self.traced.cached_sim_info(expr)
         # reparameterization trick MN(m, LL') = LZ + m, where Z ~ Normal(0, 1)
         def _jax_wrapped_distribution_multivariate_normal(x, params, key):
             # sample the mean and covariance
-            sample_mean, key, err1 = jax_mean(x, params, key)
-            sample_cov, key, err2 = jax_cov(x, params, key)
+            sample_mean, key, err1, params = jax_mean(x, params, key)
+            sample_cov, key, err2, params = jax_cov(x, params, key)
             # sample Normal(0, 1)
             key, subkey = random.split(key)
             Z = random.normal(
                 key=subkey,
                 shape=jnp.shape(sample_mean) + (1,),
-                dtype=self.REAL)
+                dtype=self.REAL
+            )
             # compute L s.t. cov = L * L' and reparameterize
             L = jnp.linalg.cholesky(sample_cov)
             sample = jnp.matmul(L, Z)[..., 0] + sample_mean
             sample = jnp.moveaxis(sample, source=-1, destination=index)
             err = err1 | err2
-            return sample, key, err
+            return sample, key, err, params
         return _jax_wrapped_distribution_multivariate_normal
-    def _jax_multivariate_student(self, expr, info):
+    def _jax_multivariate_student(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_MULTIVARIATE_STUDENT']
         _, args = expr.args
         mean, cov, df = args
-        jax_mean = self._jax(mean, info)
-        jax_cov = self._jax(cov, info)
-        jax_df = self._jax(df, info)
+        jax_mean = self._jax(mean, init_params)
+        jax_cov = self._jax(cov, init_params)
+        jax_df = self._jax(df, init_params)
         index, = self.traced.cached_sim_info(expr)
         # reparameterization trick MN(m, LL') = LZ + m, where Z ~ StudentT(0, 1)
         def _jax_wrapped_distribution_multivariate_student(x, params, key):
             # sample the mean and covariance and degrees of freedom
-            sample_mean, key, err1 = jax_mean(x, params, key)
-            sample_cov, key, err2 = jax_cov(x, params, key)
-            sample_df, key, err3 = jax_df(x, params, key)
+            sample_mean, key, err1, params = jax_mean(x, params, key)
+            sample_cov, key, err2, params = jax_cov(x, params, key)
+            sample_df, key, err3, params = jax_df(x, params, key)
             out_of_bounds = jnp.logical_not(jnp.all(sample_df > 0))
             # sample StudentT(0, 1, df) -- broadcast df to same shape as cov
@@ -1908,50 +1777,51 @@ class JaxRDDLCompiler:
                 key=subkey,
                 df=sample_df,
                 shape=jnp.shape(sample_df),
-                dtype=self.REAL)
+                dtype=self.REAL
+            )
             # compute L s.t. cov = L * L' and reparameterize
             L = jnp.linalg.cholesky(sample_cov)
             sample = jnp.matmul(L, Z)[..., 0] + sample_mean
             sample = jnp.moveaxis(sample, source=-1, destination=index)
             error = err1 | err2 | err3 | (out_of_bounds * ERR)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_distribution_multivariate_student
-    def _jax_dirichlet(self, expr, info):
+    def _jax_dirichlet(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DIRICHLET']
         _, args = expr.args
         alpha, = args
-        jax_alpha = self._jax(alpha, info)
+        jax_alpha = self._jax(alpha, init_params)
         index, = self.traced.cached_sim_info(expr)
         # sample Gamma(alpha_i, 1) and normalize across i
         def _jax_wrapped_distribution_dirichlet(x, params, key):
-            alpha, key, error = jax_alpha(x, params, key)
+            alpha, key, error, params = jax_alpha(x, params, key)
             out_of_bounds = jnp.logical_not(jnp.all(alpha > 0))
             error |= (out_of_bounds * ERR)
             key, subkey = random.split(key)
             Gamma = random.gamma(key=subkey, a=alpha, dtype=self.REAL)
             sample = Gamma / jnp.sum(Gamma, axis=-1, keepdims=True)
             sample = jnp.moveaxis(sample, source=-1, destination=index)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_distribution_dirichlet
-    def _jax_multinomial(self, expr, info):
+    def _jax_multinomial(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_MULTINOMIAL']
         _, args = expr.args
         trials, prob = args
-        jax_trials = self._jax(trials, info)
-        jax_prob = self._jax(prob, info)
+        jax_trials = self._jax(trials, init_params)
+        jax_prob = self._jax(prob, init_params)
         index, = self.traced.cached_sim_info(expr)
         def _jax_wrapped_distribution_multinomial(x, params, key):
-            trials, key, err1 = jax_trials(x, params, key)
-            prob, key, err2 = jax_prob(x, params, key)
+            trials, key, err1, params = jax_trials(x, params, key)
+            prob, key, err2, params = jax_prob(x, params, key)
             trials = jnp.asarray(trials, dtype=self.REAL)
             prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
@@ -1961,9 +1831,10 @@ class JaxRDDLCompiler:
             out_of_bounds = jnp.logical_not(jnp.all(
                 (prob >= 0)
                 & jnp.allclose(jnp.sum(prob, axis=-1), 1.0)
-                & (trials >= 0)))
+                & (trials >= 0)
+            ))
             error = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_distribution_multinomial
@@ -1971,57 +1842,57 @@ class JaxRDDLCompiler:
     # matrix algebra
     # ===========================================================================
-    def _jax_matrix(self, expr, info):
+    def _jax_matrix(self, expr, init_params):
         _, op = expr.etype
         if op == 'det':
-            return self._jax_matrix_det(expr, info)
+            return self._jax_matrix_det(expr, init_params)
         elif op == 'inverse':
-            return self._jax_matrix_inv(expr, info, pseudo=False)
+            return self._jax_matrix_inv(expr, init_params, pseudo=False)
         elif op == 'pinverse':
-            return self._jax_matrix_inv(expr, info, pseudo=True)
+            return self._jax_matrix_inv(expr, init_params, pseudo=True)
         elif op == 'cholesky':
-            return self._jax_matrix_cholesky(expr, info)
+            return self._jax_matrix_cholesky(expr, init_params)
         else:
             raise RDDLNotImplementedError(
                 f'Matrix operation {op} is not supported.\n' +
                 print_stack_trace(expr))
-    def _jax_matrix_det(self, expr, info):
+    def _jax_matrix_det(self, expr, init_params):
         * _, arg = expr.args
-        jax_arg = self._jax(arg, info)
+        jax_arg = self._jax(arg, init_params)
         def _jax_wrapped_matrix_operation_det(x, params, key):
-            sample_arg, key, error = jax_arg(x, params, key)
+            sample_arg, key, error, params = jax_arg(x, params, key)
             sample = jnp.linalg.det(sample_arg)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_matrix_operation_det
-    def _jax_matrix_inv(self, expr, info, pseudo):
+    def _jax_matrix_inv(self, expr, init_params, pseudo):
         _, arg = expr.args
-        jax_arg = self._jax(arg, info)
+        jax_arg = self._jax(arg, init_params)
         indices = self.traced.cached_sim_info(expr)
         op = jnp.linalg.pinv if pseudo else jnp.linalg.inv
         def _jax_wrapped_matrix_operation_inv(x, params, key):
-            sample_arg, key, error = jax_arg(x, params, key)
+            sample_arg, key, error, params = jax_arg(x, params, key)
             sample = op(sample_arg)
             sample = jnp.moveaxis(sample, source=(-2, -1), destination=indices)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_matrix_operation_inv
-    def _jax_matrix_cholesky(self, expr, info):
+    def _jax_matrix_cholesky(self, expr, init_params):
         _, arg = expr.args
-        jax_arg = self._jax(arg, info)
+        jax_arg = self._jax(arg, init_params)
         indices = self.traced.cached_sim_info(expr)
         op = jnp.linalg.cholesky
         def _jax_wrapped_matrix_operation_cholesky(x, params, key):
-            sample_arg, key, error = jax_arg(x, params, key)
+            sample_arg, key, error, params = jax_arg(x, params, key)
             sample = op(sample_arg)
             sample = jnp.moveaxis(sample, source=(-2, -1), destination=indices)
-            return sample, key, error
+            return sample, key, error, params
         return _jax_wrapped_matrix_operation_cholesky

pyRDDLGym-jax 0.5__py3-none-any.whl → 1.0__py3-none-any.whl

pyRDDLGym-jax 0.5py3-none-any.whl → 1.0py3-none-any.whl