PyPI - pyRDDLGym-jax - Versions diffs - 2.7__py3-none-any.whl → 3.0__py3-none-any.whl - Mend

pyRDDLGym-jax 2.7py3-none-any.whl → 3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -14,12 +14,14 @@
 from functools import partial
+import termcolor
 import traceback
 from typing import Any, Callable, Dict, List, Optional
 import jax
 import jax.numpy as jnp
 import jax.random as random
+import jax.scipy as scipy
 from pyRDDLGym.core.compiler.initializer import RDDLValueInitializer
 from pyRDDLGym.core.compiler.levels import RDDLLevelAnalysis
@@ -36,8 +38,6 @@ from pyRDDLGym.core.debug.exception import (
 from pyRDDLGym.core.debug.logger import Logger
 from pyRDDLGym.core.simulator import RDDLSimulatorPrecompiled
-from pyRDDLGym_jax.core.logic import ExactLogic
 # more robust approach - if user does not have this or broken try to continue
 try:
     from tensorflow_probability.substrates import jax as tfp
@@ -53,12 +53,11 @@ class JaxRDDLCompiler:
     All operations are identical to their numpy equivalents.
     '''
-    def __init__(self, rddl: RDDLLiftedModel,
+    def __init__(self, rddl: RDDLLiftedModel, *args,
                  allow_synchronous_state: bool=True,
                  logger: Optional[Logger]=None,
                  use64bit: bool=False,
-                 compile_non_fluent_exact: bool=True,
-                 python_functions: Optional[Dict[str, Callable]]=None) -> None:
+                 python_functions: Optional[Dict[str, Callable]]=None, **kwargs) -> None:
         '''Creates a new RDDL to Jax compiler.
         :param rddl: the RDDL model to compile into Jax
@@ -66,10 +65,17 @@ class JaxRDDLCompiler:
         on each other
         :param logger: to log information about compilation to file
         :param use64bit: whether to use 64 bit arithmetic
-        :param compile_non_fluent_exact: whether non-fluent expressions
-        are always compiled using exact JAX expressions
         :param python_functions: dictionary of external Python functions to call from RDDL
         '''
+        # warn about unused parameters
+        if args:
+            print(termcolor.colored(
+                f'[WARN] JaxRDDLCompiler received invalid args {args}.',  'yellow'))
+        if kwargs:
+            print(termcolor.colored(
+                f'[WARN] JaxRDDLCompiler received invalid kwargs {kwargs}.',  'yellow'))
         self.rddl = rddl
         self.logger = logger
         # jax.config.update('jax_log_compiles', True) # for testing ONLY
@@ -86,7 +92,7 @@ class JaxRDDLCompiler:
         self.JAX_TYPES = {
             'int': self.INT,
             'real': self.REAL,
-            'bool': bool
+            'bool': jnp.bool_
         }
         # compile initial values
@@ -94,6 +100,7 @@ class JaxRDDLCompiler:
         self.init_values = initializer.initialize()
         # compute dependency graph for CPFs and sort them by evaluation order
+        self.allow_synchronous_state = allow_synchronous_state
         sorter = RDDLLevelAnalysis(rddl, allow_synchronous_state=allow_synchronous_state)
         self.levels = sorter.compute_levels()
@@ -101,10 +108,12 @@ class JaxRDDLCompiler:
         tracer = RDDLObjectsTracer(rddl, cpf_levels=self.levels)
         self.traced = tracer.trace()
-        # extract the box constraints on actions
+        # external python functions
         if python_functions is None:
             python_functions = {}
         self.python_functions = python_functions
+        # extract the box constraints on actions
         simulator = RDDLSimulatorPrecompiled(
             rddl=self.rddl,
             init_values=self.init_values,
@@ -112,75 +121,90 @@ class JaxRDDLCompiler:
             trace_info=self.traced,
             python_functions=python_functions
         )
-        constraints = RDDLConstraints(simulator, vectorized=True)
-        self.constraints = constraints
-        # basic operations - these can be override in subclasses
-        self.compile_non_fluent_exact = compile_non_fluent_exact
-        self.AGGREGATION_BOOL = {'forall', 'exists'}
-        self.EXACT_OPS = ExactLogic(use64bit=self.use64bit).get_operator_dicts()
-        self.OPS = self.EXACT_OPS
+        self.constraints = RDDLConstraints(simulator, vectorized=True)
+    def get_kwargs(self) -> Dict[str, Any]:
+        '''Returns a dictionary of configurable parameter name: parameter value pairs.
+        '''
+        return {
+            'allow_synchronous_state': self.allow_synchronous_state,
+            'use64bit': self.use64bit,
+            'python_functions': self.python_functions
+        }
+    def split_fluent_nonfluent(self, values):
+        '''Splits given values dictionary into fluent and non-fluent dictionaries.
+        '''
+        nonfluents = self.rddl.non_fluents
+        fls = {name: value for (name, value) in values.items() if name not in nonfluents}
+        nfls = {name: value for (name, value) in values.items() if name in nonfluents}
+        return fls, nfls
     # ===========================================================================
     # main compilation subroutines
     # ===========================================================================
-    def compile(self, log_jax_expr: bool=False, heading: str='') -> None:
+    def compile(self, log_jax_expr: bool=False,
+                heading: str='',
+                extra_aux: Dict[str, Any]={}) -> None:
         '''Compiles the current RDDL into Jax expressions.
         :param log_jax_expr: whether to pretty-print the compiled Jax functions
         to the log file
         :param heading: the heading to print before compilation information
+        :param extra_aux: extra info to save during compilations
         '''
-        init_params = {}
-        self.invariants = self._compile_constraints(self.rddl.invariants, init_params)
-        self.preconditions = self._compile_constraints(self.rddl.preconditions, init_params)
-        self.terminations = self._compile_constraints(self.rddl.terminations, init_params)
-        self.cpfs = self._compile_cpfs(init_params)
-        self.reward = self._compile_reward(init_params)
-        self.model_params = init_params
+        self.model_aux = {'params': {}, 'overriden': {}}
+        self.model_aux.update(extra_aux)
+        self.invariants = self._compile_constraints(self.rddl.invariants, self.model_aux)
+        self.preconditions = self._compile_constraints(self.rddl.preconditions, self.model_aux)
+        self.terminations = self._compile_constraints(self.rddl.terminations, self.model_aux)
+        self.cpfs = self._compile_cpfs(self.model_aux)
+        self.reward = self._compile_reward(self.model_aux)
+        # add compiled jax expression to logger
         if log_jax_expr and self.logger is not None:
-            printed = self.print_jax()
-            printed_cpfs = '\n\n'.join(f'{k}: {v}'
-                                       for (k, v) in printed['cpfs'].items())
-            printed_reward = printed['reward']
-            printed_invariants = '\n\n'.join(v for v in printed['invariants'])
-            printed_preconds = '\n\n'.join(v for v in printed['preconditions'])
-            printed_terminals = '\n\n'.join(v for v in printed['terminations'])
-            printed_params = '\n'.join(f'{k}: {v}' for (k, v) in init_params.items())
-            message = (
-                f'[info] {heading}\n'
-                f'[info] compiled JAX CPFs:\n\n'
-                f'{printed_cpfs}\n\n'
-                f'[info] compiled JAX reward:\n\n'
-                f'{printed_reward}\n\n'
-                f'[info] compiled JAX invariants:\n\n'
-                f'{printed_invariants}\n\n'
-                f'[info] compiled JAX preconditions:\n\n'
-                f'{printed_preconds}\n\n'
-                f'[info] compiled JAX terminations:\n\n'
-                f'{printed_terminals}\n'
-                f'[info] model parameters:\n'
-                f'{printed_params}\n'
-            )
-            self.logger.log(message)
-    def _compile_constraints(self, constraints, init_params):
-        return [self._jax(expr, init_params, dtype=bool) for expr in constraints]
-    def _compile_cpfs(self, init_params):
+            self._log_printed_jax(heading)
+    def _log_printed_jax(self, heading=''):
+        printed = self.print_jax()
+        printed_cpfs = '\n\n'.join(f'{k}: {v}' for (k, v) in printed['cpfs'].items())
+        printed_reward = printed['reward']
+        printed_invariants = '\n\n'.join(v for v in printed['invariants'])
+        printed_preconds = '\n\n'.join(v for v in printed['preconditions'])
+        printed_terminals = '\n\n'.join(v for v in printed['terminations'])
+        printed_params = '\n'.join(f'{k}: {v}' for (k, v) in self.model_aux['params'].items())
+        self.logger.log(
+            f'[info] {heading}\n'
+            f'[info] compiled JAX CPFs:\n\n'
+            f'{printed_cpfs}\n\n'
+            f'[info] compiled JAX reward:\n\n'
+            f'{printed_reward}\n\n'
+            f'[info] compiled JAX invariants:\n\n'
+            f'{printed_invariants}\n\n'
+            f'[info] compiled JAX preconditions:\n\n'
+            f'{printed_preconds}\n\n'
+            f'[info] compiled JAX terminations:\n\n'
+            f'{printed_terminals}\n'
+            f'[info] model parameters:\n'
+            f'{printed_params}\n'
+        )
+    def _compile_constraints(self, constraints, aux):
+        return [self._jax(expr, aux, dtype=jnp.bool_) for expr in constraints]
+    def _compile_cpfs(self, aux):
         jax_cpfs = {}
         for cpfs in self.levels.values():
             for cpf in cpfs:
                 _, expr = self.rddl.cpfs[cpf]
-                prange = self.rddl.variable_ranges[cpf]
-                dtype = self.JAX_TYPES.get(prange, self.INT)
-                jax_cpfs[cpf] = self._jax(expr, init_params, dtype=dtype)
+                dtype = self.JAX_TYPES.get(self.rddl.variable_ranges[cpf], self.INT)
+                jax_cpfs[cpf] = self._jax(expr, aux, dtype=dtype)
         return jax_cpfs
-    def _compile_reward(self, init_params):
-        return self._jax(self.rddl.reward, init_params, dtype=self.REAL)
+    def _compile_reward(self, aux):
+        return self._jax(self.rddl.reward, aux, dtype=self.REAL)
     def _extract_inequality_constraint(self, expr):
         result = []
@@ -208,55 +232,110 @@ class JaxRDDLCompiler:
                 result.extend(self._extract_equality_constraint(arg))
         return result
-    def _jax_nonlinear_constraints(self, init_params):
-        rddl = self.rddl
-        # extract the non-box inequality constraints on actions
-        inequalities = [constr
-                        for (i, expr) in enumerate(rddl.preconditions)
-                        for constr in self._extract_inequality_constraint(expr)
-                        if not self.constraints.is_box_preconditions[i]]
-        # compile them to JAX and write as h(s, a) <= 0
-        jax_op = ExactLogic.exact_binary_function(jnp.subtract)
-        jax_inequalities = []
-        for (left, right) in inequalities:
-            jax_lhs = self._jax(left, init_params)
-            jax_rhs = self._jax(right, init_params)
-            jax_constr = self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
-            jax_inequalities.append(jax_constr)
-        # extract the non-box equality constraints on actions
-        equalities = [constr
-                      for (i, expr) in enumerate(rddl.preconditions)
-                      for constr in self._extract_equality_constraint(expr)
-                      if not self.constraints.is_box_preconditions[i]]
-        # compile them to JAX and write as g(s, a) == 0
-        jax_equalities = []
-        for (left, right) in equalities:
-            jax_lhs = self._jax(left, init_params)
-            jax_rhs = self._jax(right, init_params)
-            jax_constr = self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
-            jax_equalities.append(jax_constr)
+    def _jax_nonlinear_constraints(self, aux):
+        jax_equalities, jax_inequalities = [], []
+        for (i, expr) in enumerate(self.rddl.preconditions):
+            if not self.constraints.is_box_preconditions[i]:
+                # compile inequalities to JAX and write as h(s, a) <= 0
+                for (left, right) in self._extract_inequality_constraint(expr):
+                    jax_lhs = self._jax(left, aux)
+                    jax_rhs = self._jax(right, aux)
+                    jax_constr = self._jax_binary(
+                        jax_lhs, jax_rhs, jnp.subtract, at_least_int=True)
+                    jax_inequalities.append(jax_constr)
+                # compile equalities to JAX and write as g(s, a) == 0
+                for (left, right) in self._extract_equality_constraint(expr):
+                    jax_lhs = self._jax(left, aux)
+                    jax_rhs = self._jax(right, aux)
+                    jax_constr = self._jax_binary(
+                        jax_lhs, jax_rhs, jnp.subtract, at_least_int=True)
+                    jax_equalities.append(jax_constr)
         return jax_inequalities, jax_equalities
+    def _jax_preconditions(self):
+        preconds = self.preconditions
+        def _jax_wrapped_preconditions(key, errors, fls, nfls, params):
+            precond_check = jnp.array(True, dtype=jnp.bool_)
+            for precond in preconds:
+                sample, key, err, params = precond(fls, nfls, params, key)
+                precond_check = jnp.logical_and(precond_check, sample)
+                errors = errors | err
+            return precond_check, key, errors, params
+        return _jax_wrapped_preconditions
+    def _jax_inequalities(self, aux_constr):
+        inequality_fns, equality_fns = self._jax_nonlinear_constraints(aux_constr)
+        def _jax_wrapped_inequalities(key, errors, fls, nfls, params):
+            inequalities, equalities = [], []
+            for constraint in inequality_fns:
+                sample, key, err, params = constraint(fls, nfls, params, key)
+                inequalities.append(sample)
+                errors = errors | err
+            for constraint in equality_fns:
+                sample, key, err, params = constraint(fls, nfls, params, key)
+                equalities.append(sample)
+                errors = errors | err
+            return (inequalities, equalities), key, errors, params
+        return _jax_wrapped_inequalities
+    def _jax_cpfs(self):
+        cpfs = self.cpfs
+        def _jax_wrapped_cpfs(key, errors, fls, nfls, params):
+            fls = fls.copy()
+            for (name, cpf) in cpfs.items():
+                fls[name], key, err, params = cpf(fls, nfls, params, key)
+                errors = errors | err
+            return fls, key, errors, params
+        return _jax_wrapped_cpfs
+    def _jax_reward(self):
+        reward_fn = self.reward
+        def _jax_wrapped_reward(key, errors, fls, nfls, params):
+            reward, key, err, params = reward_fn(fls, nfls, params, key)
+            errors = errors | err
+            return reward, key, errors, params
+        return _jax_wrapped_reward
+    def _jax_invariants(self):
+        invariants = self.invariants
+        def _jax_wrapped_invariants(key, errors, fls, nfls, params):
+            invariant_check = jnp.array(True, dtype=jnp.bool_)
+            for invariant in invariants:
+                sample, key, err, params = invariant(fls, nfls, params, key)
+                invariant_check = jnp.logical_and(invariant_check, sample)
+                errors = errors | err
+            return invariant_check, key, errors, params
+        return _jax_wrapped_invariants
+    def _jax_terminations(self):
+        terminations = self.terminations
+        def _jax_wrapped_terminations(key, errors, fls, nfls, params):
+            terminated_check = jnp.array(False, dtype=jnp.bool_)
+            for terminal in terminations:
+                sample, key, err, params = terminal(fls, nfls, params, key)
+                terminated_check = jnp.logical_or(terminated_check, sample)
+                errors = errors | err
+            return terminated_check, key, errors, params
+        return _jax_wrapped_terminations
     def compile_transition(self, check_constraints: bool=False,
                            constraint_func: bool=False,
-                           init_params_constr: Dict[str, Any]={},
-                           cache_path_info: bool=False) -> Callable:
+                           cache_path_info: bool=False,
+                           aux_constr: Dict[str, Any]={}) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples the next state.
         The arguments of the returned function is:
             - key is the PRNG key
             - actions is the dict of action tensors
-            - subs is the dict of current pvar value tensors
-            - model_params is a dict of parameters for the relaxed model.
+            - fls is the dict of current fluent pvar tensors
+            - nfls is the dict of nonfluent pvar tensors
+            - params is a dict of parameters for the relaxed model.
         The returned value of the function is:
-            - subs is the returned next epoch fluent values
+            - fls is the returned next epoch fluent values
             - log includes all the auxiliary information about constraints
               satisfied, errors, etc.
@@ -284,104 +363,118 @@ class JaxRDDLCompiler:
         in addition to the usual outputs
         :param cache_path_info: whether to save full path traces as part of the log
         '''
-        NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
-        rddl = self.rddl
-        reward_fn, cpfs, preconds, invariants, terminals = \
-            self.reward, self.cpfs, self.preconditions, self.invariants, self.terminations
+        NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
-        # compile constraint information
+        # compile all components of the RDDL
+        cpf_fn = self._jax_cpfs()
+        reward_fn = self._jax_reward()
+        # compile optional constraints
+        precond_fn = invariant_fn = terminal_fn = None
+        if check_constraints:
+            precond_fn = self._jax_preconditions()
+            invariant_fn = self._jax_invariants()
+            terminal_fn = self._jax_terminations()
+        # compile optional inequalities
+        ineq_fn = None
         if constraint_func:
-            inequality_fns, equality_fns = self._jax_nonlinear_constraints(
-                init_params_constr)
-        else:
-            inequality_fns, equality_fns = None, None
+            ineq_fn = self._jax_inequalities(aux_constr)
         # do a single step update from the RDDL model
-        def _jax_wrapped_single_step(key, actions, subs, model_params):
+        def _jax_wrapped_single_step(key, actions, fls, nfls, params):
             errors = NORMAL
-            subs.update(actions)
+            fls = fls.copy()
+            fls.update(actions)
             # check action preconditions
-            precond_check = True
             if check_constraints:
-                for precond in preconds:
-                    sample, key, err, model_params = precond(subs, model_params, key)
-                    precond_check = jnp.logical_and(precond_check, sample)
-                    errors |= err
+                precond, key, errors, params = precond_fn(key, errors, fls, nfls, params)
+            else:
+                precond = jnp.array(True, dtype=jnp.bool_)
             # compute h(s, a) <= 0 and g(s, a) == 0 constraint functions
-            inequalities, equalities = [], []
             if constraint_func:
-                for constraint in inequality_fns:
-                    sample, key, err, model_params = constraint(subs, model_params, key)
-                    inequalities.append(sample)
-                    errors |= err
-                for constraint in equality_fns:
-                    sample, key, err, model_params = constraint(subs, model_params, key)
-                    equalities.append(sample)
-                    errors |= err
+                (inequalities, equalities), key, errors, params = ineq_fn(
+                    key, errors, fls, nfls, params)
+            else:
+                inequalities, equalities = [], []
             # calculate CPFs in topological order
-            for (name, cpf) in cpfs.items():
-                subs[name], key, err, model_params = cpf(subs, model_params, key)
-                errors |= err
-            # calculate the immediate reward
-            reward, key, err, model_params = reward_fn(subs, model_params, key)
-            errors |= err
+            fls, key, errors, params = cpf_fn(key, errors, fls, nfls, params)
+            fluents = fls if cache_path_info else {}
-            # calculate fluent values
-            if cache_path_info:
-                fluents = {name: values for (name, values) in subs.items()
-                           if name not in rddl.non_fluents}
-            else:
-                fluents = {}
+            # calculate the immediate reward
+            reward, key, errors, params = reward_fn(key, errors, fls, nfls, params)
             # set the next state to the current state
-            for (state, next_state) in rddl.next_state.items():
-                subs[state] = subs[next_state]
+            for (state, next_state) in self.rddl.next_state.items():
+                fls[state] = fls[next_state]
-            # check the state invariants
-            invariant_check = True
+            # check the state invariants and termination
             if check_constraints:
-                for invariant in invariants:
-                    sample, key, err, model_params = invariant(subs, model_params, key)
-                    invariant_check = jnp.logical_and(invariant_check, sample)
-                    errors |= err
-            # check the termination (TODO: zero out reward in s if terminated)
-            terminated_check = False
-            if check_constraints:
-                for terminal in terminals:
-                    sample, key, err, model_params = terminal(subs, model_params, key)
-                    terminated_check = jnp.logical_or(terminated_check, sample)
-                    errors |= err
+                invariant, key, errors, params = invariant_fn(key, errors, fls, nfls, params)
+                terminated, key, errors, params = terminal_fn(key, errors, fls, nfls, params)
+            else:
+                invariant = jnp.array(True, dtype=jnp.bool_)
+                terminated = jnp.array(False, dtype=jnp.bool_)
             # prepare the return value
             log = {
                 'fluents': fluents,
                 'reward': reward,
                 'error': errors,
-                'precondition': precond_check,
-                'invariant': invariant_check,
-                'termination': terminated_check
-            }
-            if constraint_func:
-                log['inequalities'] = inequalities
-                log['equalities'] = equalities
-            return subs, log, model_params
+                'precondition': precond,
+                'invariant': invariant,
+                'termination': terminated,
+                'inequalities': inequalities,
+                'equalities': equalities
+            }
+            return fls, log, params
         return _jax_wrapped_single_step
+    def _compile_policy_step(self, policy, transition_fn):
+        def _jax_wrapped_policy_step(key, policy_params, hyperparams, step, fls, nfls,
+                                     model_params):
+            key, subkey = random.split(key)
+            actions = policy(key, policy_params, hyperparams, step, fls)
+            return transition_fn(subkey, actions, fls, nfls, model_params)
+        return _jax_wrapped_policy_step
+    def _compile_batched_policy_step(self, policy_step_fn, n_batch, model_params_reduction):
+        def _jax_wrapped_batched_policy_step(carry, step):
+            key, policy_params, hyperparams, fls, nfls, model_params = carry
+            keys = random.split(key, num=1 + n_batch)
+            key, subkeys = keys[0], keys[1:]
+            fls, log, model_params = jax.vmap(
+                policy_step_fn, in_axes=(0, None, None, None, 0, None, None)
+            )(subkeys, policy_params, hyperparams, step, fls, nfls, model_params)
+            model_params = jax.tree_util.tree_map(model_params_reduction, model_params)
+            carry = (key, policy_params, hyperparams, fls, nfls, model_params)
+            return carry, log
+        return _jax_wrapped_batched_policy_step
+    def _compile_unrolled_policy_step(self, batched_policy_step_fn, n_steps):
+        def _jax_wrapped_batched_policy_rollout(key, policy_params, hyperparams, fls, nfls,
+                                                model_params):
+            start = (key, policy_params, hyperparams, fls, nfls, model_params)
+            steps = jnp.arange(n_steps)
+            end, log = jax.lax.scan(batched_policy_step_fn, start, steps)
+            log = jax.tree_util.tree_map(partial(jnp.swapaxes, axis1=0, axis2=1), log)
+            model_params = end[-1]
+            return log, model_params
+        return _jax_wrapped_batched_policy_rollout
     def compile_rollouts(self, policy: Callable,
                          n_steps: int,
                          n_batch: int,
                          check_constraints: bool=False,
                          constraint_func: bool=False,
-                         init_params_constr: Dict[str, Any]={},
+                         cache_path_info: bool=False,
                          model_params_reduction: Callable=lambda x: x[0],
-                         cache_path_info: bool=False) -> Callable:
+                         aux_constr: Dict[str, Any]={}) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples trajectories with a fixed horizon from a policy.
@@ -389,7 +482,8 @@ class JaxRDDLCompiler:
             - key is the PRNG key (used by a stochastic policy)
             - policy_params is a pytree of trainable policy weights
             - hyperparams is a pytree of (optional) fixed policy hyper-parameters
-            - subs is the dictionary of current fluent tensor values
+            - fls is the dictionary of current fluent tensor values
+            - nfls is the dictionary of next step fluent tensor value
             - model_params is a dict of model hyperparameters.
         The returned value of the returned function is:
@@ -402,7 +496,7 @@ class JaxRDDLCompiler:
             - params is a pytree of trainable policy weights
             - hyperparams is a pytree of (optional) fixed policy hyper-parameters
             - step is the time index of the decision in the current rollout
-            - states is a dict of tensors for the current observation.
+            - fls is a dict of fluent tensors for the current epoch.
         :param policy: a Jax compiled function for the policy as described above
         decision epoch, state dict, and an RNG key and returns an action dict
@@ -413,54 +507,16 @@ class JaxRDDLCompiler:
         returned log and does not raise an exception
         :param constraint_func: produces the h(s, a) constraint function
         in addition to the usual outputs
+        :param cache_path_info: whether to save full path traces as part of the log
         :param model_params_reduction: how to aggregate updated model_params across runs
         in the batch (defaults to selecting the first element's parameters in the batch)
-        :param cache_path_info: whether to save full path traces as part of the log
         '''
-        rddl = self.rddl
-        jax_step_fn = self.compile_transition(
-            check_constraints, constraint_func, init_params_constr, cache_path_info)
-        # for POMDP only observ-fluents are assumed visible to the policy
-        if rddl.observ_fluents:
-            observed_vars = rddl.observ_fluents
-        else:
-            observed_vars = rddl.state_fluents
-        # evaluate the step from the policy
-        def _jax_wrapped_single_step_policy(key, policy_params, hyperparams,
-                                            step, subs, model_params):
-            states = {var: values
-                      for (var, values) in subs.items()
-                      if var in observed_vars}
-            actions = policy(key, policy_params, hyperparams, step, states)
-            key, subkey = random.split(key)
-            return jax_step_fn(subkey, actions, subs, model_params)
-        # do a batched step update from the policy
-        def _jax_wrapped_batched_step_policy(carry, step):
-            key, policy_params, hyperparams, subs, model_params = carry
-            key, *subkeys = random.split(key, num=1 + n_batch)
-            keys = jnp.asarray(subkeys)
-            subs, log, model_params = jax.vmap(
-                _jax_wrapped_single_step_policy,
-                in_axes=(0, None, None, None, 0, None)
-            )(keys, policy_params, hyperparams, step, subs, model_params)
-            model_params = jax.tree_util.tree_map(model_params_reduction, model_params)
-            carry = (key, policy_params, hyperparams, subs, model_params)
-            return carry, log
-        # do a batched roll-out from the policy
-        def _jax_wrapped_batched_rollout(key, policy_params, hyperparams,
-                                         subs, model_params):
-            start = (key, policy_params, hyperparams, subs, model_params)
-            steps = jnp.arange(n_steps)
-            end, log = jax.lax.scan(_jax_wrapped_batched_step_policy, start, steps)
-            log = jax.tree_util.tree_map(partial(jnp.swapaxes, axis1=0, axis2=1), log)
-            model_params = end[-1]
-            return log, model_params
-        return _jax_wrapped_batched_rollout
+        jax_fn = self.compile_transition(
+            check_constraints, constraint_func, cache_path_info, aux_constr)
+        jax_fn = self._compile_policy_step(policy, jax_fn)
+        jax_fn = self._compile_batched_policy_step(jax_fn, n_batch, model_params_reduction)
+        jax_fn = self._compile_unrolled_policy_step(jax_fn, n_steps)
+        return jax_fn
     # ===========================================================================
     # error checks and prints
@@ -470,43 +526,59 @@ class JaxRDDLCompiler:
         '''Returns a dictionary containing the string representations of all
         Jax compiled expressions from the RDDL file.
         '''
-        subs = self.init_values
-        init_params = self.model_params
+        fls, nfls = self.split_fluent_nonfluent(self.init_values)
+        params = self.model_aux['params']
         key = jax.random.PRNGKey(42)
         printed = {
-            'cpfs': {name: str(jax.make_jaxpr(expr)(subs, init_params, key))
-                     for (name, expr) in self.cpfs.items()},
-            'reward': str(jax.make_jaxpr(self.reward)(subs, init_params, key)),
-            'invariants': [str(jax.make_jaxpr(expr)(subs, init_params, key))
-                           for expr in self.invariants],
-            'preconditions': [str(jax.make_jaxpr(expr)(subs, init_params, key))
-                              for expr in self.preconditions],
-            'terminations': [str(jax.make_jaxpr(expr)(subs, init_params, key))
-                             for expr in self.terminations]
+            'cpfs': {
+                name: str(jax.make_jaxpr(expr)(fls, nfls, params, key))
+                for (name, expr) in self.cpfs.items()
+            },
+            'reward': str(jax.make_jaxpr(self.reward)(fls, nfls, params, key)),
+            'invariants': [
+                str(jax.make_jaxpr(expr)(fls, nfls, params, key))
+                for expr in self.invariants
+            ],
+            'preconditions': [
+                str(jax.make_jaxpr(expr)(fls, nfls, params, key))
+                for expr in self.preconditions
+            ],
+            'terminations': [
+                str(jax.make_jaxpr(expr)(fls, nfls, params, key))
+                for expr in self.terminations
+            ]
         }
         return printed
     def model_parameter_info(self) -> Dict[str, Dict[str, Any]]:
         '''Returns a dictionary of additional information about model parameters.'''
         result = {}
-        for (id, value) in self.model_params.items():
-            expr_id = int(str(id).split('_')[0])
-            expr = self.traced.lookup(expr_id)
+        for (id, value) in self.model_aux['params'].items():
+            expr = self.traced.lookup(id)
             result[id] = {
-                'id': expr_id,
+                'id': id,
                 'rddl_op': ' '.join(expr.etype),
                 'init_value': value
             }
         return result
+    def overriden_ops_info(self) -> Dict[str, Dict[str, List[int]]]:
+        '''Returns a dictionary of operations overriden by another class.'''
+        result = {}
+        for (id, class_) in self.model_aux['overriden'].items():
+            expr = self.traced.lookup(id)
+            rddl_op = ' '.join(expr.etype)
+            result.setdefault(class_, {}).setdefault(rddl_op, []).append(id)
+        return result
     @staticmethod
     def _check_valid_op(expr, valid_ops):
         etype, op = expr.etype
         if op not in valid_ops:
-            valid_op_str = ','.join(valid_ops.keys())
+            valid_op_str = ','.join(valid_ops)
             raise RDDLNotImplementedError(
                 f'{etype} operator {op} is not supported: '
-                f'must be in {valid_op_str}.\n' + print_stack_trace(expr))
+                f'must be one of {valid_op_str}.\n' + print_stack_trace(expr))
     @staticmethod
     def _check_num_args(expr, required_args):
@@ -516,6 +588,15 @@ class JaxRDDLCompiler:
             raise RDDLInvalidNumberOfArgumentsError(
                 f'{etype} operator {op} requires {required_args} arguments, '
                 f'got {actual_args}.\n' + print_stack_trace(expr))
+    @staticmethod
+    def _check_num_args_min(expr, required_args):
+        actual_args = len(expr.args)
+        if actual_args < required_args:
+            etype, op = expr.etype
+            raise RDDLInvalidNumberOfArgumentsError(
+                f'{etype} operator {op} requires at least {required_args} arguments, '
+                f'got {actual_args}.\n' + print_stack_trace(expr))
     ERROR_CODES = {
         'NORMAL': 0,
@@ -580,8 +661,7 @@ class JaxRDDLCompiler:
         decomposes it into individual error codes.
         '''
         binary = reversed(bin(error)[2:])
-        errors = [i for (i, c) in enumerate(binary) if c == '1']
-        return errors
+        return [i for (i, c) in enumerate(binary) if c == '1']
     @staticmethod
     def get_error_messages(error: int) -> List[str]:
@@ -589,63 +669,59 @@ class JaxRDDLCompiler:
         decomposes it into error strings.
         '''
         codes = JaxRDDLCompiler.get_error_codes(error)
-        messages = [JaxRDDLCompiler.INVERSE_ERROR_CODES[i] for i in codes]
-        return messages
+        return [JaxRDDLCompiler.INVERSE_ERROR_CODES[i] for i in codes]
     # ===========================================================================
     # expression compilation
     # ===========================================================================
-    def _jax(self, expr, init_params, dtype=None):
+    def _jax(self, expr, aux, dtype=None):
         etype, _ = expr.etype
         if etype == 'constant':
-            jax_expr = self._jax_constant(expr, init_params)
+            jax_expr = self._jax_constant(expr, aux)
         elif etype == 'pvar':
-            jax_expr = self._jax_pvar(expr, init_params)
+            jax_expr = self._jax_pvar(expr, aux)
         elif etype == 'arithmetic':
-            jax_expr = self._jax_arithmetic(expr, init_params)
+            jax_expr = self._jax_arithmetic(expr, aux)
         elif etype == 'relational':
-            jax_expr = self._jax_relational(expr, init_params)
+            jax_expr = self._jax_relational(expr, aux)
         elif etype == 'boolean':
-            jax_expr = self._jax_logical(expr, init_params)
+            jax_expr = self._jax_logical(expr, aux)
         elif etype == 'aggregation':
-            jax_expr = self._jax_aggregation(expr, init_params)
+            jax_expr = self._jax_aggregation(expr, aux)
         elif etype == 'func':
-            jax_expr = self._jax_functional(expr, init_params)
+            jax_expr = self._jax_function(expr, aux)
         elif etype == 'pyfunc':
-            jax_expr = self._jax_pyfunc(expr, init_params)
+            jax_expr = self._jax_pyfunc(expr, aux)
         elif etype == 'control':
-            jax_expr = self._jax_control(expr, init_params)
+            jax_expr = self._jax_control(expr, aux)
         elif etype == 'randomvar':
-            jax_expr = self._jax_random(expr, init_params)
+            jax_expr = self._jax_random(expr, aux)
         elif etype == 'randomvector':
-            jax_expr = self._jax_random_vector(expr, init_params)
+            jax_expr = self._jax_random_vector(expr, aux)
         elif etype == 'matrix':
-            jax_expr = self._jax_matrix(expr, init_params)
+            jax_expr = self._jax_matrix(expr, aux)
         else:
             raise RDDLNotImplementedError(
-                f'Internal error: expression type {expr} is not supported.\n' +
-                print_stack_trace(expr))
+                f'Expression type {expr} is not supported.\n' + print_stack_trace(expr))
         # force type cast of tensor as required by caller
         if dtype is not None:
             jax_expr = self._jax_cast(jax_expr, dtype)
         return jax_expr
     def _jax_cast(self, jax_expr, dtype):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
-        def _jax_wrapped_cast(x, params, key):
-            val, key, err, params = jax_expr(x, params, key)
+        def _jax_wrapped_cast(fls, nfls, params, key):
+            val, key, err, params = jax_expr(fls, nfls, params, key)
             sample = jnp.asarray(val, dtype=dtype)
             invalid_cast = jnp.logical_and(
                 jnp.logical_not(jnp.can_cast(val, dtype)),
                 jnp.any(sample != val)
             )
-            err |= (invalid_cast * ERR)
+            err = err | (invalid_cast * ERR)
             return sample, key, err, params
         return _jax_wrapped_cast
     def _fix_dtype(self, value):
@@ -654,34 +730,33 @@ class JaxRDDLCompiler:
             return self.INT
         elif jnp.issubdtype(dtype, jnp.floating):
             return self.REAL
-        elif jnp.issubdtype(dtype, jnp.bool_) or jnp.issubdtype(dtype, bool):
-            return bool
+        elif jnp.issubdtype(dtype, jnp.bool_):
+            return jnp.bool_
         else:
-            raise TypeError(f'Invalid type {dtype} of {value}.')
+            raise TypeError(f'dtype {dtype} of {value} is not valid.')
     # ===========================================================================
     # leaves
     # ===========================================================================
-    def _jax_constant(self, expr, init_params):
+    def _jax_constant(self, expr, aux):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         cached_value = self.traced.cached_sim_info(expr)
+        dtype = self._fix_dtype(cached_value)
-        def _jax_wrapped_constant(x, params, key):
-            sample = jnp.asarray(cached_value, dtype=self._fix_dtype(cached_value))
+        def _jax_wrapped_constant(fls, nfls, params, key):
+            sample = jnp.asarray(cached_value, dtype=dtype)
             return sample, key, NORMAL, params
         return _jax_wrapped_constant
-    def _jax_pvar_slice(self, _slice):
+    def _jax_pvar_slice(self, slice_):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
-        def _jax_wrapped_pvar_slice(x, params, key):
-            return _slice, key, NORMAL, params
+        def _jax_wrapped_pvar_slice(fls, nfls, params, key):
+            return slice_, key, NORMAL, params
         return _jax_wrapped_pvar_slice
-    def _jax_pvar(self, expr, init_params):
+    def _jax_pvar(self, expr, aux):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         var, pvars = expr.args
         is_value, cached_info = self.traced.cached_sim_info(expr)
@@ -690,21 +765,19 @@ class JaxRDDLCompiler:
         # boundary case: domain object is converted to canonical integer index
         if is_value:
             cached_value = cached_info
-            def _jax_wrapped_object(x, params, key):
-                sample = jnp.asarray(cached_value, dtype=self._fix_dtype(cached_value))
-                return sample, key, NORMAL, params
+            dtype = self._fix_dtype(cached_value)
+            def _jax_wrapped_object(fls, nfls, params, key):
+                sample = jnp.asarray(cached_value, dtype=dtype)
+                return sample, key, NORMAL, params
             return _jax_wrapped_object
         # boundary case: no shape information (e.g. scalar pvar)
         elif cached_info is None:
-            def _jax_wrapped_pvar_scalar(x, params, key):
-                value = x[var]
+            def _jax_wrapped_pvar_scalar(fls, nfls, params, key):
+                value = fls[var] if var in fls else nfls[var]
                 sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                 return sample, key, NORMAL, params
             return _jax_wrapped_pvar_scalar
         # must slice and/or reshape value tensor to match free variables
@@ -713,34 +786,29 @@ class JaxRDDLCompiler:
             # compile nested expressions
             if slices and op_code == RDDLObjectsTracer.NUMPY_OP_CODE.NESTED_SLICE:
+                jax_nested_expr = [
+                    (self._jax(arg, aux) if slice_ is None else self._jax_pvar_slice(slice_))
+                    for (arg, slice_) in zip(pvars, slices)
+                ]
-                jax_nested_expr = [(self._jax(arg, init_params)
-                                    if _slice is None
-                                    else self._jax_pvar_slice(_slice))
-                                   for (arg, _slice) in zip(pvars, slices)]
-                def _jax_wrapped_pvar_tensor_nested(x, params, key):
+                def _jax_wrapped_pvar_tensor_nested(fls, nfls, params, key):
                     error = NORMAL
-                    value = x[var]
+                    value = fls[var] if var in fls else nfls[var]
                     sample = jnp.asarray(value, dtype=self._fix_dtype(value))
-                    new_slices = [None] * len(jax_nested_expr)
-                    for (i, jax_expr) in enumerate(jax_nested_expr):
-                        new_slice, key, err, params = jax_expr(x, params, key)
-                        if not jnp.issubdtype(jnp.result_type(new_slice), jnp.integer):
-                            new_slice = jnp.asarray(new_slice, dtype=self.INT)
-                        new_slices[i] = new_slice
-                        error |= err
-                    new_slices = tuple(new_slices)
-                    sample = sample[new_slices]
+                    new_slices = []
+                    for jax_expr in jax_nested_expr:
+                        new_slice, key, err, params = jax_expr(fls, nfls, params, key)
+                        new_slice = jnp.asarray(new_slice, dtype=self.INT)
+                        new_slices.append(new_slice)
+                        error = error | err
+                    sample = sample[tuple(new_slices)]
                     return sample, key, error, params
                 return _jax_wrapped_pvar_tensor_nested
             # tensor variable but no nesting
             else:
-                def _jax_wrapped_pvar_tensor_non_nested(x, params, key):
-                    value = x[var]
+                def _jax_wrapped_pvar_tensor_non_nested(fls, nfls, params, key):
+                    value = fls[var] if var in fls else nfls[var]
                     sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                     if slices:
                         sample = sample[slices]
@@ -752,190 +820,408 @@ class JaxRDDLCompiler:
                     elif op_code == RDDLObjectsTracer.NUMPY_OP_CODE.TRANSPOSE:
                         sample = jnp.transpose(sample, axes=op_args)
                     return sample, key, NORMAL, params
                 return _jax_wrapped_pvar_tensor_non_nested
     # ===========================================================================
-    # mathematical
+    # boilerplate helper functions
     # ===========================================================================
     def _jax_unary(self, jax_expr, jax_op, at_least_int=False, check_dtype=None):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
-        def _jax_wrapped_unary_op(x, params, key):
-            sample, key, err, params = jax_expr(x, params, key)
+        def _jax_wrapped_unary_op(fls, nfls, params, key):
+            sample, key, err, params = jax_expr(fls, nfls, params, key)
             if at_least_int:
                 sample = self.ONE * sample
-            sample, params = jax_op(sample, params)
             if check_dtype is not None:
                 invalid_cast = jnp.logical_not(jnp.can_cast(sample, check_dtype))
-                err |= (invalid_cast * ERR)
+                err = err | (invalid_cast * ERR)
+            sample = jax_op(sample)
             return sample, key, err, params
         return _jax_wrapped_unary_op
     def _jax_binary(self, jax_lhs, jax_rhs, jax_op, at_least_int=False, check_dtype=None):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
-        def _jax_wrapped_binary_op(x, params, key):
-            sample1, key, err1, params = jax_lhs(x, params, key)
-            sample2, key, err2, params = jax_rhs(x, params, key)
+        def _jax_wrapped_binary_op(fls, nfls, params, key):
+            sample1, key, err1, params = jax_lhs(fls, nfls, params, key)
+            sample2, key, err2, params = jax_rhs(fls, nfls, params, key)
             if at_least_int:
                 sample1 = self.ONE * sample1
                 sample2 = self.ONE * sample2
-            sample, params = jax_op(sample1, sample2, params)
+            sample = jax_op(sample1, sample2)
             err = err1 | err2
             if check_dtype is not None:
                 invalid_cast = jnp.logical_not(jnp.logical_and(
                     jnp.can_cast(sample1, check_dtype),
                     jnp.can_cast(sample2, check_dtype))
                 )
-                err |= (invalid_cast * ERR)
+                err = err | (invalid_cast * ERR)
             return sample, key, err, params
         return _jax_wrapped_binary_op
-    def _jax_arithmetic(self, expr, init_params):
-        _, op = expr.etype
-        # if expression is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = self.EXACT_OPS['arithmetic']
-            negative_op = self.EXACT_OPS['negative']
-        else:
-            valid_ops = self.OPS['arithmetic']
-            negative_op = self.OPS['negative']
-        JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        # recursively compile arguments
-        args = expr.args
-        n = len(args)
-        if n == 1 and op == '-':
-            arg, = args
-            jax_expr = self._jax(arg, init_params)
-            jax_op = negative_op(expr.id, init_params)
-            return self._jax_unary(jax_expr, jax_op, at_least_int=True)
-        elif n == 2 or (n >= 2 and op in {'*', '+'}):
-            jax_exprs = [self._jax(arg, init_params) for arg in args]
-            result = jax_exprs[0]
-            for (i, jax_rhs) in enumerate(jax_exprs[1:]):
-                jax_op = valid_ops[op](f'{expr.id}_{op}{i}', init_params)
-                result = self._jax_binary(result, jax_rhs, jax_op, at_least_int=True)
-            return result
+    def _jax_unary_helper(self, expr, aux, jax_op, at_least_int=False, check_dtype=None):
+        JaxRDDLCompiler._check_num_args(expr, 1)
+        arg, = expr.args
+        jax_expr = self._jax(arg, aux)
+        return self._jax_unary(
+            jax_expr, jax_op, at_least_int=at_least_int, check_dtype=check_dtype)
+    def _jax_binary_helper(self, expr, aux, jax_op, at_least_int=False, check_dtype=None):
         JaxRDDLCompiler._check_num_args(expr, 2)
+        lhs, rhs = expr.args
+        jax_lhs = self._jax(lhs, aux)
+        jax_rhs = self._jax(rhs, aux)
+        return self._jax_binary(
+            jax_lhs, jax_rhs, jax_op, at_least_int=at_least_int, check_dtype=check_dtype)
-    def _jax_relational(self, expr, init_params):
+    def _jax_nary_helper(self, expr, aux, jax_op, at_least_int=False, check_dtype=None):
+        JaxRDDLCompiler._check_num_args_min(expr, 2)
+        args = expr.args
+        jax_exprs = [self._jax(arg, aux) for arg in args]
+        result = jax_exprs[0]
+        for jax_rhs in jax_exprs[1:]:
+            result = self._jax_binary(
+                result, jax_rhs, jax_op, at_least_int=at_least_int, check_dtype=check_dtype)
+        return result
+    # ===========================================================================
+    # arithmetic
+    # ===========================================================================
+    def _jax_arithmetic(self, expr, aux):
+        JaxRDDLCompiler._check_valid_op(expr, {'-', '+', '*', '/'})
         _, op = expr.etype
-        # if expression is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = self.EXACT_OPS['relational']
-        else:
-            valid_ops = self.OPS['relational']
-        JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        # recursively compile arguments
-        JaxRDDLCompiler._check_num_args(expr, 2)
-        lhs, rhs = expr.args
-        jax_lhs = self._jax(lhs, init_params)
-        jax_rhs = self._jax(rhs, init_params)
-        jax_op = valid_ops[op](expr.id, init_params)
-        return self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
-    def _jax_logical(self, expr, init_params):
+        if op == '-':
+            if len(expr.args) == 1:
+                return self._jax_negate(expr, aux)
+            else:
+                return self._jax_subtract(expr, aux)
+        elif op == '/':
+            return self._jax_divide(expr, aux)
+        elif op == '+':
+            return self._jax_add(expr, aux)
+        elif op == '*':
+            return self._jax_multiply(expr, aux)
+    def _jax_negate(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.negative, at_least_int=True)
+    def _jax_add(self, expr, aux):
+        return self._jax_nary_helper(expr, aux, jnp.add, at_least_int=True)
+    def _jax_subtract(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.subtract, at_least_int=True)
+    def _jax_multiply(self, expr, aux):
+        return self._jax_nary_helper(expr, aux, jnp.multiply, at_least_int=True)
+    def _jax_divide(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.divide, at_least_int=True)
+    # ===========================================================================
+    # relational
+    # ===========================================================================
+    def _jax_relational(self, expr, aux):
+        JaxRDDLCompiler._check_valid_op(expr, {'>=', '<=', '>', '<', '==', '~='})
         _, op = expr.etype
+        if op == '>=':
+            return self._jax_greater_equal(expr, aux)
+        elif op == '<=':
+            return self._jax_less_equal(expr, aux)
+        elif op == '>':
+            return self._jax_greater(expr, aux)
+        elif op == '<':
+            return self._jax_less(expr, aux)
+        elif op == '==':
+            return self._jax_equal(expr, aux)
+        elif op == '~=':
+            return self._jax_not_equal(expr, aux)
+    def _jax_greater_equal(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.greater_equal, at_least_int=True)
-        # if expression is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = self.EXACT_OPS['logical']
-            logical_not_op = self.EXACT_OPS['logical_not']
-        else:
-            valid_ops = self.OPS['logical']
-            logical_not_op = self.OPS['logical_not']
-        JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        # recursively compile arguments
-        args = expr.args
-        n = len(args)
-        if n == 1 and op == '~':
-            arg, = args
-            jax_expr = self._jax(arg, init_params)
-            jax_op = logical_not_op(expr.id, init_params)
-            return self._jax_unary(jax_expr, jax_op, check_dtype=bool)
-        elif n == 2 or (n >= 2 and op in {'^', '&', '|'}):
-            jax_exprs = [self._jax(arg, init_params) for arg in args]
-            result = jax_exprs[0]
-            for i, jax_rhs in enumerate(jax_exprs[1:]):
-                jax_op = valid_ops[op](f'{expr.id}_{op}{i}', init_params)
-                result = self._jax_binary(result, jax_rhs, jax_op, check_dtype=bool)
-            return result
-        JaxRDDLCompiler._check_num_args(expr, 2)
+    def _jax_less_equal(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.less_equal, at_least_int=True)
+    def _jax_greater(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.greater, at_least_int=True)
-    def _jax_aggregation(self, expr, init_params):
-        ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
+    def _jax_less(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.less, at_least_int=True)
+    def _jax_equal(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.equal, at_least_int=True)
+    def _jax_not_equal(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.not_equal, at_least_int=True)
+    # ===========================================================================
+    # logical
+    # ===========================================================================
+    def _jax_logical(self, expr, aux):
+        JaxRDDLCompiler._check_valid_op(expr, {'^', '&', '|', '~', '=>', '<=>'})
         _, op = expr.etype
-        # if expression is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = self.EXACT_OPS['aggregation']
-        else:
-            valid_ops = self.OPS['aggregation']
-        JaxRDDLCompiler._check_valid_op(expr, valid_ops)
-        is_floating = op not in self.AGGREGATION_BOOL
-        # recursively compile arguments
-        * _, arg = expr.args
-        _, axes = self.traced.cached_sim_info(expr)
-        jax_expr = self._jax(arg, init_params)
-        jax_op = valid_ops[op](expr.id, init_params)
-        def _jax_wrapped_aggregation(x, params, key):
-            sample, key, err, params = jax_expr(x, params, key)
-            if is_floating:
-                sample = self.ONE * sample
+        if op == '~':
+            if len(expr.args) == 1:
+                return self._jax_not(expr, aux)
             else:
-                invalid_cast = jnp.logical_not(jnp.can_cast(sample, bool))
-                err |= (invalid_cast * ERR)
-            sample, params = jax_op(sample, axis=axes, params=params)
-            return sample, key, err, params
-        return _jax_wrapped_aggregation
-    def _jax_functional(self, expr, init_params):
+                return self._jax_xor(expr, aux)
+        elif op == '^' or op == '&':
+            return self._jax_and(expr, aux)
+        elif op == '|':
+            return self._jax_or(expr, aux)
+        elif op == '=>':
+            return self._jax_implies(expr, aux)
+        elif op == '<=>':
+            return self._jax_equiv(expr, aux)
+    def _jax_not(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.logical_not, check_dtype=jnp.bool_)
+    def _jax_and(self, expr, aux):
+        return self._jax_nary_helper(expr, aux, jnp.logical_and, check_dtype=jnp.bool_)
+    def _jax_or(self, expr, aux):
+        return self._jax_nary_helper(expr, aux, jnp.logical_or, check_dtype=jnp.bool_)
+    def _jax_xor(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.logical_xor, check_dtype=jnp.bool_)
+    def _jax_implies(self, expr, aux):
+        def implies_op(x, y):
+            return jnp.logical_or(jnp.logical_not(x), y)
+        return self._jax_binary_helper(expr, aux, implies_op, check_dtype=jnp.bool_)
+    def _jax_equiv(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.equal, check_dtype=jnp.bool_)
+    # ===========================================================================
+    # aggregation
+    # ===========================================================================
+    def _jax_aggregation(self, expr, aux):
+        JaxRDDLCompiler._check_valid_op(expr, {'sum', 'avg', 'prod', 'minimum', 'maximum',
+                                               'forall', 'exists', 'argmin', 'argmax'})
         _, op = expr.etype
-        # if expression is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            unary_ops = self.EXACT_OPS['unary']
-            binary_ops = self.EXACT_OPS['binary']
-        else:
-            unary_ops = self.OPS['unary']
-            binary_ops = self.OPS['binary']
-        # recursively compile arguments
-        if op in unary_ops:
-            JaxRDDLCompiler._check_num_args(expr, 1)
-            arg, = expr.args
-            jax_expr = self._jax(arg, init_params)
-            jax_op = unary_ops[op](expr.id, init_params)
-            return self._jax_unary(jax_expr, jax_op, at_least_int=True)
-        elif op in binary_ops:
-            JaxRDDLCompiler._check_num_args(expr, 2)
-            lhs, rhs = expr.args
-            jax_lhs = self._jax(lhs, init_params)
-            jax_rhs = self._jax(rhs, init_params)
-            jax_op = binary_ops[op](expr.id, init_params)
-            return self._jax_binary(jax_lhs, jax_rhs, jax_op, at_least_int=True)
-        raise RDDLNotImplementedError(
-            f'Function {op} is not supported.\n' + print_stack_trace(expr))
-    def _jax_pyfunc(self, expr, init_params):
+        if op == 'sum':
+            return self._jax_sum(expr, aux)
+        elif op == 'avg':
+            return self._jax_avg(expr, aux)
+        elif op == 'prod':
+            return self._jax_prod(expr, aux)
+        elif op == 'minimum':
+            return self._jax_minimum(expr, aux)
+        elif op == 'maximum':
+            return self._jax_maximum(expr, aux)
+        elif op == 'forall':
+            return self._jax_forall(expr, aux)
+        elif op == 'exists':
+            return self._jax_exists(expr, aux)
+        elif op == 'argmin':
+            return self._jax_argmin(expr, aux)
+        elif op == 'argmax':
+            return self._jax_argmax(expr, aux)
+    def _jax_aggregation_helper(self, expr, aux, jax_op, is_bool=False):
+        arg = expr.args[-1]
+        _, axes = self.traced.cached_sim_info(expr)
+        jax_expr = self._jax(arg, aux)
+        return self._jax_unary(
+            jax_expr,
+            jax_op=partial(jax_op, axis=axes),
+            at_least_int=not is_bool,
+            check_dtype=jnp.bool_ if is_bool else None
+        )
+    def _jax_sum(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.sum)
+    def _jax_avg(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.mean)
+    def _jax_prod(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.prod)
+    def _jax_minimum(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.min)
+    def _jax_maximum(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.max)
+    def _jax_forall(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.all, is_bool=True)
+    def _jax_exists(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.any, is_bool=True)
+    def _jax_argmin(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.argmin)
+    def _jax_argmax(self, expr, aux):
+        return self._jax_aggregation_helper(expr, aux, jnp.argmax)
+    # ===========================================================================
+    # function
+    # ===========================================================================
+    def _jax_function(self, expr, aux):
+        JaxRDDLCompiler._check_valid_op(expr, {'abs', 'sgn', 'round', 'floor', 'ceil',
+                                               'cos', 'sin', 'tan', 'acos', 'asin', 'atan',
+                                               'cosh', 'sinh', 'tanh', 'exp', 'ln', 'sqrt',
+                                               'lngamma', 'gamma',
+                                               'div', 'mod', 'fmod', 'min', 'max',
+                                               'pow', 'log', 'hypot'})
+        _, op = expr.etype
+        # unary functions
+        if op == 'abs':
+            return self._jax_abs(expr, aux)
+        elif op == 'sgn':
+            return self._jax_sgn(expr, aux)
+        elif op == 'round':
+            return self._jax_round(expr, aux)
+        elif op == 'floor':
+            return self._jax_floor(expr, aux)
+        elif op == 'ceil':
+            return self._jax_ceil(expr, aux)
+        elif op == 'cos':
+            return self._jax_cos(expr, aux)
+        elif op == 'sin':
+            return self._jax_sin(expr, aux)
+        elif op == 'tan':
+            return self._jax_tan(expr, aux)
+        elif op == 'acos':
+            return self._jax_acos(expr, aux)
+        elif op == 'asin':
+            return self._jax_asin(expr, aux)
+        elif op == 'atan':
+            return self._jax_atan(expr, aux)
+        elif op == 'cosh':
+            return self._jax_cosh(expr, aux)
+        elif op == 'sinh':
+            return self._jax_sinh(expr, aux)
+        elif op == 'tanh':
+            return self._jax_tanh(expr, aux)
+        elif op == 'exp':
+            return self._jax_exp(expr, aux)
+        elif op == 'ln':
+            return self._jax_ln(expr, aux)
+        elif op == 'sqrt':
+            return self._jax_sqrt(expr, aux)
+        elif op == 'lngamma':
+            return self._jax_lngamma(expr, aux)
+        elif op == 'gamma':
+            return self._jax_gamma(expr, aux)
+        # binary functions
+        elif op == 'div':
+            return self._jax_div(expr, aux)
+        elif op == 'mod':
+            return self._jax_mod(expr, aux)
+        elif op == 'fmod':
+            return self._jax_fmod(expr, aux)
+        elif op == 'min':
+            return self._jax_min(expr, aux)
+        elif op == 'max':
+            return self._jax_max(expr, aux)
+        elif op == 'pow':
+            return self._jax_pow(expr, aux)
+        elif op == 'log':
+            return self._jax_log(expr, aux)
+        elif op == 'hypot':
+            return self._jax_hypot(expr, aux)
+    def _jax_abs(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.abs, at_least_int=True)
+    def _jax_sgn(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.sign, at_least_int=True)
+    def _jax_round(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.round, at_least_int=True)
+    def _jax_floor(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.floor, at_least_int=True)
+    def _jax_ceil(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.ceil, at_least_int=True)
+    def _jax_cos(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.cos, at_least_int=True)
+    def _jax_sin(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.sin, at_least_int=True)
+    def _jax_tan(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.tan, at_least_int=True)
+    def _jax_acos(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.arccos, at_least_int=True)
+    def _jax_asin(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.arcsin, at_least_int=True)
+    def _jax_atan(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.arctan, at_least_int=True)
+    def _jax_cosh(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.cosh, at_least_int=True)
+    def _jax_sinh(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.sinh, at_least_int=True)
+    def _jax_tanh(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.tanh, at_least_int=True)
+    def _jax_exp(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.exp, at_least_int=True)
+    def _jax_ln(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.ln, at_least_int=True)
+    def _jax_sqrt(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, jnp.sqrt, at_least_int=True)
+    def _jax_lngamma(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, scipy.special.gammaln, at_least_int=True)
+    def _jax_gamma(self, expr, aux):
+        return self._jax_unary_helper(expr, aux, scipy.special.gamma, at_least_int=True)
+    def _jax_div(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.floor_divide, at_least_int=True)
+    def _jax_mod(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.mod, at_least_int=True)
+    def _jax_fmod(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.mod, at_least_int=True)
+    def _jax_min(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.minimum, at_least_int=True)
+    def _jax_max(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.maximum, at_least_int=True)
+    def _jax_pow(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.power, at_least_int=True)
+    def _jax_log(self, expr, aux):
+        def log_op(x, y):
+            return jnp.log(x) / jnp.log(y)
+        return self._jax_binary_helper(expr, aux, log_op, at_least_int=True)
+    def _jax_hypot(self, expr, aux):
+        return self._jax_binary_helper(expr, aux, jnp.hypot, at_least_int=True)
+    # ===========================================================================
+    # external function
+    # ===========================================================================
+    def _jax_pyfunc(self, expr, aux):
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         # get the Python function by name
@@ -957,25 +1243,21 @@ class JaxRDDLCompiler:
         require_dims = self.rddl.object_counts(captured_types)
         # compile the inputs to the function
-        jax_inputs = [self._jax(arg, init_params) for arg in args]
+        jax_inputs = [self._jax(arg, aux) for arg in args]
         # compile the function evaluation function
-        def _jax_wrapped_external_function(x, params, key):
+        def _jax_wrapped_external_function(fls, nfls, params, key):
             # evaluate inputs to the function
             # first dimensions are non-captured vars in outer scope followed by all the _
             error = NORMAL
             flat_samples = []
             for jax_expr in jax_inputs:
-                sample, key, err, params = jax_expr(x, params, key)
-                shape = jnp.shape(sample)
-                first_dim = 1
-                for dim in shape[:num_free_vars]:
-                    first_dim *= dim
-                new_shape = (first_dim,) + shape[num_free_vars:]
+                sample, key, err, params = jax_expr(fls, nfls, params, key)
+                new_shape = (-1,) + jnp.shape(sample)[num_free_vars:]
                 flat_sample = jnp.reshape(sample, new_shape)
                 flat_samples.append(flat_sample)
-                error |= err
+                error = error | err
             # now all the inputs have dimensions equal to (k,) + the number of _ occurences
             # k is the number of possible non-captured object combinations
@@ -986,7 +1268,8 @@ class JaxRDDLCompiler:
             if not isinstance(sample, jnp.ndarray):
                 raise ValueError(
                     f'Output of external Python function <{pyfunc_name}> '
-                    f'is not a JAX array.\n' + print_stack_trace(expr))
+                    f'is not a JAX array.\n' + print_stack_trace(expr)
+                )
             pyfunc_dims = jnp.shape(sample)[1:]
             if len(require_dims) != len(pyfunc_dims):
@@ -994,14 +1277,16 @@ class JaxRDDLCompiler:
                     f'External Python function <{pyfunc_name}> returned array with '
                     f'{len(pyfunc_dims)} dimensions, which does not match the '
                     f'number of captured parameter(s) {len(require_dims)}.\n' +
-                    print_stack_trace(expr))
+                    print_stack_trace(expr)
+                )
             for (param, require_dim, actual_dim) in zip(captured_vars, require_dims, pyfunc_dims):
                 if require_dim != actual_dim:
                     raise ValueError(
                         f'External Python function <{pyfunc_name}> returned array with '
                         f'{actual_dim} elements for captured parameter <{param}>, '
                         f'which does not match the number of objects {require_dim}.\n' +
-                        print_stack_trace(expr))
+                        print_stack_trace(expr)
+                    )
             # unravel the combinations k back into their original dimensions
             sample = jnp.reshape(sample, free_dims + pyfunc_dims)
@@ -1017,111 +1302,75 @@ class JaxRDDLCompiler:
     # control flow
     # ===========================================================================
-    def _jax_control(self, expr, init_params):
+    def _jax_control(self, expr, aux):
+        JaxRDDLCompiler._check_valid_op(expr, {'if', 'switch'})
         _, op = expr.etype
         if op == 'if':
-            return self._jax_if(expr, init_params)
+            return self._jax_if(expr, aux)
         elif op == 'switch':
-            return self._jax_switch(expr, init_params)
-        raise RDDLNotImplementedError(
-            f'Control operator {op} is not supported.\n' + print_stack_trace(expr))
+            return self._jax_switch(expr, aux)
-    def _jax_if(self, expr, init_params):
+    def _jax_if(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_CAST']
         JaxRDDLCompiler._check_num_args(expr, 3)
         pred, if_true, if_false = expr.args
-        # if predicate is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(pred):
-            if_op = self.EXACT_OPS['control']['if']
-        else:
-            if_op = self.OPS['control']['if']
-        jax_op = if_op(expr.id, init_params)
         # recursively compile arguments
-        jax_pred = self._jax(pred, init_params)
-        jax_true = self._jax(if_true, init_params)
-        jax_false = self._jax(if_false, init_params)
-        def _jax_wrapped_if_then_else(x, params, key):
-            sample1, key, err1, params = jax_pred(x, params, key)
-            sample2, key, err2, params = jax_true(x, params, key)
-            sample3, key, err3, params = jax_false(x, params, key)
-            sample, params = jax_op(sample1, sample2, sample3, params)
+        jax_pred = self._jax(pred, aux)
+        jax_true = self._jax(if_true, aux)
+        jax_false = self._jax(if_false, aux)
+        def _jax_wrapped_if_then_else(fls, nfls, params, key):
+            sample1, key, err1, params = jax_pred(fls, nfls, params, key)
+            sample2, key, err2, params = jax_true(fls, nfls, params, key)
+            sample3, key, err3, params = jax_false(fls, nfls, params, key)
+            sample = jnp.where(sample1 > 0.5, sample2, sample3)
             err = err1 | err2 | err3
-            invalid_cast = jnp.logical_not(jnp.can_cast(sample1, bool))
-            err |= (invalid_cast * ERR)
+            invalid_cast = jnp.logical_not(jnp.can_cast(sample1, jnp.bool_))
+            err = err | (invalid_cast * ERR)
             return sample, key, err, params
         return _jax_wrapped_if_then_else
-    def _jax_switch(self, expr, init_params):
-        pred, *_ = expr.args
-        # if predicate is non-fluent, always use the exact operation
-        # case conditions are currently only literals so they are non-fluent
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(pred):
-            switch_op = self.EXACT_OPS['control']['switch']
-        else:
-            switch_op = self.OPS['control']['switch']
-        jax_op = switch_op(expr.id, init_params)
+    def _jax_switch(self, expr, aux):
         # recursively compile predicate
-        jax_pred = self._jax(pred, init_params)
+        pred = expr.args[0]
+        jax_pred = self._jax(pred, aux)
         # recursively compile cases
         cases, default = self.traced.cached_sim_info(expr)
-        jax_default = None if default is None else self._jax(default, init_params)
-        jax_cases = [(jax_default if _case is None else self._jax(_case, init_params))
-                     for _case in cases]
+        jax_default = None if default is None else self._jax(default, aux)
+        jax_cases = [
+            (jax_default if _case is None else self._jax(_case, aux))
+            for _case in cases
+        ]
-        def _jax_wrapped_switch(x, params, key):
+        def _jax_wrapped_switch(fls, nfls, params, key):
             # sample predicate
-            sample_pred, key, err, params = jax_pred(x, params, key)
+            sample_pred, key, err, params = jax_pred(fls, nfls, params, key)
             # sample cases
-            sample_cases = [None] * len(jax_cases)
-            for (i, jax_case) in enumerate(jax_cases):
-                sample_cases[i], key, err_case, params = jax_case(x, params, key)
-                err |= err_case
+            sample_cases = []
+            for jax_case in jax_cases:
+                sample, key, err_case, params = jax_case(fls, nfls, params, key)
+                sample_cases.append(sample)
+                err = err | err_case
             sample_cases = jnp.asarray(sample_cases)
             sample_cases = jnp.asarray(sample_cases, dtype=self._fix_dtype(sample_cases))
             # predicate (enum) is an integer - use it to extract from case array
-            sample, params = jax_op(sample_pred, sample_cases, params)
+            sample_pred = jnp.asarray(sample_pred[jnp.newaxis, ...], dtype=self.INT)
+            sample = jnp.take_along_axis(sample_cases, sample_pred, axis=0)
+            assert sample.shape[0] == 1
+            sample = sample[0, ...]
             return sample, key, err, params
         return _jax_wrapped_switch
     # ===========================================================================
     # random variables
     # ===========================================================================
-    # distributions with complete reparameterization support:
-    # KronDelta: complete
-    # DiracDelta: complete
-    # Uniform: complete
-    # Bernoulli: complete (subclass uses Gumbel-softmax)
-    # Normal: complete
-    # Exponential: complete
-    # Geometric: complete
-    # Weibull: complete
-    # Pareto: complete
-    # Gumbel: complete
-    # Laplace: complete
-    # Cauchy: complete
-    # Gompertz: complete
-    # Kumaraswamy: complete
-    # Discrete: complete (subclass uses Gumbel-softmax)
-    # UnnormDiscrete: complete (subclass uses Gumbel-softmax)
-    # Discrete(p): complete (subclass uses Gumbel-softmax)
-    # UnnormDiscrete(p): complete (subclass uses Gumbel-softmax)
-    # Poisson (subclass uses Gumbel-softmax or Poisson process trick)
-    # Binomial (subclass uses Gumbel-softmax or Normal approximation)
-    # NegativeBinomial (subclass uses Poisson-Gamma mixture)
     # distributions which seem to support backpropagation (need more testing):
     # Beta
     # Student
@@ -1132,656 +1381,587 @@ class JaxRDDLCompiler:
     # distributions with incomplete reparameterization support (TODO):
     # Multinomial
-    def _jax_random(self, expr, init_params):
+    def _jax_random(self, expr, aux):
         _, name = expr.etype
         if name == 'KronDelta':
-            return self._jax_kron(expr, init_params)
+            return self._jax_kron(expr, aux)
         elif name == 'DiracDelta':
-            return self._jax_dirac(expr, init_params)
+            return self._jax_dirac(expr, aux)
         elif name == 'Uniform':
-            return self._jax_uniform(expr, init_params)
+            return self._jax_uniform(expr, aux)
         elif name == 'Bernoulli':
-            return self._jax_bernoulli(expr, init_params)
+            return self._jax_bernoulli(expr, aux)
         elif name == 'Normal':
-            return self._jax_normal(expr, init_params)
+            return self._jax_normal(expr, aux)
         elif name == 'Poisson':
-            return self._jax_poisson(expr, init_params)
+            return self._jax_poisson(expr, aux)
         elif name == 'Exponential':
-            return self._jax_exponential(expr, init_params)
+            return self._jax_exponential(expr, aux)
         elif name == 'Weibull':
-            return self._jax_weibull(expr, init_params)
+            return self._jax_weibull(expr, aux)
         elif name == 'Gamma':
-            return self._jax_gamma(expr, init_params)
+            return self._jax_gamma(expr, aux)
         elif name == 'Binomial':
-            return self._jax_binomial(expr, init_params)
+            return self._jax_binomial(expr, aux)
         elif name == 'NegativeBinomial':
-            return self._jax_negative_binomial(expr, init_params)
+            return self._jax_negative_binomial(expr, aux)
         elif name == 'Beta':
-            return self._jax_beta(expr, init_params)
+            return self._jax_beta(expr, aux)
         elif name == 'Geometric':
-            return self._jax_geometric(expr, init_params)
+            return self._jax_geometric(expr, aux)
         elif name == 'Pareto':
-            return self._jax_pareto(expr, init_params)
+            return self._jax_pareto(expr, aux)
         elif name == 'Student':
-            return self._jax_student(expr, init_params)
+            return self._jax_student(expr, aux)
         elif name == 'Gumbel':
-            return self._jax_gumbel(expr, init_params)
+            return self._jax_gumbel(expr, aux)
         elif name == 'Laplace':
-            return self._jax_laplace(expr, init_params)
+            return self._jax_laplace(expr, aux)
         elif name == 'Cauchy':
-            return self._jax_cauchy(expr, init_params)
+            return self._jax_cauchy(expr, aux)
         elif name == 'Gompertz':
-            return self._jax_gompertz(expr, init_params)
+            return self._jax_gompertz(expr, aux)
         elif name == 'ChiSquare':
-            return self._jax_chisquare(expr, init_params)
+            return self._jax_chisquare(expr, aux)
         elif name == 'Kumaraswamy':
-            return self._jax_kumaraswamy(expr, init_params)
+            return self._jax_kumaraswamy(expr, aux)
         elif name == 'Discrete':
-            return self._jax_discrete(expr, init_params, unnorm=False)
+            return self._jax_discrete(expr, aux, unnorm=False)
         elif name == 'UnnormDiscrete':
-            return self._jax_discrete(expr, init_params, unnorm=True)
+            return self._jax_discrete(expr, aux, unnorm=True)
         elif name == 'Discrete(p)':
-            return self._jax_discrete_pvar(expr, init_params, unnorm=False)
+            return self._jax_discrete_pvar(expr, aux, unnorm=False)
         elif name == 'UnnormDiscrete(p)':
-            return self._jax_discrete_pvar(expr, init_params, unnorm=True)
+            return self._jax_discrete_pvar(expr, aux, unnorm=True)
         else:
             raise RDDLNotImplementedError(
                 f'Distribution {name} is not supported.\n' + print_stack_trace(expr))
-    def _jax_kron(self, expr, init_params):
+    def _jax_kron(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_KRON_DELTA']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg, = expr.args
-        arg = self._jax(arg, init_params)
+        arg = self._jax(arg, aux)
         # just check that the sample can be cast to int
-        def _jax_wrapped_distribution_kron(x, params, key):
-            sample, key, err, params = arg(x, params, key)
+        def _jax_wrapped_distribution_kron(fls, nfls, params, key):
+            sample, key, err, params = arg(fls, nfls, params, key)
             invalid_cast = jnp.logical_not(jnp.can_cast(sample, self.INT))
-            err |= (invalid_cast * ERR)
+            err = err | (invalid_cast * ERR)
             return sample, key, err, params
         return _jax_wrapped_distribution_kron
-    def _jax_dirac(self, expr, init_params):
+    def _jax_dirac(self, expr, aux):
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg, = expr.args
-        arg = self._jax(arg, init_params, dtype=self.REAL)
+        arg = self._jax(arg, aux, dtype=self.REAL)
         return arg
-    def _jax_uniform(self, expr, init_params):
+    def _jax_uniform(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_UNIFORM']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_lb, arg_ub = expr.args
-        jax_lb = self._jax(arg_lb, init_params)
-        jax_ub = self._jax(arg_ub, init_params)
+        jax_lb = self._jax(arg_lb, aux)
+        jax_ub = self._jax(arg_ub, aux)
         # reparameterization trick U(a, b) = a + (b - a) * U(0, 1)
-        def _jax_wrapped_distribution_uniform(x, params, key):
-            lb, key, err1, params = jax_lb(x, params, key)
-            ub, key, err2, params = jax_ub(x, params, key)
+        def _jax_wrapped_distribution_uniform(fls, nfls, params, key):
+            lb, key, err1, params = jax_lb(fls, nfls, params, key)
+            ub, key, err2, params = jax_ub(fls, nfls, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(lb), dtype=self.REAL)
             sample = lb + (ub - lb) * U
             out_of_bounds = jnp.logical_not(jnp.all(lb <= ub))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_uniform
-    def _jax_normal(self, expr, init_params):
+    def _jax_normal(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_NORMAL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_var = expr.args
-        jax_mean = self._jax(arg_mean, init_params)
-        jax_var = self._jax(arg_var, init_params)
+        jax_mean = self._jax(arg_mean, aux)
+        jax_var = self._jax(arg_var, aux)
         # reparameterization trick N(m, s^2) = m + s * N(0, 1)
-        def _jax_wrapped_distribution_normal(x, params, key):
-            mean, key, err1, params = jax_mean(x, params, key)
-            var, key, err2, params = jax_var(x, params, key)
+        def _jax_wrapped_distribution_normal(fls, nfls, params, key):
+            mean, key, err1, params = jax_mean(fls, nfls, params, key)
+            var, key, err2, params = jax_var(fls, nfls, params, key)
             std = jnp.sqrt(var)
             key, subkey = random.split(key)
             Z = random.normal(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + std * Z
             out_of_bounds = jnp.logical_not(jnp.all(var >= 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_normal
-    def _jax_exponential(self, expr, init_params):
+    def _jax_exponential(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_EXPONENTIAL']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_scale, = expr.args
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_scale = self._jax(arg_scale, aux)
         # reparameterization trick Exp(s) = s * Exp(1)
-        def _jax_wrapped_distribution_exp(x, params, key):
-            scale, key, err, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_exp(fls, nfls, params, key):
+            scale, key, err, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
-            Exp1 = random.exponential(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
-            sample = scale * Exp1
+            exp = random.exponential(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
+            sample = scale * exp
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
-            err |= (out_of_bounds * ERR)
-            return sample, key, err, params
+            err = err | (out_of_bounds * ERR)
+            return sample, key, err, params
         return _jax_wrapped_distribution_exp
-    def _jax_weibull(self, expr, init_params):
+    def _jax_weibull(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_WEIBULL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, init_params)
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_shape = self._jax(arg_shape, aux)
+        jax_scale = self._jax(arg_scale, aux)
         # reparameterization trick W(s, r) = r * (-ln(1 - U(0, 1))) ** (1 / s)
-        def _jax_wrapped_distribution_weibull(x, params, key):
-            shape, key, err1, params = jax_shape(x, params, key)
-            scale, key, err2, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_weibull(fls, nfls, params, key):
+            shape, key, err1, params = jax_shape(fls, nfls, params, key)
+            scale, key, err2, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
             sample = random.weibull_min(
                 key=subkey, scale=scale, concentration=shape, dtype=self.REAL)
-            out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(shape > 0, scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_weibull
-    def _jax_bernoulli(self, expr, init_params):
+    def _jax_bernoulli(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BERNOULLI']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_prob, = expr.args
-        # if probability is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
-            bern_op = self.EXACT_OPS['sampling']['Bernoulli']
-        else:
-            bern_op = self.OPS['sampling']['Bernoulli']
-        jax_op = bern_op(expr.id, init_params)
         # recursively compile arguments
-        jax_prob = self._jax(arg_prob, init_params)
+        jax_prob = self._jax(arg_prob, aux)
-        def _jax_wrapped_distribution_bernoulli(x, params, key):
-            prob, key, err, params = jax_prob(x, params, key)
+        def _jax_wrapped_distribution_bernoulli(fls, nfls, params, key):
+            prob, key, err, params = jax_prob(fls, nfls, params, key)
             key, subkey = random.split(key)
-            sample, params = jax_op(subkey, prob, params)
-            out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
-            err |= (out_of_bounds * ERR)
-            return sample, key, err, params
+            sample = random.bernoulli(subkey, prob)
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(prob >= 0, prob <= 1)))
+            err = err | (out_of_bounds * ERR)
+            return sample, key, err, params
         return _jax_wrapped_distribution_bernoulli
-    def _jax_poisson(self, expr, init_params):
+    def _jax_poisson(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_POISSON']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_rate, = expr.args
-        # if rate is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_rate):
-            poisson_op = self.EXACT_OPS['sampling']['Poisson']
-        else:
-            poisson_op = self.OPS['sampling']['Poisson']
-        jax_op = poisson_op(expr.id, init_params)
         # recursively compile arguments
-        jax_rate = self._jax(arg_rate, init_params)
+        jax_rate = self._jax(arg_rate, aux)
         # uses the implicit JAX subroutine
-        def _jax_wrapped_distribution_poisson(x, params, key):
-            rate, key, err, params = jax_rate(x, params, key)
+        def _jax_wrapped_distribution_poisson(fls, nfls, params, key):
+            rate, key, err, params = jax_rate(fls, nfls, params, key)
             key, subkey = random.split(key)
-            sample, params = jax_op(subkey, rate, params)
+            sample = random.poisson(key=subkey, lam=rate, dtype=self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(rate >= 0))
-            err |= (out_of_bounds * ERR)
-            return sample, key, err, params
+            err = err | (out_of_bounds * ERR)
+            return sample, key, err, params
         return _jax_wrapped_distribution_poisson
-    def _jax_gamma(self, expr, init_params):
+    def _jax_gamma(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GAMMA']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, init_params)
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_shape = self._jax(arg_shape, aux)
+        jax_scale = self._jax(arg_scale, aux)
         # partial reparameterization trick Gamma(s, r) = r * Gamma(s, 1)
         # uses the implicit JAX subroutine for Gamma(s, 1)
-        def _jax_wrapped_distribution_gamma(x, params, key):
-            shape, key, err1, params = jax_shape(x, params, key)
-            scale, key, err2, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_gamma(fls, nfls, params, key):
+            shape, key, err1, params = jax_shape(fls, nfls, params, key)
+            scale, key, err2, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
             Gamma = random.gamma(key=subkey, a=shape, dtype=self.REAL)
             sample = scale * Gamma
-            out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(shape > 0, scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_gamma
-    def _jax_binomial(self, expr, init_params):
+    def _jax_binomial(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BINOMIAL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_trials, arg_prob = expr.args
-        # if prob is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact \
-        and not self.traced.cached_is_fluent(arg_trials) \
-        and not self.traced.cached_is_fluent(arg_prob):
-            bin_op = self.EXACT_OPS['sampling']['Binomial']
-        else:
-            bin_op = self.OPS['sampling']['Binomial']
-        jax_op = bin_op(expr.id, init_params)
-        jax_trials = self._jax(arg_trials, init_params)
-        jax_prob = self._jax(arg_prob, init_params)
+        jax_trials = self._jax(arg_trials, aux)
+        jax_prob = self._jax(arg_prob, aux)
         # uses reduction for constant trials
-        def _jax_wrapped_distribution_binomial(x, params, key):
-            trials, key, err2, params = jax_trials(x, params, key)
-            prob, key, err1, params = jax_prob(x, params, key)
+        def _jax_wrapped_distribution_binomial(fls, nfls, params, key):
+            trials, key, err2, params = jax_trials(fls, nfls, params, key)
+            prob, key, err1, params = jax_prob(fls, nfls, params, key)
             key, subkey = random.split(key)
-            sample, params = jax_op(subkey, trials, prob, params)
+            trials = jnp.asarray(trials, dtype=self.REAL)
+            prob = jnp.asarray(prob, dtype=self.REAL)
+            sample = random.binomial(key=subkey, n=trials, p=prob, dtype=self.REAL)
+            sample = jnp.asarray(sample, dtype=self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(
-                (prob >= 0) & (prob <= 1) & (trials >= 0)))
+                jnp.logical_and(jnp.logical_and(prob >= 0, prob <= 1), trials >= 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_binomial
-    def _jax_negative_binomial(self, expr, init_params):
+    def _jax_negative_binomial(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_NEGATIVE_BINOMIAL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_trials, arg_prob = expr.args
-        # if prob is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact \
-        and not self.traced.cached_is_fluent(arg_trials) \
-        and not self.traced.cached_is_fluent(arg_prob):
-            negbin_op = self.EXACT_OPS['sampling']['NegativeBinomial']
-        else:
-            negbin_op = self.OPS['sampling']['NegativeBinomial']
-        jax_op = negbin_op(expr.id, init_params)
-        jax_trials = self._jax(arg_trials, init_params)
-        jax_prob = self._jax(arg_prob, init_params)
+        jax_trials = self._jax(arg_trials, aux)
+        jax_prob = self._jax(arg_prob, aux)
-        # uses the JAX substrate of tensorflow-probability
-        def _jax_wrapped_distribution_negative_binomial(x, params, key):
-            trials, key, err2, params = jax_trials(x, params, key)
-            prob, key, err1, params = jax_prob(x, params, key)
+        # uses tensorflow-probability
+        def _jax_wrapped_distribution_negative_binomial(fls, nfls, params, key):
+            trials, key, err2, params = jax_trials(fls, nfls, params, key)
+            prob, key, err1, params = jax_prob(fls, nfls, params, key)
             key, subkey = random.split(key)
-            sample, params = jax_op(subkey, trials, prob, params)
+            trials = jnp.asarray(trials, dtype=self.REAL)
+            prob = jnp.asarray(prob, dtype=self.REAL)
+            dist = tfp.distributions.NegativeBinomial(total_count=trials, probs=1. - prob)
+            sample = jnp.asarray(dist.sample(seed=subkey), dtype=self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(
-                (prob >= 0) & (prob <= 1) & (trials > 0)))
+                jnp.logical_and(jnp.logical_and(prob >= 0, prob <= 1), trials > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_negative_binomial
-    def _jax_beta(self, expr, init_params):
+    def _jax_beta(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BETA']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_rate = expr.args
-        jax_shape = self._jax(arg_shape, init_params)
-        jax_rate = self._jax(arg_rate, init_params)
+        jax_shape = self._jax(arg_shape, aux)
+        jax_rate = self._jax(arg_rate, aux)
         # uses the implicit JAX subroutine
-        def _jax_wrapped_distribution_beta(x, params, key):
-            shape, key, err1, params = jax_shape(x, params, key)
-            rate, key, err2, params = jax_rate(x, params, key)
+        def _jax_wrapped_distribution_beta(fls, nfls, params, key):
+            shape, key, err1, params = jax_shape(fls, nfls, params, key)
+            rate, key, err2, params = jax_rate(fls, nfls, params, key)
             key, subkey = random.split(key)
             sample = random.beta(key=subkey, a=shape, b=rate, dtype=self.REAL)
-            out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (rate > 0)))
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(shape > 0, rate > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_beta
-    def _jax_geometric(self, expr, init_params):
+    def _jax_geometric(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GEOMETRIC']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_prob, = expr.args
-        # if prob is non-fluent, always use the exact operation
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
-            geom_op = self.EXACT_OPS['sampling']['Geometric']
-        else:
-            geom_op = self.OPS['sampling']['Geometric']
-        jax_op = geom_op(expr.id, init_params)
         # recursively compile arguments
-        jax_prob = self._jax(arg_prob, init_params)
+        jax_prob = self._jax(arg_prob, aux)
-        def _jax_wrapped_distribution_geometric(x, params, key):
-            prob, key, err, params = jax_prob(x, params, key)
+        def _jax_wrapped_distribution_geometric(fls, nfls, params, key):
+            prob, key, err, params = jax_prob(fls, nfls, params, key)
             key, subkey = random.split(key)
-            sample, params = jax_op(subkey, prob, params)
-            out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
-            err |= (out_of_bounds * ERR)
-            return sample, key, err, params
+            sample = random.geometric(key=subkey, p=prob, dtype=self.INT)
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(prob >= 0, prob <= 1)))
+            err = err | (out_of_bounds * ERR)
+            return sample, key, err, params
         return _jax_wrapped_distribution_geometric
-    def _jax_pareto(self, expr, init_params):
+    def _jax_pareto(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_PARETO']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, init_params)
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_shape = self._jax(arg_shape, aux)
+        jax_scale = self._jax(arg_scale, aux)
         # partial reparameterization trick Pareto(s, r) = r * Pareto(s, 1)
         # uses the implicit JAX subroutine for Pareto(s, 1)
-        def _jax_wrapped_distribution_pareto(x, params, key):
-            shape, key, err1, params = jax_shape(x, params, key)
-            scale, key, err2, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_pareto(fls, nfls, params, key):
+            shape, key, err1, params = jax_shape(fls, nfls, params, key)
+            scale, key, err2, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
             sample = scale * random.pareto(key=subkey, b=shape, dtype=self.REAL)
-            out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(shape > 0, scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_pareto
-    def _jax_student(self, expr, init_params):
+    def _jax_student(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_STUDENT']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_df, = expr.args
-        jax_df = self._jax(arg_df, init_params)
+        jax_df = self._jax(arg_df, aux)
         # uses the implicit JAX subroutine for student(df)
-        def _jax_wrapped_distribution_t(x, params, key):
-            df, key, err, params = jax_df(x, params, key)
+        def _jax_wrapped_distribution_t(fls, nfls, params, key):
+            df, key, err, params = jax_df(fls, nfls, params, key)
             key, subkey = random.split(key)
             sample = random.t(key=subkey, df=df, shape=jnp.shape(df), dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all(df > 0))
-            err |= (out_of_bounds * ERR)
-            return sample, key, err, params
+            err = err | (out_of_bounds * ERR)
+            return sample, key, err, params
         return _jax_wrapped_distribution_t
-    def _jax_gumbel(self, expr, init_params):
+    def _jax_gumbel(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GUMBEL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_scale = expr.args
-        jax_mean = self._jax(arg_mean, init_params)
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_mean = self._jax(arg_mean, aux)
+        jax_scale = self._jax(arg_scale, aux)
         # reparameterization trick Gumbel(m, s) = m + s * Gumbel(0, 1)
-        def _jax_wrapped_distribution_gumbel(x, params, key):
-            mean, key, err1, params = jax_mean(x, params, key)
-            scale, key, err2, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_gumbel(fls, nfls, params, key):
+            mean, key, err1, params = jax_mean(fls, nfls, params, key)
+            scale, key, err2, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
-            Gumbel01 = random.gumbel(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
-            sample = mean + scale * Gumbel01
+            g = random.gumbel(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
+            sample = mean + scale * g
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_gumbel
-    def _jax_laplace(self, expr, init_params):
+    def _jax_laplace(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_LAPLACE']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_scale = expr.args
-        jax_mean = self._jax(arg_mean, init_params)
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_mean = self._jax(arg_mean, aux)
+        jax_scale = self._jax(arg_scale, aux)
         # reparameterization trick Laplace(m, s) = m + s * Laplace(0, 1)
-        def _jax_wrapped_distribution_laplace(x, params, key):
-            mean, key, err1, params = jax_mean(x, params, key)
-            scale, key, err2, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_laplace(fls, nfls, params, key):
+            mean, key, err1, params = jax_mean(fls, nfls, params, key)
+            scale, key, err2, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
-            Laplace01 = random.laplace(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
-            sample = mean + scale * Laplace01
+            lp = random.laplace(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
+            sample = mean + scale * lp
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_laplace
-    def _jax_cauchy(self, expr, init_params):
+    def _jax_cauchy(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_CAUCHY']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_mean, arg_scale = expr.args
-        jax_mean = self._jax(arg_mean, init_params)
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_mean = self._jax(arg_mean, aux)
+        jax_scale = self._jax(arg_scale, aux)
         # reparameterization trick Cauchy(m, s) = m + s * Cauchy(0, 1)
-        def _jax_wrapped_distribution_cauchy(x, params, key):
-            mean, key, err1, params = jax_mean(x, params, key)
-            scale, key, err2, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_cauchy(fls, nfls, params, key):
+            mean, key, err1, params = jax_mean(fls, nfls, params, key)
+            scale, key, err2, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
-            Cauchy01 = random.cauchy(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
-            sample = mean + scale * Cauchy01
+            cauchy = random.cauchy(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
+            sample = mean + scale * cauchy
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_cauchy
-    def _jax_gompertz(self, expr, init_params):
+    def _jax_gompertz(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_GOMPERTZ']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_shape, arg_scale = expr.args
-        jax_shape = self._jax(arg_shape, init_params)
-        jax_scale = self._jax(arg_scale, init_params)
+        jax_shape = self._jax(arg_shape, aux)
+        jax_scale = self._jax(arg_scale, aux)
         # reparameterization trick Gompertz(s, r) = ln(1 - log(U(0, 1)) / s) / r
-        def _jax_wrapped_distribution_gompertz(x, params, key):
-            shape, key, err1, params = jax_shape(x, params, key)
-            scale, key, err2, params = jax_scale(x, params, key)
+        def _jax_wrapped_distribution_gompertz(fls, nfls, params, key):
+            shape, key, err1, params = jax_shape(fls, nfls, params, key)
+            scale, key, err2, params = jax_scale(fls, nfls, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
             sample = jnp.log(1.0 - jnp.log1p(-U) / shape) / scale
-            out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(shape > 0, scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_gompertz
-    def _jax_chisquare(self, expr, init_params):
+    def _jax_chisquare(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_CHISQUARE']
         JaxRDDLCompiler._check_num_args(expr, 1)
         arg_df, = expr.args
-        jax_df = self._jax(arg_df, init_params)
+        jax_df = self._jax(arg_df, aux)
         # use the fact that ChiSquare(df) = Gamma(df/2, 2)
-        def _jax_wrapped_distribution_chisquare(x, params, key):
-            df, key, err1, params = jax_df(x, params, key)
+        def _jax_wrapped_distribution_chisquare(fls, nfls, params, key):
+            df, key, err1, params = jax_df(fls, nfls, params, key)
             key, subkey = random.split(key)
-            shape = df / 2.0
-            Gamma = random.gamma(key=subkey, a=shape, dtype=self.REAL)
-            sample = 2.0 * Gamma
+            shape = 0.5 * df
+            sample = 2.0 * random.gamma(key=subkey, a=shape, dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all(df > 0))
             err = err1 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_chisquare
-    def _jax_kumaraswamy(self, expr, init_params):
+    def _jax_kumaraswamy(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_KUMARASWAMY']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_a, arg_b = expr.args
-        jax_a = self._jax(arg_a, init_params)
-        jax_b = self._jax(arg_b, init_params)
+        jax_a = self._jax(arg_a, aux)
+        jax_b = self._jax(arg_b, aux)
         # uses the reparameterization K(a, b) = (1 - (1 - U(0, 1))^{1/b})^{1/a}
-        def _jax_wrapped_distribution_kumaraswamy(x, params, key):
-            a, key, err1, params = jax_a(x, params, key)
-            b, key, err2, params = jax_b(x, params, key)
+        def _jax_wrapped_distribution_kumaraswamy(fls, nfls, params, key):
+            a, key, err1, params = jax_a(fls, nfls, params, key)
+            b, key, err2, params = jax_b(fls, nfls, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(a), dtype=self.REAL)
             sample = jnp.power(1.0 - jnp.power(U, 1.0 / b), 1.0 / a)
-            out_of_bounds = jnp.logical_not(jnp.all((a > 0) & (b > 0)))
+            out_of_bounds = jnp.logical_not(jnp.all(jnp.logical_and(a > 0, b > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_kumaraswamy
     # ===========================================================================
     # random variables with enum support
     # ===========================================================================
-    def _jax_discrete(self, expr, init_params, unnorm):
-        NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
+    @staticmethod
+    def _jax_update_discrete_oob_error(err, prob):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DISCRETE']
-        ordered_args = self.traced.cached_sim_info(expr)
-        # if all probabilities are non-fluent, then always sample exact
-        has_fluent_arg = any(self.traced.cached_is_fluent(arg)
-                             for arg in ordered_args)
-        if self.compile_non_fluent_exact and not has_fluent_arg:
-            discrete_op = self.EXACT_OPS['sampling']['Discrete']
-        else:
-            discrete_op = self.OPS['sampling']['Discrete']
-        jax_op = discrete_op(expr.id, init_params)
-        # compile probability expressions
-        jax_probs = [self._jax(arg, init_params) for arg in ordered_args]
-        def _jax_wrapped_distribution_discrete(x, params, key):
-            # sample case probabilities and normalize as needed
-            error = NORMAL
-            prob = [None] * len(jax_probs)
-            for (i, jax_prob) in enumerate(jax_probs):
-                prob[i], key, error_pdf, params = jax_prob(x, params, key)
-                error |= error_pdf
+        out_of_bounds = jnp.logical_not(jnp.logical_and(
+            jnp.all(prob >= 0),
+            jnp.allclose(jnp.sum(prob, axis=-1), 1.0)
+        ))
+        error = err | (out_of_bounds * ERR)
+        return error
+    def _jax_discrete_prob(self, jax_probs, unnormalized):
+        def _jax_wrapped_calc_discrete_prob(fls, nfls, params, key):
+            # calculate probability expressions
+            error = JaxRDDLCompiler.ERROR_CODES['NORMAL']
+            prob = []
+            for jax_prob in jax_probs:
+                sample, key, error_pdf, params = jax_prob(fls, nfls, params, key)
+                prob.append(sample)
+                error = error | error_pdf
             prob = jnp.stack(prob, axis=-1)
-            if unnorm:
+            # normalize them if required
+            if unnormalized:
                 normalizer = jnp.sum(prob, axis=-1, keepdims=True)
                 prob = prob / normalizer
-            # dispatch to sampling subroutine
-            key, subkey = random.split(key)
-            sample, params = jax_op(subkey, prob, params)
-            out_of_bounds = jnp.logical_not(jnp.logical_and(
-                jnp.all(prob >= 0),
-                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)
-            ))
-            error |= (out_of_bounds * ERR)
-            return sample, key, error, params
+            return prob, key, error, params
+        return _jax_wrapped_calc_discrete_prob
+    def _jax_discrete(self, expr, aux, unnorm):
+        ordered_args = self.traced.cached_sim_info(expr)
+        jax_probs = [self._jax(arg, aux) for arg in ordered_args]
+        prob_fn = self._jax_discrete_prob(jax_probs, unnorm)
+        def _jax_wrapped_distribution_discrete(fls, nfls, params, key):
+            prob, key, error, params = prob_fn(fls, nfls, params, key)
+            key, subkey = random.split(key)
+            sample = random.categorical(key=subkey, logits=jnp.log(prob), axis=-1)
+            error = JaxRDDLCompiler._jax_update_discrete_oob_error(error, prob)
+            return sample, key, error, params
         return _jax_wrapped_distribution_discrete
-    def _jax_discrete_pvar(self, expr, init_params, unnorm):
-        ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DISCRETE']
+    @staticmethod
+    def _jax_discrete_pvar_prob(jax_probs, unnormalized):
+        def _jax_wrapped_calc_discrete_prob(fls, nfls, params, key):
+            prob, key, error, params = jax_probs(fls, nfls, params, key)
+            if unnormalized:
+                normalizer = jnp.sum(prob, axis=-1, keepdims=True)
+                prob = prob / normalizer
+            return prob, key, error, params
+        return _jax_wrapped_calc_discrete_prob
+    def _jax_discrete_pvar(self, expr, aux, unnorm):
         JaxRDDLCompiler._check_num_args(expr, 2)
         _, args = expr.args
         arg, = args
-        # if probabilities are non-fluent, then always sample exact
-        if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg):
-            discrete_op = self.EXACT_OPS['sampling']['Discrete']
-        else:
-            discrete_op = self.OPS['sampling']['Discrete']
-        jax_op = discrete_op(expr.id, init_params)
-        # compile probability function
-        jax_probs = self._jax(arg, init_params)
+        jax_probs = self._jax(arg, aux)
+        prob_fn = self._jax_discrete_pvar_prob(jax_probs, unnorm)
-        def _jax_wrapped_distribution_discrete_pvar(x, params, key):
-            # sample probabilities
-            prob, key, error, params = jax_probs(x, params, key)
-            if unnorm:
-                normalizer = jnp.sum(prob, axis=-1, keepdims=True)
-                prob = prob / normalizer
-            # dispatch to sampling subroutine
+        def _jax_wrapped_distribution_discrete_pvar(fls, nfls, params, key):
+            prob, key, error, params = prob_fn(fls, nfls, params, key)
             key, subkey = random.split(key)
-            sample, params = jax_op(subkey, prob, params)
-            out_of_bounds = jnp.logical_not(jnp.logical_and(
-                jnp.all(prob >= 0),
-                jnp.allclose(jnp.sum(prob, axis=-1), 1.0)
-            ))
-            error |= (out_of_bounds * ERR)
-            return sample, key, error, params
+            sample = random.categorical(key=subkey, logits=jnp.log(prob), axis=-1)
+            error = JaxRDDLCompiler._jax_update_discrete_oob_error(error, prob)
+            return sample, key, error, params
         return _jax_wrapped_distribution_discrete_pvar
     # ===========================================================================
     # random vectors
     # ===========================================================================
-    def _jax_random_vector(self, expr, init_params):
+    def _jax_random_vector(self, expr, aux):
         _, name = expr.etype
         if name == 'MultivariateNormal':
-            return self._jax_multivariate_normal(expr, init_params)
+            return self._jax_multivariate_normal(expr, aux)
         elif name == 'MultivariateStudent':
-            return self._jax_multivariate_student(expr, init_params)
+            return self._jax_multivariate_student(expr, aux)
         elif name == 'Dirichlet':
-            return self._jax_dirichlet(expr, init_params)
+            return self._jax_dirichlet(expr, aux)
         elif name == 'Multinomial':
-            return self._jax_multinomial(expr, init_params)
+            return self._jax_multinomial(expr, aux)
         else:
             raise RDDLNotImplementedError(
                 f'Distribution {name} is not supported.\n' + print_stack_trace(expr))
-    def _jax_multivariate_normal(self, expr, init_params):
+    def _jax_multivariate_normal(self, expr, aux):
         _, args = expr.args
         mean, cov = args
-        jax_mean = self._jax(mean, init_params)
-        jax_cov = self._jax(cov, init_params)
+        jax_mean = self._jax(mean, aux)
+        jax_cov = self._jax(cov, aux)
         index, = self.traced.cached_sim_info(expr)
         # reparameterization trick MN(m, LL') = LZ + m, where Z ~ Normal(0, 1)
-        def _jax_wrapped_distribution_multivariate_normal(x, params, key):
+        def _jax_wrapped_distribution_multivariate_normal(fls, nfls, params, key):
             # sample the mean and covariance
-            sample_mean, key, err1, params = jax_mean(x, params, key)
-            sample_cov, key, err2, params = jax_cov(x, params, key)
+            sample_mean, key, err1, params = jax_mean(fls, nfls, params, key)
+            sample_cov, key, err2, params = jax_cov(fls, nfls, params, key)
             # sample Normal(0, 1)
             key, subkey = random.split(key)
             Z = random.normal(
-                key=subkey,
-                shape=jnp.shape(sample_mean) + (1,),
-                dtype=self.REAL
-            )
+                key=subkey, shape=jnp.shape(sample_mean) + (1,), dtype=self.REAL)
             # compute L s.t. cov = L * L' and reparameterize
             L = jnp.linalg.cholesky(sample_cov)
             sample = jnp.matmul(L, Z)[..., 0] + sample_mean
             sample = jnp.moveaxis(sample, source=-1, destination=index)
             err = err1 | err2
-            return sample, key, err, params
+            return sample, key, err, params
         return _jax_wrapped_distribution_multivariate_normal
-    def _jax_multivariate_student(self, expr, init_params):
+    def _jax_multivariate_student(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_MULTIVARIATE_STUDENT']
         _, args = expr.args
         mean, cov, df = args
-        jax_mean = self._jax(mean, init_params)
-        jax_cov = self._jax(cov, init_params)
-        jax_df = self._jax(df, init_params)
+        jax_mean = self._jax(mean, aux)
+        jax_cov = self._jax(cov, aux)
+        jax_df = self._jax(df, aux)
         index, = self.traced.cached_sim_info(expr)
         # reparameterization trick MN(m, LL') = LZ + m, where Z ~ StudentT(0, 1)
-        def _jax_wrapped_distribution_multivariate_student(x, params, key):
+        def _jax_wrapped_distribution_multivariate_student(fls, nfls, params, key):
             # sample the mean and covariance and degrees of freedom
-            sample_mean, key, err1, params = jax_mean(x, params, key)
-            sample_cov, key, err2, params = jax_cov(x, params, key)
-            sample_df, key, err3, params = jax_df(x, params, key)
+            sample_mean, key, err1, params = jax_mean(fls, nfls, params, key)
+            sample_cov, key, err2, params = jax_cov(fls, nfls, params, key)
+            sample_df, key, err3, params = jax_df(fls, nfls, params, key)
             out_of_bounds = jnp.logical_not(jnp.all(sample_df > 0))
             # sample StudentT(0, 1, df) -- broadcast df to same shape as cov
@@ -1800,43 +1980,41 @@ class JaxRDDLCompiler:
             sample = jnp.matmul(L, Z)[..., 0] + sample_mean
             sample = jnp.moveaxis(sample, source=-1, destination=index)
             error = err1 | err2 | err3 | (out_of_bounds * ERR)
-            return sample, key, error, params
+            return sample, key, error, params
         return _jax_wrapped_distribution_multivariate_student
-    def _jax_dirichlet(self, expr, init_params):
+    def _jax_dirichlet(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_DIRICHLET']
         _, args = expr.args
         alpha, = args
-        jax_alpha = self._jax(alpha, init_params)
+        jax_alpha = self._jax(alpha, aux)
         index, = self.traced.cached_sim_info(expr)
         # sample Gamma(alpha_i, 1) and normalize across i
-        def _jax_wrapped_distribution_dirichlet(x, params, key):
-            alpha, key, error, params = jax_alpha(x, params, key)
+        def _jax_wrapped_distribution_dirichlet(fls, nfls, params, key):
+            alpha, key, error, params = jax_alpha(fls, nfls, params, key)
             out_of_bounds = jnp.logical_not(jnp.all(alpha > 0))
-            error |= (out_of_bounds * ERR)
+            error = error | (out_of_bounds * ERR)
             key, subkey = random.split(key)
-            Gamma = random.gamma(key=subkey, a=alpha, dtype=self.REAL)
-            sample = Gamma / jnp.sum(Gamma, axis=-1, keepdims=True)
+            gamma = random.gamma(key=subkey, a=alpha, dtype=self.REAL)
+            sample = gamma / jnp.sum(gamma, axis=-1, keepdims=True)
             sample = jnp.moveaxis(sample, source=-1, destination=index)
-            return sample, key, error, params
+            return sample, key, error, params
         return _jax_wrapped_distribution_dirichlet
-    def _jax_multinomial(self, expr, init_params):
+    def _jax_multinomial(self, expr, aux):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_MULTINOMIAL']
         _, args = expr.args
         trials, prob = args
-        jax_trials = self._jax(trials, init_params)
-        jax_prob = self._jax(prob, init_params)
+        jax_trials = self._jax(trials, aux)
+        jax_prob = self._jax(prob, aux)
         index, = self.traced.cached_sim_info(expr)
-        def _jax_wrapped_distribution_multinomial(x, params, key):
-            trials, key, err1, params = jax_trials(x, params, key)
-            prob, key, err2, params = jax_prob(x, params, key)
+        def _jax_wrapped_distribution_multinomial(fls, nfls, params, key):
+            trials, key, err1, params = jax_trials(fls, nfls, params, key)
+            prob, key, err2, params = jax_prob(fls, nfls, params, key)
             trials = jnp.asarray(trials, dtype=self.REAL)
             prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
@@ -1844,70 +2022,66 @@ class JaxRDDLCompiler:
             sample = jnp.asarray(dist.sample(seed=subkey), dtype=self.INT)
             sample = jnp.moveaxis(sample, source=-1, destination=index)
             out_of_bounds = jnp.logical_not(jnp.all(
-                (prob >= 0)
-                & jnp.allclose(jnp.sum(prob, axis=-1), 1.0)
-                & (trials >= 0)
+                jnp.logical_and(
+                    jnp.logical_and(prob >= 0, jnp.allclose(jnp.sum(prob, axis=-1), 1.)),
+                    trials >= 0
+                )
             ))
             error = err1 | err2 | (out_of_bounds * ERR)
-            return sample, key, error, params
+            return sample, key, error, params
         return _jax_wrapped_distribution_multinomial
     # ===========================================================================
     # matrix algebra
     # ===========================================================================
-    def _jax_matrix(self, expr, init_params):
+    def _jax_matrix(self, expr, aux):
         _, op = expr.etype
         if op == 'det':
-            return self._jax_matrix_det(expr, init_params)
+            return self._jax_matrix_det(expr, aux)
         elif op == 'inverse':
-            return self._jax_matrix_inv(expr, init_params, pseudo=False)
+            return self._jax_matrix_inv(expr, aux, pseudo=False)
         elif op == 'pinverse':
-            return self._jax_matrix_inv(expr, init_params, pseudo=True)
+            return self._jax_matrix_inv(expr, aux, pseudo=True)
         elif op == 'cholesky':
-            return self._jax_matrix_cholesky(expr, init_params)
+            return self._jax_matrix_cholesky(expr, aux)
         else:
             raise RDDLNotImplementedError(
-                f'Matrix operation {op} is not supported.\n' +
-                print_stack_trace(expr))
+                f'Matrix operation {op} is not supported.\n' + print_stack_trace(expr))
-    def _jax_matrix_det(self, expr, init_params):
-        * _, arg = expr.args
-        jax_arg = self._jax(arg, init_params)
+    def _jax_matrix_det(self, expr, aux):
+        arg = expr.args[-1]
+        jax_arg = self._jax(arg, aux)
-        def _jax_wrapped_matrix_operation_det(x, params, key):
-            sample_arg, key, error, params = jax_arg(x, params, key)
+        def _jax_wrapped_matrix_operation_det(fls, nfls, params, key):
+            sample_arg, key, error, params = jax_arg(fls, nfls, params, key)
             sample = jnp.linalg.det(sample_arg)
-            return sample, key, error, params
+            return sample, key, error, params
         return _jax_wrapped_matrix_operation_det
-    def _jax_matrix_inv(self, expr, init_params, pseudo):
+    def _jax_matrix_inv(self, expr, aux, pseudo):
         _, arg = expr.args
-        jax_arg = self._jax(arg, init_params)
+        jax_arg = self._jax(arg, aux)
         indices = self.traced.cached_sim_info(expr)
         op = jnp.linalg.pinv if pseudo else jnp.linalg.inv
-        def _jax_wrapped_matrix_operation_inv(x, params, key):
-            sample_arg, key, error, params = jax_arg(x, params, key)
+        def _jax_wrapped_matrix_operation_inv(fls, nfls, params, key):
+            sample_arg, key, error, params = jax_arg(fls, nfls, params, key)
             sample = op(sample_arg)
             sample = jnp.moveaxis(sample, source=(-2, -1), destination=indices)
-            return sample, key, error, params
+            return sample, key, error, params
         return _jax_wrapped_matrix_operation_inv
-    def _jax_matrix_cholesky(self, expr, init_params):
+    def _jax_matrix_cholesky(self, expr, aux):
         _, arg = expr.args
-        jax_arg = self._jax(arg, init_params)
+        jax_arg = self._jax(arg, aux)
         indices = self.traced.cached_sim_info(expr)
         op = jnp.linalg.cholesky
-        def _jax_wrapped_matrix_operation_cholesky(x, params, key):
-            sample_arg, key, error, params = jax_arg(x, params, key)
+        def _jax_wrapped_matrix_operation_cholesky(fls, nfls, params, key):
+            sample_arg, key, error, params = jax_arg(fls, nfls, params, key)
             sample = op(sample_arg)
             sample = jnp.moveaxis(sample, source=(-2, -1), destination=indices)
-            return sample, key, error, params
+            return sample, key, error, params
         return _jax_wrapped_matrix_operation_cholesky

pyRDDLGym-jax 2.7__py3-none-any.whl → 3.0__py3-none-any.whl

pyRDDLGym-jax 2.7py3-none-any.whl → 3.0py3-none-any.whl