PyPI - pyRDDLGym-jax - Versions diffs - 2.5__py3-none-any.whl → 2.7__py3-none-any.whl - Mend

pyRDDLGym-jax 2.5py3-none-any.whl → 2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

pyRDDLGym_jax/__init__.py +1 -1
pyRDDLGym_jax/core/compiler.py +107 -11
pyRDDLGym_jax/core/logic.py +6 -8
pyRDDLGym_jax/core/model.py +595 -0
pyRDDLGym_jax/core/planner.py +183 -24
pyRDDLGym_jax/core/simulator.py +12 -4
pyRDDLGym_jax/examples/run_plan.py +31 -0
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/METADATA +5 -13
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/RECORD +13 -12
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/licenses/LICENSE +1 -1
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/WHEEL +0 -0
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/entry_points.txt +0 -0
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/top_level.txt +0 -0

pyRDDLGym_jax/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '2.5'
1	+ __version__ = '2.7'

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -30,7 +30,8 @@ from pyRDDLGym.core.debug.exception import (
     print_stack_trace,
     raise_warning,
     RDDLInvalidNumberOfArgumentsError,
-    RDDLNotImplementedError
+    RDDLNotImplementedError,
+    RDDLUndefinedVariableError
 )
 from pyRDDLGym.core.debug.logger import Logger
 from pyRDDLGym.core.simulator import RDDLSimulatorPrecompiled
@@ -56,7 +57,8 @@ class JaxRDDLCompiler:
                  allow_synchronous_state: bool=True,
                  logger: Optional[Logger]=None,
                  use64bit: bool=False,
-                 compile_non_fluent_exact: bool=True) -> None:
+                 compile_non_fluent_exact: bool=True,
+                 python_functions: Optional[Dict[str, Callable]]=None) -> None:
         '''Creates a new RDDL to Jax compiler.
         :param rddl: the RDDL model to compile into Jax
@@ -65,7 +67,8 @@ class JaxRDDLCompiler:
         :param logger: to log information about compilation to file
         :param use64bit: whether to use 64 bit arithmetic
         :param compile_non_fluent_exact: whether non-fluent expressions
-        are always compiled using exact JAX expressions.
+        are always compiled using exact JAX expressions
+        :param python_functions: dictionary of external Python functions to call from RDDL
         '''
         self.rddl = rddl
         self.logger = logger
@@ -99,11 +102,15 @@ class JaxRDDLCompiler:
         self.traced = tracer.trace()
         # extract the box constraints on actions
+        if python_functions is None:
+            python_functions = {}
+        self.python_functions = python_functions
         simulator = RDDLSimulatorPrecompiled(
             rddl=self.rddl,
             init_values=self.init_values,
             levels=self.levels,
-            trace_info=self.traced
+            trace_info=self.traced,
+            python_functions=python_functions
         )
         constraints = RDDLConstraints(simulator, vectorized=True)
         self.constraints = constraints
@@ -237,7 +244,8 @@ class JaxRDDLCompiler:
     def compile_transition(self, check_constraints: bool=False,
                            constraint_func: bool=False,
-                           init_params_constr: Dict[str, Any]={}) -> Callable:
+                           init_params_constr: Dict[str, Any]={},
+                           cache_path_info: bool=False) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples the next state.
@@ -274,6 +282,7 @@ class JaxRDDLCompiler:
         returned log and does not raise an exception
         :param constraint_func: produces the h(s, a) function described above
         in addition to the usual outputs
+        :param cache_path_info: whether to save full path traces as part of the log
         '''
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         rddl = self.rddl
@@ -322,8 +331,11 @@ class JaxRDDLCompiler:
             errors |= err
             # calculate fluent values
-            fluents = {name: values for (name, values) in subs.items()
-                       if name not in rddl.non_fluents}
+            if cache_path_info:
+                fluents = {name: values for (name, values) in subs.items()
+                           if name not in rddl.non_fluents}
+            else:
+                fluents = {}
             # set the next state to the current state
             for (state, next_state) in rddl.next_state.items():
@@ -367,7 +379,9 @@ class JaxRDDLCompiler:
                          n_batch: int,
                          check_constraints: bool=False,
                          constraint_func: bool=False,
-                         init_params_constr: Dict[str, Any]={}) -> Callable:
+                         init_params_constr: Dict[str, Any]={},
+                         model_params_reduction: Callable=lambda x: x[0],
+                         cache_path_info: bool=False) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples trajectories with a fixed horizon from a policy.
@@ -399,10 +413,13 @@ class JaxRDDLCompiler:
         returned log and does not raise an exception
         :param constraint_func: produces the h(s, a) constraint function
         in addition to the usual outputs
+        :param model_params_reduction: how to aggregate updated model_params across runs
+        in the batch (defaults to selecting the first element's parameters in the batch)
+        :param cache_path_info: whether to save full path traces as part of the log
         '''
         rddl = self.rddl
         jax_step_fn = self.compile_transition(
-            check_constraints, constraint_func, init_params_constr)
+            check_constraints, constraint_func, init_params_constr, cache_path_info)
         # for POMDP only observ-fluents are assumed visible to the policy
         if rddl.observ_fluents:
@@ -421,7 +438,6 @@ class JaxRDDLCompiler:
             return jax_step_fn(subkey, actions, subs, model_params)
         # do a batched step update from the policy
-        # TODO: come up with a better way to reduce the model_param batch dim
         def _jax_wrapped_batched_step_policy(carry, step):
             key, policy_params, hyperparams, subs, model_params = carry
             key, *subkeys = random.split(key, num=1 + n_batch)
@@ -430,7 +446,7 @@ class JaxRDDLCompiler:
                 _jax_wrapped_single_step_policy,
                 in_axes=(0, None, None, None, 0, None)
             )(keys, policy_params, hyperparams, step, subs, model_params)
-            model_params = jax.tree_util.tree_map(partial(jnp.mean, axis=0), model_params)
+            model_params = jax.tree_util.tree_map(model_params_reduction, model_params)
             carry = (key, policy_params, hyperparams, subs, model_params)
             return carry, log
@@ -596,6 +612,8 @@ class JaxRDDLCompiler:
             jax_expr = self._jax_aggregation(expr, init_params)
         elif etype == 'func':
             jax_expr = self._jax_functional(expr, init_params)
+        elif etype == 'pyfunc':
+            jax_expr = self._jax_pyfunc(expr, init_params)
         elif etype == 'control':
             jax_expr = self._jax_control(expr, init_params)
         elif etype == 'randomvar':
@@ -917,6 +935,84 @@ class JaxRDDLCompiler:
         raise RDDLNotImplementedError(
             f'Function {op} is not supported.\n' + print_stack_trace(expr))
+    def _jax_pyfunc(self, expr, init_params):
+        NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
+        # get the Python function by name
+        _, pyfunc_name = expr.etype
+        pyfunc = self.python_functions.get(pyfunc_name)
+        if pyfunc is None:
+            raise RDDLUndefinedVariableError(
+                f'Undefined external Python function <{pyfunc_name}>, '
+                f'must be one of {list(self.python_functions.keys())}.\n' +
+                print_stack_trace(expr))
+        captured_vars, args = expr.args
+        scope_vars = self.traced.cached_objects_in_scope(expr)
+        dest_indices = self.traced.cached_sim_info(expr)
+        free_vars = [p for p in scope_vars if p[0] not in captured_vars]
+        free_dims = self.rddl.object_counts(p for (_, p) in free_vars)
+        num_free_vars = len(free_vars)
+        captured_types = [t for (p, t) in scope_vars if p in captured_vars]
+        require_dims = self.rddl.object_counts(captured_types)
+        # compile the inputs to the function
+        jax_inputs = [self._jax(arg, init_params) for arg in args]
+        # compile the function evaluation function
+        def _jax_wrapped_external_function(x, params, key):
+            # evaluate inputs to the function
+            # first dimensions are non-captured vars in outer scope followed by all the _
+            error = NORMAL
+            flat_samples = []
+            for jax_expr in jax_inputs:
+                sample, key, err, params = jax_expr(x, params, key)
+                shape = jnp.shape(sample)
+                first_dim = 1
+                for dim in shape[:num_free_vars]:
+                    first_dim *= dim
+                new_shape = (first_dim,) + shape[num_free_vars:]
+                flat_sample = jnp.reshape(sample, new_shape)
+                flat_samples.append(flat_sample)
+                error |= err
+            # now all the inputs have dimensions equal to (k,) + the number of _ occurences
+            # k is the number of possible non-captured object combinations
+            # evaluate the function independently for each combination
+            # output dimension for each combination is captured variables (n1, n2, ...)
+            # so the total dimension of the output array is (k, n1, n2, ...)
+            sample = jax.vmap(pyfunc, in_axes=0)(*flat_samples)
+            if not isinstance(sample, jnp.ndarray):
+                raise ValueError(
+                    f'Output of external Python function <{pyfunc_name}> '
+                    f'is not a JAX array.\n' + print_stack_trace(expr))
+            pyfunc_dims = jnp.shape(sample)[1:]
+            if len(require_dims) != len(pyfunc_dims):
+                raise ValueError(
+                    f'External Python function <{pyfunc_name}> returned array with '
+                    f'{len(pyfunc_dims)} dimensions, which does not match the '
+                    f'number of captured parameter(s) {len(require_dims)}.\n' +
+                    print_stack_trace(expr))
+            for (param, require_dim, actual_dim) in zip(captured_vars, require_dims, pyfunc_dims):
+                if require_dim != actual_dim:
+                    raise ValueError(
+                        f'External Python function <{pyfunc_name}> returned array with '
+                        f'{actual_dim} elements for captured parameter <{param}>, '
+                        f'which does not match the number of objects {require_dim}.\n' +
+                        print_stack_trace(expr))
+            # unravel the combinations k back into their original dimensions
+            sample = jnp.reshape(sample, free_dims + pyfunc_dims)
+            # rearrange the output dimensions to match the outer scope
+            source_indices = [num_free_vars + i for i in range(len(pyfunc_dims))]
+            sample = jnp.moveaxis(sample, source=source_indices, destination=dest_indices)
+            return sample, key, error, params
+        return _jax_wrapped_external_function
     # ===========================================================================
     # control flow
     # ===========================================================================

pyRDDLGym_jax/core/logic.py CHANGED Viewed

@@ -1056,15 +1056,13 @@ class ExactLogic(Logic):
     def control_if(self, id, init_params):
         return self._jax_wrapped_calc_if_then_else_exact
-    @staticmethod
-    def _jax_wrapped_calc_switch_exact(pred, cases, params):
-        pred = pred[jnp.newaxis, ...]
-        sample = jnp.take_along_axis(cases, pred, axis=0)
-        assert sample.shape[0] == 1
-        return sample[0, ...], params
     def control_switch(self, id, init_params):
-        return self._jax_wrapped_calc_switch_exact
+        def _jax_wrapped_calc_switch_exact(pred, cases, params):
+            pred = jnp.asarray(pred[jnp.newaxis, ...], dtype=self.INT)
+            sample = jnp.take_along_axis(cases, pred, axis=0)
+            assert sample.shape[0] == 1
+            return sample[0, ...], params
+        return _jax_wrapped_calc_switch_exact
     # ===========================================================================
     # random variables

pyRDDLGym-jax 2.5__py3-none-any.whl → 2.7__py3-none-any.whl

pyRDDLGym-jax 2.5py3-none-any.whl → 2.7py3-none-any.whl