PyPI - pyRDDLGym-jax - Versions diffs - 2.4__py3-none-any.whl → 2.6__py3-none-any.whl - Mend

pyRDDLGym-jax 2.4py3-none-any.whl → 2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

pyRDDLGym_jax/__init__.py +1 -1
pyRDDLGym_jax/core/compiler.py +23 -10
pyRDDLGym_jax/core/logic.py +6 -8
pyRDDLGym_jax/core/model.py +595 -0
pyRDDLGym_jax/core/planner.py +317 -99
pyRDDLGym_jax/core/simulator.py +37 -13
pyRDDLGym_jax/core/tuning.py +25 -10
pyRDDLGym_jax/entry_point.py +39 -7
pyRDDLGym_jax/examples/configs/tuning_drp.cfg +1 -0
pyRDDLGym_jax/examples/configs/tuning_replan.cfg +1 -0
pyRDDLGym_jax/examples/configs/tuning_slp.cfg +1 -0
pyRDDLGym_jax/examples/run_plan.py +1 -1
pyRDDLGym_jax/examples/run_tune.py +8 -2
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.6.dist-info}/METADATA +17 -30
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.6.dist-info}/RECORD +19 -18
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.6.dist-info}/WHEEL +1 -1
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.6.dist-info}/entry_points.txt +0 -0
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.6.dist-info/licenses}/LICENSE +0 -0
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.6.dist-info}/top_level.txt +0 -0

pyRDDLGym_jax/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '2.4'
1	+ __version__ = '2.6'

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -237,7 +237,8 @@ class JaxRDDLCompiler:
     def compile_transition(self, check_constraints: bool=False,
                            constraint_func: bool=False,
-                           init_params_constr: Dict[str, Any]={}) -> Callable:
+                           init_params_constr: Dict[str, Any]={},
+                           cache_path_info: bool=False) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples the next state.
@@ -274,6 +275,7 @@ class JaxRDDLCompiler:
         returned log and does not raise an exception
         :param constraint_func: produces the h(s, a) function described above
         in addition to the usual outputs
+        :param cache_path_info: whether to save full path traces as part of the log
         '''
         NORMAL = JaxRDDLCompiler.ERROR_CODES['NORMAL']
         rddl = self.rddl
@@ -322,8 +324,11 @@ class JaxRDDLCompiler:
             errors |= err
             # calculate fluent values
-            fluents = {name: values for (name, values) in subs.items()
-                       if name not in rddl.non_fluents}
+            if cache_path_info:
+                fluents = {name: values for (name, values) in subs.items()
+                           if name not in rddl.non_fluents}
+            else:
+                fluents = {}
             # set the next state to the current state
             for (state, next_state) in rddl.next_state.items():
@@ -367,7 +372,9 @@ class JaxRDDLCompiler:
                          n_batch: int,
                          check_constraints: bool=False,
                          constraint_func: bool=False,
-                         init_params_constr: Dict[str, Any]={}) -> Callable:
+                         init_params_constr: Dict[str, Any]={},
+                         model_params_reduction: Callable=lambda x: x[0],
+                         cache_path_info: bool=False) -> Callable:
         '''Compiles the current RDDL model into a JAX transition function that
         samples trajectories with a fixed horizon from a policy.
@@ -399,10 +406,13 @@ class JaxRDDLCompiler:
         returned log and does not raise an exception
         :param constraint_func: produces the h(s, a) constraint function
         in addition to the usual outputs
+        :param model_params_reduction: how to aggregate updated model_params across runs
+        in the batch (defaults to selecting the first element's parameters in the batch)
+        :param cache_path_info: whether to save full path traces as part of the log
         '''
         rddl = self.rddl
         jax_step_fn = self.compile_transition(
-            check_constraints, constraint_func, init_params_constr)
+            check_constraints, constraint_func, init_params_constr, cache_path_info)
         # for POMDP only observ-fluents are assumed visible to the policy
         if rddl.observ_fluents:
@@ -421,7 +431,6 @@ class JaxRDDLCompiler:
             return jax_step_fn(subkey, actions, subs, model_params)
         # do a batched step update from the policy
-        # TODO: come up with a better way to reduce the model_param batch dim
         def _jax_wrapped_batched_step_policy(carry, step):
             key, policy_params, hyperparams, subs, model_params = carry
             key, *subkeys = random.split(key, num=1 + n_batch)
@@ -430,7 +439,7 @@ class JaxRDDLCompiler:
                 _jax_wrapped_single_step_policy,
                 in_axes=(0, None, None, None, 0, None)
             )(keys, policy_params, hyperparams, step, subs, model_params)
-            model_params = jax.tree_map(partial(jnp.mean, axis=0), model_params)
+            model_params = jax.tree_util.tree_map(model_params_reduction, model_params)
             carry = (key, policy_params, hyperparams, subs, model_params)
             return carry, log
@@ -440,7 +449,7 @@ class JaxRDDLCompiler:
             start = (key, policy_params, hyperparams, subs, model_params)
             steps = jnp.arange(n_steps)
             end, log = jax.lax.scan(_jax_wrapped_batched_step_policy, start, steps)
-            log = jax.tree_map(partial(jnp.swapaxes, axis1=0, axis2=1), log)
+            log = jax.tree_util.tree_map(partial(jnp.swapaxes, axis1=0, axis2=1), log)
             model_params = end[-1]
             return log, model_params
@@ -707,7 +716,10 @@ class JaxRDDLCompiler:
                     sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                     new_slices = [None] * len(jax_nested_expr)
                     for (i, jax_expr) in enumerate(jax_nested_expr):
-                        new_slices[i], key, err, params = jax_expr(x, params, key)
+                        new_slice, key, err, params = jax_expr(x, params, key)
+                        if not jnp.issubdtype(jnp.result_type(new_slice), jnp.integer):
+                            new_slice = jnp.asarray(new_slice, dtype=self.INT)
+                        new_slices[i] = new_slice
                         error |= err
                     new_slices = tuple(new_slices)
                     sample = sample[new_slices]
@@ -986,7 +998,8 @@ class JaxRDDLCompiler:
             sample_cases = [None] * len(jax_cases)
             for (i, jax_case) in enumerate(jax_cases):
                 sample_cases[i], key, err_case, params = jax_case(x, params, key)
-                err |= err_case
+                err |= err_case
+            sample_cases = jnp.asarray(sample_cases)
             sample_cases = jnp.asarray(sample_cases, dtype=self._fix_dtype(sample_cases))
             # predicate (enum) is an integer - use it to extract from case array

pyRDDLGym_jax/core/logic.py CHANGED Viewed

@@ -1056,15 +1056,13 @@ class ExactLogic(Logic):
     def control_if(self, id, init_params):
         return self._jax_wrapped_calc_if_then_else_exact
-    @staticmethod
-    def _jax_wrapped_calc_switch_exact(pred, cases, params):
-        pred = pred[jnp.newaxis, ...]
-        sample = jnp.take_along_axis(cases, pred, axis=0)
-        assert sample.shape[0] == 1
-        return sample[0, ...], params
     def control_switch(self, id, init_params):
-        return self._jax_wrapped_calc_switch_exact
+        def _jax_wrapped_calc_switch_exact(pred, cases, params):
+            pred = jnp.asarray(pred[jnp.newaxis, ...], dtype=self.INT)
+            sample = jnp.take_along_axis(cases, pred, axis=0)
+            assert sample.shape[0] == 1
+            return sample[0, ...], params
+        return _jax_wrapped_calc_switch_exact
     # ===========================================================================
     # random variables

pyRDDLGym-jax 2.4__py3-none-any.whl → 2.6__py3-none-any.whl

pyRDDLGym-jax 2.4py3-none-any.whl → 2.6py3-none-any.whl