PyPI - pyRDDLGym-jax - Versions diffs - 1.3__py3-none-any.whl → 2.0__py3-none-any.whl - Mend

pyRDDLGym-jax 1.3py3-none-any.whl → 2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pyRDDLGym_jax/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '1.3'
1	+ __version__ = '2.0'

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# ***********************************************************************
+# JAXPLAN
+#
+# Author: Michael Gimelfarb
+#
+# REFERENCES:
+#
+# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
+# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
+# Probabilistic Domains." Proceedings of the International Conference on Automated
+# Planning and Scheduling. Vol. 34. 2024.
+#
+# ***********************************************************************
 from functools import partial
 import traceback
 from typing import Any, Callable, Dict, List, Optional
@@ -524,7 +539,7 @@ class JaxRDDLCompiler:
                 _jax_wrapped_single_step_policy,
                 in_axes=(0, None, None, None, 0, None)
             )(keys, policy_params, hyperparams, step, subs, model_params)
-            model_params = jax.tree_map(lambda x: jnp.mean(x, axis=0), model_params)
+            model_params = jax.tree_map(partial(jnp.mean, axis=0), model_params)
             carry = (key, policy_params, hyperparams, subs, model_params)
             return carry, log

pyRDDLGym_jax/core/logic.py CHANGED Viewed

@@ -1,4 +1,31 @@
-from typing import Optional, Set
+# ***********************************************************************
+# JAXPLAN
+#
+# Author: Michael Gimelfarb
+#
+# REFERENCES:
+#
+# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
+# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
+# Probabilistic Domains." Proceedings of the International Conference on Automated
+# Planning and Scheduling. Vol. 34. 2024.
+#
+# [2] Petersen, Felix, Christian Borgelt, Hilde Kuehne, and Oliver Deussen. "Learning with
+# algorithmic supervision via continuous relaxations." Advances in Neural Information
+# Processing Systems 34 (2021): 16520-16531.
+#
+# [3] Agustsson, Eirikur, and Lucas Theis. "Universally quantized neural compression."
+# Advances in neural information processing systems 33 (2020): 12367-12376.
+#
+# [4] Gupta, Madan M., and J11043360726 Qi. "Theory of T-norms and fuzzy inference
+# methods." Fuzzy sets and systems 40, no. 3 (1991): 431-450.
+#
+# [5] Jang, Eric, Shixiang Gu, and Ben Poole. "Categorical Reparametrization with
+# Gumble-Softmax." In International Conference on Learning Representations (ICLR 2017).
+# OpenReview. net, 2017.
+#
+# ***********************************************************************
 import jax
 import jax.numpy as jnp
@@ -759,14 +786,14 @@ class FuzzyLogic(Logic):
     def __str__(self) -> str:
         return (f'model relaxation:\n'
-                f'    tnorm         ={str(self.tnorm)}\n'
-                f'    complement    ={str(self.complement)}\n'
-                f'    comparison    ={str(self.comparison)}\n'
-                f'    sampling      ={str(self.sampling)}\n'
-                f'    rounding      ={str(self.rounding)}\n'
-                f'    control       ={str(self.control)}\n'
-                f'    underflow_tol ={self.eps}\n'
-                f'    use_64_bit    ={self.use64bit}')
+                f'    tnorm        ={str(self.tnorm)}\n'
+                f'    complement   ={str(self.complement)}\n'
+                f'    comparison   ={str(self.comparison)}\n'
+                f'    sampling     ={str(self.sampling)}\n'
+                f'    rounding     ={str(self.rounding)}\n'
+                f'    control      ={str(self.control)}\n'
+                f'    underflow_tol={self.eps}\n'
+                f'    use_64_bit   ={self.use64bit}\n')
     def summarize_hyperparameters(self) -> None:
         print(self.__str__())

pyRDDLGym_jax/core/planner.py CHANGED Viewed

@@ -1,12 +1,43 @@
+# ***********************************************************************
+# JAXPLAN
+#
+# Author: Michael Gimelfarb
+#
+# RELEVANT SOURCES:
+#
+# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
+# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
+# Probabilistic Domains." Proceedings of the International Conference on Automated
+# Planning and Scheduling. Vol. 34. 2024.
+#
+# [2] Patton, Noah, Jihwan Jeong, Mike Gimelfarb, and Scott Sanner. "A Distributional
+# Framework for Risk-Sensitive End-to-End Planning in Continuous MDPs." In Proceedings of
+# the AAAI Conference on Artificial Intelligence, vol. 36, no. 9, pp. 9894-9901. 2022.
+#
+# [3] Bueno, Thiago P., Leliane N. de Barros, Denis D. Mauá, and Scott Sanner. "Deep
+# reactive policies for planning in stochastic nonlinear domains." In Proceedings of the
+# AAAI Conference on Artificial Intelligence, vol. 33, no. 01, pp. 7530-7537. 2019.
+#
+# [4] Wu, Ga, Buser Say, and Scott Sanner. "Scalable planning with tensorflow for hybrid
+# nonlinear domains." Advances in Neural Information Processing Systems 30 (2017).
+#
+# [5] Sehnke, Frank, and Tingting Zhao. "Baseline-free sampling in parameter exploring
+# policy gradients: Super symmetric pgpe." Artificial Neural Networks: Methods and
+# Applications in Bio-/Neuroinformatics. Springer International Publishing, 2015.
+#
+# ***********************************************************************
 from ast import literal_eval
 from collections import deque
 import configparser
 from enum import Enum
+from functools import partial
 import os
 import sys
 import time
 import traceback
-from typing import Any, Callable, Dict, Generator, Optional, Set, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, Generator, Optional, Set, Sequence, Type, Tuple, Union
 import haiku as hk
 import jax
@@ -163,7 +194,20 @@ def _load_config(config, args):
             del planner_args['optimizer']
         else:
             planner_args['optimizer'] = optimizer
+    # pgpe optimizer
+    pgpe_method = planner_args.get('pgpe', 'GaussianPGPE')
+    pgpe_kwargs = planner_args.pop('pgpe_kwargs', {})
+    if pgpe_method is not None:
+        if 'optimizer' in pgpe_kwargs:
+            pgpe_optimizer = _getattr_any(packages=[optax], item=pgpe_kwargs['optimizer'])
+            if pgpe_optimizer is None:
+                raise_warning(f'Ignoring invalid optimizer <{pgpe_optimizer}>.', 'red')
+                del pgpe_kwargs['optimizer']
+            else:
+                pgpe_kwargs['optimizer'] = pgpe_optimizer
+        planner_args['pgpe'] = getattr(sys.modules[__name__], pgpe_method)(**pgpe_kwargs)
     # optimize call RNG key
     planner_key = train_args.get('key', None)
     if planner_key is not None:
@@ -469,16 +513,16 @@ class JaxStraightLinePlan(JaxPlan):
         bounds = '\n        '.join(
             map(lambda kv: f'{kv[0]}: {kv[1]}', self.bounds.items()))
         return (f'policy hyper-parameters:\n'
-                f'    initializer          ={self._initializer_base}\n'
-                f'constraint-sat strategy (simple):\n'
-                f'    parsed_action_bounds =\n        {bounds}\n'
-                f'    wrap_sigmoid         ={self._wrap_sigmoid}\n'
-                f'    wrap_sigmoid_min_prob={self._min_action_prob}\n'
-                f'    wrap_non_bool        ={self._wrap_non_bool}\n'
-                f'constraint-sat strategy (complex):\n'
-                f'    wrap_softmax         ={self._wrap_softmax}\n'
-                f'    use_new_projection   ={self._use_new_projection}\n'
-                f'    max_projection_iters ={self._max_constraint_iter}')
+                f'    initializer={self._initializer_base}\n'
+                f'    constraint-sat strategy (simple):\n'
+                f'        parsed_action_bounds =\n        {bounds}\n'
+                f'        wrap_sigmoid         ={self._wrap_sigmoid}\n'
+                f'        wrap_sigmoid_min_prob={self._min_action_prob}\n'
+                f'        wrap_non_bool        ={self._wrap_non_bool}\n'
+                f'    constraint-sat strategy (complex):\n'
+                f'        wrap_softmax        ={self._wrap_softmax}\n'
+                f'        use_new_projection  ={self._use_new_projection}\n'
+                f'        max_projection_iters={self._max_constraint_iter}\n')
     def compile(self, compiled: JaxRDDLCompilerWithGrad,
                 _bounds: Bounds,
@@ -856,15 +900,16 @@ class JaxDeepReactivePolicy(JaxPlan):
         bounds = '\n        '.join(
             map(lambda kv: f'{kv[0]}: {kv[1]}', self.bounds.items()))
         return (f'policy hyper-parameters:\n'
-                f'    topology            ={self._topology}\n'
-                f'    activation_fn       ={self._activations[0].__name__}\n'
-                f'    initializer         ={type(self._initializer_base).__name__}\n'
-                f'    apply_input_norm    ={self._normalize}\n'
-                f'    input_norm_layerwise={self._normalize_per_layer}\n'
-                f'    input_norm_args     ={self._normalizer_kwargs}\n'
-                f'constraint-sat strategy:\n'
-                f'    parsed_action_bounds=\n        {bounds}\n'
-                f'    wrap_non_bool       ={self._wrap_non_bool}')
+                f'    topology     ={self._topology}\n'
+                f'    activation_fn={self._activations[0].__name__}\n'
+                f'    initializer  ={type(self._initializer_base).__name__}\n'
+                f'    input norm:\n'
+                f'        apply_input_norm    ={self._normalize}\n'
+                f'        input_norm_layerwise={self._normalize_per_layer}\n'
+                f'        input_norm_args     ={self._normalizer_kwargs}\n'
+                f'    constraint-sat strategy:\n'
+                f'        parsed_action_bounds=\n        {bounds}\n'
+                f'        wrap_non_bool       ={self._wrap_non_bool}\n')
     def compile(self, compiled: JaxRDDLCompilerWithGrad,
                 _bounds: Bounds,
@@ -1090,10 +1135,11 @@ class JaxDeepReactivePolicy(JaxPlan):
 # ***********************************************************************
-# ALL VERSIONS OF JAX PLANNER
+# SUPPORTING FUNCTIONS
 #
-# - simple gradient descent based planner
-# - more stable but slower line search based planner
+# - smoothed mean calculation
+# - planner status
+# - stopping criteria
 #
 # ***********************************************************************
@@ -1167,6 +1213,264 @@ class NoImprovementStoppingRule(JaxPlannerStoppingRule):
         return f'No improvement for {self.patience} iterations'
+# ***********************************************************************
+# PARAMETER EXPLORING POLICY GRADIENTS (PGPE)
+#
+# - simple Gaussian PGPE
+#
+# ***********************************************************************
+class PGPE:
+    """Base class for all PGPE strategies."""
+    def __init__(self) -> None:
+        self._initializer = None
+        self._update = None
+    @property
+    def initialize(self):
+        return self._initializer
+    @property
+    def update(self):
+        return self._update
+    def compile(self, loss_fn: Callable, projection: Callable, real_dtype: Type) -> None:
+        raise NotImplementedError
+class GaussianPGPE(PGPE):
+    '''PGPE with a Gaussian parameter distribution.'''
+    def __init__(self, batch_size: int=1,
+                 init_sigma: float=1.0,
+                 sigma_range: Tuple[float, float]=(1e-5, 1e5),
+                 scale_reward: bool=True,
+                 super_symmetric: bool=True,
+                 super_symmetric_accurate: bool=True,
+                 optimizer: Callable[..., optax.GradientTransformation]=optax.adam,
+                 optimizer_kwargs_mu: Optional[Kwargs]=None,
+                 optimizer_kwargs_sigma: Optional[Kwargs]=None) -> None:
+        '''Creates a new Gaussian PGPE planner.
+        :param batch_size: how many policy parameters to sample per optimization step
+        :param init_sigma: initial standard deviation of Gaussian
+        :param sigma_range: bounds to constrain standard deviation
+        :param scale_reward: whether to apply reward scaling as in the paper
+        :param super_symmetric: whether to use super-symmetric sampling as in the paper
+        :param super_symmetric_accurate: whether to use the accurate formula for super-
+        symmetric sampling or the simplified but biased formula
+        :param optimizer: a factory for an optax SGD algorithm
+        :param optimizer_kwargs_mu: a dictionary of parameters to pass to the SGD
+        factory for the mean optimizer
+        :param optimizer_kwargs_sigma: a dictionary of parameters to pass to the SGD
+        factory for the standard deviation optimizer
+        '''
+        super().__init__()
+        self.batch_size = batch_size
+        self.init_sigma = init_sigma
+        self.sigma_range = sigma_range
+        self.scale_reward = scale_reward
+        self.super_symmetric = super_symmetric
+        self.super_symmetric_accurate = super_symmetric_accurate
+        # set optimizers
+        if optimizer_kwargs_mu is None:
+            optimizer_kwargs_mu = {'learning_rate': 0.1}
+        self.optimizer_kwargs_mu = optimizer_kwargs_mu
+        if optimizer_kwargs_sigma is None:
+            optimizer_kwargs_sigma = {'learning_rate': 0.1}
+        self.optimizer_kwargs_sigma = optimizer_kwargs_sigma
+        self.optimizer_name = optimizer
+        mu_optimizer = optimizer(**optimizer_kwargs_mu)
+        sigma_optimizer = optimizer(**optimizer_kwargs_sigma)
+        self.optimizers = (mu_optimizer, sigma_optimizer)
+    def __str__(self) -> str:
+        return (f'PGPE hyper-parameters:\n'
+                f'    method         ={self.__class__.__name__}\n'
+                f'    batch_size     ={self.batch_size}\n'
+                f'    init_sigma     ={self.init_sigma}\n'
+                f'    sigma_range    ={self.sigma_range}\n'
+                f'    scale_reward   ={self.scale_reward}\n'
+                f'    super_symmetric={self.super_symmetric}\n'
+                f'        accurate   ={self.super_symmetric_accurate}\n'
+                f'    optimizer      ={self.optimizer_name}\n'
+                f'    optimizer_kwargs:\n'
+                f'        mu   ={self.optimizer_kwargs_mu}\n'
+                f'        sigma={self.optimizer_kwargs_sigma}\n'
+        )
+    def compile(self, loss_fn: Callable, projection: Callable, real_dtype: Type) -> None:
+        MIN_NORM = 1e-5
+        sigma0 = self.init_sigma
+        sigma_range = self.sigma_range
+        scale_reward = self.scale_reward
+        super_symmetric = self.super_symmetric
+        super_symmetric_accurate = self.super_symmetric_accurate
+        batch_size = self.batch_size
+        optimizers = (mu_optimizer, sigma_optimizer) = self.optimizers
+        # initializer
+        def _jax_wrapped_pgpe_init(key, policy_params):
+            mu = policy_params
+            sigma = jax.tree_map(lambda x: sigma0 * jnp.ones_like(x), mu)
+            pgpe_params = (mu, sigma)
+            pgpe_opt_state = tuple(opt.init(param)
+                                   for (opt, param) in zip(optimizers, pgpe_params))
+            return pgpe_params, pgpe_opt_state
+        self._initializer = jax.jit(_jax_wrapped_pgpe_init)
+        # parameter sampling functions
+        def _jax_wrapped_mu_noise(key, sigma):
+            return sigma * random.normal(key, shape=jnp.shape(sigma), dtype=real_dtype)
+        def _jax_wrapped_epsilon_star(sigma, epsilon):
+            c1, c2, c3 = -0.06655, -0.9706, 0.124
+            phi = 0.67449 * sigma
+            a = (sigma - jnp.abs(epsilon)) / sigma
+            if super_symmetric_accurate:
+                aa = jnp.abs(a)
+                epsilon_star = jnp.sign(epsilon) * phi * jnp.where(
+                    a <= 0,
+                    jnp.exp(c1 * aa * (aa * aa - 1) / jnp.log(aa + 1e-10) + c2 * aa),
+                    jnp.exp(aa - c3 * aa * jnp.log(1.0 - jnp.power(aa, 3) + 1e-10))
+                )
+            else:
+                epsilon_star = jnp.sign(epsilon) * phi * jnp.exp(a)
+            return epsilon_star
+        def _jax_wrapped_sample_params(key, mu, sigma):
+            keys = random.split(key, num=len(jax.tree_util.tree_leaves(mu)))
+            keys_pytree = jax.tree_util.tree_unflatten(
+                treedef=jax.tree_util.tree_structure(mu), leaves=keys)
+            epsilon = jax.tree_map(_jax_wrapped_mu_noise, keys_pytree, sigma)
+            p1 = jax.tree_map(jnp.add, mu, epsilon)
+            p2 = jax.tree_map(jnp.subtract, mu, epsilon)
+            if super_symmetric:
+                epsilon_star = jax.tree_map(_jax_wrapped_epsilon_star, sigma, epsilon)
+                p3 = jax.tree_map(jnp.add, mu, epsilon_star)
+                p4 = jax.tree_map(jnp.subtract, mu, epsilon_star)
+            else:
+                epsilon_star, p3, p4 = epsilon, p1, p2
+            return (p1, p2, p3, p4), (epsilon, epsilon_star)
+        # policy gradient update functions
+        def _jax_wrapped_mu_grad(epsilon, epsilon_star, r1, r2, r3, r4, m):
+            if super_symmetric:
+                if scale_reward:
+                    scale1 = jnp.maximum(MIN_NORM, m - (r1 + r2) / 2)
+                    scale2 = jnp.maximum(MIN_NORM, m - (r3 + r4) / 2)
+                else:
+                    scale1 = scale2 = 1.0
+                r_mu1 = (r1 - r2) / (2 * scale1)
+                r_mu2 = (r3 - r4) / (2 * scale2)
+                grad = -(r_mu1 * epsilon + r_mu2 * epsilon_star)
+            else:
+                if scale_reward:
+                    scale = jnp.maximum(MIN_NORM, m - (r1 + r2) / 2)
+                else:
+                    scale = 1.0
+                r_mu = (r1 - r2) / (2 * scale)
+                grad = -r_mu * epsilon
+            return grad
+        def _jax_wrapped_sigma_grad(epsilon, epsilon_star, sigma, r1, r2, r3, r4, m):
+            if super_symmetric:
+                mask = r1 + r2 >= r3 + r4
+                epsilon_tau = mask * epsilon + (1 - mask) * epsilon_star
+                s = epsilon_tau * epsilon_tau / sigma - sigma
+                if scale_reward:
+                    scale = jnp.maximum(MIN_NORM, m - (r1 + r2 + r3 + r4) / 4)
+                else:
+                    scale = 1.0
+                r_sigma = ((r1 + r2) - (r3 + r4)) / (4 * scale)
+            else:
+                s = epsilon * epsilon / sigma - sigma
+                if scale_reward:
+                    scale = jnp.maximum(MIN_NORM, jnp.abs(m))
+                else:
+                    scale = 1.0
+                r_sigma = (r1 + r2) / (2 * scale)
+            grad = -r_sigma * s
+            return grad
+        def _jax_wrapped_pgpe_grad(key, mu, sigma, r_max,
+                                   policy_hyperparams, subs, model_params):
+            key, subkey = random.split(key)
+            (p1, p2, p3, p4), (epsilon, epsilon_star) = _jax_wrapped_sample_params(
+                key, mu, sigma)
+            r1 = -loss_fn(subkey, p1, policy_hyperparams, subs, model_params)[0]
+            r2 = -loss_fn(subkey, p2, policy_hyperparams, subs, model_params)[0]
+            r_max = jnp.maximum(r_max, r1)
+            r_max = jnp.maximum(r_max, r2)
+            if super_symmetric:
+                r3 = -loss_fn(subkey, p3, policy_hyperparams, subs, model_params)[0]
+                r4 = -loss_fn(subkey, p4, policy_hyperparams, subs, model_params)[0]
+                r_max = jnp.maximum(r_max, r3)
+                r_max = jnp.maximum(r_max, r4)
+            else:
+                r3, r4 = r1, r2
+            grad_mu = jax.tree_map(
+                partial(_jax_wrapped_mu_grad, r1=r1, r2=r2, r3=r3, r4=r4, m=r_max),
+                epsilon, epsilon_star
+            )
+            grad_sigma = jax.tree_map(
+                partial(_jax_wrapped_sigma_grad, r1=r1, r2=r2, r3=r3, r4=r4, m=r_max),
+                epsilon, epsilon_star, sigma
+            )
+            return grad_mu, grad_sigma, r_max
+        def _jax_wrapped_pgpe_grad_batched(key, pgpe_params, r_max,
+                                           policy_hyperparams, subs, model_params):
+            mu, sigma = pgpe_params
+            if batch_size == 1:
+                mu_grad, sigma_grad, new_r_max = _jax_wrapped_pgpe_grad(
+                    key, mu, sigma, r_max, policy_hyperparams, subs, model_params)
+            else:
+                keys = random.split(key, num=batch_size)
+                mu_grads, sigma_grads, r_maxs = jax.vmap(
+                    _jax_wrapped_pgpe_grad,
+                    in_axes=(0, None, None, None, None, None, None)
+                )(keys, mu, sigma, r_max, policy_hyperparams, subs, model_params)
+                mu_grad = jax.tree_map(partial(jnp.mean, axis=0), mu_grads)
+                sigma_grad = jax.tree_map(partial(jnp.mean, axis=0), sigma_grads)
+                new_r_max = jnp.max(r_maxs)
+            return mu_grad, sigma_grad, new_r_max
+        def _jax_wrapped_pgpe_update(key, pgpe_params, r_max,
+                                     policy_hyperparams, subs, model_params,
+                                     pgpe_opt_state):
+            mu, sigma = pgpe_params
+            mu_state, sigma_state = pgpe_opt_state
+            mu_grad, sigma_grad, new_r_max = _jax_wrapped_pgpe_grad_batched(
+                key, pgpe_params, r_max, policy_hyperparams, subs, model_params)
+            mu_updates, new_mu_state = mu_optimizer.update(mu_grad, mu_state, params=mu)
+            sigma_updates, new_sigma_state = sigma_optimizer.update(
+                sigma_grad, sigma_state, params=sigma)
+            new_mu = optax.apply_updates(mu, mu_updates)
+            new_mu, converged = projection(new_mu, policy_hyperparams)
+            new_sigma = optax.apply_updates(sigma, sigma_updates)
+            new_sigma = jax.tree_map(lambda x: jnp.clip(x, *sigma_range), new_sigma)
+            new_pgpe_params = (new_mu, new_sigma)
+            new_pgpe_opt_state = (new_mu_state, new_sigma_state)
+            policy_params = new_mu
+            return new_pgpe_params, new_r_max, new_pgpe_opt_state, policy_params, converged
+        self._update = jax.jit(_jax_wrapped_pgpe_update)
+# ***********************************************************************
+# ALL VERSIONS OF JAX PLANNER
+#
+# - simple gradient descent based planner
+#
+# ***********************************************************************
 class JaxBackpropPlanner:
     '''A class for optimizing an action sequence in the given RDDL MDP using
     gradient descent.'''
@@ -1183,6 +1487,7 @@ class JaxBackpropPlanner:
                  clip_grad: Optional[float]=None,
                  line_search_kwargs: Optional[Kwargs]=None,
                  noise_kwargs: Optional[Kwargs]=None,
+                 pgpe: Optional[PGPE]=GaussianPGPE(),
                  logic: Logic=FuzzyLogic(),
                  use_symlog_reward: bool=False,
                  utility: Union[Callable[[jnp.ndarray], float], str]='mean',
@@ -1213,6 +1518,7 @@ class JaxBackpropPlanner:
         :param line_search_kwargs: parameters to pass to optional line search
         method to scale learning rate
         :param noise_kwargs: parameters of optional gradient noise
+        :param pgpe: optional policy gradient to run alongside the planner
         :param logic: a subclass of Logic for mapping exact mathematical
         operations to their differentiable counterparts
         :param use_symlog_reward: whether to use the symlog transform on the
@@ -1251,6 +1557,8 @@ class JaxBackpropPlanner:
         self.clip_grad = clip_grad
         self.line_search_kwargs = line_search_kwargs
         self.noise_kwargs = noise_kwargs
+        self.pgpe = pgpe
+        self.use_pgpe = pgpe is not None
         # set optimizer
         try:
@@ -1355,24 +1663,25 @@ r"""
                   f'    line_search_kwargs={self.line_search_kwargs}\n'
                   f'    noise_kwargs      ={self.noise_kwargs}\n'
                   f'    batch_size_train  ={self.batch_size_train}\n'
-                  f'    batch_size_test   ={self.batch_size_test}')
-        result += '\n' + str(self.plan)
-        result += '\n' + str(self.logic)
+                  f'    batch_size_test   ={self.batch_size_test}\n')
+        result += str(self.plan)
+        if self.use_pgpe:
+            result += str(self.pgpe)
+        result += str(self.logic)
         # print model relaxation information
-        if not self.compiled.model_params:
-            return result
-        result += '\n' + ('Some RDDL operations are non-differentiable '
-                          'and will be approximated as follows:' + '\n')
-        exprs_by_rddl_op, values_by_rddl_op = {}, {}
-        for info in self.compiled.model_parameter_info().values():
-            rddl_op = info['rddl_op']
-            exprs_by_rddl_op.setdefault(rddl_op, []).append(info['id'])
-            values_by_rddl_op.setdefault(rddl_op, []).append(info['init_value'])
-        for rddl_op in sorted(exprs_by_rddl_op.keys()):
-            result += (f'    {rddl_op}:\n'
-                       f'        addresses  ={exprs_by_rddl_op[rddl_op]}\n'
-                       f'        init_values={values_by_rddl_op[rddl_op]}\n')
+        if self.compiled.model_params:
+            result += ('Some RDDL operations are non-differentiable '
+                       'and will be approximated as follows:' + '\n')
+            exprs_by_rddl_op, values_by_rddl_op = {}, {}
+            for info in self.compiled.model_parameter_info().values():
+                rddl_op = info['rddl_op']
+                exprs_by_rddl_op.setdefault(rddl_op, []).append(info['id'])
+                values_by_rddl_op.setdefault(rddl_op, []).append(info['init_value'])
+            for rddl_op in sorted(exprs_by_rddl_op.keys()):
+                result += (f'    {rddl_op}:\n'
+                           f'        addresses  ={exprs_by_rddl_op[rddl_op]}\n'
+                           f'        init_values={values_by_rddl_op[rddl_op]}\n')
         return result
     def summarize_hyperparameters(self) -> None:
@@ -1438,6 +1747,15 @@ r"""
         # optimization
         self.update = self._jax_update(train_loss)
         self.check_zero_grad = self._jax_check_zero_gradients()
+        # pgpe option
+        if self.use_pgpe:
+            loss_fn = self._jax_loss(rollouts=test_rollouts)
+            self.pgpe.compile(
+                loss_fn=loss_fn,
+                projection=self.plan.projection,
+                real_dtype=self.test_compiled.REAL
+            )
     def _jax_return(self, use_symlog):
         gamma = self.rddl.discount
@@ -1646,7 +1964,7 @@ r"""
             return grad
         return _loss_function, _grad_function, guess_1d, jax.jit(unravel_fn)
     # ===========================================================================
     # OPTIMIZE API
     # ===========================================================================
@@ -1819,7 +2137,17 @@ r"""
             policy_params = guess
             opt_state = self.optimizer.init(policy_params)
             opt_aux = {}
+        # initialize pgpe parameters
+        if self.use_pgpe:
+            pgpe_params, pgpe_opt_state = self.pgpe.initialize(key, policy_params)
+            rolling_pgpe_loss = RollingMean(test_rolling_window)
+        else:
+            pgpe_params, pgpe_opt_state = None, None
+            rolling_pgpe_loss = None
+        total_pgpe_it = 0
+        r_max = -jnp.inf
         # ======================================================================
         # INITIALIZATION OF RUNNING STATISTICS
         # ======================================================================
@@ -1860,17 +2188,47 @@ r"""
             # update the parameters of the plan
             key, subkey = random.split(key)
-            (policy_params, converged, opt_state, opt_aux,
-             train_loss, train_log, model_params) = \
-                self.update(subkey, policy_params, policy_hyperparams,
-                            train_subs, model_params, opt_state, opt_aux)
+            (policy_params, converged, opt_state, opt_aux, train_loss, train_log,
+             model_params) = self.update(subkey, policy_params, policy_hyperparams,
+                                         train_subs, model_params, opt_state, opt_aux)
+            test_loss, (test_log, model_params_test) = self.test_loss(
+                subkey, policy_params, policy_hyperparams, test_subs, model_params_test)
+            test_loss_smooth = rolling_test_loss.update(test_loss)
+            # pgpe update of the plan
+            pgpe_improve = False
+            if self.use_pgpe:
+                key, subkey = random.split(key)
+                pgpe_params, r_max, pgpe_opt_state, pgpe_param, pgpe_converged = \
+                    self.pgpe.update(subkey, pgpe_params, r_max, policy_hyperparams,
+                                     test_subs, model_params, pgpe_opt_state)
+                pgpe_loss, _ = self.test_loss(
+                    subkey, pgpe_param, policy_hyperparams, test_subs, model_params_test)
+                pgpe_loss_smooth = rolling_pgpe_loss.update(pgpe_loss)
+                pgpe_return = -pgpe_loss_smooth
+                # replace with PGPE if it reaches a new minimum or train loss invalid
+                if pgpe_loss_smooth < best_loss or not np.isfinite(train_loss):
+                    policy_params = pgpe_param
+                    test_loss, test_loss_smooth = pgpe_loss, pgpe_loss_smooth
+                    converged = pgpe_converged
+                    pgpe_improve = True
+                    total_pgpe_it += 1
+            else:
+                pgpe_loss, pgpe_loss_smooth, pgpe_return = None, None, None
+            # evaluate test losses and record best plan so far
+            if test_loss_smooth < best_loss:
+                best_params, best_loss, best_grad = \
+                    policy_params, test_loss_smooth, train_log['grad']
+                last_iter_improve = it
             # ==================================================================
             # STATUS CHECKS AND LOGGING
             # ==================================================================
             # no progress
-            if self.check_zero_grad(train_log['grad']):
+            if (not pgpe_improve) and self.check_zero_grad(train_log['grad']):
                 status = JaxPlannerStatus.NO_PROGRESS
             # constraint satisfaction problem
@@ -1882,21 +2240,14 @@ r"""
                 status = JaxPlannerStatus.PRECONDITION_POSSIBLY_UNSATISFIED
             # numerical error
-            if not np.isfinite(train_loss):
-                raise_warning(
-                    f'JAX planner aborted due to invalid loss {train_loss}.', 'red')
+            if self.use_pgpe:
+                invalid_loss = not (np.isfinite(train_loss) or np.isfinite(pgpe_loss))
+            else:
+                invalid_loss = not np.isfinite(train_loss)
+            if invalid_loss:
+                raise_warning(f'Planner aborted due to invalid loss {train_loss}.', 'red')
                 status = JaxPlannerStatus.INVALID_GRADIENT
-            # evaluate test losses and record best plan so far
-            test_loss, (log, model_params_test) = self.test_loss(
-                subkey, policy_params, policy_hyperparams,
-                test_subs, model_params_test)
-            test_loss = rolling_test_loss.update(test_loss)
-            if test_loss < best_loss:
-                best_params, best_loss, best_grad = \
-                    policy_params, test_loss, train_log['grad']
-                last_iter_improve = it
             # reached computation budget
             elapsed = time.time() - start_time - elapsed_outside_loop
             if elapsed >= train_seconds:
@@ -1910,11 +2261,14 @@ r"""
                 'status': status,
                 'iteration': it,
                 'train_return':-train_loss,
-                'test_return':-test_loss,
+                'test_return':-test_loss_smooth,
                 'best_return':-best_loss,
+                'pgpe_return': pgpe_return,
                 'params': policy_params,
                 'best_params': best_params,
+                'pgpe_params': pgpe_params,
                 'last_iteration_improved': last_iter_improve,
+                'pgpe_improved': pgpe_improve,
                 'grad': train_log['grad'],
                 'best_grad': best_grad,
                 'updates': train_log['updates'],
@@ -1923,7 +2277,7 @@ r"""
                 'model_params': model_params,
                 'progress': progress_percent,
                 'train_log': train_log,
-                **log
+                **test_log
             }
             # stopping condition reached
@@ -1934,9 +2288,9 @@ r"""
             if print_progress:
                 iters.n = progress_percent
                 iters.set_description(
-                    f'{position_str} {it:6} it / {-train_loss:14.6f} train / '
-                    f'{-test_loss:14.6f} test / {-best_loss:14.6f} best / '
-                    f'{status.value} status'
+                    f'{position_str} {it:6} it / {-train_loss:14.5f} train / '
+                    f'{-test_loss_smooth:14.5f} test / {-best_loss:14.5f} best / '
+                    f'{status.value} status / {total_pgpe_it:6} pgpe'
                 )
             # dash-board
@@ -1955,7 +2309,7 @@ r"""
         # ======================================================================
         # POST-PROCESSING AND CLEANUP
         # ======================================================================
         # release resources
         if print_progress:
             iters.close()
@@ -1967,7 +2321,7 @@ r"""
                 messages.update(JaxRDDLCompiler.get_error_messages(error_code))
             if messages:
                 messages = '\n'.join(messages)
-                raise_warning('The JAX compiler encountered the following '
+                raise_warning('JAX compiler encountered the following '
                               'error(s) in the original RDDL formulation '
                               f'during test evaluation:\n{messages}', 'red')
@@ -1975,14 +2329,14 @@ r"""
         if print_summary:
             grad_norm = jax.tree_map(lambda x: np.linalg.norm(x).item(), best_grad)
             diagnosis = self._perform_diagnosis(
-                last_iter_improve, -train_loss, -test_loss, -best_loss, grad_norm)
+                last_iter_improve, -train_loss, -test_loss_smooth, -best_loss, grad_norm)
             print(f'summary of optimization:\n'
-                  f'    status_code   ={status}\n'
-                  f'    time_elapsed  ={elapsed}\n'
+                  f'    status        ={status}\n'
+                  f'    time          ={elapsed:.6f} sec.\n'
                   f'    iterations    ={it}\n'
-                  f'    best_objective={-best_loss}\n'
-                  f'    best_grad_norm={grad_norm}\n'
-                  f'    diagnosis: {diagnosis}\n')
+                  f'    best objective={-best_loss:.6f}\n'
+                  f'    best grad norm={grad_norm}\n'
+                  f'diagnosis: {diagnosis}\n')
     def _perform_diagnosis(self, last_iter_improve,
                            train_return, test_return, best_return, grad_norm):
@@ -2002,23 +2356,24 @@ r"""
         if last_iter_improve <= 1:
             if grad_is_zero:
                 return termcolor.colored(
-                    '[FAILURE] no progress was made, '
-                    f'and max grad norm {max_grad_norm:.6f} is zero: '
-                    'solver likely stuck in a plateau.', 'red')
+                    '[FAILURE] no progress was made '
+                    f'and max grad norm {max_grad_norm:.6f} was zero: '
+                    'the solver was likely stuck in a plateau.', 'red')
             else:
                 return termcolor.colored(
-                    '[FAILURE] no progress was made, '
-                    f'but max grad norm {max_grad_norm:.6f} is non-zero: '
-                    'likely poor learning rate or other hyper-parameter.', 'red')
+                    '[FAILURE] no progress was made '
+                    f'but max grad norm {max_grad_norm:.6f} was non-zero: '
+                    'the learning rate or other hyper-parameters were likely suboptimal.',
+                    'red')
         # model is likely poor IF:
         # 1. the train and test return disagree
         if not (validation_error < 20):
             return termcolor.colored(
-                '[WARNING] progress was made, '
-                f'but relative train-test error {validation_error:.6f} is high: '
-                'likely poor model relaxation around the solution, '
-                'or the batch size is too small.', 'yellow')
+                '[WARNING] progress was made '
+                f'but relative train-test error {validation_error:.6f} was high: '
+                'model relaxation around the solution was poor '
+                'or the batch size was too small.', 'yellow')
         # model likely did not converge IF:
         # 1. the max grad relative to the return is high
@@ -2026,15 +2381,15 @@ r"""
             return_to_grad_norm = abs(best_return) / max_grad_norm
             if not (return_to_grad_norm > 1):
                 return termcolor.colored(
-                    '[WARNING] progress was made, '
-                    f'but max grad norm {max_grad_norm:.6f} is high: '
-                    'likely the solution is not locally optimal, '
-                    'or the relaxed model is not smooth around the solution, '
-                    'or the batch size is too small.', 'yellow')
+                    '[WARNING] progress was made '
+                    f'but max grad norm {max_grad_norm:.6f} was high: '
+                    'the solution was likely locally suboptimal, '
+                    'or the relaxed model was not smooth around the solution, '
+                    'or the batch size was too small.', 'yellow')
         # likely successful
         return termcolor.colored(
-            '[SUCCESS] planner has converged successfully '
+            '[SUCCESS] solver converged successfully '
             '(note: not all potential problems can be ruled out).', 'green')
     def get_action(self, key: random.PRNGKey,

pyRDDLGym_jax/core/simulator.py CHANGED Viewed

@@ -1,3 +1,23 @@
+# ***********************************************************************
+# JAXPLAN
+#
+# Author: Michael Gimelfarb
+#
+# REFERENCES:
+#
+# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
+# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
+# Probabilistic Domains." Proceedings of the International Conference on Automated
+# Planning and Scheduling. Vol. 34. 2024.
+#
+# [2] Taitler, Ayal, Michael Gimelfarb, Jihwan Jeong, Sriram Gopalakrishnan, Martin
+# Mladenov, Xiaotian Liu, and Scott Sanner. "pyRDDLGym: From RDDL to Gym Environments."
+# In PRL Workshop Series {\textendash} Bridging the Gap Between AI Planning and
+# Reinforcement Learning.
+#
+# ***********************************************************************
 import time
 from typing import Dict, Optional

pyRDDLGym_jax/core/tuning.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# ***********************************************************************
+# JAXPLAN
+#
+# Author: Michael Gimelfarb
+#
+# REFERENCES:
+#
+# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
+# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
+# Probabilistic Domains." Proceedings of the International Conference on Automated
+# Planning and Scheduling. Vol. 34. 2024.
+#
+# ***********************************************************************
 import csv
 import datetime
 import threading

pyRDDLGym_jax/core/visualization.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# ***********************************************************************
+# JAXPLAN
+#
+# Author: Michael Gimelfarb
+#
+# REFERENCES:
+#
+# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
+# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
+# Probabilistic Domains." Proceedings of the International Conference on Automated
+# Planning and Scheduling. Vol. 34. 2024.
+#
+# ***********************************************************************
 import ast
 import os
 from datetime import datetime
@@ -61,6 +76,7 @@ class JaxPlannerDashboard:
         self.xticks = {}
         self.test_return = {}
         self.train_return = {}
+        self.pgpe_return = {}
         self.return_dist = {}
         self.return_dist_ticks = {}
         self.return_dist_last_progress = {}
@@ -299,6 +315,9 @@ class JaxPlannerDashboard:
                                     dbc.Col(Graph(id='train-return-graph'), width=6),
                                     dbc.Col(Graph(id='test-return-graph'), width=6),
                                 ]),
+                                dbc.Row([
+                                    dbc.Col(Graph(id='pgpe-return-graph'), width=6)
+                                ]),
                                 dbc.Row([
                                     Graph(id='dist-return-graph')
                                 ])
@@ -661,6 +680,33 @@ class JaxPlannerDashboard:
             )
             return fig
+        @app.callback(
+            Output('pgpe-return-graph', 'figure'),
+            [Input('interval', 'n_intervals'),
+             Input('trigger-experiment-check', 'children'),
+             Input('tabs-main', 'active_tab')]
+        )
+        def update_pgpe_return_graph(n, trigger, active_tab):
+            if active_tab != 'tab-performance': return dash.no_update
+            fig = go.Figure()
+            for (row, checked) in self.checked.copy().items():
+                if checked:
+                    fig.add_trace(go.Scatter(
+                        x=self.xticks[row], y=self.pgpe_return[row],
+                        name=f'id={row}',
+                        mode='lines+markers',
+                        marker=dict(size=3), line=dict(width=2)
+                    ))
+            fig.update_layout(
+                title=dict(text="PGPE Return"),
+                xaxis=dict(title=dict(text="Training Iteration")),
+                yaxis=dict(title=dict(text="Cumulative Reward")),
+                font=dict(size=PLOT_AXES_FONT_SIZE),
+                legend=dict(bgcolor='rgba(0,0,0,0)'),
+                template="plotly_white"
+            )
+            return fig
         @app.callback(
             Output('dist-return-graph', 'figure'),
             [Input('interval', 'n_intervals'),
@@ -1316,6 +1362,7 @@ class JaxPlannerDashboard:
         self.xticks[experiment_id] = []
         self.train_return[experiment_id] = []
         self.test_return[experiment_id] = []
+        self.pgpe_return[experiment_id] = []
         self.return_dist_ticks[experiment_id] = []
         self.return_dist_last_progress[experiment_id] = 0
         self.return_dist[experiment_id] = []
@@ -1367,6 +1414,7 @@ class JaxPlannerDashboard:
         self.xticks[experiment_id].append(iteration)
         self.train_return[experiment_id].append(callback['train_return'])
         self.test_return[experiment_id].append(callback['best_return'])
+        self.pgpe_return[experiment_id].append(callback['pgpe_return'])
         # data for return distributions
         progress = callback['progress']

pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg CHANGED Viewed

@@ -1,8 +1,8 @@
 [Model]
 logic='FuzzyLogic'
-comparison_kwargs={'weight': 50}
-rounding_kwargs={'weight': 50}
-control_kwargs={'weight': 50}
+comparison_kwargs={'weight': 20}
+rounding_kwargs={'weight': 20}
+control_kwargs={'weight': 20}
 [Optimizer]
 method='JaxStraightLinePlan'

pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg CHANGED Viewed

@@ -1,14 +1,14 @@
 [Model]
 logic='FuzzyLogic'
-comparison_kwargs={'weight': 30}
-rounding_kwargs={'weight': 30}
-control_kwargs={'weight': 30}
+comparison_kwargs={'weight': 20}
+rounding_kwargs={'weight': 20}
+control_kwargs={'weight': 20}
 [Optimizer]
 method='JaxStraightLinePlan'
 method_kwargs={}
 optimizer='rmsprop'
-optimizer_kwargs={'learning_rate': 0.002}
+optimizer_kwargs={'learning_rate': 0.001}
 batch_size_train=1
 batch_size_test=1
 clip_grad=1.0

pyRDDLGym_jax/examples/configs/Quadcopter_drp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.001}
 batch_size_train=1
 batch_size_test=1
+pgpe=None
 [Training]
 key=42

pyRDDLGym_jax/examples/configs/Quadcopter_slp.cfg CHANGED Viewed

@@ -1,8 +1,8 @@
 [Model]
 logic='FuzzyLogic'
-comparison_kwargs={'weight': 50}
-rounding_kwargs={'weight': 50}
-control_kwargs={'weight': 50}
+comparison_kwargs={'weight': 10}
+rounding_kwargs={'weight': 10}
+control_kwargs={'weight': 10}
 [Optimizer]
 method='JaxStraightLinePlan'
@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.03}
 batch_size_train=1
 batch_size_test=1
+pgpe=None
 [Training]
 key=42

pyRDDLGym_jax/examples/configs/Reservoir_Continuous_drp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.0002}
 batch_size_train=32
 batch_size_test=32
+pgpe=None
 [Training]
 key=42

pyRDDLGym_jax/examples/configs/Reservoir_Continuous_slp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.2}
 batch_size_train=32
 batch_size_test=32
+pgpe=None
 [Training]
 key=42

pyRDDLGym_jax/examples/configs/UAV_Continuous_slp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.0003}
 batch_size_train=1
 batch_size_test=1
+pgpe=None
 [Training]
 key=42

pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_drp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.001}
 batch_size_train=32
 batch_size_test=32
+pgpe=None
 [Training]
 key=42

pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_replan.cfg CHANGED Viewed

@@ -12,6 +12,7 @@ optimizer_kwargs={'learning_rate': 0.1}
 batch_size_train=32
 batch_size_test=32
 rollout_horizon=5
+pgpe=None
 [Training]
 key=42

pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_slp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.01}
 batch_size_train=32
 batch_size_test=32
+pgpe=None
 [Training]
 key=42

{pyRDDLGym_jax-1.3.dist-info → pyRDDLGym_jax-2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pyRDDLGym-jax
-Version: 1.3
+Version: 2.0
 Summary: pyRDDLGym-jax: automatic differentiation for solving sequential planning problems in JAX.
 Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
 Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner

{pyRDDLGym_jax-1.3.dist-info → pyRDDLGym_jax-2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-pyRDDLGym_jax/__init__.py,sha256=p_veRZMP15-djJyMuDHT7Ul1RbCCHpYsZ9LO0GD1URo,19
+pyRDDLGym_jax/__init__.py,sha256=TiPG4w8nN4AzPkhugwVvZkHmAgP955NltD4QRmBLhRU,19
 pyRDDLGym_jax/entry_point.py,sha256=dxDlO_5gneEEViwkLCg30Z-KVzUgdRXaKuFjoZklkA0,974
 pyRDDLGym_jax/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pyRDDLGym_jax/core/compiler.py,sha256=qy1TSivdpuZxWecDl5HEM0PXX45JB7DHzV7uAB8kmbE,88696
-pyRDDLGym_jax/core/logic.py,sha256=iYvLgWyQd_mrkwwoeRWao9NzjmhsObQnPq4DphILw1Q,38425
-pyRDDLGym_jax/core/planner.py,sha256=TFFy91aCzRW600k_eP-7i2Gvp9wpNVjXlXtBnt9x03M,101744
-pyRDDLGym_jax/core/simulator.py,sha256=JpmwfPqYPBfEhmQ04ufBeclZOQ-U1ZiyAtLf1AIwO2M,8462
-pyRDDLGym_jax/core/tuning.py,sha256=LBhoVQZWWhYQj89gpM2B4xVHlYlKDt4psw4Be9cBbSY,23685
-pyRDDLGym_jax/core/visualization.py,sha256=uKhC8z0TeX9BklPNoxSVt0g5pkqhgxrQClQAih78ybY,68292
+pyRDDLGym_jax/core/compiler.py,sha256=Rn-aIqfgfWqu45bvCfPb9tB8RIOBVdbj-pI-V3WS2Z8,89212
+pyRDDLGym_jax/core/logic.py,sha256=_A6eGYtLVU3pbLAezxJVB9bnClJoaFIa2mBIDdFrqoU,39655
+pyRDDLGym_jax/core/planner.py,sha256=4j56l7SL7F89g2QA4nOpyhODmY0DamvxYLfCMKxJNbQ,118593
+pyRDDLGym_jax/core/simulator.py,sha256=DnPL93WVCMZqtqMUoiJdfWcH9pEvNgGfDfO4NV0wIS0,9271
+pyRDDLGym_jax/core/tuning.py,sha256=RKKtDZp7unvfbhZEoaunZtcAn5xtzGYqXBB_Ij_Aapc,24205
+pyRDDLGym_jax/core/visualization.py,sha256=XtQL1A5dQIlfeUpte-r3lNVw-GNLxj2EYUNMz7AFOtc,70359
 pyRDDLGym_jax/core/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/core/assets/favicon.ico,sha256=RMMrI9YvmF81TgYG7FO7UAre6WmYFkV3B2GmbA1l0kM,175085
 pyRDDLGym_jax/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -16,8 +16,8 @@ pyRDDLGym_jax/examples/run_plan.py,sha256=v2AvwgIa4Ejr626vBOgWFJIQvay3IPKWno02zt
 pyRDDLGym_jax/examples/run_scipy.py,sha256=wvcpWCvdjvYHntO95a7JYfY2fuCMUTKnqjJikW0PnL4,2291
 pyRDDLGym_jax/examples/run_tune.py,sha256=zqrhvLR5PeWJv0NsRxDCzAPmvgPgz_1NrtM1xBy6ndU,3606
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_drp.cfg,sha256=mE8MqhOlkHeXIGEVrnR3QY6I-_iy4uxFYRA71P1bmtk,347
-pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg,sha256=CI_c-E2Ij2dzVbYFA3sAUEXQBaIDImaEH15HpLqGQRw,370
-pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg,sha256=T-O4faHYfSMyNNjY2jQ9XPK772szjbC7Enip5AaEO_0,340
+pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg,sha256=nFFYHCKQUMn8x-OpJwu2pwe1tycNSJ8iAIwSkCBn33E,370
+pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg,sha256=eJ3HvHjODoKdtX7u-AM51xQaHJnYgzEy2t3omNG2oCs,340
 pyRDDLGym_jax/examples/configs/HVAC_ippc2023_drp.cfg,sha256=9-QMZPZuecAEaerD79ZAbGX-tgfL8Y2W-tfkAyD15Cw,362
 pyRDDLGym_jax/examples/configs/HVAC_ippc2023_slp.cfg,sha256=BiY6wwSYkR9-T46AA4n3okJ1Qvj8Iu-y1V5BrfCbqrM,340
 pyRDDLGym_jax/examples/configs/MountainCar_Continuous_gym_slp.cfg,sha256=VBlTiHFQG72D1wpebMsuzSokwqlPVD99WjPp4YoWs84,356
@@ -25,15 +25,15 @@ pyRDDLGym_jax/examples/configs/MountainCar_ippc2023_slp.cfg,sha256=bH_5O13-Y6ztv
 pyRDDLGym_jax/examples/configs/PowerGen_Continuous_drp.cfg,sha256=Pq6E9RYksue7X2cWjdWyUsV0LqQTjTvq6p0aLBVKWfY,370
 pyRDDLGym_jax/examples/configs/PowerGen_Continuous_replan.cfg,sha256=SGVQAOqrOjEsZEtxL_Z6aGbLR19h5gKCcy0oz2vtQp8,382
 pyRDDLGym_jax/examples/configs/PowerGen_Continuous_slp.cfg,sha256=6obQik2FBldoJ3VwoVfGhQqKpKdnYox770cF-SGRi3Q,345
-pyRDDLGym_jax/examples/configs/Quadcopter_drp.cfg,sha256=Vvve7Os_bpmV0n-ZweW7JJAdWv16CZPyjHABqvST4_E,392
-pyRDDLGym_jax/examples/configs/Quadcopter_slp.cfg,sha256=ZxHmlddQhqNatXgv4A_i_972Mxo-eyGw4wdYmsTcvgU,345
-pyRDDLGym_jax/examples/configs/Reservoir_Continuous_drp.cfg,sha256=GXgQAmeciZeG3WCg1vFDVWMpjjc-rVja7CrJsJWuyN4,368
+pyRDDLGym_jax/examples/configs/Quadcopter_drp.cfg,sha256=rs-CzOAyZV_NvwSh2f6Fm9XNw5Z8WIYgpAOzgTm_Gv8,403
+pyRDDLGym_jax/examples/configs/Quadcopter_slp.cfg,sha256=EtSCTjd8gWm7akQdfHFxdpGnQvHzjo2IHbAuVxTAX4U,356
+pyRDDLGym_jax/examples/configs/Reservoir_Continuous_drp.cfg,sha256=7nPOJCo3eaZuq1pCyIJJJkDM0jjJThDuDECJDZzX-uc,379
 pyRDDLGym_jax/examples/configs/Reservoir_Continuous_replan.cfg,sha256=V3jzPGuNq2IAxYy_EeZWin4Y_uf0HvGhzg06ODNSY-I,381
-pyRDDLGym_jax/examples/configs/Reservoir_Continuous_slp.cfg,sha256=u8AZPm7vqBceWkXd6q_jNN6R70TTzf_BDS49BuVu91k,343
-pyRDDLGym_jax/examples/configs/UAV_Continuous_slp.cfg,sha256=0xktiJ6r_wWueW4BOU1CpCc_SgQ9U9l--OQMDaL7jIw,343
-pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_drp.cfg,sha256=crCbQpeBm3Ab12wcpDG7SQmoLrNZsNIriFr0lhbdQ9o,371
-pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_replan.cfg,sha256=HTQyW-zIIPyuE0zyrMngvjnjsJYiuG37NTCm7gBnPAA,440
-pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_slp.cfg,sha256=mtZJh46OfEZYjMGw3V7EWzXJReXJNGulRrntj5cnRPk,402
+pyRDDLGym_jax/examples/configs/Reservoir_Continuous_slp.cfg,sha256=SYAJmoUIUhhvAej3XOzC5boGxKVHnSiVi5-ZGj2S29M,354
+pyRDDLGym_jax/examples/configs/UAV_Continuous_slp.cfg,sha256=osoIPfrldPw7oJF2AaAw0-ke6YHQNdrslFBCTytsqmo,354
+pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_drp.cfg,sha256=oNX8uW8Bw2uG9zHX1zeLF3mHWDHRIlJXYvbFcY0pfCI,382
+pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_replan.cfg,sha256=exCfGI3WU7IFO7n5rRe5cO1ZHAdFwttRYzjIdD4Pz2Y,451
+pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_slp.cfg,sha256=e6Ikgv2uBbKuXHfVKt4KQ01LDUBGbc31D28bCcztJ58,413
 pyRDDLGym_jax/examples/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/examples/configs/default_drp.cfg,sha256=XeMWAAG_OFZo7JAMxS5-XXroZaeVMzfM0NswmEobIns,373
 pyRDDLGym_jax/examples/configs/default_replan.cfg,sha256=CK4cEz8ReXyAZPLaLG9clIIRXAqM3IplUCxbLt_V2lY,407
@@ -41,9 +41,9 @@ pyRDDLGym_jax/examples/configs/default_slp.cfg,sha256=mJo0woDevhQCSQfJg30ULVy9qG
 pyRDDLGym_jax/examples/configs/tuning_drp.cfg,sha256=CQMpSCKTkGioO7U82mHMsYWFRsutULx0V6Wrl3YzV2U,504
 pyRDDLGym_jax/examples/configs/tuning_replan.cfg,sha256=m_0nozFg_GVld0tGv92Xao_KONFJDq_vtiJKt5isqI8,501
 pyRDDLGym_jax/examples/configs/tuning_slp.cfg,sha256=KHu8II6CA-h_HblwvWHylNRjSvvGS3VHxN7JQNR4p_Q,464
-pyRDDLGym_jax-1.3.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
-pyRDDLGym_jax-1.3.dist-info/METADATA,sha256=Colu-byYJ4RF5sr1qOVKg9VhCbrLnv32OvHt_A9KtLE,15090
-pyRDDLGym_jax-1.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-pyRDDLGym_jax-1.3.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
-pyRDDLGym_jax-1.3.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
-pyRDDLGym_jax-1.3.dist-info/RECORD,,
+pyRDDLGym_jax-2.0.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
+pyRDDLGym_jax-2.0.dist-info/METADATA,sha256=ZYIe9c_Tar4WO8qQOvcUIJVMmZznPUBRaegS0DH2un8,15090
+pyRDDLGym_jax-2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+pyRDDLGym_jax-2.0.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
+pyRDDLGym_jax-2.0.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
+pyRDDLGym_jax-2.0.dist-info/RECORD,,

{pyRDDLGym_jax-1.3.dist-info → pyRDDLGym_jax-2.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{pyRDDLGym_jax-1.3.dist-info → pyRDDLGym_jax-2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pyRDDLGym_jax-1.3.dist-info → pyRDDLGym_jax-2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pyRDDLGym_jax-1.3.dist-info → pyRDDLGym_jax-2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

pyRDDLGym-jax 1.3__py3-none-any.whl → 2.0__py3-none-any.whl

pyRDDLGym-jax 1.3py3-none-any.whl → 2.0py3-none-any.whl