PyPI - pyRDDLGym-jax - Versions diffs - 2.5__py3-none-any.whl → 2.7__py3-none-any.whl - Mend

pyRDDLGym-jax 2.5py3-none-any.whl → 2.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

pyRDDLGym_jax/__init__.py +1 -1
pyRDDLGym_jax/core/compiler.py +107 -11
pyRDDLGym_jax/core/logic.py +6 -8
pyRDDLGym_jax/core/model.py +595 -0
pyRDDLGym_jax/core/planner.py +183 -24
pyRDDLGym_jax/core/simulator.py +12 -4
pyRDDLGym_jax/examples/run_plan.py +31 -0
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/METADATA +5 -13
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/RECORD +13 -12
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/licenses/LICENSE +1 -1
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/WHEEL +0 -0
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/entry_points.txt +0 -0
{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/top_level.txt +0 -0

pyRDDLGym_jax/core/planner.py CHANGED Viewed

@@ -207,6 +207,13 @@ def _load_config(config, args):
                 pgpe_kwargs['optimizer'] = pgpe_optimizer
         planner_args['pgpe'] = getattr(sys.modules[__name__], pgpe_method)(**pgpe_kwargs)
+    # preprocessor settings
+    preproc_method = planner_args.get('preprocessor', None)
+    preproc_kwargs = planner_args.pop('preprocessor_kwargs', {})
+    if preproc_method is not None:
+        planner_args['preprocessor'] = getattr(
+            sys.modules[__name__], preproc_method)(**preproc_kwargs)
     # optimize call RNG key
     planner_key = train_args.get('key', None)
     if planner_key is not None:
@@ -343,6 +350,100 @@ class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
         return arg
+# ***********************************************************************
+# ALL VERSIONS OF STATE PREPROCESSING FOR DRP
+#
+# - static normalization
+#
+# ***********************************************************************
+class Preprocessor(metaclass=ABCMeta):
+    '''Base class for all state preprocessors.'''
+    HYPERPARAMS_KEY = 'preprocessor__'
+    def __init__(self) -> None:
+        self._initializer = None
+        self._update = None
+        self._transform = None
+    @property
+    def initialize(self):
+        return self._initializer
+    @property
+    def update(self):
+        return self._update
+    @property
+    def transform(self):
+        return self._transform
+    @abstractmethod
+    def compile(self, compiled: JaxRDDLCompilerWithGrad) -> None:
+        pass
+class StaticNormalizer(Preprocessor):
+    '''Normalize values by box constraints on fluents computed from the RDDL domain.'''
+    def __init__(self, fluent_bounds: Dict[str, Tuple[np.ndarray, np.ndarray]]={}) -> None:
+        '''Create a new instance of the static normalizer.
+        :param fluent_bounds: optional bounds on fluents to overwrite default values.
+        '''
+        self.fluent_bounds = fluent_bounds
+    def compile(self, compiled: JaxRDDLCompilerWithGrad) -> None:
+        # adjust for partial observability
+        rddl = compiled.rddl
+        if rddl.observ_fluents:
+            observed_vars = rddl.observ_fluents
+        else:
+            observed_vars = rddl.state_fluents
+        # ignore boolean fluents and infinite bounds
+        bounded_vars = {}
+        for var in observed_vars:
+            if rddl.variable_ranges[var] != 'bool':
+                lower, upper = compiled.constraints.bounds[var]
+                if np.all(np.isfinite(lower) & np.isfinite(upper) & (lower < upper)):
+                    bounded_vars[var] = (lower, upper)
+                user_bounds = self.fluent_bounds.get(var, None)
+                if user_bounds is not None:
+                    bounded_vars[var] = tuple(user_bounds)
+        # initialize to ranges computed by the constraint parser
+        def _jax_wrapped_normalizer_init():
+            return bounded_vars
+        self._initializer = jax.jit(_jax_wrapped_normalizer_init)
+        # static bounds
+        def _jax_wrapped_normalizer_update(subs, stats):
+            stats = {var: (jnp.asarray(lower, dtype=compiled.REAL),
+                           jnp.asarray(upper, dtype=compiled.REAL))
+                     for (var, (lower, upper)) in bounded_vars.items()}
+            return stats
+        self._update = jax.jit(_jax_wrapped_normalizer_update)
+        # apply min max scaling
+        def _jax_wrapped_normalizer_transform(subs, stats):
+            new_subs = {}
+            for (var, values) in subs.items():
+                if var in stats:
+                    lower, upper = stats[var]
+                    new_dims = jnp.ndim(values) - jnp.ndim(lower)
+                    lower = lower[(jnp.newaxis,) * new_dims + (...,)]
+                    upper = upper[(jnp.newaxis,) * new_dims + (...,)]
+                    new_subs[var] = (values - lower) / (upper - lower)
+                else:
+                    new_subs[var] = values
+            return new_subs
+        self._transform = jax.jit(_jax_wrapped_normalizer_transform)
 # ***********************************************************************
 # ALL VERSIONS OF JAX PLANS
 #
@@ -368,7 +469,8 @@ class JaxPlan(metaclass=ABCMeta):
     @abstractmethod
     def compile(self, compiled: JaxRDDLCompilerWithGrad,
                 _bounds: Bounds,
-                horizon: int) -> None:
+                horizon: int,
+                preprocessor: Optional[Preprocessor]=None) -> None:
         pass
     @abstractmethod
@@ -519,7 +621,8 @@ class JaxStraightLinePlan(JaxPlan):
     def compile(self, compiled: JaxRDDLCompilerWithGrad,
                 _bounds: Bounds,
-                horizon: int) -> None:
+                horizon: int,
+                preprocessor: Optional[Preprocessor]=None) -> None:
         rddl = compiled.rddl
         # calculate the correct action box bounds
@@ -607,7 +710,7 @@ class JaxStraightLinePlan(JaxPlan):
             return new_params, True
         # convert softmax action back to action dict
-        action_sizes = {var: np.prod(shape[1:], dtype=int)
+        action_sizes = {var: np.prod(shape[1:], dtype=np.int64)
                         for (var, shape) in shapes.items()
                         if ranges[var] == 'bool'}
@@ -691,7 +794,7 @@ class JaxStraightLinePlan(JaxPlan):
                 scores = []
                 for (var, param) in params.items():
                     if ranges[var] == 'bool':
-                        param_flat = jnp.ravel(param)
+                        param_flat = jnp.ravel(param, order='C')
                         if noop[var]:
                             if wrap_sigmoid:
                                 param_flat = -param_flat
@@ -908,7 +1011,8 @@ class JaxDeepReactivePolicy(JaxPlan):
     def compile(self, compiled: JaxRDDLCompilerWithGrad,
                 _bounds: Bounds,
-                horizon: int) -> None:
+                horizon: int,
+                preprocessor: Optional[Preprocessor]=None) -> None:
         rddl = compiled.rddl
         # calculate the correct action box bounds
@@ -939,7 +1043,7 @@ class JaxDeepReactivePolicy(JaxPlan):
         wrap_non_bool = self._wrap_non_bool
         init = self._initializer
         layers = list(enumerate(zip(self._topology, self._activations)))
-        layer_sizes = {var: np.prod(shape, dtype=int)
+        layer_sizes = {var: np.prod(shape, dtype=np.int64)
                        for (var, shape) in shapes.items()}
         layer_names = {var: f'output_{var}'.replace('-', '_') for var in shapes}
@@ -973,7 +1077,12 @@ class JaxDeepReactivePolicy(JaxPlan):
                 normalize = False
         # convert subs dictionary into a state vector to feed to the MLP
-        def _jax_wrapped_policy_input(subs):
+        def _jax_wrapped_policy_input(subs, hyperparams):
+            # optional state preprocessing
+            if preprocessor is not None:
+                stats = hyperparams[preprocessor.HYPERPARAMS_KEY]
+                subs = preprocessor.transform(subs, stats)
             # concatenate all state variables into a single vector
             # optionally apply layer norm to each input tensor
@@ -981,7 +1090,7 @@ class JaxDeepReactivePolicy(JaxPlan):
             non_bool_dims = 0
             for (var, value) in subs.items():
                 if var in observed_vars:
-                    state = jnp.ravel(value)
+                    state = jnp.ravel(value, order='C')
                     if ranges[var] == 'bool':
                         states_bool.append(state)
                     else:
@@ -1010,8 +1119,8 @@ class JaxDeepReactivePolicy(JaxPlan):
             return state
         # predict actions from the policy network for current state
-        def _jax_wrapped_policy_network_predict(subs):
-            state = _jax_wrapped_policy_input(subs)
+        def _jax_wrapped_policy_network_predict(subs, hyperparams):
+            state = _jax_wrapped_policy_input(subs, hyperparams)
             # feed state vector through hidden layers
             hidden = state
@@ -1076,7 +1185,7 @@ class JaxDeepReactivePolicy(JaxPlan):
         # train action prediction
         def _jax_wrapped_drp_predict_train(key, params, hyperparams, step, subs):
-            actions = predict_fn.apply(params, subs)
+            actions = predict_fn.apply(params, subs, hyperparams)
             if not wrap_non_bool:
                 for (var, action) in actions.items():
                     if var != bool_key and ranges[var] != 'bool':
@@ -1126,7 +1235,7 @@ class JaxDeepReactivePolicy(JaxPlan):
             subs = {var: value[0, ...]
                     for (var, value) in subs.items()
                     if var in observed_vars}
-            params = predict_fn.init(key, subs)
+            params = predict_fn.init(key, subs, hyperparams)
             return params
         self.initializer = _jax_wrapped_drp_init
@@ -1634,12 +1743,21 @@ def mean_semivariance_utility(returns: jnp.ndarray, beta: float) -> float:
     return mu - 0.5 * beta * msv
+@jax.jit
+def sharpe_utility(returns: jnp.ndarray, risk_free: float) -> float:
+    return (jnp.mean(returns) - risk_free) / (jnp.std(returns) + 1e-10)
+@jax.jit
+def var_utility(returns: jnp.ndarray, alpha: float) -> float:
+    return jnp.percentile(returns, q=100 * alpha)
 @jax.jit
 def cvar_utility(returns: jnp.ndarray, alpha: float) -> float:
     var = jnp.percentile(returns, q=100 * alpha)
     mask = returns <= var
-    weights = mask / jnp.maximum(1, jnp.sum(mask))
-    return jnp.sum(returns * weights)
+    return jnp.sum(returns * mask) / jnp.maximum(1, jnp.sum(mask))
 # set of all currently valid built-in utility functions
@@ -1649,8 +1767,10 @@ UTILITY_LOOKUP = {
     'mean_std': mean_deviation_utility,
     'mean_semivar': mean_semivariance_utility,
     'mean_semidev': mean_semideviation_utility,
+    'sharpe': sharpe_utility,
     'entropic': entropic_utility,
     'exponential': entropic_utility,
+    'var': var_utility,
     'cvar': cvar_utility
 }
@@ -1689,7 +1809,9 @@ class JaxBackpropPlanner:
                  logger: Optional[Logger]=None,
                  dashboard_viz: Optional[Any]=None,
                  print_warnings: bool=True,
-                 parallel_updates: Optional[int]=None) -> None:
+                 parallel_updates: Optional[int]=None,
+                 preprocessor: Optional[Preprocessor]=None,
+                 python_functions: Optional[Dict[str, Callable]]=None) -> None:
         '''Creates a new gradient-based algorithm for optimizing action sequences
         (plan) in the given RDDL. Some operations will be converted to their
         differentiable counterparts; the specific operations can be customized
@@ -1731,6 +1853,8 @@ class JaxBackpropPlanner:
         to pass to the dashboard to visualize the policy
         :param print_warnings: whether to print warnings
         :param parallel_updates: how many optimizers to run independently in parallel
+        :param preprocessor: optional preprocessor for state inputs to plan
+        :param python_functions: dictionary of external Python functions to call from RDDL
         '''
         self.rddl = rddl
         self.plan = plan
@@ -1756,7 +1880,11 @@ class JaxBackpropPlanner:
         self.pgpe = pgpe
         self.use_pgpe = pgpe is not None
         self.print_warnings = print_warnings
+        self.preprocessor = preprocessor
+        if python_functions is None:
+            python_functions = {}
+        self.python_functions = python_functions
         # set optimizer
         try:
             optimizer = optax.inject_hyperparams(optimizer)(**optimizer_kwargs)
@@ -1881,7 +2009,8 @@ r"""
                   f'    noise_kwargs      ={self.noise_kwargs}\n'
                   f'    batch_size_train  ={self.batch_size_train}\n'
                   f'    batch_size_test   ={self.batch_size_test}\n'
-                  f'    parallel_updates  ={self.parallel_updates}\n')
+                  f'    parallel_updates  ={self.parallel_updates}\n'
+                  f'    preprocessor      ={self.preprocessor}\n')
         result += str(self.plan)
         if self.use_pgpe:
             result += str(self.pgpe)
@@ -1903,7 +2032,8 @@ r"""
             use64bit=self.use64bit,
             cpfs_without_grad=self.cpfs_without_grad,
             compile_non_fluent_exact=self.compile_non_fluent_exact,
-            print_warnings=self.print_warnings
+            print_warnings=self.print_warnings,
+            python_functions=self.python_functions
         )
         self.compiled.compile(log_jax_expr=True, heading='RELAXED MODEL')
@@ -1911,16 +2041,22 @@ r"""
         self.test_compiled = JaxRDDLCompiler(
             rddl=rddl,
             logger=self.logger,
-            use64bit=self.use64bit
+            use64bit=self.use64bit,
+            python_functions=self.python_functions
         )
         self.test_compiled.compile(log_jax_expr=True, heading='EXACT MODEL')
     def _jax_compile_optimizer(self):
+        # preprocessor
+        if self.preprocessor is not None:
+            self.preprocessor.compile(self.compiled)
         # policy
         self.plan.compile(self.compiled,
                           _bounds=self._action_bounds,
-                          horizon=self.horizon)
+                          horizon=self.horizon,
+                          preprocessor=self.preprocessor)
         self.train_policy = jax.jit(self.plan.train_policy)
         self.test_policy = jax.jit(self.plan.test_policy)
@@ -1928,14 +2064,16 @@ r"""
         train_rollouts = self.compiled.compile_rollouts(
             policy=self.plan.train_policy,
             n_steps=self.horizon,
-            n_batch=self.batch_size_train
+            n_batch=self.batch_size_train,
+            cache_path_info=self.preprocessor is not None
         )
         self.train_rollouts = train_rollouts
         test_rollouts = self.test_compiled.compile_rollouts(
             policy=self.plan.test_policy,
             n_steps=self.horizon,
-            n_batch=self.batch_size_test
+            n_batch=self.batch_size_test,
+            cache_path_info=False
         )
         self.test_rollouts = jax.jit(test_rollouts)
@@ -2397,7 +2535,13 @@ r"""
                             f'which could be suboptimal.', 'yellow')
                         print(message)
                     policy_hyperparams[action] = 1.0
+        # initialize preprocessor
+        preproc_key = None
+        if self.preprocessor is not None:
+            preproc_key = self.preprocessor.HYPERPARAMS_KEY
+            policy_hyperparams[preproc_key] = self.preprocessor.initialize()
         # print summary of parameters:
         if print_summary:
             print(self.summarize_system())
@@ -2524,6 +2668,11 @@ r"""
                  subkey, policy_params, policy_hyperparams, train_subs, model_params,
                  opt_state, opt_aux)
+            # update the preprocessor
+            if self.preprocessor is not None:
+                policy_hyperparams[preproc_key] = self.preprocessor.update(
+                    train_log['fluents'], policy_hyperparams[preproc_key])
             # evaluate
             test_loss, (test_log, model_params_test) = self.test_loss(
                 subkey, policy_params, policy_hyperparams, test_subs, model_params_test)
@@ -2676,6 +2825,7 @@ r"""
                 'model_params': model_params,
                 'progress': progress_percent,
                 'train_log': train_log,
+                'policy_hyperparams': policy_hyperparams,
                 **test_log
             }
@@ -2753,7 +2903,8 @@ r"""
     def _perform_diagnosis(self, last_iter_improve,
                            train_return, test_return, best_return, grad_norm):
-        max_grad_norm = max(jax.tree_util.tree_leaves(grad_norm))
+        grad_norms = jax.tree_util.tree_leaves(grad_norm)
+        max_grad_norm = max(grad_norms) if grad_norms else np.nan
         grad_is_zero = np.allclose(max_grad_norm, 0)
         # divergence if the solution is not finite
@@ -2895,6 +3046,7 @@ class JaxOfflineController(BaseAgent):
         self.train_on_reset = train_on_reset
         self.train_kwargs = train_kwargs
         self.params_given = params is not None
+        self.hyperparams_given = eval_hyperparams is not None
         # load the policy from file
         if not self.train_on_reset and params is not None and isinstance(params, str):
@@ -2908,6 +3060,8 @@ class JaxOfflineController(BaseAgent):
             callback = self.planner.optimize(key=self.key, **self.train_kwargs)
             self.callback = callback
             params = callback['best_params']
+            if not self.hyperparams_given:
+                self.eval_hyperparams = callback['policy_hyperparams']
             # save the policy
             if save_path is not None:
@@ -2931,6 +3085,8 @@ class JaxOfflineController(BaseAgent):
             callback = self.planner.optimize(key=self.key, **self.train_kwargs)
             self.callback = callback
             self.params = callback['best_params']
+            if not self.hyperparams_given:
+                self.eval_hyperparams = callback['policy_hyperparams']
 class JaxOnlineController(BaseAgent):
@@ -2963,6 +3119,7 @@ class JaxOnlineController(BaseAgent):
             key = random.PRNGKey(round(time.time() * 1000))
         self.key = key
         self.eval_hyperparams = eval_hyperparams
+        self.hyperparams_given = eval_hyperparams is not None
         self.warm_start = warm_start
         self.train_kwargs = train_kwargs
         self.max_attempts = max_attempts
@@ -2987,6 +3144,8 @@ class JaxOnlineController(BaseAgent):
                 key=self.key, guess=self.guess, subs=state, **self.train_kwargs)
         self.callback = callback
         params = callback['best_params']
+        if not self.hyperparams_given:
+            self.eval_hyperparams = callback['policy_hyperparams']
         # get the action from the parameters for the current state
         self.key, subkey = random.split(self.key)

pyRDDLGym_jax/core/simulator.py CHANGED Viewed

@@ -20,7 +20,7 @@
 import time
 import numpy as np
-from typing import Dict, Optional, Union
+from typing import Callable, Dict, Optional, Union
 import jax
@@ -48,6 +48,7 @@ class JaxRDDLSimulator(RDDLSimulator):
                  logger: Optional[Logger]=None,
                  keep_tensors: bool=False,
                  objects_as_strings: bool=True,
+                 python_functions: Optional[Dict[str, Callable]]=None,
                  **compiler_args) -> None:
         '''Creates a new simulator for the given RDDL model with Jax as a backend.
@@ -60,8 +61,9 @@ class JaxRDDLSimulator(RDDLSimulator):
         :param logger: to log information about compilation to file
         :param keep_tensors: whether the sampler takes actions and
         returns state in numpy array form
-        param objects_as_strings: whether to return object values as strings (defaults
+        :param objects_as_strings: whether to return object values as strings (defaults
         to integer indices if False)
+        :param python_functions: dictionary of external Python functions to call from RDDL
         :param **compiler_args: keyword arguments to pass to the Jax compiler
         '''
         if key is None:
@@ -73,7 +75,8 @@ class JaxRDDLSimulator(RDDLSimulator):
         # generate direct sampling with default numpy RNG and operations
         super(JaxRDDLSimulator, self).__init__(
             rddl, logger=logger,
-            keep_tensors=keep_tensors, objects_as_strings=objects_as_strings)
+            keep_tensors=keep_tensors, objects_as_strings=objects_as_strings,
+            python_functions=python_functions)
     def seed(self, seed: int) -> None:
         super(JaxRDDLSimulator, self).seed(seed)
@@ -83,7 +86,12 @@ class JaxRDDLSimulator(RDDLSimulator):
         rddl = self.rddl
         # compilation
-        compiled = JaxRDDLCompiler(rddl, logger=self.logger, **self.compiler_args)
+        compiled = JaxRDDLCompiler(
+            rddl,
+            logger=self.logger,
+            python_functions=self.python_functions,
+            **self.compiler_args
+        )
         compiled.compile(log_jax_expr=True, heading='SIMULATION MODEL')
         self.init_values = compiled.init_values

pyRDDLGym_jax/examples/run_plan.py CHANGED Viewed

@@ -25,6 +25,36 @@ from pyRDDLGym_jax.core.planner import (
     load_config, JaxBackpropPlanner, JaxOfflineController, JaxOnlineController
 )
+def run_cnn1d():
+    import haiku as hk
+    import jax
+    import jax.numpy as jnp
+    class CNN(hk.Module):
+        def __init__(self, name=None):
+            super().__init__(name=name)
+            self.conv1d_layer = hk.Conv1D(
+                output_channels=4,
+                kernel_shape=6,  # Kernel size for 1D convolution
+                padding="SAME",
+                name="conv"
+            )
+        def __call__(self, x):
+            return self.conv1d_layer(x)
+    # Example usage:
+    key = jax.random.PRNGKey(42)
+    input_data = jnp.ones([1, 4])  # Batch size 1, sequence length 10, 1 input channel
+    # Transform the Haiku module into a pure function
+    f = hk.transform(lambda x: CNN()(x))
+    params = f.init(key, input_data)
+    print(params['cnn/~/conv']['w'].shape)
+    print(params['cnn/~/conv']['b'].shape)
+    print(f.apply(params, key, input_data).shape)
 def main(domain: str, instance: str, method: str, episodes: int=1) -> None:
@@ -63,6 +93,7 @@ def main(domain: str, instance: str, method: str, episodes: int=1) -> None:
 def run_from_args(args):
+    run_cnn1d()
     if len(args) < 3:
         print('python run_plan.py <domain> <instance> <method> [<episodes>]')
         exit(1)

{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyRDDLGym-jax
-Version: 2.5
+Version: 2.7
 Summary: pyRDDLGym-jax: automatic differentiation for solving sequential planning problems in JAX.
 Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
 Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner
@@ -20,7 +20,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: pyRDDLGym>=2.0
+Requires-Dist: pyRDDLGym>=2.5
 Requires-Dist: tqdm>=4.66
 Requires-Dist: jax>=0.4.12
 Requires-Dist: optax>=0.1.9
@@ -55,7 +55,7 @@ Dynamic: summary
 [Installation](#installation) | [Run cmd](#running-from-the-command-line) | [Run python](#running-from-another-python-application) | [Configuration](#configuring-the-planner) | [Dashboard](#jaxplan-dashboard) | [Tuning](#tuning-the-planner) | [Simulation](#simulation) | [Citing](#citing-jaxplan)
-**pyRDDLGym-jax (known in the literature as JaxPlan) is an efficient gradient-based/differentiable planning algorithm in JAX.**
+**pyRDDLGym-jax (or JaxPlan) is an efficient gradient-based planning algorithm based on JAX.**
 Purpose:
@@ -84,7 +84,7 @@ and was moved to the individual logic components which have their own unique wei
 > [!NOTE]
 > While JaxPlan can support some discrete state/action problems through model relaxations, on some discrete problems it can perform poorly (though there is an ongoing effort to remedy this!).
-> If you find it is not making sufficient progress, check out the [PROST planner](https://github.com/pyrddlgym-project/pyRDDLGym-prost) (for discrete spaces) or the [deep reinforcement learning wrappers](https://github.com/pyrddlgym-project/pyRDDLGym-rl).
+> If you find it is not making progress, check out the [PROST planner](https://github.com/pyrddlgym-project/pyRDDLGym-prost) (for discrete spaces) or the [deep reinforcement learning wrappers](https://github.com/pyrddlgym-project/pyRDDLGym-rl).
 ## Installation
@@ -220,13 +220,7 @@ controller = JaxOfflineController(planner, **train_args)
 ## JaxPlan Dashboard
 Since version 1.0, JaxPlan has an optional dashboard that allows keeping track of the planner performance across multiple runs,
-and visualization of the policy or model, and other useful debugging features.
-<p align="middle">
-<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/dashboard.png" width="480" height="248" margin=0/>
-</p>
-To run the dashboard, add the following entry to your config file:
+and visualization of the policy or model, and other useful debugging features. To run the dashboard, add the following to your config file:
 ```ini
 ...
@@ -235,8 +229,6 @@ dashboard=True
 ...
 ```
-More documentation about this and other new features will be coming soon.
 ## Tuning the Planner
 A basic run script is provided to run automatic Bayesian hyper-parameter tuning for the most sensitive parameters of JaxPlan:

{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,11 @@
-pyRDDLGym_jax/__init__.py,sha256=VoxLo_sy8RlJIIyu7szqL-cdMGBJdQPg-aSeyOVVIkY,19
+pyRDDLGym_jax/__init__.py,sha256=nHQztRWlKCpxZgvKkxsGQax5-clS2XguHhAvmBZt0sA,19
 pyRDDLGym_jax/entry_point.py,sha256=K0zy1oe66jfBHkHHCM6aGHbbiVqnQvDhDb8se4uaKHE,3319
 pyRDDLGym_jax/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pyRDDLGym_jax/core/compiler.py,sha256=uFCtoipsIa3MM9nGgT3X8iCViPl2XSPNXh0jMdzN0ko,82895
-pyRDDLGym_jax/core/logic.py,sha256=lfc2ak_ap_ajMEFlB5EHCRNgJym31dNyA-5d-7N4CZA,56271
-pyRDDLGym_jax/core/planner.py,sha256=M6GKzN7Ml57B4ZrFZhhkpsQCvReKaCQNzer7zeHCM9E,140275
-pyRDDLGym_jax/core/simulator.py,sha256=ayCATTUL3clLaZPQ5OUg2bI_c26KKCTq6TbrxbMsVdc,10470
+pyRDDLGym_jax/core/compiler.py,sha256=DS4G5f5U83cOUQsUe6RsyyJnLPDuHaqjxM7bHSWMCtM,88040
+pyRDDLGym_jax/core/logic.py,sha256=9rRpKJCx4Us_2c6BiSWRN9k2sM_iYsAK1B7zcgwu3ZA,56290
+pyRDDLGym_jax/core/model.py,sha256=4WfmtUVN1EKCD-7eWeQByWk8_zKyDcMABAMdlxN1LOU,27215
+pyRDDLGym_jax/core/planner.py,sha256=cvl3JS1tLQqj8KJ5ATkHUfIzCzcYJWOCoWJYwLxMDSg,146835
+pyRDDLGym_jax/core/simulator.py,sha256=D-yLxDFw67DvFHdb_kJjZHujSBSmiFA1J3osel-KOvY,10799
 pyRDDLGym_jax/core/tuning.py,sha256=BWcQZk02TMLexTz1Sw4lX2EQKvmPbp7biC51M-IiNUw,25153
 pyRDDLGym_jax/core/visualization.py,sha256=4BghMp8N7qtF0tdyDSqtxAxNfP9HPrQWTiXzAMJmx7o,70365
 pyRDDLGym_jax/core/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +13,7 @@ pyRDDLGym_jax/core/assets/favicon.ico,sha256=RMMrI9YvmF81TgYG7FO7UAre6WmYFkV3B2G
 pyRDDLGym_jax/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/examples/run_gradient.py,sha256=KhXvijRDZ4V7N8NOI2WV8ePGpPna5_vnET61YwS7Tco,2919
 pyRDDLGym_jax/examples/run_gym.py,sha256=rXvNWkxe4jHllvbvU_EOMji_2-2k5d4tbBKhpMm_Gaw,1526
-pyRDDLGym_jax/examples/run_plan.py,sha256=4y7JHqTxY5O1ltP6N7rar0jMiw7u9w1nuAIOcmDaAuE,2806
+pyRDDLGym_jax/examples/run_plan.py,sha256=uScTTUSdwohhaqvmSf9zvOjQn4xZ97qU1xYezZTIIHg,3745
 pyRDDLGym_jax/examples/run_scipy.py,sha256=7uVnDXb7D3NTJqA2L8nrcYDJP-k0ba9dl9YqA2CD9ac,2301
 pyRDDLGym_jax/examples/run_tune.py,sha256=F5KWgtoCPbf7XHB6HW9LjxarD57U2LvuGdTz67OL1DY,4114
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_drp.cfg,sha256=mE8MqhOlkHeXIGEVrnR3QY6I-_iy4uxFYRA71P1bmtk,347
@@ -41,9 +42,9 @@ pyRDDLGym_jax/examples/configs/default_slp.cfg,sha256=mJo0woDevhQCSQfJg30ULVy9qG
 pyRDDLGym_jax/examples/configs/tuning_drp.cfg,sha256=zocZn_cVarH5i0hOlt2Zu0NwmXYBmTTghLaXLtQOGto,526
 pyRDDLGym_jax/examples/configs/tuning_replan.cfg,sha256=9oIhtw9cuikmlbDgCgbrTc5G7hUio-HeAv_3CEGVclY,523
 pyRDDLGym_jax/examples/configs/tuning_slp.cfg,sha256=QqnyR__5-HhKeCDfGDel8VIlqsjxRHk4SSH089zJP8s,486
-pyrddlgym_jax-2.5.dist-info/licenses/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
-pyrddlgym_jax-2.5.dist-info/METADATA,sha256=XAaEJfbsYW-txxZhFZ6o_HmvqxkIMTqBF9LbV-KdTzI,17058
-pyrddlgym_jax-2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pyrddlgym_jax-2.5.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
-pyrddlgym_jax-2.5.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
-pyrddlgym_jax-2.5.dist-info/RECORD,,
+pyrddlgym_jax-2.7.dist-info/licenses/LICENSE,sha256=2a-BZEY7aEZW-DkmmOQsuUDU0pc6ovQy3QnYFZ4baq4,1095
+pyrddlgym_jax-2.7.dist-info/METADATA,sha256=xN_SB6x-qiC9cj8O0VvF9HIEDpK79i7FQgn8D3og2xQ,16770
+pyrddlgym_jax-2.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pyrddlgym_jax-2.7.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
+pyrddlgym_jax-2.7.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
+pyrddlgym_jax-2.7.dist-info/RECORD,,

{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/licenses/LICENSE RENAMED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2024 pyrddlgym-project
+Copyright (c) 2025 pyrddlgym-project
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pyrddlgym_jax-2.5.dist-info → pyrddlgym_jax-2.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

pyRDDLGym-jax 2.5__py3-none-any.whl → 2.7__py3-none-any.whl

pyRDDLGym-jax 2.5py3-none-any.whl → 2.7py3-none-any.whl