PyPI - pyRDDLGym-jax - Versions diffs - 2.4__py3-none-any.whl → 2.5__py3-none-any.whl - Mend

pyRDDLGym-jax 2.4py3-none-any.whl → 2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pyRDDLGym_jax/__init__.py +1 -1
pyRDDLGym_jax/core/compiler.py +8 -4
pyRDDLGym_jax/core/planner.py +144 -78
pyRDDLGym_jax/core/simulator.py +37 -13
pyRDDLGym_jax/core/tuning.py +25 -10
pyRDDLGym_jax/entry_point.py +39 -7
pyRDDLGym_jax/examples/configs/tuning_drp.cfg +1 -0
pyRDDLGym_jax/examples/configs/tuning_replan.cfg +1 -0
pyRDDLGym_jax/examples/configs/tuning_slp.cfg +1 -0
pyRDDLGym_jax/examples/run_plan.py +1 -1
pyRDDLGym_jax/examples/run_tune.py +8 -2
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/METADATA +13 -18
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/RECORD +17 -17
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/WHEEL +1 -1
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/entry_points.txt +0 -0
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info/licenses}/LICENSE +0 -0
{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/top_level.txt +0 -0

pyRDDLGym_jax/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '2.4'
1	+ __version__ = '2.5'

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -430,7 +430,7 @@ class JaxRDDLCompiler:
                 _jax_wrapped_single_step_policy,
                 in_axes=(0, None, None, None, 0, None)
             )(keys, policy_params, hyperparams, step, subs, model_params)
-            model_params = jax.tree_map(partial(jnp.mean, axis=0), model_params)
+            model_params = jax.tree_util.tree_map(partial(jnp.mean, axis=0), model_params)
             carry = (key, policy_params, hyperparams, subs, model_params)
             return carry, log
@@ -440,7 +440,7 @@ class JaxRDDLCompiler:
             start = (key, policy_params, hyperparams, subs, model_params)
             steps = jnp.arange(n_steps)
             end, log = jax.lax.scan(_jax_wrapped_batched_step_policy, start, steps)
-            log = jax.tree_map(partial(jnp.swapaxes, axis1=0, axis2=1), log)
+            log = jax.tree_util.tree_map(partial(jnp.swapaxes, axis1=0, axis2=1), log)
             model_params = end[-1]
             return log, model_params
@@ -707,7 +707,10 @@ class JaxRDDLCompiler:
                     sample = jnp.asarray(value, dtype=self._fix_dtype(value))
                     new_slices = [None] * len(jax_nested_expr)
                     for (i, jax_expr) in enumerate(jax_nested_expr):
-                        new_slices[i], key, err, params = jax_expr(x, params, key)
+                        new_slice, key, err, params = jax_expr(x, params, key)
+                        if not jnp.issubdtype(jnp.result_type(new_slice), jnp.integer):
+                            new_slice = jnp.asarray(new_slice, dtype=self.INT)
+                        new_slices[i] = new_slice
                         error |= err
                     new_slices = tuple(new_slices)
                     sample = sample[new_slices]
@@ -986,7 +989,8 @@ class JaxRDDLCompiler:
             sample_cases = [None] * len(jax_cases)
             for (i, jax_case) in enumerate(jax_cases):
                 sample_cases[i], key, err_case, params = jax_case(x, params, key)
-                err |= err_case
+                err |= err_case
+            sample_cases = jnp.asarray(sample_cases)
             sample_cases = jnp.asarray(sample_cases, dtype=self._fix_dtype(sample_cases))
             # predicate (enum) is an integer - use it to extract from case array

pyRDDLGym_jax/core/planner.py CHANGED Viewed

@@ -39,6 +39,7 @@ import configparser
 from enum import Enum
 from functools import partial
 import os
+import pickle
 import sys
 import time
 import traceback
@@ -229,13 +230,19 @@ def _load_config(config, args):
 def load_config(path: str) -> Tuple[Kwargs, ...]:
-    '''Loads a config file at the specified file path.'''
+    '''Loads a config file at the specified file path.
+    :param path: the path of the config file to load and parse
+    '''
     config, args = _parse_config_file(path)
     return _load_config(config, args)
 def load_config_from_string(value: str) -> Tuple[Kwargs, ...]:
-    '''Loads config file contents specified explicitly as a string value.'''
+    '''Loads config file contents specified explicitly as a string value.
+    :param value: the string in json format containing the config contents to parse
+    '''
     config, args = _parse_config_string(value)
     return _load_config(config, args)
@@ -258,6 +265,7 @@ class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
     def __init__(self, *args,
                  logic: Logic=FuzzyLogic(),
                  cpfs_without_grad: Optional[Set[str]]=None,
+                 print_warnings: bool=True,
                  **kwargs) -> None:
         '''Creates a new RDDL to Jax compiler, where operations that are not
         differentiable are converted to approximate forms that have defined gradients.
@@ -268,6 +276,7 @@ class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
         to customize these operations
         :param cpfs_without_grad: which CPFs do not have gradients (use straight
         through gradient trick)
+        :param print_warnings: whether to print warnings
         :param *kwargs: keyword arguments to pass to base compiler
         '''
         super(JaxRDDLCompilerWithGrad, self).__init__(*args, **kwargs)
@@ -277,6 +286,7 @@ class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
         if cpfs_without_grad is None:
             cpfs_without_grad = set()
         self.cpfs_without_grad = cpfs_without_grad
+        self.print_warnings = print_warnings
         # actions and CPFs must be continuous
         pvars_cast = set()
@@ -284,7 +294,7 @@ class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
             self.init_values[var] = np.asarray(values, dtype=self.REAL)
             if not np.issubdtype(np.result_type(values), np.floating):
                 pvars_cast.add(var)
-        if pvars_cast:
+        if self.print_warnings and pvars_cast:
             message = termcolor.colored(
                 f'[INFO] JAX gradient compiler will cast p-vars {pvars_cast} to float.',
                 'green')
@@ -314,12 +324,12 @@ class JaxRDDLCompilerWithGrad(JaxRDDLCompiler):
                 if cpf in self.cpfs_without_grad:
                     jax_cpfs[cpf] = self._jax_stop_grad(jax_cpfs[cpf])
-        if cpfs_cast:
+        if self.print_warnings and cpfs_cast:
             message = termcolor.colored(
                 f'[INFO] JAX gradient compiler will cast CPFs {cpfs_cast} to float.',
                 'green')
             print(message)
-        if self.cpfs_without_grad:
+        if self.print_warnings and self.cpfs_without_grad:
             message = termcolor.colored(
                 f'[INFO] Gradients will not flow through CPFs {self.cpfs_without_grad}.',
                 'green')
@@ -436,10 +446,11 @@ class JaxPlan(metaclass=ABCMeta):
                                     ~lower_finite & upper_finite,
                                     ~lower_finite & ~upper_finite]
             bounds[name] = (lower, upper)
-            message = termcolor.colored(
-                f'[INFO] Bounds of action-fluent <{name}> set to {bounds[name]}.',
-                'green')
-            print(message)
+            if compiled.print_warnings:
+                message = termcolor.colored(
+                    f'[INFO] Bounds of action-fluent <{name}> set to {bounds[name]}.',
+                    'green')
+                print(message)
         return shapes, bounds, bounds_safe, cond_lists
     def _count_bool_actions(self, rddl: RDDLLiftedModel):
@@ -519,7 +530,7 @@ class JaxStraightLinePlan(JaxPlan):
         # action concurrency check
         bool_action_count, allowed_actions = self._count_bool_actions(rddl)
         use_constraint_satisfaction = allowed_actions < bool_action_count
-        if use_constraint_satisfaction:
+        if compiled.print_warnings and use_constraint_satisfaction:
             message = termcolor.colored(
                 f'[INFO] SLP will use projected gradient to satisfy '
                 f'max_nondef_actions since total boolean actions '
@@ -605,7 +616,7 @@ class JaxStraightLinePlan(JaxPlan):
             start = 0
             for (name, size) in action_sizes.items():
                 action = output[..., start:start + size]
-                action = jnp.reshape(action, newshape=shapes[name][1:])
+                action = jnp.reshape(action, shapes[name][1:])
                 if noop[name]:
                     action = 1.0 - action
                 actions[name] = action
@@ -838,7 +849,7 @@ class JaxStraightLinePlan(JaxPlan):
     def guess_next_epoch(self, params: Pytree) -> Pytree:
         next_fn = JaxStraightLinePlan._guess_next_epoch
-        return jax.tree_map(next_fn, params)
+        return jax.tree_util.tree_map(next_fn, params)
 class JaxDeepReactivePolicy(JaxPlan):
@@ -946,17 +957,19 @@ class JaxDeepReactivePolicy(JaxPlan):
                 if ranges[var] != 'bool':
                     value_size = np.size(values)
                     if normalize_per_layer and value_size == 1:
-                        message = termcolor.colored(
-                            f'[WARN] Cannot apply layer norm to state-fluent <{var}> '
-                            f'of size 1: setting normalize_per_layer = False.', 'yellow')
-                        print(message)
+                        if compiled.print_warnings:
+                            message = termcolor.colored(
+                                f'[WARN] Cannot apply layer norm to state-fluent <{var}> '
+                                f'of size 1: setting normalize_per_layer = False.', 'yellow')
+                            print(message)
                         normalize_per_layer = False
                     non_bool_dims += value_size
             if not normalize_per_layer and non_bool_dims == 1:
-                message = termcolor.colored(
-                    '[WARN] Cannot apply layer norm to state-fluents of total size 1: '
-                    'setting normalize = False.', 'yellow')
-                print(message)
+                if compiled.print_warnings:
+                    message = termcolor.colored(
+                        '[WARN] Cannot apply layer norm to state-fluents of total size 1: '
+                        'setting normalize = False.', 'yellow')
+                    print(message)
                 normalize = False
         # convert subs dictionary into a state vector to feed to the MLP
@@ -1054,7 +1067,7 @@ class JaxDeepReactivePolicy(JaxPlan):
             for (name, size) in layer_sizes.items():
                 if ranges[name] == 'bool':
                     action = output[..., start:start + size]
-                    action = jnp.reshape(action, newshape=shapes[name])
+                    action = jnp.reshape(action, shapes[name])
                     if noop[name]:
                         action = 1.0 - action
                     actions[name] = action
@@ -1226,6 +1239,7 @@ class PGPE(metaclass=ABCMeta):
     @abstractmethod
     def compile(self, loss_fn: Callable, projection: Callable, real_dtype: Type,
+                print_warnings: bool,
                 parallel_updates: Optional[int]=None) -> None:
         pass
@@ -1322,6 +1336,7 @@ class GaussianPGPE(PGPE):
         )
     def compile(self, loss_fn: Callable, projection: Callable, real_dtype: Type,
+                print_warnings: bool,
                 parallel_updates: Optional[int]=None) -> None:
         sigma0 = self.init_sigma
         sigma_lo, sigma_hi = self.sigma_range
@@ -1347,7 +1362,7 @@ class GaussianPGPE(PGPE):
         def _jax_wrapped_pgpe_init(key, policy_params):
             mu = policy_params
-            sigma = jax.tree_map(partial(jnp.full_like, fill_value=sigma0), mu)
+            sigma = jax.tree_util.tree_map(partial(jnp.full_like, fill_value=sigma0), mu)
             pgpe_params = (mu, sigma)
             pgpe_opt_state = (mu_optimizer.init(mu), sigma_optimizer.init(sigma))
             r_max = -jnp.inf
@@ -1395,13 +1410,14 @@ class GaussianPGPE(PGPE):
             treedef = jax.tree_util.tree_structure(sigma)
             keys = random.split(key, num=treedef.num_leaves)
             keys_pytree = jax.tree_util.tree_unflatten(treedef=treedef, leaves=keys)
-            epsilon = jax.tree_map(_jax_wrapped_mu_noise, keys_pytree, sigma)
-            p1 = jax.tree_map(jnp.add, mu, epsilon)
-            p2 = jax.tree_map(jnp.subtract, mu, epsilon)
+            epsilon = jax.tree_util.tree_map(_jax_wrapped_mu_noise, keys_pytree, sigma)
+            p1 = jax.tree_util.tree_map(jnp.add, mu, epsilon)
+            p2 = jax.tree_util.tree_map(jnp.subtract, mu, epsilon)
             if super_symmetric:
-                epsilon_star = jax.tree_map(_jax_wrapped_epsilon_star, sigma, epsilon)
-                p3 = jax.tree_map(jnp.add, mu, epsilon_star)
-                p4 = jax.tree_map(jnp.subtract, mu, epsilon_star)
+                epsilon_star = jax.tree_util.tree_map(
+                    _jax_wrapped_epsilon_star, sigma, epsilon)
+                p3 = jax.tree_util.tree_map(jnp.add, mu, epsilon_star)
+                p4 = jax.tree_util.tree_map(jnp.subtract, mu, epsilon_star)
             else:
                 epsilon_star, p3, p4 = epsilon, p1, p2
             return p1, p2, p3, p4, epsilon, epsilon_star
@@ -1469,11 +1485,11 @@ class GaussianPGPE(PGPE):
                 r_max = jnp.maximum(r_max, r4)
             else:
                 r3, r4 = r1, r2
-            grad_mu = jax.tree_map(
+            grad_mu = jax.tree_util.tree_map(
                 partial(_jax_wrapped_mu_grad, r1=r1, r2=r2, r3=r3, r4=r4, m=r_max),
                 epsilon, epsilon_star
             )
-            grad_sigma = jax.tree_map(
+            grad_sigma = jax.tree_util.tree_map(
                 partial(_jax_wrapped_sigma_grad,
                         r1=r1, r2=r2, r3=r3, r4=r4, m=r_max, ent=ent),
                 epsilon, epsilon_star, sigma
@@ -1492,7 +1508,7 @@ class GaussianPGPE(PGPE):
                     _jax_wrapped_pgpe_grad,
                     in_axes=(0, None, None, None, None, None, None, None)
                 )(keys, mu, sigma, r_max, ent, policy_hyperparams, subs, model_params)
-                mu_grad, sigma_grad = jax.tree_map(
+                mu_grad, sigma_grad = jax.tree_util.tree_map(
                     partial(jnp.mean, axis=0), (mu_grads, sigma_grads))
                 new_r_max = jnp.max(r_maxs)
             return mu_grad, sigma_grad, new_r_max
@@ -1516,7 +1532,7 @@ class GaussianPGPE(PGPE):
                 sigma_grad, sigma_state, params=sigma)
             new_mu = optax.apply_updates(mu, mu_updates)
             new_sigma = optax.apply_updates(sigma, sigma_updates)
-            new_sigma = jax.tree_map(
+            new_sigma = jax.tree_util.tree_map(
                 partial(jnp.clip, min=sigma_lo, max=sigma_hi), new_sigma)
             return new_mu, new_sigma, new_mu_state, new_sigma_state
@@ -1537,7 +1553,7 @@ class GaussianPGPE(PGPE):
             if max_kl is not None:
                 old_mu_lr = new_mu_state.hyperparams['learning_rate']
                 old_sigma_lr = new_sigma_state.hyperparams['learning_rate']
-                kl_terms = jax.tree_map(
+                kl_terms = jax.tree_util.tree_map(
                     _jax_wrapped_pgpe_kl_term, new_mu, new_sigma, mu, sigma)
                 total_kl = jax.tree_util.tree_reduce(jnp.add, kl_terms)
                 kl_reduction = jnp.minimum(1.0, jnp.sqrt(max_kl / total_kl))
@@ -1672,6 +1688,7 @@ class JaxBackpropPlanner:
                  compile_non_fluent_exact: bool=True,
                  logger: Optional[Logger]=None,
                  dashboard_viz: Optional[Any]=None,
+                 print_warnings: bool=True,
                  parallel_updates: Optional[int]=None) -> None:
         '''Creates a new gradient-based algorithm for optimizing action sequences
         (plan) in the given RDDL. Some operations will be converted to their
@@ -1712,6 +1729,7 @@ class JaxBackpropPlanner:
         :param logger: to log information about compilation to file
         :param dashboard_viz: optional visualizer object from the environment
         to pass to the dashboard to visualize the policy
+        :param print_warnings: whether to print warnings
         :param parallel_updates: how many optimizers to run independently in parallel
         '''
         self.rddl = rddl
@@ -1737,6 +1755,7 @@ class JaxBackpropPlanner:
         self.noise_kwargs = noise_kwargs
         self.pgpe = pgpe
         self.use_pgpe = pgpe is not None
+        self.print_warnings = print_warnings
         # set optimizer
         try:
@@ -1789,7 +1808,11 @@ class JaxBackpropPlanner:
         self._jax_compile_rddl()
         self._jax_compile_optimizer()
-    def summarize_system(self) -> str:
+    @staticmethod
+    def summarize_system() -> str:
+        '''Returns a string containing information about the system, Python version
+        and jax-related packages that are relevant to the current planner.
+        '''
         try:
             jaxlib_version = jax._src.lib.version_str
         except Exception as _:
@@ -1818,6 +1841,9 @@ r"""
                 f'devices: {devices_short}\n')
     def summarize_relaxations(self) -> str:
+        '''Returns a summary table containing all non-differentiable operators
+        and their relaxations.
+        '''
         result = ''
         if self.compiled.model_params:
             result += ('Some RDDL operations are non-differentiable '
@@ -1834,6 +1860,9 @@ r"""
         return result
     def summarize_hyperparameters(self) -> str:
+        '''Returns a string summarizing the hyper-parameters of the current planner
+        instance.
+        '''
         result = (f'objective hyper-parameters:\n'
                   f'    utility_fn        ={self.utility.__name__}\n'
                   f'    utility args      ={self.utility_kwargs}\n'
@@ -1873,7 +1902,8 @@ r"""
             logger=self.logger,
             use64bit=self.use64bit,
             cpfs_without_grad=self.cpfs_without_grad,
-            compile_non_fluent_exact=self.compile_non_fluent_exact
+            compile_non_fluent_exact=self.compile_non_fluent_exact,
+            print_warnings=self.print_warnings
         )
         self.compiled.compile(log_jax_expr=True, heading='RELAXED MODEL')
@@ -1922,7 +1952,8 @@ r"""
         # optimization
         self.update = self._jax_update(train_loss)
-        self.pytree_at = jax.jit(lambda tree, i: jax.tree_map(lambda x: x[i], tree))
+        self.pytree_at = jax.jit(
+            lambda tree, i: jax.tree_util.tree_map(lambda x: x[i], tree))
         # pgpe option
         if self.use_pgpe:
@@ -1930,6 +1961,7 @@ r"""
                 loss_fn=test_loss,
                 projection=self.plan.projection,
                 real_dtype=self.test_compiled.REAL,
+                print_warnings=self.print_warnings,
                 parallel_updates=self.parallel_updates
             )
             self.merge_pgpe = self._jax_merge_pgpe_jaxplan()
@@ -2010,7 +2042,7 @@ r"""
         # check if the gradients are all zeros
         def _jax_wrapped_zero_gradients(grad):
             leaves, _ = jax.tree_util.tree_flatten(
-                jax.tree_map(partial(jnp.allclose, b=0), grad))
+                jax.tree_util.tree_map(partial(jnp.allclose, b=0), grad))
             return jnp.all(jnp.asarray(leaves))
         # calculate the plan gradient w.r.t. return loss and update optimizer
@@ -2069,7 +2101,7 @@ r"""
             def select_fn(leaf1, leaf2):
                 expanded_mask = pgpe_mask[(...,) + (jnp.newaxis,) * (jnp.ndim(leaf1) - 1)]
                 return jnp.where(expanded_mask, leaf1, leaf2)
-            policy_params = jax.tree_map(select_fn, pgpe_param, policy_params)
+            policy_params = jax.tree_util.tree_map(select_fn, pgpe_param, policy_params)
             test_loss = jnp.where(pgpe_mask, pgpe_loss, test_loss)
             test_loss_smooth = jnp.where(pgpe_mask, pgpe_loss_smooth, test_loss_smooth)
             expanded_mask = pgpe_mask[(...,) + (jnp.newaxis,) * (jnp.ndim(converged) - 1)]
@@ -2091,7 +2123,9 @@ r"""
                     f'Variable <{name}> in subs argument is not a '
                     f'valid p-variable, must be one of '
                     f'{set(self.test_compiled.init_values.keys())}.')
-            value = np.reshape(value, newshape=np.shape(init_value))[np.newaxis, ...]
+            value = np.reshape(value, np.shape(init_value))[np.newaxis, ...]
+            if value.dtype.type is np.str_:
+                value = rddl.object_string_to_index_array(rddl.variable_ranges[name], value)
             train_value = np.repeat(value, repeats=n_train, axis=0)
             train_value = np.asarray(train_value, dtype=self.compiled.REAL)
             init_train[name] = train_value
@@ -2121,7 +2155,7 @@ r"""
                 x[np.newaxis, ...], shape=(self.parallel_updates,) + np.shape(x))
             return x
-        return jax.tree_map(make_batched, pytree)
+        return jax.tree_util.tree_map(make_batched, pytree)
     def as_optimization_problem(
             self, key: Optional[random.PRNGKey]=None,
@@ -2165,10 +2199,11 @@ r"""
         train_subs, _ = self._batched_init_subs(subs)
         model_params = self.compiled.model_params
         if policy_hyperparams is None:
-            message = termcolor.colored(
-                '[WARN] policy_hyperparams is not set, setting 1.0 for '
-                'all action-fluents which could be suboptimal.', 'yellow')
-            print(message)
+            if self.print_warnings:
+                message = termcolor.colored(
+                    '[WARN] policy_hyperparams is not set, setting 1.0 for '
+                    'all action-fluents which could be suboptimal.', 'yellow')
+                print(message)
             policy_hyperparams = {action: 1.0
                                   for action in self.rddl.action_fluents}
@@ -2318,10 +2353,11 @@ r"""
         # cannot run dashboard with parallel updates
         if dashboard is not None and self.parallel_updates is not None:
-            message = termcolor.colored(
-                '[WARN] Dashboard is unavailable if parallel_updates is not None: '
-                'setting dashboard to None.', 'yellow')
-            print(message)
+            if self.print_warnings:
+                message = termcolor.colored(
+                    '[WARN] Dashboard is unavailable if parallel_updates is not None: '
+                    'setting dashboard to None.', 'yellow')
+                print(message)
             dashboard = None
         # if PRNG key is not provided
@@ -2331,19 +2367,21 @@ r"""
         # if policy_hyperparams is not provided
         if policy_hyperparams is None:
-            message = termcolor.colored(
-                '[WARN] policy_hyperparams is not set, setting 1.0 for '
-                'all action-fluents which could be suboptimal.', 'yellow')
-            print(message)
+            if self.print_warnings:
+                message = termcolor.colored(
+                    '[WARN] policy_hyperparams is not set, setting 1.0 for '
+                    'all action-fluents which could be suboptimal.', 'yellow')
+                print(message)
             policy_hyperparams = {action: 1.0
                                   for action in self.rddl.action_fluents}
         # if policy_hyperparams is a scalar
         elif isinstance(policy_hyperparams, (int, float, np.number)):
-            message = termcolor.colored(
-                f'[INFO] policy_hyperparams is {policy_hyperparams}, '
-                f'setting this value for all action-fluents.', 'green')
-            print(message)
+            if self.print_warnings:
+                message = termcolor.colored(
+                    f'[INFO] policy_hyperparams is {policy_hyperparams}, '
+                    f'setting this value for all action-fluents.', 'green')
+                print(message)
             hyperparam_value = float(policy_hyperparams)
             policy_hyperparams = {action: hyperparam_value
                                   for action in self.rddl.action_fluents}
@@ -2352,11 +2390,12 @@ r"""
         elif isinstance(policy_hyperparams, dict):
             for action in self.rddl.action_fluents:
                 if action not in policy_hyperparams:
-                    message = termcolor.colored(
-                        f'[WARN] policy_hyperparams[{action}] is not set, '
-                        f'setting 1.0 for missing action-fluents '
-                        f'which could be suboptimal.', 'yellow')
-                    print(message)
+                    if self.print_warnings:
+                        message = termcolor.colored(
+                            f'[WARN] policy_hyperparams[{action}] is not set, '
+                            f'setting 1.0 for missing action-fluents '
+                            f'which could be suboptimal.', 'yellow')
+                        print(message)
                     policy_hyperparams[action] = 1.0
         # print summary of parameters:
@@ -2396,7 +2435,7 @@ r"""
                 if var not in subs:
                     subs[var] = value
                     added_pvars_to_subs.append(var)
-            if added_pvars_to_subs:
+            if self.print_warnings and added_pvars_to_subs:
                 message = termcolor.colored(
                     f'[INFO] p-variables {added_pvars_to_subs} is not in '
                     f'provided subs, using their initial values.', 'green')
@@ -2648,7 +2687,7 @@ r"""
                     policy_params, opt_state, opt_aux = self.initialize(
                         subkey, policy_hyperparams, train_subs)
                     no_progress_count = 0
-                    if progress_bar is not None:
+                    if self.print_warnings and progress_bar is not None:
                         message = termcolor.colored(
                             f'[INFO] Optimizer restarted at iteration {it} '
                             f'due to lack of progress.', 'green')
@@ -2658,7 +2697,7 @@ r"""
             # stopping condition reached
             if stopping_rule is not None and stopping_rule.monitor(callback):
-                if progress_bar is not None:
+                if self.print_warnings and progress_bar is not None:
                     message = termcolor.colored(
                         '[SUCC] Stopping rule has been reached.', 'green')
                     progress_bar.write(message)
@@ -2699,7 +2738,8 @@ r"""
         # summarize and test for convergence
         if print_summary:
-            grad_norm = jax.tree_map(lambda x: np.linalg.norm(x).item(), best_grad)
+            grad_norm = jax.tree_util.tree_map(
+                lambda x: np.linalg.norm(x).item(), best_grad)
             diagnosis = self._perform_diagnosis(
                 last_iter_improve, -np.min(train_loss), -np.min(test_loss_smooth),
                 -best_loss, grad_norm)
@@ -2777,6 +2817,7 @@ r"""
         :param policy_hyperparams: hyper-parameters for the policy/plan, such as
         weights for sigmoid wrapping boolean actions (optional)
         '''
+        subs = subs.copy()
         # check compatibility of the subs dictionary
         for (var, values) in subs.items():
@@ -2795,13 +2836,17 @@ r"""
                 if step == 0 and var in self.rddl.observ_fluents:
                     subs[var] = self.test_compiled.init_values[var]
                 else:
-                    raise ValueError(
-                        f'Values {values} assigned to p-variable <{var}> are '
-                        f'non-numeric of type {dtype}.')
+                    if dtype.type is np.str_:
+                        prange = self.rddl.variable_ranges[var]
+                        subs[var] = self.rddl.object_string_to_index_array(prange, subs[var])
+                    else:
+                        raise ValueError(
+                            f'Values {values} assigned to p-variable <{var}> are '
+                            f'non-numeric of type {dtype}.')
         # cast device arrays to numpy
         actions = self.test_policy(key, params, policy_hyperparams, step, subs)
-        actions = jax.tree_map(np.asarray, actions)
+        actions = jax.tree_util.tree_map(np.asarray, actions)
         return actions
@@ -2822,8 +2867,9 @@ class JaxOfflineController(BaseAgent):
     def __init__(self, planner: JaxBackpropPlanner,
                  key: Optional[random.PRNGKey]=None,
                  eval_hyperparams: Optional[Dict[str, Any]]=None,
-                 params: Optional[Pytree]=None,
+                 params: Optional[Union[str, Pytree]]=None,
                  train_on_reset: bool=False,
+                 save_path: Optional[str]=None,
                  **train_kwargs) -> None:
         '''Creates a new JAX offline control policy that is trained once, then
         deployed later.
@@ -2834,8 +2880,10 @@ class JaxOfflineController(BaseAgent):
         :param eval_hyperparams: policy hyperparameters to apply for evaluation
         or whenever sample_action is called
         :param params: use the specified policy parameters instead of calling
-        planner.optimize()
+        planner.optimize(); can be a string pointing to a valid file path where params
+        have been saved, or a pytree of parameters
         :param train_on_reset: retrain policy parameters on every episode reset
+        :param save_path: optional path to save parameters to
         :param **train_kwargs: any keyword arguments to be passed to the planner
         for optimization
         '''
@@ -2848,12 +2896,24 @@ class JaxOfflineController(BaseAgent):
         self.train_kwargs = train_kwargs
         self.params_given = params is not None
+        # load the policy from file
+        if not self.train_on_reset and params is not None and isinstance(params, str):
+            with open(params, 'rb') as file:
+                params = pickle.load(file)
+        # train the policy
         self.step = 0
         self.callback = None
         if not self.train_on_reset and not self.params_given:
             callback = self.planner.optimize(key=self.key, **self.train_kwargs)
             self.callback = callback
             params = callback['best_params']
+            # save the policy
+            if save_path is not None:
+                with open(save_path, 'wb') as file:
+                    pickle.dump(params, file)
         self.params = params
     def sample_action(self, state: Dict[str, Any]) -> Dict[str, Any]:
@@ -2865,6 +2925,8 @@ class JaxOfflineController(BaseAgent):
     def reset(self) -> None:
         self.step = 0
+        # train the policy if required to reset at the start of every episode
         if self.train_on_reset and not self.params_given:
             callback = self.planner.optimize(key=self.key, **self.train_kwargs)
             self.callback = callback
@@ -2915,18 +2977,22 @@ class JaxOnlineController(BaseAgent):
         attempts = 0
         while attempts < self.max_attempts and callback['iteration'] <= 1:
             attempts += 1
-            message = termcolor.colored(
-                f'[WARN] JIT compilation dominated the execution time: '
-                f'executing the optimizer again on the traced model [attempt {attempts}].',
-                'yellow')
-            print(message)
+            if self.planner.print_warnings:
+                message = termcolor.colored(
+                    f'[WARN] JIT compilation dominated the execution time: '
+                    f'executing the optimizer again on the traced model '
+                    f'[attempt {attempts}].', 'yellow')
+                print(message)
             callback = planner.optimize(
-                key=self.key, guess=self.guess, subs=state, **self.train_kwargs)
+                key=self.key, guess=self.guess, subs=state, **self.train_kwargs)
         self.callback = callback
         params = callback['best_params']
+        # get the action from the parameters for the current state
         self.key, subkey = random.split(self.key)
         actions = planner.get_action(subkey, params, 0, state, self.eval_hyperparams)
+        # apply warm start for the next epoch
         if self.warm_start:
             self.guess = planner.plan.guess_next_epoch(params)
         return actions

pyRDDLGym_jax/core/simulator.py CHANGED Viewed

@@ -19,10 +19,12 @@
 import time
-from typing import Dict, Optional
+import numpy as np
+from typing import Dict, Optional, Union
 import jax
+from pyRDDLGym.core.compiler.initializer import RDDLValueInitializer
 from pyRDDLGym.core.compiler.model import RDDLLiftedModel
 from pyRDDLGym.core.debug.exception import (
     RDDLActionPreconditionNotSatisfiedError,
@@ -35,7 +37,7 @@ from pyRDDLGym.core.simulator import RDDLSimulator
 from pyRDDLGym_jax.core.compiler import JaxRDDLCompiler
-Args = Dict[str, Value]
+Args = Dict[str, Union[np.ndarray, Value]]
 class JaxRDDLSimulator(RDDLSimulator):
@@ -45,6 +47,7 @@ class JaxRDDLSimulator(RDDLSimulator):
                  raise_error: bool=True,
                  logger: Optional[Logger]=None,
                  keep_tensors: bool=False,
+                 objects_as_strings: bool=True,
                  **compiler_args) -> None:
         '''Creates a new simulator for the given RDDL model with Jax as a backend.
@@ -57,6 +60,8 @@ class JaxRDDLSimulator(RDDLSimulator):
         :param logger: to log information about compilation to file
         :param keep_tensors: whether the sampler takes actions and
         returns state in numpy array form
+        param objects_as_strings: whether to return object values as strings (defaults
+        to integer indices if False)
         :param **compiler_args: keyword arguments to pass to the Jax compiler
         '''
         if key is None:
@@ -67,7 +72,8 @@ class JaxRDDLSimulator(RDDLSimulator):
         # generate direct sampling with default numpy RNG and operations
         super(JaxRDDLSimulator, self).__init__(
-            rddl, logger=logger, keep_tensors=keep_tensors)
+            rddl, logger=logger,
+            keep_tensors=keep_tensors, objects_as_strings=objects_as_strings)
     def seed(self, seed: int) -> None:
         super(JaxRDDLSimulator, self).seed(seed)
@@ -84,11 +90,11 @@ class JaxRDDLSimulator(RDDLSimulator):
         self.levels = compiled.levels
         self.traced = compiled.traced
-        self.invariants = jax.tree_map(jax.jit, compiled.invariants)
-        self.preconds = jax.tree_map(jax.jit, compiled.preconditions)
-        self.terminals = jax.tree_map(jax.jit, compiled.terminations)
+        self.invariants = jax.tree_util.tree_map(jax.jit, compiled.invariants)
+        self.preconds = jax.tree_util.tree_map(jax.jit, compiled.preconditions)
+        self.terminals = jax.tree_util.tree_map(jax.jit, compiled.terminations)
         self.reward = jax.jit(compiled.reward)
-        jax_cpfs = jax.tree_map(jax.jit, compiled.cpfs)
+        jax_cpfs = jax.tree_util.tree_map(jax.jit, compiled.cpfs)
         self.model_params = compiled.model_params
         # level analysis
@@ -139,7 +145,6 @@ class JaxRDDLSimulator(RDDLSimulator):
     def check_action_preconditions(self, actions: Args, silent: bool=False) -> bool:
         '''Throws an exception if the action preconditions are not satisfied.'''
-        actions = self._process_actions(actions)
         subs = self.subs
         subs.update(actions)
@@ -180,7 +185,6 @@ class JaxRDDLSimulator(RDDLSimulator):
         '''
         rddl = self.rddl
         keep_tensors = self.keep_tensors
-        actions = self._process_actions(actions)
         subs = self.subs
         subs.update(actions)
@@ -196,20 +200,40 @@ class JaxRDDLSimulator(RDDLSimulator):
         # update state
         self.state = {}
         for (state, next_state) in rddl.next_state.items():
+            # set state = state' for the next epoch
             subs[state] = subs[next_state]
+            # convert object integer to string representation
+            state_values = subs[state]
+            if self.objects_as_strings:
+                ptype = rddl.variable_ranges[state]
+                if ptype not in RDDLValueInitializer.NUMPY_TYPES:
+                    state_values = rddl.index_to_object_string_array(ptype, state_values)
+            # optional grounding of state dictionary
             if keep_tensors:
-                self.state[state] = subs[state]
+                self.state[state] = state_values
             else:
-                self.state.update(rddl.ground_var_with_values(state, subs[state]))
+                self.state.update(rddl.ground_var_with_values(state, state_values))
         # update observation
         if self._pomdp:
             obs = {}
             for var in rddl.observ_fluents:
+                # convert object integer to string representation
+                obs_values = subs[var]
+                if self.objects_as_strings:
+                    ptype = rddl.variable_ranges[var]
+                    if ptype not in RDDLValueInitializer.NUMPY_TYPES:
+                        obs_values = rddl.index_to_object_string_array(ptype, obs_values)
+                # optional grounding of observ-fluent dictionary
                 if keep_tensors:
-                    obs[var] = subs[var]
+                    obs[var] = obs_values
                 else:
-                    obs.update(rddl.ground_var_with_values(var, subs[var]))
+                    obs.update(rddl.ground_var_with_values(var, obs_values))
         else:
             obs = self.state

pyRDDLGym_jax/core/tuning.py CHANGED Viewed

@@ -371,16 +371,30 @@ class JaxParameterTuning:
         '''Tunes the Bayesian optimization algorithm hyper-parameters.'''
         print(f'Kernel: {repr(optimizer._gp.kernel_)}.')
-    def tune(self, key: int, log_file: str, show_dashboard: bool=False) -> ParameterValues:
-        '''Tunes the hyper-parameters for Jax planner, returns the best found.'''
+    def tune(self, key: int,
+             log_file: Optional[str]=None,
+             show_dashboard: bool=False,
+             print_hyperparams: bool=False) -> ParameterValues:
+        '''Tunes the hyper-parameters for Jax planner, returns the best found.
-        print(self.summarize_hyperparameters())
+        :param key: RNG key to seed the hyper-parameter optimizer
+        :param log_file: optional path to file where tuning progress will be saved
+        :param show_dashboard: whether to display tuning results in a dashboard
+        :param print_hyperparams: whether to print a hyper-parameter summary of the
+        optimizer
+        '''
-        # clear and prepare output file
-        with open(log_file, 'w', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerow(COLUMNS + list(self.hyperparams_dict.keys()))
+        if self.verbose:
+            print(JaxBackpropPlanner.summarize_system())
+        if print_hyperparams:
+            print(self.summarize_hyperparameters())
+        # clear and prepare output file
+        if log_file is not None:
+            with open(log_file, 'w', newline='') as file:
+                writer = csv.writer(file)
+                writer.writerow(COLUMNS + list(self.hyperparams_dict.keys()))
         # create a dash-board for visualizing experiment runs
         if show_dashboard and JaxPlannerDashboard is not None:
             dashboard = JaxPlannerDashboard()
@@ -519,9 +533,10 @@ class JaxParameterTuning:
                 self.tune_optimizer(optimizer)
                 # write results of all processes in current iteration to file
-                with open(log_file, 'a', newline='') as file:
-                    writer = csv.writer(file)
-                    writer.writerows(rows)
+                if log_file is not None:
+                    with open(log_file, 'a', newline='') as file:
+                        writer = csv.writer(file)
+                        writer.writerows(rows)
                 # update the dashboard tuning
                 if show_dashboard:

pyRDDLGym_jax/entry_point.py CHANGED Viewed

@@ -2,24 +2,56 @@ import argparse
 from pyRDDLGym_jax.examples import run_plan, run_tune
+EPILOG = 'For complete documentation, see https://pyrddlgym.readthedocs.io/en/latest/jax.html.'
 def main():
-    parser = argparse.ArgumentParser(description="Command line parser for the JaxPlan planner.")
+    parser = argparse.ArgumentParser(prog='jaxplan',
+                                     description="command line parser for the jaxplan planner",
+                                     epilog=EPILOG)
     subparsers = parser.add_subparsers(dest="jaxplan", required=True)
     # planning
-    parser_plan = subparsers.add_parser("plan", help="Executes JaxPlan on a specified RDDL problem and method (slp, drp, or replan).")
-    parser_plan.add_argument('args', nargs=argparse.REMAINDER)
+    parser_plan = subparsers.add_parser("plan",
+                                        help="execute jaxplan on a specified RDDL problem",
+                                        epilog=EPILOG)
+    parser_plan.add_argument('domain', type=str,
+                             help='name of domain in rddlrepository or a valid file path')
+    parser_plan.add_argument('instance', type=str,
+                             help='name of instance in rddlrepository or a valid file path')
+    parser_plan.add_argument('method', type=str,
+                             help='training method to apply: [slp, drp] are offline methods, and [replan] are online')
+    parser_plan.add_argument('-e', '--episodes', type=int, required=False, default=1,
+                             help='number of training or evaluation episodes')
     # tuning
-    parser_tune = subparsers.add_parser("tune", help="Tunes JaxPlan on a specified RDDL problem and method (slp, drp, or replan).")
-    parser_tune.add_argument('args', nargs=argparse.REMAINDER)
+    parser_tune = subparsers.add_parser("tune",
+                                        help="tune jaxplan on a specified RDDL problem",
+                                        epilog=EPILOG)
+    parser_tune.add_argument('domain', type=str,
+                             help='name of domain in rddlrepository or a valid file path')
+    parser_tune.add_argument('instance', type=str,
+                             help='name of instance in rddlrepository or a valid file path')
+    parser_tune.add_argument('method', type=str,
+                             help='training method to apply: [slp, drp] are offline methods, and [replan] are online')
+    parser_tune.add_argument('-t', '--trials', type=int, required=False, default=5,
+                             help='number of evaluation rollouts per hyper-parameter choice')
+    parser_tune.add_argument('-i', '--iters', type=int, required=False, default=20,
+                             help='number of iterations of bayesian optimization')
+    parser_tune.add_argument('-w', '--workers', type=int, required=False, default=4,
+                             help='number of parallel hyper-parameters to evaluate per iteration')
+    parser_tune.add_argument('-d', '--dashboard', type=bool, required=False, default=False,
+                             help='show the dashboard')
+    parser_tune.add_argument('-f', '--filepath', type=str, required=False, default='',
+                             help='where to save the config file of the best hyper-parameters')
     # dispatch
     args = parser.parse_args()
     if args.jaxplan == "plan":
-        run_plan.run_from_args(args.args)
+        run_plan.main(args.domain, args.instance, args.method, args.episodes)
     elif args.jaxplan == "tune":
-        run_tune.run_from_args(args.args)
+        run_tune.main(args.domain, args.instance, args.method,
+                      args.trials, args.iters, args.workers, args.dashboard,
+                      args.filepath)
     else:
         parser.print_help()

pyRDDLGym_jax/examples/configs/tuning_drp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': LEARNING_RATE_TUNE}
 batch_size_train=32
 batch_size_test=32
+print_warnings=False
 [Training]
 train_seconds=30

pyRDDLGym_jax/examples/configs/tuning_replan.cfg CHANGED Viewed

@@ -12,6 +12,7 @@ optimizer_kwargs={'learning_rate': LEARNING_RATE_TUNE}
 batch_size_train=32
 batch_size_test=32
 rollout_horizon=ROLLOUT_HORIZON_TUNE
+print_warnings=False
 [Training]
 train_seconds=1

pyRDDLGym_jax/examples/configs/tuning_slp.cfg CHANGED Viewed

@@ -11,6 +11,7 @@ optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': LEARNING_RATE_TUNE}
 batch_size_train=32
 batch_size_test=32
+print_warnings=False
 [Training]
 train_seconds=30

pyRDDLGym_jax/examples/run_plan.py CHANGED Viewed

@@ -26,7 +26,7 @@ from pyRDDLGym_jax.core.planner import (
 )
-def main(domain, instance, method, episodes=1):
+def main(domain: str, instance: str, method: str, episodes: int=1) -> None:
     # set up the environment
     env = pyRDDLGym.make(domain, instance, vectorized=True)

pyRDDLGym_jax/examples/run_tune.py CHANGED Viewed

@@ -36,7 +36,9 @@ def power_10(x):
     return 10.0 ** x
-def main(domain, instance, method, trials=5, iters=20, workers=4, dashboard=False):
+def main(domain: str, instance: str, method: str,
+         trials: int=5, iters: int=20, workers: int=4, dashboard: bool=False,
+         filepath: str='') -> None:
     # set up the environment
     env = pyRDDLGym.make(domain, instance, vectorized=True)
@@ -68,6 +70,9 @@ def main(domain, instance, method, trials=5, iters=20, workers=4, dashboard=Fals
     tuning.tune(key=42,
                 log_file=f'gp_{method}_{domain}_{instance}.csv',
                 show_dashboard=dashboard)
+    if filepath is not None and filepath:
+        with open(filepath, "w") as file:
+            file.write(tuning.best_config)
     # evaluate the agent on the best parameters
     planner_args, _, train_args = load_config_from_string(tuning.best_config)
@@ -80,7 +85,7 @@ def main(domain, instance, method, trials=5, iters=20, workers=4, dashboard=Fals
 def run_from_args(args):
     if len(args) < 3:
-        print('python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>] [<dashboard>]')
+        print('python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>] [<dashboard>] [<filepath>]')
         exit(1)
     if args[2] not in ['drp', 'slp', 'replan']:
         print('<method> in [drp, slp, replan]')
@@ -90,6 +95,7 @@ def run_from_args(args):
     if len(args) >= 5: kwargs['iters'] = int(args[4])
     if len(args) >= 6: kwargs['workers'] = int(args[5])
     if len(args) >= 7: kwargs['dashboard'] = bool(args[6])
+    if len(args) >= 8: kwargs['filepath'] = bool(args[7])
     main(**kwargs)

{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: pyRDDLGym-jax
-Version: 2.4
+Version: 2.5
 Summary: pyRDDLGym-jax: automatic differentiation for solving sequential planning problems in JAX.
 Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
 Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner
@@ -39,6 +39,7 @@ Dynamic: description
 Dynamic: description-content-type
 Dynamic: home-page
 Dynamic: license
+Dynamic: license-file
 Dynamic: provides-extra
 Dynamic: requires-dist
 Dynamic: requires-python
@@ -116,7 +117,7 @@ pip install pyRDDLGym-jax[extra,dashboard]
 A basic run script is provided to train JaxPlan on any RDDL problem:
 ```shell
-jaxplan plan <domain> <instance> <method> <episodes>
+jaxplan plan <domain> <instance> <method> --episodes <episodes>
 ```
 where:
@@ -241,7 +242,7 @@ More documentation about this and other new features will be coming soon.
 A basic run script is provided to run automatic Bayesian hyper-parameter tuning for the most sensitive parameters of JaxPlan:
 ```shell
-jaxplan tune <domain> <instance> <method> <trials> <iters> <workers> <dashboard>
+jaxplan tune <domain> <instance> <method> --trials <trials> --iters <iters> --workers <workers> --dashboard <dashboard> --filepath <filepath>
 ```
 where:
@@ -251,7 +252,8 @@ where:
 - ``trials`` is the (optional) number of trials/episodes to average in evaluating each hyper-parameter setting
 - ``iters`` is the (optional) maximum number of iterations/evaluations of Bayesian optimization to perform
 - ``workers`` is the (optional) number of parallel evaluations to be done at each iteration, e.g. the total evaluations = ``iters * workers``
-- ``dashboard`` is whether the optimizations are tracked in the dashboard application.
+- ``dashboard`` is whether the optimizations are tracked in the dashboard application
+- ``filepath`` is the optional file path where a config file with the best hyper-parameter setting will be saved.
 It is easy to tune a custom range of the planner's hyper-parameters efficiently.
 First create a config file template with patterns replacing concrete parameter values that you want to tune, e.g.:
@@ -291,23 +293,16 @@ env = pyRDDLGym.make(domain, instance, vectorized=True)
 with open('path/to/config.cfg', 'r') as file:
     config_template = file.read()
-# map parameters in the config that will be tuned
+# tune weight from 10^-1 ... 10^5 and lr from 10^-5 ... 10^1
 def power_10(x):
-    return 10.0 ** x
-hyperparams = [
-    Hyperparameter('TUNABLE_WEIGHT', -1., 5., power_10),  # tune weight from 10^-1 ... 10^5
-    Hyperparameter('TUNABLE_LEARNING_RATE', -5., 1., power_10),   # tune lr from 10^-5 ... 10^1
-]
+    return 10.0 ** x
+hyperparams = [Hyperparameter('TUNABLE_WEIGHT', -1., 5., power_10),
+               Hyperparameter('TUNABLE_LEARNING_RATE', -5., 1., power_10)]
 # build the tuner and tune
 tuning = JaxParameterTuning(env=env,
-                            config_template=config_template,
-                            hyperparams=hyperparams,
-                            online=False,
-                            eval_trials=trials,
-                            num_workers=workers,
-                            gp_iters=iters)
+                            config_template=config_template, hyperparams=hyperparams,
+                            online=False, eval_trials=trials, num_workers=workers, gp_iters=iters)
 tuning.tune(key=42, log_file='path/to/log.csv')
 ```

{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
-pyRDDLGym_jax/__init__.py,sha256=6Bd43-94X_2dH_ErGLQ0_DvlhX5cLWkVPvn31JBzFkY,19
-pyRDDLGym_jax/entry_point.py,sha256=dxDlO_5gneEEViwkLCg30Z-KVzUgdRXaKuFjoZklkA0,974
+pyRDDLGym_jax/__init__.py,sha256=VoxLo_sy8RlJIIyu7szqL-cdMGBJdQPg-aSeyOVVIkY,19
+pyRDDLGym_jax/entry_point.py,sha256=K0zy1oe66jfBHkHHCM6aGHbbiVqnQvDhDb8se4uaKHE,3319
 pyRDDLGym_jax/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pyRDDLGym_jax/core/compiler.py,sha256=NFWfTHtGf7F-t7Qhn6X-VpSAJkTVHm-oRjujFw4O1HA,82605
+pyRDDLGym_jax/core/compiler.py,sha256=uFCtoipsIa3MM9nGgT3X8iCViPl2XSPNXh0jMdzN0ko,82895
 pyRDDLGym_jax/core/logic.py,sha256=lfc2ak_ap_ajMEFlB5EHCRNgJym31dNyA-5d-7N4CZA,56271
-pyRDDLGym_jax/core/planner.py,sha256=wZJiZHV0Qxi9DS3AQ9Rx1doBvsKQXc1HYziY6GXTu_A,136965
-pyRDDLGym_jax/core/simulator.py,sha256=DnPL93WVCMZqtqMUoiJdfWcH9pEvNgGfDfO4NV0wIS0,9271
-pyRDDLGym_jax/core/tuning.py,sha256=Gm3YJF84_2vDIIJpOj0tK0-4rlJoEjYwxRt_JpUKAOA,24482
+pyRDDLGym_jax/core/planner.py,sha256=M6GKzN7Ml57B4ZrFZhhkpsQCvReKaCQNzer7zeHCM9E,140275
+pyRDDLGym_jax/core/simulator.py,sha256=ayCATTUL3clLaZPQ5OUg2bI_c26KKCTq6TbrxbMsVdc,10470
+pyRDDLGym_jax/core/tuning.py,sha256=BWcQZk02TMLexTz1Sw4lX2EQKvmPbp7biC51M-IiNUw,25153
 pyRDDLGym_jax/core/visualization.py,sha256=4BghMp8N7qtF0tdyDSqtxAxNfP9HPrQWTiXzAMJmx7o,70365
 pyRDDLGym_jax/core/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/core/assets/favicon.ico,sha256=RMMrI9YvmF81TgYG7FO7UAre6WmYFkV3B2GmbA1l0kM,175085
 pyRDDLGym_jax/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/examples/run_gradient.py,sha256=KhXvijRDZ4V7N8NOI2WV8ePGpPna5_vnET61YwS7Tco,2919
 pyRDDLGym_jax/examples/run_gym.py,sha256=rXvNWkxe4jHllvbvU_EOMji_2-2k5d4tbBKhpMm_Gaw,1526
-pyRDDLGym_jax/examples/run_plan.py,sha256=TVfziHHaEC56wxwRw9llZ5iqSHe3m6yy8HxiR2TyvXE,2778
+pyRDDLGym_jax/examples/run_plan.py,sha256=4y7JHqTxY5O1ltP6N7rar0jMiw7u9w1nuAIOcmDaAuE,2806
 pyRDDLGym_jax/examples/run_scipy.py,sha256=7uVnDXb7D3NTJqA2L8nrcYDJP-k0ba9dl9YqA2CD9ac,2301
-pyRDDLGym_jax/examples/run_tune.py,sha256=WbGO8RudIK-cPMAMKvI8NbFQAqkG-Blbnta3Efsep6c,3828
+pyRDDLGym_jax/examples/run_tune.py,sha256=F5KWgtoCPbf7XHB6HW9LjxarD57U2LvuGdTz67OL1DY,4114
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_drp.cfg,sha256=mE8MqhOlkHeXIGEVrnR3QY6I-_iy4uxFYRA71P1bmtk,347
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg,sha256=nFFYHCKQUMn8x-OpJwu2pwe1tycNSJ8iAIwSkCBn33E,370
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg,sha256=eJ3HvHjODoKdtX7u-AM51xQaHJnYgzEy2t3omNG2oCs,340
@@ -38,12 +38,12 @@ pyRDDLGym_jax/examples/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 pyRDDLGym_jax/examples/configs/default_drp.cfg,sha256=XeMWAAG_OFZo7JAMxS5-XXroZaeVMzfM0NswmEobIns,373
 pyRDDLGym_jax/examples/configs/default_replan.cfg,sha256=CK4cEz8ReXyAZPLaLG9clIIRXAqM3IplUCxbLt_V2lY,407
 pyRDDLGym_jax/examples/configs/default_slp.cfg,sha256=mJo0woDevhQCSQfJg30ULVy9qGIJDIw73XCe6pyIPtg,369
-pyRDDLGym_jax/examples/configs/tuning_drp.cfg,sha256=CQMpSCKTkGioO7U82mHMsYWFRsutULx0V6Wrl3YzV2U,504
-pyRDDLGym_jax/examples/configs/tuning_replan.cfg,sha256=m_0nozFg_GVld0tGv92Xao_KONFJDq_vtiJKt5isqI8,501
-pyRDDLGym_jax/examples/configs/tuning_slp.cfg,sha256=KHu8II6CA-h_HblwvWHylNRjSvvGS3VHxN7JQNR4p_Q,464
-pyrddlgym_jax-2.4.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
-pyrddlgym_jax-2.4.dist-info/METADATA,sha256=98Nl3EnEk-fRLeoy9orDScaikCT9M8X4zOfYtiS-WXI,17021
-pyrddlgym_jax-2.4.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
-pyrddlgym_jax-2.4.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
-pyrddlgym_jax-2.4.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
-pyrddlgym_jax-2.4.dist-info/RECORD,,
+pyRDDLGym_jax/examples/configs/tuning_drp.cfg,sha256=zocZn_cVarH5i0hOlt2Zu0NwmXYBmTTghLaXLtQOGto,526
+pyRDDLGym_jax/examples/configs/tuning_replan.cfg,sha256=9oIhtw9cuikmlbDgCgbrTc5G7hUio-HeAv_3CEGVclY,523
+pyRDDLGym_jax/examples/configs/tuning_slp.cfg,sha256=QqnyR__5-HhKeCDfGDel8VIlqsjxRHk4SSH089zJP8s,486
+pyrddlgym_jax-2.5.dist-info/licenses/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
+pyrddlgym_jax-2.5.dist-info/METADATA,sha256=XAaEJfbsYW-txxZhFZ6o_HmvqxkIMTqBF9LbV-KdTzI,17058
+pyrddlgym_jax-2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pyrddlgym_jax-2.5.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
+pyrddlgym_jax-2.5.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
+pyrddlgym_jax-2.5.dist-info/RECORD,,

{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (76.0.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{pyrddlgym_jax-2.4.dist-info → pyrddlgym_jax-2.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

pyRDDLGym-jax 2.4__py3-none-any.whl → 2.5__py3-none-any.whl

pyRDDLGym-jax 2.4py3-none-any.whl → 2.5py3-none-any.whl