PyPI - pyRDDLGym-jax - Versions diffs - 2.0__py3-none-any.whl → 2.2__py3-none-any.whl - Mend

pyRDDLGym-jax 2.0py3-none-any.whl → 2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

pyRDDLGym_jax/__init__.py +1 -1
pyRDDLGym_jax/core/compiler.py +85 -190
pyRDDLGym_jax/core/logic.py +313 -56
pyRDDLGym_jax/core/planner.py +274 -200
pyRDDLGym_jax/core/visualization.py +7 -8
pyRDDLGym_jax/examples/run_tune.py +10 -6
{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/METADATA +43 -30
{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/RECORD +12 -12
{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/WHEEL +1 -1
{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/LICENSE +0 -0
{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/entry_points.txt +0 -0
{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/top_level.txt +0 -0

pyRDDLGym_jax/core/visualization.py CHANGED Viewed

@@ -20,8 +20,7 @@ import math
 import numpy as np
 import time
 import threading
-from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
-import warnings
+from typing import Any, Dict, Optional, Tuple, TYPE_CHECKING
 import webbrowser
 # prevent endless console prints
@@ -32,7 +31,7 @@ log.setLevel(logging.ERROR)
 import dash
 from dash.dcc import Interval, Graph, Store
 from dash.dependencies import Input, Output, State, ALL
-from dash.html import Div, B, H4, P, Img, Hr
+from dash.html import Div, B, H4, P, Hr
 import dash_bootstrap_components as dbc
 import plotly.colors as pc
@@ -53,6 +52,7 @@ REWARD_ERROR_DIST_SUBPLOTS = 20
 MODEL_STATE_ERROR_HEIGHT = 300
 POLICY_STATE_VIZ_MAX_HEIGHT = 800
 GP_POSTERIOR_MAX_HEIGHT = 800
+GP_POSTERIOR_PIXELS = 100
 PLOT_AXES_FONT_SIZE = 11
 EXPERIMENT_ENTRY_FONT_SIZE = 14
@@ -1417,7 +1417,7 @@ class JaxPlannerDashboard:
         self.pgpe_return[experiment_id].append(callback['pgpe_return'])
         # data for return distributions
-        progress = callback['progress']
+        progress = int(callback['progress'])
         if progress - self.return_dist_last_progress[experiment_id] \
             >= PROGRESS_FOR_NEXT_RETURN_DIST:
             self.return_dist_ticks[experiment_id].append(iteration)
@@ -1486,8 +1486,8 @@ class JaxPlannerDashboard:
                 if i2 > i1:
                     # Generate a grid for visualization
-                    p1_values = np.linspace(*bounds[param1], 100)
-                    p2_values = np.linspace(*bounds[param2], 100)
+                    p1_values = np.linspace(*bounds[param1], GP_POSTERIOR_PIXELS)
+                    p2_values = np.linspace(*bounds[param2], GP_POSTERIOR_PIXELS)
                     P1, P2 = np.meshgrid(p1_values, p2_values)
                     # Predict the mean and deviation of the surrogate model
@@ -1500,8 +1500,7 @@ class JaxPlannerDashboard:
                     for p1, p2 in zip(np.ravel(P1), np.ravel(P2)):
                         params = {param1: p1, param2: p2}
                         params.update(fixed_params)
-                        param_grid.append(
-                            [params[key] for key in optimizer.space.keys])
+                        param_grid.append([params[key] for key in optimizer.space.keys])
                     param_grid = np.asarray(param_grid)
                     mean, std = optimizer._gp.predict(param_grid, return_std=True)
                     mean = mean.reshape(P1.shape)

pyRDDLGym_jax/examples/run_tune.py CHANGED Viewed

@@ -3,7 +3,7 @@ is performed using a batched parallelized Bayesian optimization.
 The syntax is:
-    python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>]
+    python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>] [<dashboard>]
 where:
     <domain> is the name of a domain located in the /Examples directory
@@ -15,6 +15,7 @@ where:
     (defaults to 20)
     <workers> is the number of parallel workers (i.e. batch size), which must
     not exceed the number of cores available on the machine (defaults to 4)
+    <dashboard> is whether the dashboard is displayed
 '''
 import os
 import sys
@@ -35,7 +36,7 @@ def power_10(x):
     return 10.0 ** x
-def main(domain, instance, method, trials=5, iters=20, workers=4):
+def main(domain, instance, method, trials=5, iters=20, workers=4, dashboard=False):
     # set up the environment
     env = pyRDDLGym.make(domain, instance, vectorized=True)
@@ -48,9 +49,9 @@ def main(domain, instance, method, trials=5, iters=20, workers=4):
     # map parameters in the config that will be tuned
     hyperparams = [
-        Hyperparameter('MODEL_WEIGHT_TUNE', -1., 5., power_10),
+        Hyperparameter('MODEL_WEIGHT_TUNE', -1., 4., power_10),
         Hyperparameter('POLICY_WEIGHT_TUNE', -2., 2., power_10),
-        Hyperparameter('LEARNING_RATE_TUNE', -5., 1., power_10),
+        Hyperparameter('LEARNING_RATE_TUNE', -5., 0., power_10),
         Hyperparameter('LAYER1_TUNE', 1, 8, power_2),
         Hyperparameter('LAYER2_TUNE', 1, 8, power_2),
         Hyperparameter('ROLLOUT_HORIZON_TUNE', 1, min(env.horizon, 100), int)
@@ -64,7 +65,9 @@ def main(domain, instance, method, trials=5, iters=20, workers=4):
                                 eval_trials=trials,
                                 num_workers=workers,
                                 gp_iters=iters)
-    tuning.tune(key=42, log_file=f'gp_{method}_{domain}_{instance}.csv')
+    tuning.tune(key=42,
+                log_file=f'gp_{method}_{domain}_{instance}.csv',
+                show_dashboard=dashboard)
     # evaluate the agent on the best parameters
     planner_args, _, train_args = load_config_from_string(tuning.best_config)
@@ -77,7 +80,7 @@ def main(domain, instance, method, trials=5, iters=20, workers=4):
 def run_from_args(args):
     if len(args) < 3:
-        print('python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>]')
+        print('python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>] [<dashboard>]')
         exit(1)
     if args[2] not in ['drp', 'slp', 'replan']:
         print('<method> in [drp, slp, replan]')
@@ -86,6 +89,7 @@ def run_from_args(args):
     if len(args) >= 4: kwargs['trials'] = int(args[3])
     if len(args) >= 5: kwargs['iters'] = int(args[4])
     if len(args) >= 6: kwargs['workers'] = int(args[5])
+    if len(args) >= 7: kwargs['dashboard'] = bool(args[6])
     main(**kwargs)

{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pyRDDLGym-jax
-Version: 2.0
+Version: 2.2
 Summary: pyRDDLGym-jax: automatic differentiation for solving sequential planning problems in JAX.
 Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
 Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner
@@ -58,18 +58,21 @@ Dynamic: summary
 Purpose:
-1. automatic translation of any RDDL description file into a differentiable simulator in JAX
-2. flexible policy class representations, automatic model relaxations for working in discrete and hybrid domains, and Bayesian hyper-parameter tuning.
+1. automatic translation of RDDL description files into differentiable JAX simulators
+2. implementation of (highly configurable) operator relaxations for working in discrete and hybrid domains
+3. flexible policy representations and automated Bayesian hyper-parameter tuning
+4. interactive dashboard for dyanmic visualization and debugging
+5. hybridization with parameter-exploring policy gradients.
 Some demos of solved problems by JaxPlan:
 <p align="middle">
-<img src="Images/intruders.gif" width="120" height="120" margin=0/>
-<img src="Images/marsrover.gif" width="120" height="120" margin=0/>
-<img src="Images/pong.gif" width="120" height="120" margin=0/>
-<img src="Images/quadcopter.gif" width="120" height="120" margin=0/>
-<img src="Images/reacher.gif" width="120" height="120" margin=0/>
-<img src="Images/reservoir.gif" width="120" height="120" margin=0/>
+<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/intruders.gif" width="120" height="120" margin=0/>
+<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/marsrover.gif" width="120" height="120" margin=0/>
+<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/pong.gif" width="120" height="120" margin=0/>
+<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/quadcopter.gif" width="120" height="120" margin=0/>
+<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/reacher.gif" width="120" height="120" margin=0/>
+<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/reservoir.gif" width="120" height="120" margin=0/>
 </p>
 > [!WARNING]
@@ -219,7 +222,7 @@ Since version 1.0, JaxPlan has an optional dashboard that allows keeping track o
 and visualization of the policy or model, and other useful debugging features.
 <p align="middle">
-<img src="Images/dashboard.png" width="480" height="248" margin=0/>
+<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/dashboard.png" width="480" height="248" margin=0/>
 </p>
 To run the dashboard, add the following entry to your config file:
@@ -235,8 +238,23 @@ More documentation about this and other new features will be coming soon.
 ## Tuning the Planner
-It is easy to tune the planner's hyper-parameters efficiently and automatically using Bayesian optimization.
-To do this, first create a config file template with patterns replacing concrete parameter values that you want to tune, e.g.:
+A basic run script is provided to run automatic Bayesian hyper-parameter tuning for the most sensitive parameters of JaxPlan:
+```shell
+jaxplan tune <domain> <instance> <method> <trials> <iters> <workers> <dashboard>
+```
+where:
+- ``domain`` is the domain identifier as specified in rddlrepository
+- ``instance`` is the instance identifier
+- ``method`` is the planning method to use (i.e. drp, slp, replan)
+- ``trials`` is the (optional) number of trials/episodes to average in evaluating each hyper-parameter setting
+- ``iters`` is the (optional) maximum number of iterations/evaluations of Bayesian optimization to perform
+- ``workers`` is the (optional) number of parallel evaluations to be done at each iteration, e.g. the total evaluations = ``iters * workers``
+- ``dashboard`` is whether the optimizations are tracked in the dashboard application.
+It is easy to tune a custom range of the planner's hyper-parameters efficiently.
+First create a config file template with patterns replacing concrete parameter values that you want to tune, e.g.:
 ```ini
 [Model]
@@ -260,7 +278,7 @@ train_on_reset=True
 would allow to tune the sharpness of model relaxations, and the learning rate of the optimizer.
-Next, you must link the patterns in the config with concrete hyper-parameter ranges the tuner will understand:
+Next, you must link the patterns in the config with concrete hyper-parameter ranges the tuner will understand, and run the optimizer:
 ```python
 import pyRDDLGym
@@ -292,21 +310,7 @@ tuning = JaxParameterTuning(env=env,
                             gp_iters=iters)
 tuning.tune(key=42, log_file='path/to/log.csv')
 ```
-A basic run script is provided to run the automatic hyper-parameter tuning for the most sensitive parameters of JaxPlan:
-```shell
-jaxplan tune <domain> <instance> <method> <trials> <iters> <workers>
-```
-where:
-- ``domain`` is the domain identifier as specified in rddlrepository
-- ``instance`` is the instance identifier
-- ``method`` is the planning method to use (i.e. drp, slp, replan)
-- ``trials`` is the (optional) number of trials/episodes to average in evaluating each hyper-parameter setting
-- ``iters`` is the (optional) maximum number of iterations/evaluations of Bayesian optimization to perform
-- ``workers`` is the (optional) number of parallel evaluations to be done at each iteration, e.g. the total evaluations = ``iters * workers``.
 ## Simulation
@@ -344,7 +348,16 @@ The [following citation](https://ojs.aaai.org/index.php/ICAPS/article/view/31480
 ```
 Some of the implementation details derive from the following literature, which you may wish to also cite in your research papers:
-- [A Distributional Framework for Risk-Sensitive End-to-End Planning in Continuous MDPs](https://ojs.aaai.org/index.php/AAAI/article/view/21226)
+- [A Distributional Framework for Risk-Sensitive End-to-End Planning in Continuous MDPs, AAAI 2022](https://ojs.aaai.org/index.php/AAAI/article/view/21226)
 - [Deep reactive policies for planning in stochastic nonlinear domains, AAAI 2019](https://ojs.aaai.org/index.php/AAAI/article/view/4744)
+- [Stochastic Planning with Lifted Symbolic Trajectory Optimization, AAAI 2019](https://ojs.aaai.org/index.php/ICAPS/article/view/3467/3335)
 - [Scalable planning with tensorflow for hybrid nonlinear domains, NeurIPS 2017](https://proceedings.neurips.cc/paper/2017/file/98b17f068d5d9b7668e19fb8ae470841-Paper.pdf)
+- [Baseline-Free Sampling in Parameter Exploring Policy Gradients: Super Symmetric PGPE, ANN 2015](https://link.springer.com/chapter/10.1007/978-3-319-09903-3_13)
+The model relaxations in JaxPlan are based on the following works:
+- [Poisson Variational Autoencoder, NeurIPS 2025](https://proceedings.neurips.cc/paper_files/paper/2024/file/4f3cb9576dc99d62b80726690453716f-Paper-Conference.pdf)
+- [Analyzing Differentiable Fuzzy Logic Operators, AI 2022](https://www.sciencedirect.com/science/article/pii/S0004370221001533)
+- [Learning with algorithmic supervision via continuous relaxations, NeurIPS 2021](https://proceedings.neurips.cc/paper_files/paper/2021/file/89ae0fe22c47d374bc9350ef99e01685-Paper.pdf)
+- [Universally quantized neural compression, NeurIPS 2020](https://papers.nips.cc/paper_files/paper/2020/file/92049debbe566ca5782a3045cf300a3c-Paper.pdf)
+- [Generalized Gumbel-Softmax Gradient Estimator for Generic Discrete Random Variables, 2020](https://arxiv.org/pdf/2003.01847)
+- [Categorical Reparametrization with Gumbel-Softmax, ICLR 2017](https://openreview.net/pdf?id=rkE3y85ee)

{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-pyRDDLGym_jax/__init__.py,sha256=TiPG4w8nN4AzPkhugwVvZkHmAgP955NltD4QRmBLhRU,19
+pyRDDLGym_jax/__init__.py,sha256=lqo7WXKfZGHPIOxgE6EWI5fGZHP2h6XrwVNNVQAUN3Q,19
 pyRDDLGym_jax/entry_point.py,sha256=dxDlO_5gneEEViwkLCg30Z-KVzUgdRXaKuFjoZklkA0,974
 pyRDDLGym_jax/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pyRDDLGym_jax/core/compiler.py,sha256=Rn-aIqfgfWqu45bvCfPb9tB8RIOBVdbj-pI-V3WS2Z8,89212
-pyRDDLGym_jax/core/logic.py,sha256=_A6eGYtLVU3pbLAezxJVB9bnClJoaFIa2mBIDdFrqoU,39655
-pyRDDLGym_jax/core/planner.py,sha256=4j56l7SL7F89g2QA4nOpyhODmY0DamvxYLfCMKxJNbQ,118593
+pyRDDLGym_jax/core/compiler.py,sha256=_ERueJW7GQ7S8-IezreeuLs3fNCZbQZ8j7VMUVlEt1k,82306
+pyRDDLGym_jax/core/logic.py,sha256=ZeCwCLqC6BvXpRT06TvE2bfPNO6ALuMzPmUvXNzW6Uw,52278
+pyRDDLGym_jax/core/planner.py,sha256=0rluBXKGNHRPEPfegOWcx9__cJHr8KjZdDJtG7i1JjI,122793
 pyRDDLGym_jax/core/simulator.py,sha256=DnPL93WVCMZqtqMUoiJdfWcH9pEvNgGfDfO4NV0wIS0,9271
 pyRDDLGym_jax/core/tuning.py,sha256=RKKtDZp7unvfbhZEoaunZtcAn5xtzGYqXBB_Ij_Aapc,24205
-pyRDDLGym_jax/core/visualization.py,sha256=XtQL1A5dQIlfeUpte-r3lNVw-GNLxj2EYUNMz7AFOtc,70359
+pyRDDLGym_jax/core/visualization.py,sha256=4BghMp8N7qtF0tdyDSqtxAxNfP9HPrQWTiXzAMJmx7o,70365
 pyRDDLGym_jax/core/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/core/assets/favicon.ico,sha256=RMMrI9YvmF81TgYG7FO7UAre6WmYFkV3B2GmbA1l0kM,175085
 pyRDDLGym_jax/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -14,7 +14,7 @@ pyRDDLGym_jax/examples/run_gradient.py,sha256=KhXvijRDZ4V7N8NOI2WV8ePGpPna5_vnET
 pyRDDLGym_jax/examples/run_gym.py,sha256=rXvNWkxe4jHllvbvU_EOMji_2-2k5d4tbBKhpMm_Gaw,1526
 pyRDDLGym_jax/examples/run_plan.py,sha256=v2AvwgIa4Ejr626vBOgWFJIQvay3IPKWno02ztIFCYc,2768
 pyRDDLGym_jax/examples/run_scipy.py,sha256=wvcpWCvdjvYHntO95a7JYfY2fuCMUTKnqjJikW0PnL4,2291
-pyRDDLGym_jax/examples/run_tune.py,sha256=zqrhvLR5PeWJv0NsRxDCzAPmvgPgz_1NrtM1xBy6ndU,3606
+pyRDDLGym_jax/examples/run_tune.py,sha256=WbGO8RudIK-cPMAMKvI8NbFQAqkG-Blbnta3Efsep6c,3828
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_drp.cfg,sha256=mE8MqhOlkHeXIGEVrnR3QY6I-_iy4uxFYRA71P1bmtk,347
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg,sha256=nFFYHCKQUMn8x-OpJwu2pwe1tycNSJ8iAIwSkCBn33E,370
 pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg,sha256=eJ3HvHjODoKdtX7u-AM51xQaHJnYgzEy2t3omNG2oCs,340
@@ -41,9 +41,9 @@ pyRDDLGym_jax/examples/configs/default_slp.cfg,sha256=mJo0woDevhQCSQfJg30ULVy9qG
 pyRDDLGym_jax/examples/configs/tuning_drp.cfg,sha256=CQMpSCKTkGioO7U82mHMsYWFRsutULx0V6Wrl3YzV2U,504
 pyRDDLGym_jax/examples/configs/tuning_replan.cfg,sha256=m_0nozFg_GVld0tGv92Xao_KONFJDq_vtiJKt5isqI8,501
 pyRDDLGym_jax/examples/configs/tuning_slp.cfg,sha256=KHu8II6CA-h_HblwvWHylNRjSvvGS3VHxN7JQNR4p_Q,464
-pyRDDLGym_jax-2.0.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
-pyRDDLGym_jax-2.0.dist-info/METADATA,sha256=ZYIe9c_Tar4WO8qQOvcUIJVMmZznPUBRaegS0DH2un8,15090
-pyRDDLGym_jax-2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-pyRDDLGym_jax-2.0.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
-pyRDDLGym_jax-2.0.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
-pyRDDLGym_jax-2.0.dist-info/RECORD,,
+pyrddlgym_jax-2.2.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
+pyrddlgym_jax-2.2.dist-info/METADATA,sha256=aFNUX6uUZZHS7lPbYBTmMSH6TBiWmXbEgQNxPZNWiRI,17021
+pyrddlgym_jax-2.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+pyrddlgym_jax-2.2.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
+pyrddlgym_jax-2.2.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
+pyrddlgym_jax-2.2.dist-info/RECORD,,

{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (75.8.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pyRDDLGym_jax-2.0.dist-info → pyrddlgym_jax-2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

pyRDDLGym-jax 2.0__py3-none-any.whl → 2.2__py3-none-any.whl

pyRDDLGym-jax 2.0py3-none-any.whl → 2.2py3-none-any.whl