pyRDDLGym-jax 1.3__py3-none-any.whl → 2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyRDDLGym_jax/__init__.py +1 -1
- pyRDDLGym_jax/core/compiler.py +101 -191
- pyRDDLGym_jax/core/logic.py +349 -65
- pyRDDLGym_jax/core/planner.py +554 -208
- pyRDDLGym_jax/core/simulator.py +20 -0
- pyRDDLGym_jax/core/tuning.py +15 -0
- pyRDDLGym_jax/core/visualization.py +55 -8
- pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg +3 -3
- pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg +4 -4
- pyRDDLGym_jax/examples/configs/Quadcopter_drp.cfg +1 -0
- pyRDDLGym_jax/examples/configs/Quadcopter_slp.cfg +4 -3
- pyRDDLGym_jax/examples/configs/Reservoir_Continuous_drp.cfg +1 -0
- pyRDDLGym_jax/examples/configs/Reservoir_Continuous_slp.cfg +1 -0
- pyRDDLGym_jax/examples/configs/UAV_Continuous_slp.cfg +1 -0
- pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_drp.cfg +1 -0
- pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_replan.cfg +1 -0
- pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_slp.cfg +1 -0
- pyRDDLGym_jax/examples/run_tune.py +10 -6
- {pyRDDLGym_jax-1.3.dist-info → pyrddlgym_jax-2.1.dist-info}/METADATA +22 -12
- {pyRDDLGym_jax-1.3.dist-info → pyrddlgym_jax-2.1.dist-info}/RECORD +24 -24
- {pyRDDLGym_jax-1.3.dist-info → pyrddlgym_jax-2.1.dist-info}/WHEEL +1 -1
- {pyRDDLGym_jax-1.3.dist-info → pyrddlgym_jax-2.1.dist-info}/LICENSE +0 -0
- {pyRDDLGym_jax-1.3.dist-info → pyrddlgym_jax-2.1.dist-info}/entry_points.txt +0 -0
- {pyRDDLGym_jax-1.3.dist-info → pyrddlgym_jax-2.1.dist-info}/top_level.txt +0 -0
pyRDDLGym_jax/core/simulator.py
CHANGED
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
# ***********************************************************************
|
|
2
|
+
# JAXPLAN
|
|
3
|
+
#
|
|
4
|
+
# Author: Michael Gimelfarb
|
|
5
|
+
#
|
|
6
|
+
# REFERENCES:
|
|
7
|
+
#
|
|
8
|
+
# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
|
|
9
|
+
# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
|
|
10
|
+
# Probabilistic Domains." Proceedings of the International Conference on Automated
|
|
11
|
+
# Planning and Scheduling. Vol. 34. 2024.
|
|
12
|
+
#
|
|
13
|
+
# [2] Taitler, Ayal, Michael Gimelfarb, Jihwan Jeong, Sriram Gopalakrishnan, Martin
|
|
14
|
+
# Mladenov, Xiaotian Liu, and Scott Sanner. "pyRDDLGym: From RDDL to Gym Environments."
|
|
15
|
+
# In PRL Workshop Series {\textendash} Bridging the Gap Between AI Planning and
|
|
16
|
+
# Reinforcement Learning.
|
|
17
|
+
#
|
|
18
|
+
# ***********************************************************************
|
|
19
|
+
|
|
20
|
+
|
|
1
21
|
import time
|
|
2
22
|
from typing import Dict, Optional
|
|
3
23
|
|
pyRDDLGym_jax/core/tuning.py
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
# ***********************************************************************
|
|
2
|
+
# JAXPLAN
|
|
3
|
+
#
|
|
4
|
+
# Author: Michael Gimelfarb
|
|
5
|
+
#
|
|
6
|
+
# REFERENCES:
|
|
7
|
+
#
|
|
8
|
+
# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
|
|
9
|
+
# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
|
|
10
|
+
# Probabilistic Domains." Proceedings of the International Conference on Automated
|
|
11
|
+
# Planning and Scheduling. Vol. 34. 2024.
|
|
12
|
+
#
|
|
13
|
+
# ***********************************************************************
|
|
14
|
+
|
|
15
|
+
|
|
1
16
|
import csv
|
|
2
17
|
import datetime
|
|
3
18
|
import threading
|
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
# ***********************************************************************
|
|
2
|
+
# JAXPLAN
|
|
3
|
+
#
|
|
4
|
+
# Author: Michael Gimelfarb
|
|
5
|
+
#
|
|
6
|
+
# REFERENCES:
|
|
7
|
+
#
|
|
8
|
+
# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
|
|
9
|
+
# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
|
|
10
|
+
# Probabilistic Domains." Proceedings of the International Conference on Automated
|
|
11
|
+
# Planning and Scheduling. Vol. 34. 2024.
|
|
12
|
+
#
|
|
13
|
+
# ***********************************************************************
|
|
14
|
+
|
|
15
|
+
|
|
1
16
|
import ast
|
|
2
17
|
import os
|
|
3
18
|
from datetime import datetime
|
|
@@ -5,8 +20,7 @@ import math
|
|
|
5
20
|
import numpy as np
|
|
6
21
|
import time
|
|
7
22
|
import threading
|
|
8
|
-
from typing import Any, Dict,
|
|
9
|
-
import warnings
|
|
23
|
+
from typing import Any, Dict, Optional, Tuple, TYPE_CHECKING
|
|
10
24
|
import webbrowser
|
|
11
25
|
|
|
12
26
|
# prevent endless console prints
|
|
@@ -17,7 +31,7 @@ log.setLevel(logging.ERROR)
|
|
|
17
31
|
import dash
|
|
18
32
|
from dash.dcc import Interval, Graph, Store
|
|
19
33
|
from dash.dependencies import Input, Output, State, ALL
|
|
20
|
-
from dash.html import Div, B, H4, P,
|
|
34
|
+
from dash.html import Div, B, H4, P, Hr
|
|
21
35
|
import dash_bootstrap_components as dbc
|
|
22
36
|
|
|
23
37
|
import plotly.colors as pc
|
|
@@ -38,6 +52,7 @@ REWARD_ERROR_DIST_SUBPLOTS = 20
|
|
|
38
52
|
MODEL_STATE_ERROR_HEIGHT = 300
|
|
39
53
|
POLICY_STATE_VIZ_MAX_HEIGHT = 800
|
|
40
54
|
GP_POSTERIOR_MAX_HEIGHT = 800
|
|
55
|
+
GP_POSTERIOR_PIXELS = 100
|
|
41
56
|
|
|
42
57
|
PLOT_AXES_FONT_SIZE = 11
|
|
43
58
|
EXPERIMENT_ENTRY_FONT_SIZE = 14
|
|
@@ -61,6 +76,7 @@ class JaxPlannerDashboard:
|
|
|
61
76
|
self.xticks = {}
|
|
62
77
|
self.test_return = {}
|
|
63
78
|
self.train_return = {}
|
|
79
|
+
self.pgpe_return = {}
|
|
64
80
|
self.return_dist = {}
|
|
65
81
|
self.return_dist_ticks = {}
|
|
66
82
|
self.return_dist_last_progress = {}
|
|
@@ -299,6 +315,9 @@ class JaxPlannerDashboard:
|
|
|
299
315
|
dbc.Col(Graph(id='train-return-graph'), width=6),
|
|
300
316
|
dbc.Col(Graph(id='test-return-graph'), width=6),
|
|
301
317
|
]),
|
|
318
|
+
dbc.Row([
|
|
319
|
+
dbc.Col(Graph(id='pgpe-return-graph'), width=6)
|
|
320
|
+
]),
|
|
302
321
|
dbc.Row([
|
|
303
322
|
Graph(id='dist-return-graph')
|
|
304
323
|
])
|
|
@@ -661,6 +680,33 @@ class JaxPlannerDashboard:
|
|
|
661
680
|
)
|
|
662
681
|
return fig
|
|
663
682
|
|
|
683
|
+
@app.callback(
|
|
684
|
+
Output('pgpe-return-graph', 'figure'),
|
|
685
|
+
[Input('interval', 'n_intervals'),
|
|
686
|
+
Input('trigger-experiment-check', 'children'),
|
|
687
|
+
Input('tabs-main', 'active_tab')]
|
|
688
|
+
)
|
|
689
|
+
def update_pgpe_return_graph(n, trigger, active_tab):
|
|
690
|
+
if active_tab != 'tab-performance': return dash.no_update
|
|
691
|
+
fig = go.Figure()
|
|
692
|
+
for (row, checked) in self.checked.copy().items():
|
|
693
|
+
if checked:
|
|
694
|
+
fig.add_trace(go.Scatter(
|
|
695
|
+
x=self.xticks[row], y=self.pgpe_return[row],
|
|
696
|
+
name=f'id={row}',
|
|
697
|
+
mode='lines+markers',
|
|
698
|
+
marker=dict(size=3), line=dict(width=2)
|
|
699
|
+
))
|
|
700
|
+
fig.update_layout(
|
|
701
|
+
title=dict(text="PGPE Return"),
|
|
702
|
+
xaxis=dict(title=dict(text="Training Iteration")),
|
|
703
|
+
yaxis=dict(title=dict(text="Cumulative Reward")),
|
|
704
|
+
font=dict(size=PLOT_AXES_FONT_SIZE),
|
|
705
|
+
legend=dict(bgcolor='rgba(0,0,0,0)'),
|
|
706
|
+
template="plotly_white"
|
|
707
|
+
)
|
|
708
|
+
return fig
|
|
709
|
+
|
|
664
710
|
@app.callback(
|
|
665
711
|
Output('dist-return-graph', 'figure'),
|
|
666
712
|
[Input('interval', 'n_intervals'),
|
|
@@ -1316,6 +1362,7 @@ class JaxPlannerDashboard:
|
|
|
1316
1362
|
self.xticks[experiment_id] = []
|
|
1317
1363
|
self.train_return[experiment_id] = []
|
|
1318
1364
|
self.test_return[experiment_id] = []
|
|
1365
|
+
self.pgpe_return[experiment_id] = []
|
|
1319
1366
|
self.return_dist_ticks[experiment_id] = []
|
|
1320
1367
|
self.return_dist_last_progress[experiment_id] = 0
|
|
1321
1368
|
self.return_dist[experiment_id] = []
|
|
@@ -1367,9 +1414,10 @@ class JaxPlannerDashboard:
|
|
|
1367
1414
|
self.xticks[experiment_id].append(iteration)
|
|
1368
1415
|
self.train_return[experiment_id].append(callback['train_return'])
|
|
1369
1416
|
self.test_return[experiment_id].append(callback['best_return'])
|
|
1417
|
+
self.pgpe_return[experiment_id].append(callback['pgpe_return'])
|
|
1370
1418
|
|
|
1371
1419
|
# data for return distributions
|
|
1372
|
-
progress = callback['progress']
|
|
1420
|
+
progress = int(callback['progress'])
|
|
1373
1421
|
if progress - self.return_dist_last_progress[experiment_id] \
|
|
1374
1422
|
>= PROGRESS_FOR_NEXT_RETURN_DIST:
|
|
1375
1423
|
self.return_dist_ticks[experiment_id].append(iteration)
|
|
@@ -1438,8 +1486,8 @@ class JaxPlannerDashboard:
|
|
|
1438
1486
|
if i2 > i1:
|
|
1439
1487
|
|
|
1440
1488
|
# Generate a grid for visualization
|
|
1441
|
-
p1_values = np.linspace(*bounds[param1],
|
|
1442
|
-
p2_values = np.linspace(*bounds[param2],
|
|
1489
|
+
p1_values = np.linspace(*bounds[param1], GP_POSTERIOR_PIXELS)
|
|
1490
|
+
p2_values = np.linspace(*bounds[param2], GP_POSTERIOR_PIXELS)
|
|
1443
1491
|
P1, P2 = np.meshgrid(p1_values, p2_values)
|
|
1444
1492
|
|
|
1445
1493
|
# Predict the mean and deviation of the surrogate model
|
|
@@ -1452,8 +1500,7 @@ class JaxPlannerDashboard:
|
|
|
1452
1500
|
for p1, p2 in zip(np.ravel(P1), np.ravel(P2)):
|
|
1453
1501
|
params = {param1: p1, param2: p2}
|
|
1454
1502
|
params.update(fixed_params)
|
|
1455
|
-
param_grid.append(
|
|
1456
|
-
[params[key] for key in optimizer.space.keys])
|
|
1503
|
+
param_grid.append([params[key] for key in optimizer.space.keys])
|
|
1457
1504
|
param_grid = np.asarray(param_grid)
|
|
1458
1505
|
mean, std = optimizer._gp.predict(param_grid, return_std=True)
|
|
1459
1506
|
mean = mean.reshape(P1.shape)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
[Model]
|
|
2
2
|
logic='FuzzyLogic'
|
|
3
|
-
comparison_kwargs={'weight':
|
|
4
|
-
rounding_kwargs={'weight':
|
|
5
|
-
control_kwargs={'weight':
|
|
3
|
+
comparison_kwargs={'weight': 20}
|
|
4
|
+
rounding_kwargs={'weight': 20}
|
|
5
|
+
control_kwargs={'weight': 20}
|
|
6
6
|
|
|
7
7
|
[Optimizer]
|
|
8
8
|
method='JaxStraightLinePlan'
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
[Model]
|
|
2
2
|
logic='FuzzyLogic'
|
|
3
|
-
comparison_kwargs={'weight':
|
|
4
|
-
rounding_kwargs={'weight':
|
|
5
|
-
control_kwargs={'weight':
|
|
3
|
+
comparison_kwargs={'weight': 20}
|
|
4
|
+
rounding_kwargs={'weight': 20}
|
|
5
|
+
control_kwargs={'weight': 20}
|
|
6
6
|
|
|
7
7
|
[Optimizer]
|
|
8
8
|
method='JaxStraightLinePlan'
|
|
9
9
|
method_kwargs={}
|
|
10
10
|
optimizer='rmsprop'
|
|
11
|
-
optimizer_kwargs={'learning_rate': 0.
|
|
11
|
+
optimizer_kwargs={'learning_rate': 0.001}
|
|
12
12
|
batch_size_train=1
|
|
13
13
|
batch_size_test=1
|
|
14
14
|
clip_grad=1.0
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
[Model]
|
|
2
2
|
logic='FuzzyLogic'
|
|
3
|
-
comparison_kwargs={'weight':
|
|
4
|
-
rounding_kwargs={'weight':
|
|
5
|
-
control_kwargs={'weight':
|
|
3
|
+
comparison_kwargs={'weight': 10}
|
|
4
|
+
rounding_kwargs={'weight': 10}
|
|
5
|
+
control_kwargs={'weight': 10}
|
|
6
6
|
|
|
7
7
|
[Optimizer]
|
|
8
8
|
method='JaxStraightLinePlan'
|
|
@@ -11,6 +11,7 @@ optimizer='rmsprop'
|
|
|
11
11
|
optimizer_kwargs={'learning_rate': 0.03}
|
|
12
12
|
batch_size_train=1
|
|
13
13
|
batch_size_test=1
|
|
14
|
+
pgpe=None
|
|
14
15
|
|
|
15
16
|
[Training]
|
|
16
17
|
key=42
|
|
@@ -3,7 +3,7 @@ is performed using a batched parallelized Bayesian optimization.
|
|
|
3
3
|
|
|
4
4
|
The syntax is:
|
|
5
5
|
|
|
6
|
-
python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>]
|
|
6
|
+
python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>] [<dashboard>]
|
|
7
7
|
|
|
8
8
|
where:
|
|
9
9
|
<domain> is the name of a domain located in the /Examples directory
|
|
@@ -15,6 +15,7 @@ where:
|
|
|
15
15
|
(defaults to 20)
|
|
16
16
|
<workers> is the number of parallel workers (i.e. batch size), which must
|
|
17
17
|
not exceed the number of cores available on the machine (defaults to 4)
|
|
18
|
+
<dashboard> is whether the dashboard is displayed
|
|
18
19
|
'''
|
|
19
20
|
import os
|
|
20
21
|
import sys
|
|
@@ -35,7 +36,7 @@ def power_10(x):
|
|
|
35
36
|
return 10.0 ** x
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
def main(domain, instance, method, trials=5, iters=20, workers=4):
|
|
39
|
+
def main(domain, instance, method, trials=5, iters=20, workers=4, dashboard=False):
|
|
39
40
|
|
|
40
41
|
# set up the environment
|
|
41
42
|
env = pyRDDLGym.make(domain, instance, vectorized=True)
|
|
@@ -48,9 +49,9 @@ def main(domain, instance, method, trials=5, iters=20, workers=4):
|
|
|
48
49
|
|
|
49
50
|
# map parameters in the config that will be tuned
|
|
50
51
|
hyperparams = [
|
|
51
|
-
Hyperparameter('MODEL_WEIGHT_TUNE', -1.,
|
|
52
|
+
Hyperparameter('MODEL_WEIGHT_TUNE', -1., 4., power_10),
|
|
52
53
|
Hyperparameter('POLICY_WEIGHT_TUNE', -2., 2., power_10),
|
|
53
|
-
Hyperparameter('LEARNING_RATE_TUNE', -5.,
|
|
54
|
+
Hyperparameter('LEARNING_RATE_TUNE', -5., 0., power_10),
|
|
54
55
|
Hyperparameter('LAYER1_TUNE', 1, 8, power_2),
|
|
55
56
|
Hyperparameter('LAYER2_TUNE', 1, 8, power_2),
|
|
56
57
|
Hyperparameter('ROLLOUT_HORIZON_TUNE', 1, min(env.horizon, 100), int)
|
|
@@ -64,7 +65,9 @@ def main(domain, instance, method, trials=5, iters=20, workers=4):
|
|
|
64
65
|
eval_trials=trials,
|
|
65
66
|
num_workers=workers,
|
|
66
67
|
gp_iters=iters)
|
|
67
|
-
tuning.tune(key=42,
|
|
68
|
+
tuning.tune(key=42,
|
|
69
|
+
log_file=f'gp_{method}_{domain}_{instance}.csv',
|
|
70
|
+
show_dashboard=dashboard)
|
|
68
71
|
|
|
69
72
|
# evaluate the agent on the best parameters
|
|
70
73
|
planner_args, _, train_args = load_config_from_string(tuning.best_config)
|
|
@@ -77,7 +80,7 @@ def main(domain, instance, method, trials=5, iters=20, workers=4):
|
|
|
77
80
|
|
|
78
81
|
def run_from_args(args):
|
|
79
82
|
if len(args) < 3:
|
|
80
|
-
print('python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>]')
|
|
83
|
+
print('python run_tune.py <domain> <instance> <method> [<trials>] [<iters>] [<workers>] [<dashboard>]')
|
|
81
84
|
exit(1)
|
|
82
85
|
if args[2] not in ['drp', 'slp', 'replan']:
|
|
83
86
|
print('<method> in [drp, slp, replan]')
|
|
@@ -86,6 +89,7 @@ def run_from_args(args):
|
|
|
86
89
|
if len(args) >= 4: kwargs['trials'] = int(args[3])
|
|
87
90
|
if len(args) >= 5: kwargs['iters'] = int(args[4])
|
|
88
91
|
if len(args) >= 6: kwargs['workers'] = int(args[5])
|
|
92
|
+
if len(args) >= 7: kwargs['dashboard'] = bool(args[6])
|
|
89
93
|
main(**kwargs)
|
|
90
94
|
|
|
91
95
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: pyRDDLGym-jax
|
|
3
|
-
Version: 1
|
|
3
|
+
Version: 2.1
|
|
4
4
|
Summary: pyRDDLGym-jax: automatic differentiation for solving sequential planning problems in JAX.
|
|
5
5
|
Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
|
|
6
6
|
Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner
|
|
@@ -64,12 +64,12 @@ Purpose:
|
|
|
64
64
|
Some demos of solved problems by JaxPlan:
|
|
65
65
|
|
|
66
66
|
<p align="middle">
|
|
67
|
-
<img src="Images/intruders.gif" width="120" height="120" margin=0/>
|
|
68
|
-
<img src="Images/marsrover.gif" width="120" height="120" margin=0/>
|
|
69
|
-
<img src="Images/pong.gif" width="120" height="120" margin=0/>
|
|
70
|
-
<img src="Images/quadcopter.gif" width="120" height="120" margin=0/>
|
|
71
|
-
<img src="Images/reacher.gif" width="120" height="120" margin=0/>
|
|
72
|
-
<img src="Images/reservoir.gif" width="120" height="120" margin=0/>
|
|
67
|
+
<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/intruders.gif" width="120" height="120" margin=0/>
|
|
68
|
+
<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/marsrover.gif" width="120" height="120" margin=0/>
|
|
69
|
+
<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/pong.gif" width="120" height="120" margin=0/>
|
|
70
|
+
<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/quadcopter.gif" width="120" height="120" margin=0/>
|
|
71
|
+
<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/reacher.gif" width="120" height="120" margin=0/>
|
|
72
|
+
<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/reservoir.gif" width="120" height="120" margin=0/>
|
|
73
73
|
</p>
|
|
74
74
|
|
|
75
75
|
> [!WARNING]
|
|
@@ -219,7 +219,7 @@ Since version 1.0, JaxPlan has an optional dashboard that allows keeping track o
|
|
|
219
219
|
and visualization of the policy or model, and other useful debugging features.
|
|
220
220
|
|
|
221
221
|
<p align="middle">
|
|
222
|
-
<img src="Images/dashboard.png" width="480" height="248" margin=0/>
|
|
222
|
+
<img src="https://github.com/pyrddlgym-project/pyRDDLGym-jax/blob/main/Images/dashboard.png" width="480" height="248" margin=0/>
|
|
223
223
|
</p>
|
|
224
224
|
|
|
225
225
|
To run the dashboard, add the following entry to your config file:
|
|
@@ -296,7 +296,7 @@ tuning.tune(key=42, log_file='path/to/log.csv')
|
|
|
296
296
|
A basic run script is provided to run the automatic hyper-parameter tuning for the most sensitive parameters of JaxPlan:
|
|
297
297
|
|
|
298
298
|
```shell
|
|
299
|
-
jaxplan tune <domain> <instance> <method> <trials> <iters> <workers>
|
|
299
|
+
jaxplan tune <domain> <instance> <method> <trials> <iters> <workers> <dashboard>
|
|
300
300
|
```
|
|
301
301
|
|
|
302
302
|
where:
|
|
@@ -305,7 +305,8 @@ where:
|
|
|
305
305
|
- ``method`` is the planning method to use (i.e. drp, slp, replan)
|
|
306
306
|
- ``trials`` is the (optional) number of trials/episodes to average in evaluating each hyper-parameter setting
|
|
307
307
|
- ``iters`` is the (optional) maximum number of iterations/evaluations of Bayesian optimization to perform
|
|
308
|
-
- ``workers`` is the (optional) number of parallel evaluations to be done at each iteration, e.g. the total evaluations = ``iters * workers
|
|
308
|
+
- ``workers`` is the (optional) number of parallel evaluations to be done at each iteration, e.g. the total evaluations = ``iters * workers``
|
|
309
|
+
- ``dashboard`` is whether the optimizations are tracked in the dashboard application.
|
|
309
310
|
|
|
310
311
|
|
|
311
312
|
## Simulation
|
|
@@ -344,7 +345,16 @@ The [following citation](https://ojs.aaai.org/index.php/ICAPS/article/view/31480
|
|
|
344
345
|
```
|
|
345
346
|
|
|
346
347
|
Some of the implementation details derive from the following literature, which you may wish to also cite in your research papers:
|
|
347
|
-
- [A Distributional Framework for Risk-Sensitive End-to-End Planning in Continuous MDPs](https://ojs.aaai.org/index.php/AAAI/article/view/21226)
|
|
348
|
+
- [A Distributional Framework for Risk-Sensitive End-to-End Planning in Continuous MDPs, AAAI 2022](https://ojs.aaai.org/index.php/AAAI/article/view/21226)
|
|
348
349
|
- [Deep reactive policies for planning in stochastic nonlinear domains, AAAI 2019](https://ojs.aaai.org/index.php/AAAI/article/view/4744)
|
|
350
|
+
- [Stochastic Planning with Lifted Symbolic Trajectory Optimization, AAAI 2019](https://ojs.aaai.org/index.php/ICAPS/article/view/3467/3335)
|
|
349
351
|
- [Scalable planning with tensorflow for hybrid nonlinear domains, NeurIPS 2017](https://proceedings.neurips.cc/paper/2017/file/98b17f068d5d9b7668e19fb8ae470841-Paper.pdf)
|
|
350
|
-
|
|
352
|
+
- [Baseline-Free Sampling in Parameter Exploring Policy Gradients: Super Symmetric PGPE, ANN 2015](https://link.springer.com/chapter/10.1007/978-3-319-09903-3_13)
|
|
353
|
+
|
|
354
|
+
The model relaxations in JaxPlan are based on the following works:
|
|
355
|
+
- [Poisson Variational Autoencoder, NeurIPS 2025](https://proceedings.neurips.cc/paper_files/paper/2024/file/4f3cb9576dc99d62b80726690453716f-Paper-Conference.pdf)
|
|
356
|
+
- [Analyzing Differentiable Fuzzy Logic Operators, AI 2022](https://www.sciencedirect.com/science/article/pii/S0004370221001533)
|
|
357
|
+
- [Learning with algorithmic supervision via continuous relaxations, NeurIPS 2021](https://proceedings.neurips.cc/paper_files/paper/2021/file/89ae0fe22c47d374bc9350ef99e01685-Paper.pdf)
|
|
358
|
+
- [Universally quantized neural compression, NeurIPS 2020](https://papers.nips.cc/paper_files/paper/2020/file/92049debbe566ca5782a3045cf300a3c-Paper.pdf)
|
|
359
|
+
- [Generalized Gumbel-Softmax Gradient Estimator for Generic Discrete Random Variables, 2020](https://arxiv.org/pdf/2003.01847)
|
|
360
|
+
- [Categorical Reparametrization with Gumbel-Softmax, ICLR 2017](https://openreview.net/pdf?id=rkE3y85ee)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
pyRDDLGym_jax/__init__.py,sha256=
|
|
1
|
+
pyRDDLGym_jax/__init__.py,sha256=iUkzbGTNPaXJsBuXc5YQfq1EDxnaYb9ZwmINbS0U8A8,19
|
|
2
2
|
pyRDDLGym_jax/entry_point.py,sha256=dxDlO_5gneEEViwkLCg30Z-KVzUgdRXaKuFjoZklkA0,974
|
|
3
3
|
pyRDDLGym_jax/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
pyRDDLGym_jax/core/compiler.py,sha256=
|
|
5
|
-
pyRDDLGym_jax/core/logic.py,sha256=
|
|
6
|
-
pyRDDLGym_jax/core/planner.py,sha256=
|
|
7
|
-
pyRDDLGym_jax/core/simulator.py,sha256=
|
|
8
|
-
pyRDDLGym_jax/core/tuning.py,sha256=
|
|
9
|
-
pyRDDLGym_jax/core/visualization.py,sha256=
|
|
4
|
+
pyRDDLGym_jax/core/compiler.py,sha256=_ERueJW7GQ7S8-IezreeuLs3fNCZbQZ8j7VMUVlEt1k,82306
|
|
5
|
+
pyRDDLGym_jax/core/logic.py,sha256=ZeCwCLqC6BvXpRT06TvE2bfPNO6ALuMzPmUvXNzW6Uw,52278
|
|
6
|
+
pyRDDLGym_jax/core/planner.py,sha256=N-vI52aN_vbOImwb8-hHCl3UxasPGX3gRi8w5bc6JiI,117922
|
|
7
|
+
pyRDDLGym_jax/core/simulator.py,sha256=DnPL93WVCMZqtqMUoiJdfWcH9pEvNgGfDfO4NV0wIS0,9271
|
|
8
|
+
pyRDDLGym_jax/core/tuning.py,sha256=RKKtDZp7unvfbhZEoaunZtcAn5xtzGYqXBB_Ij_Aapc,24205
|
|
9
|
+
pyRDDLGym_jax/core/visualization.py,sha256=4BghMp8N7qtF0tdyDSqtxAxNfP9HPrQWTiXzAMJmx7o,70365
|
|
10
10
|
pyRDDLGym_jax/core/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
pyRDDLGym_jax/core/assets/favicon.ico,sha256=RMMrI9YvmF81TgYG7FO7UAre6WmYFkV3B2GmbA1l0kM,175085
|
|
12
12
|
pyRDDLGym_jax/examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -14,10 +14,10 @@ pyRDDLGym_jax/examples/run_gradient.py,sha256=KhXvijRDZ4V7N8NOI2WV8ePGpPna5_vnET
|
|
|
14
14
|
pyRDDLGym_jax/examples/run_gym.py,sha256=rXvNWkxe4jHllvbvU_EOMji_2-2k5d4tbBKhpMm_Gaw,1526
|
|
15
15
|
pyRDDLGym_jax/examples/run_plan.py,sha256=v2AvwgIa4Ejr626vBOgWFJIQvay3IPKWno02ztIFCYc,2768
|
|
16
16
|
pyRDDLGym_jax/examples/run_scipy.py,sha256=wvcpWCvdjvYHntO95a7JYfY2fuCMUTKnqjJikW0PnL4,2291
|
|
17
|
-
pyRDDLGym_jax/examples/run_tune.py,sha256=
|
|
17
|
+
pyRDDLGym_jax/examples/run_tune.py,sha256=WbGO8RudIK-cPMAMKvI8NbFQAqkG-Blbnta3Efsep6c,3828
|
|
18
18
|
pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_drp.cfg,sha256=mE8MqhOlkHeXIGEVrnR3QY6I-_iy4uxFYRA71P1bmtk,347
|
|
19
|
-
pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg,sha256=
|
|
20
|
-
pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg,sha256=
|
|
19
|
+
pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg,sha256=nFFYHCKQUMn8x-OpJwu2pwe1tycNSJ8iAIwSkCBn33E,370
|
|
20
|
+
pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg,sha256=eJ3HvHjODoKdtX7u-AM51xQaHJnYgzEy2t3omNG2oCs,340
|
|
21
21
|
pyRDDLGym_jax/examples/configs/HVAC_ippc2023_drp.cfg,sha256=9-QMZPZuecAEaerD79ZAbGX-tgfL8Y2W-tfkAyD15Cw,362
|
|
22
22
|
pyRDDLGym_jax/examples/configs/HVAC_ippc2023_slp.cfg,sha256=BiY6wwSYkR9-T46AA4n3okJ1Qvj8Iu-y1V5BrfCbqrM,340
|
|
23
23
|
pyRDDLGym_jax/examples/configs/MountainCar_Continuous_gym_slp.cfg,sha256=VBlTiHFQG72D1wpebMsuzSokwqlPVD99WjPp4YoWs84,356
|
|
@@ -25,15 +25,15 @@ pyRDDLGym_jax/examples/configs/MountainCar_ippc2023_slp.cfg,sha256=bH_5O13-Y6ztv
|
|
|
25
25
|
pyRDDLGym_jax/examples/configs/PowerGen_Continuous_drp.cfg,sha256=Pq6E9RYksue7X2cWjdWyUsV0LqQTjTvq6p0aLBVKWfY,370
|
|
26
26
|
pyRDDLGym_jax/examples/configs/PowerGen_Continuous_replan.cfg,sha256=SGVQAOqrOjEsZEtxL_Z6aGbLR19h5gKCcy0oz2vtQp8,382
|
|
27
27
|
pyRDDLGym_jax/examples/configs/PowerGen_Continuous_slp.cfg,sha256=6obQik2FBldoJ3VwoVfGhQqKpKdnYox770cF-SGRi3Q,345
|
|
28
|
-
pyRDDLGym_jax/examples/configs/Quadcopter_drp.cfg,sha256=
|
|
29
|
-
pyRDDLGym_jax/examples/configs/Quadcopter_slp.cfg,sha256=
|
|
30
|
-
pyRDDLGym_jax/examples/configs/Reservoir_Continuous_drp.cfg,sha256=
|
|
28
|
+
pyRDDLGym_jax/examples/configs/Quadcopter_drp.cfg,sha256=rs-CzOAyZV_NvwSh2f6Fm9XNw5Z8WIYgpAOzgTm_Gv8,403
|
|
29
|
+
pyRDDLGym_jax/examples/configs/Quadcopter_slp.cfg,sha256=EtSCTjd8gWm7akQdfHFxdpGnQvHzjo2IHbAuVxTAX4U,356
|
|
30
|
+
pyRDDLGym_jax/examples/configs/Reservoir_Continuous_drp.cfg,sha256=7nPOJCo3eaZuq1pCyIJJJkDM0jjJThDuDECJDZzX-uc,379
|
|
31
31
|
pyRDDLGym_jax/examples/configs/Reservoir_Continuous_replan.cfg,sha256=V3jzPGuNq2IAxYy_EeZWin4Y_uf0HvGhzg06ODNSY-I,381
|
|
32
|
-
pyRDDLGym_jax/examples/configs/Reservoir_Continuous_slp.cfg,sha256=
|
|
33
|
-
pyRDDLGym_jax/examples/configs/UAV_Continuous_slp.cfg,sha256=
|
|
34
|
-
pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_drp.cfg,sha256=
|
|
35
|
-
pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_replan.cfg,sha256=
|
|
36
|
-
pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_slp.cfg,sha256=
|
|
32
|
+
pyRDDLGym_jax/examples/configs/Reservoir_Continuous_slp.cfg,sha256=SYAJmoUIUhhvAej3XOzC5boGxKVHnSiVi5-ZGj2S29M,354
|
|
33
|
+
pyRDDLGym_jax/examples/configs/UAV_Continuous_slp.cfg,sha256=osoIPfrldPw7oJF2AaAw0-ke6YHQNdrslFBCTytsqmo,354
|
|
34
|
+
pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_drp.cfg,sha256=oNX8uW8Bw2uG9zHX1zeLF3mHWDHRIlJXYvbFcY0pfCI,382
|
|
35
|
+
pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_replan.cfg,sha256=exCfGI3WU7IFO7n5rRe5cO1ZHAdFwttRYzjIdD4Pz2Y,451
|
|
36
|
+
pyRDDLGym_jax/examples/configs/Wildfire_MDP_ippc2014_slp.cfg,sha256=e6Ikgv2uBbKuXHfVKt4KQ01LDUBGbc31D28bCcztJ58,413
|
|
37
37
|
pyRDDLGym_jax/examples/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
pyRDDLGym_jax/examples/configs/default_drp.cfg,sha256=XeMWAAG_OFZo7JAMxS5-XXroZaeVMzfM0NswmEobIns,373
|
|
39
39
|
pyRDDLGym_jax/examples/configs/default_replan.cfg,sha256=CK4cEz8ReXyAZPLaLG9clIIRXAqM3IplUCxbLt_V2lY,407
|
|
@@ -41,9 +41,9 @@ pyRDDLGym_jax/examples/configs/default_slp.cfg,sha256=mJo0woDevhQCSQfJg30ULVy9qG
|
|
|
41
41
|
pyRDDLGym_jax/examples/configs/tuning_drp.cfg,sha256=CQMpSCKTkGioO7U82mHMsYWFRsutULx0V6Wrl3YzV2U,504
|
|
42
42
|
pyRDDLGym_jax/examples/configs/tuning_replan.cfg,sha256=m_0nozFg_GVld0tGv92Xao_KONFJDq_vtiJKt5isqI8,501
|
|
43
43
|
pyRDDLGym_jax/examples/configs/tuning_slp.cfg,sha256=KHu8II6CA-h_HblwvWHylNRjSvvGS3VHxN7JQNR4p_Q,464
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
44
|
+
pyrddlgym_jax-2.1.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
|
|
45
|
+
pyrddlgym_jax-2.1.dist-info/METADATA,sha256=mVpp8qP4AnXEsjVCyCGjooSVZXn0y9zsbqLTm9qeMM4,16874
|
|
46
|
+
pyrddlgym_jax-2.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
47
|
+
pyrddlgym_jax-2.1.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
|
|
48
|
+
pyrddlgym_jax-2.1.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
|
|
49
|
+
pyrddlgym_jax-2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|