PyPI - pyRDDLGym-jax - Versions diffs - 2.8__py3-none-any.whl → 3.0__py3-none-any.whl - Mend

pyRDDLGym-jax 2.8py3-none-any.whl → 3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

pyRDDLGym_jax/core/simulator.py CHANGED Viewed

@@ -20,7 +20,7 @@
 import time
 import numpy as np
-from typing import Callable, Dict, Optional, Union
+from typing import Callable, Dict, Optional, Tuple, Union
 import jax
@@ -103,7 +103,7 @@ class JaxRDDLSimulator(RDDLSimulator):
         self.terminals = jax.tree_util.tree_map(jax.jit, compiled.terminations)
         self.reward = jax.jit(compiled.reward)
         jax_cpfs = jax.tree_util.tree_map(jax.jit, compiled.cpfs)
-        self.model_params = compiled.model_params
+        self.model_params = compiled.model_aux['params']
         # level analysis
         self.cpfs = []
@@ -116,6 +116,7 @@ class JaxRDDLSimulator(RDDLSimulator):
         # initialize all fluent and non-fluent values
         self.subs = self.init_values.copy()
+        self.fls, self.nfls = compiled.split_fluent_nonfluent(self.subs)
         self.state = None
         self.noop_actions = {var: values
                              for (var, values) in self.init_values.items()
@@ -142,24 +143,23 @@ class JaxRDDLSimulator(RDDLSimulator):
         for (i, invariant) in enumerate(self.invariants):
             loc = self.invariant_names[i]
             sample, self.key, error, self.model_params = invariant(
-                self.subs, self.model_params, self.key)
+                self.fls, self.nfls, self.model_params, self.key)
             self.handle_error_code(error, loc)
             if not bool(sample):
                 if not silent:
-                    raise RDDLStateInvariantNotSatisfiedError(
-                        f'{loc} is not satisfied.')
+                    raise RDDLStateInvariantNotSatisfiedError(f'{loc} is not satisfied.')
                 return False
         return True
     def check_action_preconditions(self, actions: Args, silent: bool=False) -> bool:
         '''Throws an exception if the action preconditions are not satisfied.'''
-        subs = self.subs
-        subs.update(actions)
+        self.fls.update(actions)
+        self.subs.update(actions)
         for (i, precond) in enumerate(self.preconds):
             loc = self.precond_names[i]
             sample, self.key, error, self.model_params = precond(
-                subs, self.model_params, self.key)
+                self.fls, self.nfls, self.model_params, self.key)
             self.handle_error_code(error, loc)
             if not bool(sample):
                 if not silent:
@@ -173,7 +173,7 @@ class JaxRDDLSimulator(RDDLSimulator):
         for (i, terminal) in enumerate(self.terminals):
             loc = self.terminal_names[i]
             sample, self.key, error, self.model_params = terminal(
-                self.subs, self.model_params, self.key)
+                self.fls, self.nfls, self.model_params, self.key)
             self.handle_error_code(error, loc)
             if bool(sample):
                 return True
@@ -182,24 +182,26 @@ class JaxRDDLSimulator(RDDLSimulator):
     def sample_reward(self) -> float:
         '''Samples the current reward given the current state and action.'''
         reward, self.key, error, self.model_params = self.reward(
-            self.subs, self.model_params, self.key)
+            self.fls, self.nfls, self.model_params, self.key)
         self.handle_error_code(error, 'reward function')
         return float(reward)
-    def step(self, actions: Args) -> Args:
+    def step(self, actions: Args) -> Tuple[Args, float, bool]:
         '''Samples and returns the next state from the cpfs.
         :param actions: a dict mapping current action fluents to their values
         '''
         rddl = self.rddl
         keep_tensors = self.keep_tensors
-        subs = self.subs
+        subs, fls, nfls = self.subs, self.fls, self.nfls
         subs.update(actions)
+        fls.update(actions)
         # compute CPFs in topological order
         for (cpf, expr, _) in self.cpfs:
-            subs[cpf], self.key, error, self.model_params = expr(
-                subs, self.model_params, self.key)
+            fls[cpf], self.key, error, self.model_params = expr(
+                fls, nfls, self.model_params, self.key)
+            subs[cpf] = fls[cpf]
             self.handle_error_code(error, f'CPF <{cpf}>')
         # sample reward
@@ -210,10 +212,11 @@ class JaxRDDLSimulator(RDDLSimulator):
         for (state, next_state) in rddl.next_state.items():
             # set state = state' for the next epoch
+            fls[state] = fls[next_state]
             subs[state] = subs[next_state]
             # convert object integer to string representation
-            state_values = subs[state]
+            state_values = fls[state]
             if self.objects_as_strings:
                 ptype = rddl.variable_ranges[state]
                 if ptype not in RDDLValueInitializer.NUMPY_TYPES:
@@ -231,7 +234,7 @@ class JaxRDDLSimulator(RDDLSimulator):
             for var in rddl.observ_fluents:
                 # convert object integer to string representation
-                obs_values = subs[var]
+                obs_values = fls[var]
                 if self.objects_as_strings:
                     ptype = rddl.variable_ranges[var]
                     if ptype not in RDDLValueInitializer.NUMPY_TYPES:
@@ -244,7 +247,7 @@ class JaxRDDLSimulator(RDDLSimulator):
                     obs.update(rddl.ground_var_with_values(var, obs_values))
         else:
             obs = self.state
         done = self.check_terminal_states()
         return obs, reward, done

pyRDDLGym_jax/core/tuning.py CHANGED Viewed

@@ -248,8 +248,8 @@ class JaxParameterTuning:
             policy = JaxOfflineController(
                 planner=planner, key=subkey, tqdm_position=index,
                 params=best_params, train_on_reset=False)
-            total_reward = policy.evaluate(env, episodes=rollouts_per_trial,
-                                           seed=np.array(subkey)[0])['mean']
+            total_reward = policy.evaluate(
+                env, episodes=rollouts_per_trial, seed=np.array(subkey)[0])['mean']
             # update average reward
             if verbose:
@@ -321,7 +321,8 @@ class JaxParameterTuning:
                            index: int,
                            iteration: int,
                            kwargs: Kwargs,
-                           queue: object) -> Tuple[ParameterValues, float, int, int]:
+                           queue: object,
+                           show_dashboard: bool) -> Tuple[ParameterValues, float, int, int]:
         '''A pickleable objective function to evaluate a single hyper-parameter
         configuration.'''
@@ -345,7 +346,10 @@ class JaxParameterTuning:
         planner_args, _, train_args = load_config_from_string(config_string)
         # remove keywords that should not be in the tuner
-        train_args.pop('dashboard', None)
+        if show_dashboard:
+            planner_args['dashboard'] = True
+        else:
+            planner_args['dashboard'] = None
         planner_args.pop('parallel_updates', None)
         # initialize env for evaluation (need fresh copy to avoid concurrency)
@@ -353,6 +357,7 @@ class JaxParameterTuning:
         # run planning algorithm
         planner = JaxBackpropPlanner(rddl=env.model, **planner_args)
+        planner.dashboard = None
         if online:
             average_reward = JaxParameterTuning.online_trials(
                 env, planner, train_args, key, iteration, index, num_trials,
@@ -482,7 +487,7 @@ class JaxParameterTuning:
                     # assign jobs to worker pool
                     results = [
                         pool.apply_async(JaxParameterTuning.objective_function,
-                                         obj_args + (it, obj_kwargs, queue))
+                                         obj_args + (it, obj_kwargs, queue, show_dashboard))
                         for obj_args in zip(suggested_params, subkeys, worker_ids)
                     ]
@@ -502,8 +507,7 @@ class JaxParameterTuning:
                             # extract and register the new evaluation
                             params, target, index, pid = results.pop(i).get()
                             optimizer.register(params, target)
-                            optimizer._gp.fit(
-                                optimizer.space.params, optimizer.space.target)
+                            optimizer._gp.fit(optimizer.space.params, optimizer.space.target)
                             # update acquisition function and suggest a new point
                             suggested_params[index] = optimizer.suggest()

pyRDDLGym_jax/core/visualization.py CHANGED Viewed

@@ -18,6 +18,8 @@ import os
 from datetime import datetime
 import math
 import numpy as np
+import io
+import pickle
 import time
 import threading
 from typing import Any, Dict, Optional, Tuple, TYPE_CHECKING
@@ -29,7 +31,7 @@ log = logging.getLogger('werkzeug')
 log.setLevel(logging.ERROR)
 import dash
-from dash.dcc import Interval, Graph, Store
+from dash.dcc import Download, Interval, Graph, Store
 from dash.dependencies import Input, Output, State, ALL
 from dash.html import Div, B, H4, P, Hr
 import dash_bootstrap_components as dbc
@@ -48,6 +50,7 @@ POLICY_DIST_PLOTS_PER_ROW = 6
 ACTION_HEATMAP_HEIGHT = 400
 PROGRESS_FOR_NEXT_RETURN_DIST = 2
 PROGRESS_FOR_NEXT_POLICY_DIST = 10
+PROGRESS_FOR_NEXT_BASIC_TIME_CURVE = 0.05
 REWARD_ERROR_DIST_SUBPLOTS = 20
 MODEL_STATE_ERROR_HEIGHT = 300
 POLICY_STATE_VIZ_MAX_HEIGHT = 800
@@ -77,6 +80,7 @@ class JaxPlannerDashboard:
         self.test_return = {}
         self.train_return = {}
         self.pgpe_return = {}
+        self.basic_time_curve_last_progress = {}
         self.return_dist = {}
         self.return_dist_ticks = {}
         self.return_dist_last_progress = {}
@@ -91,7 +95,8 @@ class JaxPlannerDashboard:
         self.train_reward_dist = {}
         self.test_reward_dist = {}
         self.train_state_fluents = {}
-        self.test_state_fluents = {}
+        self.train_state_output = {}
+        self.test_state_output = {}
         self.tuning_gp_heatmaps = None
         self.tuning_gp_targets = None
@@ -269,6 +274,8 @@ class JaxPlannerDashboard:
                              dbc.DropdownMenuItem("30s", id='30sec'),
                              dbc.DropdownMenuItem("1m", id='1min'),
                              dbc.DropdownMenuItem("5m", id='5min'),
+                             dbc.DropdownMenuItem("30m", id='30min'),
+                             dbc.DropdownMenuItem("1h", id='1h'),
                              dbc.DropdownMenuItem("1d", id='1day')],
                             label="Refresh: 2s",
                             id='refresh-rate-dropdown',
@@ -281,7 +288,7 @@ class JaxPlannerDashboard:
                              dbc.DropdownMenuItem("10", id='10pp'),
                              dbc.DropdownMenuItem("25", id='25pp'),
                              dbc.DropdownMenuItem("50", id='50pp')],
-                            label="Exp. Per Page: 10",
+                            label="Results Per Page: 10",
                             id='experiment-num-per-page-dropdown',
                             nav=True
                         )
@@ -328,6 +335,13 @@ class JaxPlannerDashboard:
                         # policy
                         dbc.Tab(dbc.Card(
                             dbc.CardBody([
+                                dbc.Row([
+                                    dbc.Col([
+                                        dbc.Button('Save Policy Weights',
+                                                    id='policy-save-button'),
+                                        Download(id="download-policy")
+                                    ], width='auto')
+                                ]),
                                 dbc.Row([
                                     Graph(id='action-output'),
                                 ]),
@@ -506,10 +520,12 @@ class JaxPlannerDashboard:
              Input("30sec", "n_clicks"),
              Input("1min", "n_clicks"),
              Input("5min", "n_clicks"),
+             Input("30min", "n_clicks"),
+             Input("1h", "n_clicks"),
              Input("1day", "n_clicks")],
             [State('refresh-interval', 'data')]
         )
-        def click_refresh_rate(n05, n1, n2, n5, n10, n30, n1m, n5m, nd, data):
+        def click_refresh_rate(n05, n1, n2, n5, n10, n30, n1m, n5m, n30m, n1h, nd, data):
             ctx = dash.callback_context
             if not ctx.triggered:
                 return data
@@ -530,6 +546,10 @@ class JaxPlannerDashboard:
                 return 60000
             elif button_id == '5min':
                 return 300000
+            elif button_id == '30min':
+                return 1800000
+            elif button_id == '1h':
+                return 3600000
             elif button_id == '1day':
                 return 86400000
             return data
@@ -562,8 +582,14 @@ class JaxPlannerDashboard:
                 return 'Refresh: 1m'
             elif selected_interval == 300000:
                 return 'Refresh: 5m'
+            elif selected_interval == 1800000:
+                return 'Refresh: 30m'
+            elif selected_interval == 3600000:
+                return 'Refresh: 1h'
+            elif selected_interval == 86400000:
+                return 'Refresh: 1day'
             else:
-                return 'Refresh: 2s'
+                return 'Refresh: n/a'
         # update the experiments per page
         @app.callback(
@@ -594,7 +620,7 @@ class JaxPlannerDashboard:
             [Input('experiment-num-per-page', 'data')]
         )
         def update_experiments_per_page(selected_num):
-            return f'Exp. Per Page: {selected_num}'
+            return f'Results Per Page: {selected_num}'
         # update the experiment table
         @app.callback(
@@ -758,7 +784,7 @@ class JaxPlannerDashboard:
                 if checked and self.action_output[row] is not None:
                     num_plots = len(self.action_output[row])
                     titles = []
-                    for (_, act, _) in self.action_output[row]:
+                    for act in self.action_output[row].keys():
                         titles.append(f'Values of Action-Fluents {act}')
                         titles.append(f'Std. Dev. of Action-Fluents {act}')
                     fig = make_subplots(
@@ -766,8 +792,7 @@ class JaxPlannerDashboard:
                         shared_xaxes=True, horizontal_spacing=0.15,
                         subplot_titles=titles
                     )
-                    for (i, (action_output, action, action_labels)) \
-                    in enumerate(self.action_output[row]):
+                    for (i, action_output) in enumerate(self.action_output[row].values()):
                         action_values = np.mean(1. * action_output, axis=0).T
                         action_errors = np.std(1. * action_output, axis=0).T
                         fig.add_trace(go.Heatmap(
@@ -984,6 +1009,21 @@ class JaxPlannerDashboard:
                     return fig
             return dash.no_update
+        # save policy button
+        @app.callback(
+            Output('download-policy', 'data'),
+            Input("policy-save-button", "n_clicks"),
+            prevent_initial_call=True
+        )
+        def save_policy_weights(n_clicks):
+            for (row, checked) in self.checked.copy().items():
+                if checked:
+                    bytes_io = io.BytesIO()
+                    pickle.dump(self.policy_params[row], bytes_io)
+                    bytes_io.seek(0)
+                    return dash.dcc.send_bytes(bytes_io.read(), "policy_params.pkl")
+            return dash.no_update
         # update the model parameter information
         @app.callback(
             Output('model-params-dropdown', 'children'),
@@ -1136,42 +1176,40 @@ class JaxPlannerDashboard:
             if not state: return fig
             for (row, checked) in self.checked.copy().items():
                 if checked and row in self.train_state_fluents:
-                    train_values = self.train_state_fluents[row][state]
-                    test_values = self.test_state_fluents[row][state]
-                    train_values = 1 * train_values.reshape(train_values.shape[:2] + (-1,))
-                    test_values = 1 * test_values.reshape(test_values.shape[:2] + (-1,))
-                    num_epochs, num_states = train_values.shape[1:]
-                    step = 1
-                    if num_epochs > REWARD_ERROR_DIST_SUBPLOTS:
-                        step = num_epochs // REWARD_ERROR_DIST_SUBPLOTS
+                    titles = [f'Values of Train State-Fluents {state}',
+                              f'Values of Test State-Fluents {state}']
                     fig = make_subplots(
-                        rows=num_states, cols=1, shared_xaxes=True,
-                        subplot_titles=self.rddl[row].variable_groundings[state]
+                        rows=1, cols=2,
+                        shared_xaxes=True, horizontal_spacing=0.15,
+                        subplot_titles=titles
                     )
-                    for istate in range(num_states):
-                        for epoch in range(0, num_epochs, step):
-                            fig.add_trace(go.Violin(
-                                y=train_values[:, epoch, istate], x0=epoch,
-                                side='negative', line_color='red',
-                                name=f'Train Epoch {epoch + 1}'
-                            ), row=istate + 1, col=1)
-                            fig.add_trace(go.Violin(
-                                y=test_values[:, epoch, istate], x0=epoch,
-                                side='positive', line_color='blue',
-                                name=f'Test Epoch {epoch + 1}'
-                            ), row=istate + 1, col=1)
-                    fig.update_traces(meanline_visible=True)
+                    train_state_output = self.train_state_output[row][state]
+                    test_state_output = self.test_state_output[row][state]
+                    train_state_values = np.mean(1. * train_state_output, axis=0).T
+                    test_state_values = np.mean(1. * test_state_output, axis=0).T
+                    fig.add_trace(go.Heatmap(
+                        z=train_state_values,
+                        x=np.arange(train_state_values.shape[1]),
+                        y=np.arange(train_state_values.shape[0]),
+                        colorscale='Blues', colorbar_x=0.45,
+                        colorbar_len=0.8 / 1,
+                        colorbar_y=1 - (0.5) / 1
+                    ), row=1, col=1)
+                    fig.add_trace(go.Heatmap(
+                        z=test_state_values,
+                        x=np.arange(test_state_values.shape[1]),
+                        y=np.arange(test_state_values.shape[0]),
+                        colorscale='Blues', colorbar_len=0.8 / 1,
+                        colorbar_y=1 - (0.5) / 1
+                    ), row=1, col=2)
                     fig.update_layout(
-                        title=dict(text=(f"Distribution of State-Fluent {state} "
-                                         f"in Relaxed Model vs True Model")),
+                        title=f"Values of State-Fluents {state}",
                         xaxis=dict(title=dict(text="Decision Epoch")),
-                        yaxis=dict(title=dict(text="State-Fluent Value")),
                         font=dict(size=PLOT_AXES_FONT_SIZE),
-                        height=MODEL_STATE_ERROR_HEIGHT * num_states,
-                        violingap=0, violinmode='overlay', showlegend=False,
-                        legend=dict(bgcolor='rgba(0,0,0,0)'),
+                        height=ACTION_HEATMAP_HEIGHT * 1,
+                        showlegend=False,
                         template="plotly_white"
-                    )
+                    )
                     break
             return fig
@@ -1363,6 +1401,7 @@ class JaxPlannerDashboard:
         self.train_return[experiment_id] = []
         self.test_return[experiment_id] = []
         self.pgpe_return[experiment_id] = []
+        self.basic_time_curve_last_progress[experiment_id] = 0
         self.return_dist_ticks[experiment_id] = []
         self.return_dist_last_progress[experiment_id] = 0
         self.return_dist[experiment_id] = []
@@ -1410,64 +1449,63 @@ class JaxPlannerDashboard:
         '''Pass new information and update the dashboard for a given experiment.'''
         # data for return curves
+        progress = callback['progress']
         iteration = callback['iteration']
-        self.xticks[experiment_id].append(iteration)
-        self.train_return[experiment_id].append(callback['train_return'])
-        self.test_return[experiment_id].append(callback['best_return'])
-        self.pgpe_return[experiment_id].append(callback['pgpe_return'])
+        if progress - self.basic_time_curve_last_progress[experiment_id] >= PROGRESS_FOR_NEXT_BASIC_TIME_CURVE:
+            self.xticks[experiment_id].append(iteration)
+            self.train_return[experiment_id].append(np.min(callback['train_return']))
+            self.test_return[experiment_id].append(np.min(callback['best_return']))
+            self.pgpe_return[experiment_id].append(np.min(callback['pgpe_return']))
+            for (key, values) in callback['model_params'].items():
+                self.relaxed_exprs_values[experiment_id][key].append(values[0])
+            self.basic_time_curve_last_progress[experiment_id] = progress
         # data for return distributions
-        progress = int(callback['progress'])
-        if progress - self.return_dist_last_progress[experiment_id] \
-            >= PROGRESS_FOR_NEXT_RETURN_DIST:
+        if progress - self.return_dist_last_progress[experiment_id] >= PROGRESS_FOR_NEXT_RETURN_DIST:
             self.return_dist_ticks[experiment_id].append(iteration)
             self.return_dist[experiment_id].append(
-                np.sum(np.asarray(callback['reward']), axis=1))
+                np.sum(np.mean(callback['test_log']['reward'], axis=0), axis=1))
             self.return_dist_last_progress[experiment_id] = progress
+        # data for policy weight distributions
+        if progress - self.policy_params_last_progress[experiment_id] >= PROGRESS_FOR_NEXT_POLICY_DIST:
+            self.policy_params_ticks[experiment_id].append(iteration)
+            self.policy_params[experiment_id].append(callback['best_params'])
+            self.policy_params_last_progress[experiment_id] = progress
         # data for action heatmaps
-        action_output = []
+        action_output = {}
         rddl = self.rddl[experiment_id]
         for action in rddl.action_fluents:
-            action_values = np.asarray(callback['fluents'][action])
-            action_output.append(
-                (action_values.reshape(action_values.shape[:2] + (-1,)),
-                 action,
-                 rddl.variable_groundings[action])
-            )
+            action_values = np.asarray(callback['test_log']['fluents'][action][0])
+            action_output[action] = action_values.reshape(action_values.shape[:2] + (-1,))
         self.action_output[experiment_id] = action_output
-        # data for policy weight distributions
-        if progress - self.policy_params_last_progress[experiment_id] \
-            >= PROGRESS_FOR_NEXT_POLICY_DIST:
-            self.policy_params_ticks[experiment_id].append(iteration)
-            self.policy_params[experiment_id].append(callback['best_params'])
-            self.policy_params_last_progress[experiment_id] = progress
+        # data for state heatmaps
+        train_state_output = {}
+        test_state_output = {}
+        for state in rddl.state_fluents:
+            state_values = np.asarray(callback['train_log']['fluents'][state][0])
+            train_state_output[state] = state_values.reshape(state_values.shape[:2] + (-1,))
+            state_values = np.asarray(callback['test_log']['fluents'][state][0])
+            test_state_output[state] = state_values.reshape(state_values.shape[:2] + (-1,))
+        self.train_state_output[experiment_id] = train_state_output
+        self.test_state_output[experiment_id] = test_state_output
-        # data for model relaxations
-        model_params = callback['model_params']
-        for (key, values) in model_params.items():
-            expr_id = int(str(key).split('_')[0])
-            self.relaxed_exprs_values[experiment_id][expr_id].append(values.item())
-        self.train_reward_dist[experiment_id] = callback['train_log']['reward']
-        self.test_reward_dist[experiment_id] = callback['reward']
+        # data for reward distributions
+        self.train_reward_dist[experiment_id] = np.mean(callback['train_log']['reward'], axis=0)
+        self.test_reward_dist[experiment_id] = np.mean(callback['test_log']['reward'], axis=0)
         self.train_state_fluents[experiment_id] = {
-            name: np.asarray(callback['train_log']['fluents'][name])
+            name: np.asarray(callback['train_log']['fluents'][name][0])
             for name in rddl.state_fluents
         }
-        self.test_state_fluents[experiment_id] = {
-            name: np.asarray(callback['fluents'][name])
-            for name in self.train_state_fluents[experiment_id]
-        }
         # update experiment table info
         self.status[experiment_id] = str(callback['status']).split('.')[1]
         self.duration[experiment_id] = callback["elapsed_time"]
-        self.progress[experiment_id] = progress
+        self.progress[experiment_id] = int(progress)
         self.warnings = None
-    def update_tuning(self, optimizer: Any,
-                      bounds: Dict[str, Tuple[float, float]]) -> None:
+    def update_tuning(self, optimizer: Any, bounds: Dict[str, Tuple[float, float]]) -> None:
         '''Updates the hyper-parameter tuning plots.'''
         self.tuning_gp_heatmaps = []
@@ -1475,8 +1513,7 @@ class JaxPlannerDashboard:
         if not optimizer.res: return
         self.tuning_gp_targets = optimizer.space.target.reshape((-1,))
-        self.tuning_gp_predicted = \
-            optimizer._gp.predict(optimizer.space.params).reshape((-1,))
+        self.tuning_gp_predicted = optimizer._gp.predict(optimizer.space.params).reshape((-1,))
         self.tuning_gp_params = {name: optimizer.space.params[:, i]
                                  for (i, name) in enumerate(optimizer.space.keys)}

pyRDDLGym_jax/entry_point.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import argparse
-from pyRDDLGym_jax.examples import run_plan, run_tune
 EPILOG = 'For complete documentation, see https://pyrddlgym.readthedocs.io/en/latest/jax.html.'
@@ -47,8 +46,10 @@ def main():
     # dispatch
     args = parser.parse_args()
     if args.jaxplan == "plan":
+        from pyRDDLGym_jax.examples import run_plan
         run_plan.main(args.domain, args.instance, args.method, args.episodes)
     elif args.jaxplan == "tune":
+        from pyRDDLGym_jax.examples import run_tune
         run_tune.main(args.domain, args.instance, args.method,
                       args.trials, args.iters, args.workers, args.dashboard,
                       args.filepath)

pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_drp.cfg CHANGED Viewed

@@ -1,17 +1,15 @@
-[Model]
-logic='FuzzyLogic'
-comparison_kwargs={'weight': 20}
-rounding_kwargs={'weight': 20}
-control_kwargs={'weight': 20}
+[Compiler]
+method='DefaultJaxRDDLCompilerWithGrad'
+sigmoid_weight=20
-[Optimizer]
+[Planner]
 method='JaxDeepReactivePolicy'
-method_kwargs={'topology': [32, 16]}
+method_kwargs={'topology': [32, 32]}
 optimizer='rmsprop'
 optimizer_kwargs={'learning_rate': 0.005}
 batch_size_train=1
 batch_size_test=1
-[Training]
+[Optimize]
 key=42
 epochs=1000

pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_replan.cfg CHANGED Viewed

@@ -1,10 +1,8 @@
-[Model]
-logic='FuzzyLogic'
-comparison_kwargs={'weight': 20}
-rounding_kwargs={'weight': 20}
-control_kwargs={'weight': 20}
+[Compiler]
+method='DefaultJaxRDDLCompilerWithGrad'
+sigmoid_weight=20
-[Optimizer]
+[Planner]
 method='JaxStraightLinePlan'
 method_kwargs={}
 optimizer='rmsprop'
@@ -13,7 +11,7 @@ batch_size_train=1
 batch_size_test=1
 rollout_horizon=30
-[Training]
+[Optimize]
 key=42
 train_seconds=0.5
 print_summary=False

pyRDDLGym_jax/examples/configs/Cartpole_Continuous_gym_slp.cfg CHANGED Viewed

@@ -1,10 +1,8 @@
-[Model]
-logic='FuzzyLogic'
-comparison_kwargs={'weight': 20}
-rounding_kwargs={'weight': 20}
-control_kwargs={'weight': 20}
+[Compiler]
+method='DefaultJaxRDDLCompilerWithGrad'
+sigmoid_weight=20
-[Optimizer]
+[Planner]
 method='JaxStraightLinePlan'
 method_kwargs={}
 optimizer='rmsprop'
@@ -12,7 +10,8 @@ optimizer_kwargs={'learning_rate': 0.001}
 batch_size_train=1
 batch_size_test=1
 clip_grad=1.0
+pgpe_kwargs={'optimizer_kwargs_mu': {'learning_rate': 0.01}, 'optimizer_kwargs_sigma': {'learning_rate': 0.01}}
-[Training]
+[Optimize]
 key=42
-epochs=5000
+epochs=3000

pyRDDLGym_jax/examples/configs/HVAC_ippc2023_drp.cfg CHANGED Viewed

@@ -1,10 +1,9 @@
-[Model]
-logic='FuzzyLogic'
-comparison_kwargs={'weight': 5}
-rounding_kwargs={'weight': 5}
-control_kwargs={'weight': 5}
+[Compiler]
+method='DefaultJaxRDDLCompilerWithGrad'
+bernoulli_sigmoid_weight=5
+sigmoid_weight=5
-[Optimizer]
+[Planner]
 method='JaxDeepReactivePolicy'
 method_kwargs={'topology': [64, 64]}
 optimizer='rmsprop'
@@ -12,7 +11,7 @@ optimizer_kwargs={'learning_rate': 0.001}
 batch_size_train=1
 batch_size_test=1
-[Training]
+[Optimize]
 key=42
 epochs=6000
-train_seconds=60
+train_seconds=90

pyRDDLGym-jax 2.8__py3-none-any.whl → 3.0__py3-none-any.whl

pyRDDLGym-jax 2.8py3-none-any.whl → 3.0py3-none-any.whl