PyPI - pyRDDLGym-jax - Versions diffs - 1.2__py3-none-any.whl → 1.3__py3-none-any.whl - Mend

pyRDDLGym-jax 1.2py3-none-any.whl → 1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

pyRDDLGym_jax/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '1.2'
1	+ __version__ = '1.3'

pyRDDLGym_jax/core/planner.py CHANGED Viewed

@@ -655,7 +655,10 @@ class JaxStraightLinePlan(JaxPlan):
                     if ranges[var] == 'bool':
                         param_flat = jnp.ravel(param)
                         if noop[var]:
-                            param_flat = (-param_flat) if wrap_sigmoid else 1.0 - param_flat
+                            if wrap_sigmoid:
+                                param_flat = -param_flat
+                            else:
+                                param_flat = 1.0 - param_flat
                         scores.append(param_flat)
                 scores = jnp.concatenate(scores)
                 descending = jnp.sort(scores)[::-1]
@@ -666,7 +669,10 @@ class JaxStraightLinePlan(JaxPlan):
                 new_params = {}
                 for (var, param) in params.items():
                     if ranges[var] == 'bool':
-                        new_param = param + (surplus if noop[var] else -surplus)
+                        if noop[var]:
+                            new_param = param + surplus
+                        else:
+                            new_param = param - surplus
                         new_param = _jax_project_bool_to_box(var, new_param, hyperparams)
                     else:
                         new_param = param
@@ -687,57 +693,73 @@ class JaxStraightLinePlan(JaxPlan):
         elif use_constraint_satisfaction and not self._use_new_projection:
             # calculate the surplus of actions above max-nondef-actions
-            def _jax_wrapped_sogbofa_surplus(params, hyperparams):
-                sum_action, count = 0.0, 0
-                for (var, param) in params.items():
+            def _jax_wrapped_sogbofa_surplus(actions):
+                sum_action, k = 0.0, 0
+                for (var, action) in actions.items():
                     if ranges[var] == 'bool':
-                        action = _jax_bool_param_to_action(var, param, hyperparams)
                         if noop[var]:
-                            sum_action += jnp.size(action) - jnp.sum(action)
-                            count += jnp.sum(action < 1)
-                        else:
-                            sum_action += jnp.sum(action)
-                            count += jnp.sum(action > 0)
+                            action = 1 - action
+                        sum_action += jnp.sum(action)
+                        k += jnp.count_nonzero(action)
                 surplus = jnp.maximum(sum_action - allowed_actions, 0.0)
-                count = jnp.maximum(count, 1)
-                return surplus / count
+                return surplus, k
             # return whether the surplus is positive or reached compute limit
             max_constraint_iter = self._max_constraint_iter
             def _jax_wrapped_sogbofa_continue(values):
-                it, _, _, surplus = values
-                return jnp.logical_and(it < max_constraint_iter, surplus > 0)
+                it, _, surplus, k = values
+                return jnp.logical_and(
+                    it < max_constraint_iter, jnp.logical_and(surplus > 0, k > 0))
             # reduce all bool action values by the surplus clipping at minimum
             # for no-op = True, do the opposite, i.e. increase all
             # bool action values by surplus clipping at maximum
             def _jax_wrapped_sogbofa_subtract_surplus(values):
-                it, params, hyperparams, surplus = values
-                new_params = {}
-                for (var, param) in params.items():
+                it, actions, surplus, k = values
+                amount = surplus / k
+                new_actions = {}
+                for (var, action) in actions.items():
                     if ranges[var] == 'bool':
-                        action = _jax_bool_param_to_action(var, param, hyperparams)
-                        new_action = action + (surplus if noop[var] else -surplus)
-                        new_action = jnp.clip(new_action, min_action, max_action)
-                        new_param = _jax_bool_action_to_param(var, new_action, hyperparams)
+                        if noop[var]:
+                            new_actions[var] = jnp.minimum(action + amount, 1)
+                        else:
+                            new_actions[var] = jnp.maximum(action - amount, 0)
                     else:
-                        new_param = param
-                    new_params[var] = new_param
-                new_surplus = _jax_wrapped_sogbofa_surplus(new_params, hyperparams)
+                        new_actions[var] = action
+                new_surplus, new_k = _jax_wrapped_sogbofa_surplus(new_actions)
                 new_it = it + 1
-                return new_it, new_params, hyperparams, new_surplus
+                return new_it, new_actions, new_surplus, new_k
             # apply the surplus to the actions until it becomes zero
             def _jax_wrapped_sogbofa_project(params, hyperparams):
-                surplus = _jax_wrapped_sogbofa_surplus(params, hyperparams)
-                _, params, _, surplus = jax.lax.while_loop(
+                # convert parameters to actions
+                actions = {}
+                for (var, param) in params.items():
+                    if ranges[var] == 'bool':
+                        actions[var] = _jax_bool_param_to_action(var, param, hyperparams)
+                    else:
+                        actions[var] = param
+                # run SOGBOFA loop on the actions to get adjusted actions
+                surplus, k = _jax_wrapped_sogbofa_surplus(actions)
+                _, actions, surplus, k = jax.lax.while_loop(
                     cond_fun=_jax_wrapped_sogbofa_continue,
                     body_fun=_jax_wrapped_sogbofa_subtract_surplus,
-                    init_val=(0, params, hyperparams, surplus)
+                    init_val=(0, actions, surplus, k)
                 )
                 converged = jnp.logical_not(surplus > 0)
-                return params, converged
+                # convert the adjusted actions back to parameters
+                new_params = {}
+                for (var, action) in actions.items():
+                    if ranges[var] == 'bool':
+                        action = jnp.clip(action, min_action, max_action)
+                        new_params[var] = _jax_bool_action_to_param(var, action, hyperparams)
+                    else:
+                        new_params[var] = action
+                return new_params, converged
             # clip actions to valid bounds and satisfy constraint on max actions
             def _jax_wrapped_slp_project_to_max_constraint(params, hyperparams):
@@ -1415,6 +1437,7 @@ r"""
         # optimization
         self.update = self._jax_update(train_loss)
+        self.check_zero_grad = self._jax_check_zero_gradients()
     def _jax_return(self, use_symlog):
         gamma = self.rddl.discount
@@ -1497,6 +1520,18 @@ r"""
         return jax.jit(_jax_wrapped_plan_update)
+    def _jax_check_zero_gradients(self):
+        def _jax_wrapped_zero_gradient(grad):
+            return jnp.allclose(grad, 0)
+        def _jax_wrapped_zero_gradients(grad):
+            leaves, _ = jax.tree_util.tree_flatten(
+                jax.tree_map(_jax_wrapped_zero_gradient, grad))
+            return jnp.all(jnp.asarray(leaves))
+        return jax.jit(_jax_wrapped_zero_gradients)
     def _batched_init_subs(self, subs):
         rddl = self.rddl
         n_train, n_test = self.batch_size_train, self.batch_size_test
@@ -1795,7 +1830,6 @@ r"""
         rolling_test_loss = RollingMean(test_rolling_window)
         log = {}
         status = JaxPlannerStatus.NORMAL
-        is_all_zero_fn = lambda x: np.allclose(x, 0)
         # initialize stopping criterion
         if stopping_rule is not None:
@@ -1836,9 +1870,7 @@ r"""
             # ==================================================================
             # no progress
-            grad_norm_zero, _ = jax.tree_util.tree_flatten(
-                jax.tree_map(is_all_zero_fn, train_log['grad']))
-            if np.all(grad_norm_zero):
+            if self.check_zero_grad(train_log['grad']):
                 status = JaxPlannerStatus.NO_PROGRESS
             # constraint satisfaction problem
@@ -2035,8 +2067,8 @@ r"""
             # must be numeric array
             # exception is for POMDPs at 1st epoch when observ-fluents are None
             dtype = np.atleast_1d(values).dtype
-            if not jnp.issubdtype(dtype, jnp.number) \
-            and not jnp.issubdtype(dtype, jnp.bool_):
+            if not np.issubdtype(dtype, np.number) \
+            and not np.issubdtype(dtype, np.bool_):
                 if step == 0 and var in self.rddl.observ_fluents:
                     subs[var] = self.test_compiled.init_values[var]
                 else:
@@ -2077,10 +2109,11 @@ def mean_variance_utility(returns: jnp.ndarray, beta: float) -> float:
 @jax.jit
 def cvar_utility(returns: jnp.ndarray, alpha: float) -> float:
-    alpha_mask = jax.lax.stop_gradient(
-        returns <= jnp.percentile(returns, q=100 * alpha))
-    return jnp.sum(returns * alpha_mask) / jnp.sum(alpha_mask)
+    var = jnp.percentile(returns, q=100 * alpha)
+    mask = returns <= var
+    weights = mask / jnp.maximum(1, jnp.sum(mask))
+    return jnp.sum(returns * weights)
 # ***********************************************************************
 # ALL VERSIONS OF CONTROLLERS

{pyRDDLGym_jax-1.2.dist-info → pyRDDLGym_jax-1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: pyRDDLGym-jax
-Version: 1.2
+Version: 1.3
 Summary: pyRDDLGym-jax: automatic differentiation for solving sequential planning problems in JAX.
 Home-page: https://github.com/pyrddlgym-project/pyRDDLGym-jax
 Author: Michael Gimelfarb, Ayal Taitler, Scott Sanner

{pyRDDLGym_jax-1.2.dist-info → pyRDDLGym_jax-1.3.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-pyRDDLGym_jax/__init__.py,sha256=LTT-ZpL6vrKdC5t0O71pJnk3zMhDf1eXkNmoLoIRupo,19
+pyRDDLGym_jax/__init__.py,sha256=p_veRZMP15-djJyMuDHT7Ul1RbCCHpYsZ9LO0GD1URo,19
 pyRDDLGym_jax/entry_point.py,sha256=dxDlO_5gneEEViwkLCg30Z-KVzUgdRXaKuFjoZklkA0,974
 pyRDDLGym_jax/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pyRDDLGym_jax/core/compiler.py,sha256=qy1TSivdpuZxWecDl5HEM0PXX45JB7DHzV7uAB8kmbE,88696
 pyRDDLGym_jax/core/logic.py,sha256=iYvLgWyQd_mrkwwoeRWao9NzjmhsObQnPq4DphILw1Q,38425
-pyRDDLGym_jax/core/planner.py,sha256=oKs9js7xyIc9-bxQFZSQNBw9s1nWQlz4DjENwEgSojY,100672
+pyRDDLGym_jax/core/planner.py,sha256=TFFy91aCzRW600k_eP-7i2Gvp9wpNVjXlXtBnt9x03M,101744
 pyRDDLGym_jax/core/simulator.py,sha256=JpmwfPqYPBfEhmQ04ufBeclZOQ-U1ZiyAtLf1AIwO2M,8462
 pyRDDLGym_jax/core/tuning.py,sha256=LBhoVQZWWhYQj89gpM2B4xVHlYlKDt4psw4Be9cBbSY,23685
 pyRDDLGym_jax/core/visualization.py,sha256=uKhC8z0TeX9BklPNoxSVt0g5pkqhgxrQClQAih78ybY,68292
@@ -41,9 +41,9 @@ pyRDDLGym_jax/examples/configs/default_slp.cfg,sha256=mJo0woDevhQCSQfJg30ULVy9qG
 pyRDDLGym_jax/examples/configs/tuning_drp.cfg,sha256=CQMpSCKTkGioO7U82mHMsYWFRsutULx0V6Wrl3YzV2U,504
 pyRDDLGym_jax/examples/configs/tuning_replan.cfg,sha256=m_0nozFg_GVld0tGv92Xao_KONFJDq_vtiJKt5isqI8,501
 pyRDDLGym_jax/examples/configs/tuning_slp.cfg,sha256=KHu8II6CA-h_HblwvWHylNRjSvvGS3VHxN7JQNR4p_Q,464
-pyRDDLGym_jax-1.2.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
-pyRDDLGym_jax-1.2.dist-info/METADATA,sha256=oWVOtC5AvAm2Xvdd507gXr3b6_aZLaH7LnOj6hADdgQ,15090
-pyRDDLGym_jax-1.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-pyRDDLGym_jax-1.2.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
-pyRDDLGym_jax-1.2.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
-pyRDDLGym_jax-1.2.dist-info/RECORD,,
+pyRDDLGym_jax-1.3.dist-info/LICENSE,sha256=Y0Gi6H6mLOKN-oIKGZulQkoTJyPZeAaeuZu7FXH-meg,1095
+pyRDDLGym_jax-1.3.dist-info/METADATA,sha256=Colu-byYJ4RF5sr1qOVKg9VhCbrLnv32OvHt_A9KtLE,15090
+pyRDDLGym_jax-1.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+pyRDDLGym_jax-1.3.dist-info/entry_points.txt,sha256=Q--z9QzqDBz1xjswPZ87PU-pib-WPXx44hUWAFoBGBA,59
+pyRDDLGym_jax-1.3.dist-info/top_level.txt,sha256=n_oWkP_BoZK0VofvPKKmBZ3NPk86WFNvLhi1BktCbVQ,14
+pyRDDLGym_jax-1.3.dist-info/RECORD,,

{pyRDDLGym_jax-1.2.dist-info → pyRDDLGym_jax-1.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{pyRDDLGym_jax-1.2.dist-info → pyRDDLGym_jax-1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{pyRDDLGym_jax-1.2.dist-info → pyRDDLGym_jax-1.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{pyRDDLGym_jax-1.2.dist-info → pyRDDLGym_jax-1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

pyRDDLGym-jax 1.2__py3-none-any.whl → 1.3__py3-none-any.whl

pyRDDLGym-jax 1.2py3-none-any.whl → 1.3py3-none-any.whl