PyPI - pyRDDLGym-jax - Versions diffs - 1.3__py3-none-any.whl → 2.1__py3-none-any.whl - Mend

pyRDDLGym-jax 1.3py3-none-any.whl → 2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

pyRDDLGym_jax/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '1.3'
1	+ __version__ = '2.1'

pyRDDLGym_jax/core/compiler.py CHANGED Viewed

@@ -1,3 +1,18 @@
+# ***********************************************************************
+# JAXPLAN
+#
+# Author: Michael Gimelfarb
+#
+# REFERENCES:
+#
+# [1] Gimelfarb, Michael, Ayal Taitler, and Scott Sanner. "JaxPlan and GurobiPlan:
+# Optimization Baselines for Replanning in Discrete and Mixed Discrete-Continuous
+# Probabilistic Domains." Proceedings of the International Conference on Automated
+# Planning and Scheduling. Vol. 34. 2024.
+#
+# ***********************************************************************
 from functools import partial
 import traceback
 from typing import Any, Callable, Dict, List, Optional
@@ -5,7 +20,6 @@ from typing import Any, Callable, Dict, List, Optional
 import jax
 import jax.numpy as jnp
 import jax.random as random
-import jax.scipy as scipy
 from pyRDDLGym.core.compiler.initializer import RDDLValueInitializer
 from pyRDDLGym.core.compiler.levels import RDDLLevelAnalysis
@@ -28,8 +42,7 @@ try:
     from tensorflow_probability.substrates import jax as tfp
 except Exception:
     raise_warning('Failed to import tensorflow-probability: '
-                  'compilation of some complex distributions '
-                  '(Binomial, Negative-Binomial, Multinomial) will fail.', 'red')
+                  'compilation of some probability distributions will fail.', 'red')
     traceback.print_exc()
     tfp = None
@@ -39,102 +52,6 @@ class JaxRDDLCompiler:
     All operations are identical to their numpy equivalents.
     '''
-    MODEL_PARAM_TAG_SEPARATOR = '___'
-    # ===========================================================================
-    # EXACT RDDL TO JAX COMPILATION RULES BY DEFAULT
-    # ===========================================================================
-    @staticmethod
-    def wrap_logic(func):
-        def exact_func(id, init_params):
-            return func
-        return exact_func
-    EXACT_RDDL_TO_JAX_NEGATIVE = wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.negative))
-    EXACT_RDDL_TO_JAX_ARITHMETIC = {
-        '+': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.add)),
-        '-': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.subtract)),
-        '*': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.multiply)),
-        '/': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.divide))
-    }
-    EXACT_RDDL_TO_JAX_RELATIONAL = {
-        '>=': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.greater_equal)),
-        '<=': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.less_equal)),
-        '<': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.less)),
-        '>': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.greater)),
-        '==': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.equal)),
-        '~=': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.not_equal))
-    }
-    EXACT_RDDL_TO_JAX_LOGICAL_NOT = wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.logical_not))
-    EXACT_RDDL_TO_JAX_LOGICAL = {
-        '^': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.logical_and)),
-        '&': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.logical_and)),
-        '|': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.logical_or)),
-        '~': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.logical_xor)),
-        '=>': wrap_logic.__func__(ExactLogic.exact_binary_implies),
-        '<=>': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.equal))
-    }
-    EXACT_RDDL_TO_JAX_AGGREGATION = {
-        'sum': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.sum)),
-        'avg': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.mean)),
-        'prod': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.prod)),
-        'minimum': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.min)),
-        'maximum': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.max)),
-        'forall': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.all)),
-        'exists': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.any)),
-        'argmin': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.argmin)),
-        'argmax': wrap_logic.__func__(ExactLogic.exact_aggregation(jnp.argmax))
-    }
-    EXACT_RDDL_TO_JAX_UNARY = {
-        'abs': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.abs)),
-        'sgn': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.sign)),
-        'round': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.round)),
-        'floor': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.floor)),
-        'ceil': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.ceil)),
-        'cos': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.cos)),
-        'sin': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.sin)),
-        'tan': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.tan)),
-        'acos': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.arccos)),
-        'asin': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.arcsin)),
-        'atan': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.arctan)),
-        'cosh': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.cosh)),
-        'sinh': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.sinh)),
-        'tanh': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.tanh)),
-        'exp': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.exp)),
-        'ln': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.log)),
-        'sqrt': wrap_logic.__func__(ExactLogic.exact_unary_function(jnp.sqrt)),
-        'lngamma': wrap_logic.__func__(ExactLogic.exact_unary_function(scipy.special.gammaln)),
-        'gamma': wrap_logic.__func__(ExactLogic.exact_unary_function(scipy.special.gamma))
-    }
-    @staticmethod
-    def _jax_wrapped_calc_log_exact(x, y, params):
-        return jnp.log(x) / jnp.log(y), params
-    EXACT_RDDL_TO_JAX_BINARY = {
-        'div': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.floor_divide)),
-        'mod': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.mod)),
-        'fmod': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.mod)),
-        'min': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.minimum)),
-        'max': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.maximum)),
-        'pow': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.power)),
-        'log': wrap_logic.__func__(_jax_wrapped_calc_log_exact.__func__),
-        'hypot': wrap_logic.__func__(ExactLogic.exact_binary_function(jnp.hypot)),
-    }
-    EXACT_RDDL_TO_JAX_IF = wrap_logic.__func__(ExactLogic.exact_if_then_else)
-    EXACT_RDDL_TO_JAX_SWITCH = wrap_logic.__func__(ExactLogic.exact_switch)
-    EXACT_RDDL_TO_JAX_BERNOULLI = wrap_logic.__func__(ExactLogic.exact_bernoulli)
-    EXACT_RDDL_TO_JAX_DISCRETE = wrap_logic.__func__(ExactLogic.exact_discrete)
-    EXACT_RDDL_TO_JAX_POISSON = wrap_logic.__func__(ExactLogic.exact_poisson)
-    EXACT_RDDL_TO_JAX_GEOMETRIC = wrap_logic.__func__(ExactLogic.exact_geometric)
     def __init__(self, rddl: RDDLLiftedModel,
                  allow_synchronous_state: bool=True,
                  logger: Optional[Logger]=None,
@@ -174,8 +91,7 @@ class JaxRDDLCompiler:
         self.init_values = initializer.initialize()
         # compute dependency graph for CPFs and sort them by evaluation order
-        sorter = RDDLLevelAnalysis(
-            rddl, allow_synchronous_state=allow_synchronous_state)
+        sorter = RDDLLevelAnalysis(rddl, allow_synchronous_state=allow_synchronous_state)
         self.levels = sorter.compute_levels()
         # trace expressions to cache information to be used later
@@ -187,28 +103,17 @@ class JaxRDDLCompiler:
             rddl=self.rddl,
             init_values=self.init_values,
             levels=self.levels,
-            trace_info=self.traced)
+            trace_info=self.traced
+        )
         constraints = RDDLConstraints(simulator, vectorized=True)
         self.constraints = constraints
         # basic operations - these can be override in subclasses
         self.compile_non_fluent_exact = compile_non_fluent_exact
-        self.NEGATIVE = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_NEGATIVE
-        self.ARITHMETIC_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_ARITHMETIC.copy()
-        self.RELATIONAL_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_RELATIONAL.copy()
-        self.LOGICAL_NOT = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL_NOT
-        self.LOGICAL_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL.copy()
-        self.AGGREGATION_OPS = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_AGGREGATION.copy()
         self.AGGREGATION_BOOL = {'forall', 'exists'}
-        self.KNOWN_UNARY = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_UNARY.copy()
-        self.KNOWN_BINARY = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BINARY.copy()
-        self.IF_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_IF
-        self.SWITCH_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_SWITCH
-        self.BERNOULLI_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BERNOULLI
-        self.DISCRETE_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
-        self.POISSON_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_POISSON
-        self.GEOMETRIC_HELPER = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_GEOMETRIC
+        self.EXACT_OPS = ExactLogic(use64bit=self.use64bit).get_operator_dicts()
+        self.OPS = self.EXACT_OPS
     # ===========================================================================
     # main compilation subroutines
     # ===========================================================================
@@ -377,7 +282,8 @@ class JaxRDDLCompiler:
         # compile constraint information
         if constraint_func:
-            inequality_fns, equality_fns = self._jax_nonlinear_constraints(init_params_constr)
+            inequality_fns, equality_fns = self._jax_nonlinear_constraints(
+                init_params_constr)
         else:
             inequality_fns, equality_fns = None, None
@@ -524,7 +430,7 @@ class JaxRDDLCompiler:
                 _jax_wrapped_single_step_policy,
                 in_axes=(0, None, None, None, 0, None)
             )(keys, policy_params, hyperparams, step, subs, model_params)
-            model_params = jax.tree_map(lambda x: jnp.mean(x, axis=0), model_params)
+            model_params = jax.tree_map(partial(jnp.mean, axis=0), model_params)
             carry = (key, policy_params, hyperparams, subs, model_params)
             return carry, log
@@ -571,7 +477,11 @@ class JaxRDDLCompiler:
         for (id, value) in self.model_params.items():
             expr_id = int(str(id).split('_')[0])
             expr = self.traced.lookup(expr_id)
-            result[id] = {'id': expr_id, 'rddl_op': ' '.join(expr.etype), 'init_value': value}
+            result[id] = {
+                'id': expr_id,
+                'rddl_op': ' '.join(expr.etype),
+                'init_value': value
+            }
         return result
     @staticmethod
@@ -722,7 +632,7 @@ class JaxRDDLCompiler:
         return _jax_wrapped_cast
     def _fix_dtype(self, value):
-        dtype = jnp.atleast_1d(value).dtype
+        dtype = jnp.result_type(value)
         if jnp.issubdtype(dtype, jnp.integer):
             return self.INT
         elif jnp.issubdtype(dtype, jnp.floating):
@@ -870,11 +780,11 @@ class JaxRDDLCompiler:
         # if expression is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_ARITHMETIC
-            negative_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_NEGATIVE
+            valid_ops = self.EXACT_OPS['arithmetic']
+            negative_op = self.EXACT_OPS['negative']
         else:
-            valid_ops = self.ARITHMETIC_OPS
-            negative_op = self.NEGATIVE
+            valid_ops = self.OPS['arithmetic']
+            negative_op = self.OPS['negative']
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
         # recursively compile arguments
@@ -901,9 +811,9 @@ class JaxRDDLCompiler:
         # if expression is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_RELATIONAL
+            valid_ops = self.EXACT_OPS['relational']
         else:
-            valid_ops = self.RELATIONAL_OPS
+            valid_ops = self.OPS['relational']
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
         # recursively compile arguments
@@ -919,11 +829,11 @@ class JaxRDDLCompiler:
         # if expression is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL
-            logical_not_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_LOGICAL_NOT
+            valid_ops = self.EXACT_OPS['logical']
+            logical_not_op = self.EXACT_OPS['logical_not']
         else:
-            valid_ops = self.LOGICAL_OPS
-            logical_not_op = self.LOGICAL_NOT
+            valid_ops = self.OPS['logical']
+            logical_not_op = self.OPS['logical_not']
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
         # recursively compile arguments
@@ -951,9 +861,9 @@ class JaxRDDLCompiler:
         # if expression is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            valid_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_AGGREGATION
+            valid_ops = self.EXACT_OPS['aggregation']
         else:
-            valid_ops = self.AGGREGATION_OPS
+            valid_ops = self.OPS['aggregation']
         JaxRDDLCompiler._check_valid_op(expr, valid_ops)
         is_floating = op not in self.AGGREGATION_BOOL
@@ -980,11 +890,11 @@ class JaxRDDLCompiler:
         # if expression is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(expr):
-            unary_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_UNARY
-            binary_ops = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BINARY
+            unary_ops = self.EXACT_OPS['unary']
+            binary_ops = self.EXACT_OPS['binary']
         else:
-            unary_ops = self.KNOWN_UNARY
-            binary_ops = self.KNOWN_BINARY
+            unary_ops = self.OPS['unary']
+            binary_ops = self.OPS['binary']
         # recursively compile arguments
         if op in unary_ops:
@@ -1026,9 +936,9 @@ class JaxRDDLCompiler:
         # if predicate is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(pred):
-            if_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_IF
+            if_op = self.EXACT_OPS['control']['if']
         else:
-            if_op = self.IF_HELPER
+            if_op = self.OPS['control']['if']
         jax_op = if_op(expr.id, init_params)
         # recursively compile arguments
@@ -1054,9 +964,9 @@ class JaxRDDLCompiler:
         # if predicate is non-fluent, always use the exact operation
         # case conditions are currently only literals so they are non-fluent
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(pred):
-            switch_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_SWITCH
+            switch_op = self.EXACT_OPS['control']['switch']
         else:
-            switch_op = self.SWITCH_HELPER
+            switch_op = self.OPS['control']['switch']
         jax_op = switch_op(expr.id, init_params)
         # recursively compile predicate
@@ -1078,8 +988,7 @@ class JaxRDDLCompiler:
             for (i, jax_case) in enumerate(jax_cases):
                 sample_cases[i], key, err_case, params = jax_case(x, params, key)
                 err |= err_case
-            sample_cases = jnp.asarray(
-                sample_cases, dtype=self._fix_dtype(sample_cases))
+            sample_cases = jnp.asarray(sample_cases, dtype=self._fix_dtype(sample_cases))
             # predicate (enum) is an integer - use it to extract from case array
             sample, params = jax_op(sample_pred, sample_cases, params)
@@ -1098,6 +1007,7 @@ class JaxRDDLCompiler:
     # Bernoulli: complete (subclass uses Gumbel-softmax)
     # Normal: complete
     # Exponential: complete
+    # Geometric: complete
     # Weibull: complete
     # Pareto: complete
     # Gumbel: complete
@@ -1110,14 +1020,18 @@ class JaxRDDLCompiler:
     # Discrete(p): complete (subclass uses Gumbel-softmax)
     # UnnormDiscrete(p): complete (subclass uses Gumbel-softmax)
+    # distributions which seem to support backpropagation (need more testing):
+    # Beta
+    # Student
+    # Gamma
+    # ChiSquare
+    # Dirichlet
+    # Poisson (subclass uses Gumbel-softmax or Poisson process trick)
     # distributions with incomplete reparameterization support (TODO):
-    # Binomial: (use truncation and Gumbel-softmax)
-    # NegativeBinomial: (no reparameterization)
-    # Poisson: (use truncation and Gumbel-softmax)
-    # Gamma, ChiSquare: (no shape reparameterization)
-    # Beta: (no reparameterization)
-    # Geometric: (implement safe floor)
-    # Student: (no reparameterization)
+    # Binomial
+    # NegativeBinomial
+    # Multinomial
     def _jax_random(self, expr, init_params):
         _, name = expr.etype
@@ -1173,8 +1087,7 @@ class JaxRDDLCompiler:
             return self._jax_discrete_pvar(expr, init_params, unnorm=True)
         else:
             raise RDDLNotImplementedError(
-                f'Distribution {name} is not supported.\n' +
-                print_stack_trace(expr))
+                f'Distribution {name} is not supported.\n' + print_stack_trace(expr))
     def _jax_kron(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_KRON_DELTA']
@@ -1251,8 +1164,7 @@ class JaxRDDLCompiler:
         def _jax_wrapped_distribution_exp(x, params, key):
             scale, key, err, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
-            Exp1 = random.exponential(
-                key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
+            Exp1 = random.exponential(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
             sample = scale * Exp1
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err |= (out_of_bounds * ERR)
@@ -1273,8 +1185,8 @@ class JaxRDDLCompiler:
             shape, key, err1, params = jax_shape(x, params, key)
             scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
-            U = random.uniform(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
-            sample = scale * jnp.power(-jnp.log(U), 1.0 / shape)
+            sample = random.weibull_min(
+                key=subkey, scale=scale, concentration=shape, dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
             return sample, key, err, params
@@ -1288,9 +1200,9 @@ class JaxRDDLCompiler:
         # if probability is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
-            bern_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_BERNOULLI
+            bern_op = self.EXACT_OPS['sampling']['Bernoulli']
         else:
-            bern_op = self.BERNOULLI_HELPER
+            bern_op = self.OPS['sampling']['Bernoulli']
         jax_op = bern_op(expr.id, init_params)
         # recursively compile arguments
@@ -1313,9 +1225,9 @@ class JaxRDDLCompiler:
         # if rate is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_rate):
-            poisson_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_POISSON
+            poisson_op = self.EXACT_OPS['sampling']['Poisson']
         else:
-            poisson_op = self.POISSON_HELPER
+            poisson_op = self.OPS['sampling']['Poisson']
         jax_op = poisson_op(expr.id, init_params)
         # recursively compile arguments
@@ -1326,7 +1238,6 @@ class JaxRDDLCompiler:
             rate, key, err, params = jax_rate(x, params, key)
             key, subkey = random.split(key)
             sample, params = jax_op(subkey, rate, params)
-            sample = sample.astype(self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(rate >= 0))
             err |= (out_of_bounds * ERR)
             return sample, key, err, params
@@ -1358,20 +1269,26 @@ class JaxRDDLCompiler:
     def _jax_binomial(self, expr, init_params):
         ERR = JaxRDDLCompiler.ERROR_CODES['INVALID_PARAM_BINOMIAL']
         JaxRDDLCompiler._check_num_args(expr, 2)
         arg_trials, arg_prob = expr.args
+        # if prob is non-fluent, always use the exact operation
+        if self.compile_non_fluent_exact \
+        and not self.traced.cached_is_fluent(arg_trials) \
+        and not self.traced.cached_is_fluent(arg_prob):
+            bin_op = self.EXACT_OPS['sampling']['Binomial']
+        else:
+            bin_op = self.OPS['sampling']['Binomial']
+        jax_op = bin_op(expr.id, init_params)
         jax_trials = self._jax(arg_trials, init_params)
         jax_prob = self._jax(arg_prob, init_params)
-        # uses the JAX substrate of tensorflow-probability
+        # uses reduction for constant trials
         def _jax_wrapped_distribution_binomial(x, params, key):
             trials, key, err2, params = jax_trials(x, params, key)
             prob, key, err1, params = jax_prob(x, params, key)
-            trials = jnp.asarray(trials, dtype=self.REAL)
-            prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
-            dist = tfp.distributions.Binomial(total_count=trials, probs=prob)
-            sample = dist.sample(seed=subkey).astype(self.INT)
+            sample, params = jax_op(subkey, trials, prob, params)
             out_of_bounds = jnp.logical_not(jnp.all(
                 (prob >= 0) & (prob <= 1) & (trials >= 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
@@ -1395,10 +1312,9 @@ class JaxRDDLCompiler:
             prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
             dist = tfp.distributions.NegativeBinomial(total_count=trials, probs=prob)
-            sample = dist.sample(seed=subkey).astype(self.INT)
+            sample = jnp.asarray(dist.sample(seed=subkey), dtype=self.INT)
             out_of_bounds = jnp.logical_not(jnp.all(
-                (prob >= 0) & (prob <= 1) & (trials > 0))
-            )
+                (prob >= 0) & (prob <= 1) & (trials > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
             return sample, key, err, params
@@ -1431,9 +1347,9 @@ class JaxRDDLCompiler:
         # if prob is non-fluent, always use the exact operation
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg_prob):
-            geom_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_GEOMETRIC
+            geom_op = self.EXACT_OPS['sampling']['Geometric']
         else:
-            geom_op = self.GEOMETRIC_HELPER
+            geom_op = self.OPS['sampling']['Geometric']
         jax_op = geom_op(expr.id, init_params)
         # recursively compile arguments
@@ -1443,7 +1359,6 @@ class JaxRDDLCompiler:
             prob, key, err, params = jax_prob(x, params, key)
             key, subkey = random.split(key)
             sample, params = jax_op(subkey, prob, params)
-            sample = sample.astype(self.INT)
             out_of_bounds = jnp.logical_not(jnp.all((prob >= 0) & (prob <= 1)))
             err |= (out_of_bounds * ERR)
             return sample, key, err, params
@@ -1482,8 +1397,7 @@ class JaxRDDLCompiler:
         def _jax_wrapped_distribution_t(x, params, key):
             df, key, err, params = jax_df(x, params, key)
             key, subkey = random.split(key)
-            sample = random.t(
-                key=subkey, df=df, shape=jnp.shape(df), dtype=self.REAL)
+            sample = random.t(key=subkey, df=df, shape=jnp.shape(df), dtype=self.REAL)
             out_of_bounds = jnp.logical_not(jnp.all(df > 0))
             err |= (out_of_bounds * ERR)
             return sample, key, err, params
@@ -1503,8 +1417,7 @@ class JaxRDDLCompiler:
             mean, key, err1, params = jax_mean(x, params, key)
             scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
-            Gumbel01 = random.gumbel(
-                key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
+            Gumbel01 = random.gumbel(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + scale * Gumbel01
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
@@ -1525,8 +1438,7 @@ class JaxRDDLCompiler:
             mean, key, err1, params = jax_mean(x, params, key)
             scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
-            Laplace01 = random.laplace(
-                key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
+            Laplace01 = random.laplace(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + scale * Laplace01
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
@@ -1547,8 +1459,7 @@ class JaxRDDLCompiler:
             mean, key, err1, params = jax_mean(x, params, key)
             scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
-            Cauchy01 = random.cauchy(
-                key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
+            Cauchy01 = random.cauchy(key=subkey, shape=jnp.shape(mean), dtype=self.REAL)
             sample = mean + scale * Cauchy01
             out_of_bounds = jnp.logical_not(jnp.all(scale > 0))
             err = err1 | err2 | (out_of_bounds * ERR)
@@ -1570,7 +1481,7 @@ class JaxRDDLCompiler:
             scale, key, err2, params = jax_scale(x, params, key)
             key, subkey = random.split(key)
             U = random.uniform(key=subkey, shape=jnp.shape(scale), dtype=self.REAL)
-            sample = jnp.log(1.0 - jnp.log(U) / shape) / scale
+            sample = jnp.log(1.0 - jnp.log1p(-U) / shape) / scale
             out_of_bounds = jnp.logical_not(jnp.all((shape > 0) & (scale > 0)))
             err = err1 | err2 | (out_of_bounds * ERR)
             return sample, key, err, params
@@ -1631,9 +1542,9 @@ class JaxRDDLCompiler:
         has_fluent_arg = any(self.traced.cached_is_fluent(arg)
                              for arg in ordered_args)
         if self.compile_non_fluent_exact and not has_fluent_arg:
-            discrete_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
+            discrete_op = self.EXACT_OPS['sampling']['Discrete']
         else:
-            discrete_op = self.DISCRETE_HELPER
+            discrete_op = self.OPS['sampling']['Discrete']
         jax_op = discrete_op(expr.id, init_params)
         # compile probability expressions
@@ -1672,9 +1583,9 @@ class JaxRDDLCompiler:
         # if probabilities are non-fluent, then always sample exact
         if self.compile_non_fluent_exact and not self.traced.cached_is_fluent(arg):
-            discrete_op = JaxRDDLCompiler.EXACT_RDDL_TO_JAX_DISCRETE
+            discrete_op = self.EXACT_OPS['sampling']['Discrete']
         else:
-            discrete_op = self.DISCRETE_HELPER
+            discrete_op = self.OPS['sampling']['Discrete']
         jax_op = discrete_op(expr.id, init_params)
         # compile probability function
@@ -1716,8 +1627,7 @@ class JaxRDDLCompiler:
             return self._jax_multinomial(expr, init_params)
         else:
             raise RDDLNotImplementedError(
-                f'Distribution {name} is not supported.\n' +
-                print_stack_trace(expr))
+                f'Distribution {name} is not supported.\n' + print_stack_trace(expr))
     def _jax_multivariate_normal(self, expr, init_params):
         _, args = expr.args
@@ -1771,7 +1681,7 @@ class JaxRDDLCompiler:
             # sample StudentT(0, 1, df) -- broadcast df to same shape as cov
             sample_df = sample_df[..., jnp.newaxis, jnp.newaxis]
-            sample_df = jnp.broadcast_to(sample_df, shape=sample_mean.shape + (1,))
+            sample_df = jnp.broadcast_to(sample_df, shape=jnp.shape(sample_mean) + (1,))
             key, subkey = random.split(key)
             Z = random.t(
                 key=subkey,
@@ -1826,7 +1736,7 @@ class JaxRDDLCompiler:
             prob = jnp.asarray(prob, dtype=self.REAL)
             key, subkey = random.split(key)
             dist = tfp.distributions.Multinomial(total_count=trials, probs=prob)
-            sample = dist.sample(seed=subkey).astype(self.INT)
+            sample = jnp.asarray(dist.sample(seed=subkey), dtype=self.INT)
             sample = jnp.moveaxis(sample, source=-1, destination=index)
             out_of_bounds = jnp.logical_not(jnp.all(
                 (prob >= 0)

pyRDDLGym-jax 1.3__py3-none-any.whl → 2.1__py3-none-any.whl

pyRDDLGym-jax 1.3py3-none-any.whl → 2.1py3-none-any.whl