PyPI - pymc-extras - Versions diffs - 0.2.0__py3-none-any.whl - Mend

pymc-extras 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

pymc_extras/__init__.py +29 -0
pymc_extras/distributions/__init__.py +40 -0
pymc_extras/distributions/continuous.py +351 -0
pymc_extras/distributions/discrete.py +399 -0
pymc_extras/distributions/histogram_utils.py +163 -0
pymc_extras/distributions/multivariate/__init__.py +3 -0
pymc_extras/distributions/multivariate/r2d2m2cp.py +446 -0
pymc_extras/distributions/timeseries.py +356 -0
pymc_extras/gp/__init__.py +18 -0
pymc_extras/gp/latent_approx.py +183 -0
pymc_extras/inference/__init__.py +18 -0
pymc_extras/inference/find_map.py +431 -0
pymc_extras/inference/fit.py +44 -0
pymc_extras/inference/laplace.py +570 -0
pymc_extras/inference/pathfinder.py +134 -0
pymc_extras/inference/smc/__init__.py +13 -0
pymc_extras/inference/smc/sampling.py +451 -0
pymc_extras/linearmodel.py +130 -0
pymc_extras/model/__init__.py +0 -0
pymc_extras/model/marginal/__init__.py +0 -0
pymc_extras/model/marginal/distributions.py +276 -0
pymc_extras/model/marginal/graph_analysis.py +372 -0
pymc_extras/model/marginal/marginal_model.py +595 -0
pymc_extras/model/model_api.py +56 -0
pymc_extras/model/transforms/__init__.py +0 -0
pymc_extras/model/transforms/autoreparam.py +434 -0
pymc_extras/model_builder.py +759 -0
pymc_extras/preprocessing/__init__.py +0 -0
pymc_extras/preprocessing/standard_scaler.py +17 -0
pymc_extras/printing.py +182 -0
pymc_extras/statespace/__init__.py +13 -0
pymc_extras/statespace/core/__init__.py +7 -0
pymc_extras/statespace/core/compile.py +48 -0
pymc_extras/statespace/core/representation.py +438 -0
pymc_extras/statespace/core/statespace.py +2268 -0
pymc_extras/statespace/filters/__init__.py +15 -0
pymc_extras/statespace/filters/distributions.py +453 -0
pymc_extras/statespace/filters/kalman_filter.py +820 -0
pymc_extras/statespace/filters/kalman_smoother.py +126 -0
pymc_extras/statespace/filters/utilities.py +59 -0
pymc_extras/statespace/models/ETS.py +670 -0
pymc_extras/statespace/models/SARIMAX.py +536 -0
pymc_extras/statespace/models/VARMAX.py +393 -0
pymc_extras/statespace/models/__init__.py +6 -0
pymc_extras/statespace/models/structural.py +1651 -0
pymc_extras/statespace/models/utilities.py +387 -0
pymc_extras/statespace/utils/__init__.py +0 -0
pymc_extras/statespace/utils/constants.py +74 -0
pymc_extras/statespace/utils/coord_tools.py +0 -0
pymc_extras/statespace/utils/data_tools.py +182 -0
pymc_extras/utils/__init__.py +23 -0
pymc_extras/utils/linear_cg.py +290 -0
pymc_extras/utils/pivoted_cholesky.py +69 -0
pymc_extras/utils/prior.py +200 -0
pymc_extras/utils/spline.py +131 -0
pymc_extras/version.py +11 -0
pymc_extras/version.txt +1 -0
pymc_extras-0.2.0.dist-info/LICENSE +212 -0
pymc_extras-0.2.0.dist-info/METADATA +99 -0
pymc_extras-0.2.0.dist-info/RECORD +101 -0
pymc_extras-0.2.0.dist-info/WHEEL +5 -0
pymc_extras-0.2.0.dist-info/top_level.txt +2 -0
tests/__init__.py +13 -0
tests/distributions/__init__.py +19 -0
tests/distributions/test_continuous.py +185 -0
tests/distributions/test_discrete.py +210 -0
tests/distributions/test_discrete_markov_chain.py +258 -0
tests/distributions/test_multivariate.py +304 -0
tests/model/__init__.py +0 -0
tests/model/marginal/__init__.py +0 -0
tests/model/marginal/test_distributions.py +131 -0
tests/model/marginal/test_graph_analysis.py +182 -0
tests/model/marginal/test_marginal_model.py +867 -0
tests/model/test_model_api.py +29 -0
tests/statespace/__init__.py +0 -0
tests/statespace/test_ETS.py +411 -0
tests/statespace/test_SARIMAX.py +405 -0
tests/statespace/test_VARMAX.py +184 -0
tests/statespace/test_coord_assignment.py +116 -0
tests/statespace/test_distributions.py +270 -0
tests/statespace/test_kalman_filter.py +326 -0
tests/statespace/test_representation.py +175 -0
tests/statespace/test_statespace.py +818 -0
tests/statespace/test_statespace_JAX.py +156 -0
tests/statespace/test_structural.py +829 -0
tests/statespace/utilities/__init__.py +0 -0
tests/statespace/utilities/shared_fixtures.py +9 -0
tests/statespace/utilities/statsmodel_local_level.py +42 -0
tests/statespace/utilities/test_helpers.py +310 -0
tests/test_blackjax_smc.py +222 -0
tests/test_find_map.py +98 -0
tests/test_histogram_approximation.py +109 -0
tests/test_laplace.py +238 -0
tests/test_linearmodel.py +208 -0
tests/test_model_builder.py +306 -0
tests/test_pathfinder.py +45 -0
tests/test_pivoted_cholesky.py +24 -0
tests/test_printing.py +98 -0
tests/test_prior_from_trace.py +172 -0
tests/test_splines.py +77 -0
tests/utils.py +31 -0

pymc_extras/statespace/filters/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from pymc_extras.statespace.filters.distributions import LinearGaussianStateSpace
+from pymc_extras.statespace.filters.kalman_filter import (
+    SquareRootFilter,
+    StandardFilter,
+    UnivariateFilter,
+)
+from pymc_extras.statespace.filters.kalman_smoother import KalmanSmoother
+__all__ = [
+    "StandardFilter",
+    "UnivariateFilter",
+    "KalmanSmoother",
+    "SquareRootFilter",
+    "LinearGaussianStateSpace",
+]

pymc_extras/statespace/filters/distributions.py ADDED Viewed

@@ -0,0 +1,453 @@
+import numpy as np
+import pymc as pm
+import pytensor
+import pytensor.tensor as pt
+from pymc import intX
+from pymc.distributions.dist_math import check_parameters
+from pymc.distributions.distribution import Continuous, SymbolicRandomVariable
+from pymc.distributions.multivariate import MvNormal
+from pymc.distributions.shape_utils import get_support_shape_1d
+from pymc.logprob.abstract import _logprob
+from pytensor.graph.basic import Node
+from pytensor.tensor.random.basic import MvNormalRV
+floatX = pytensor.config.floatX
+COV_ZERO_TOL = 0
+lgss_shape_message = (
+    "The LinearGaussianStateSpace distribution needs shape information to be constructed. "
+    "Ensure that all input matrices have shape information specified."
+)
+def make_signature(sequence_names):
+    states = "s"
+    obs = "p"
+    exog = "r"
+    time = "t"
+    state_and_obs = "n"
+    matrix_to_shape = {
+        "x0": (states,),
+        "P0": (states, states),
+        "c": (states,),
+        "d": (obs,),
+        "T": (states, states),
+        "Z": (obs, states),
+        "R": (states, exog),
+        "H": (obs, obs),
+        "Q": (exog, exog),
+    }
+    for matrix in sequence_names:
+        base_shape = matrix_to_shape[matrix]
+        matrix_to_shape[matrix] = (time, *base_shape)
+    signature = ",".join(["(" + ",".join(shapes) + ")" for shapes in matrix_to_shape.values()])
+    return f"{signature},[rng]->[rng],({time},{state_and_obs})"
+class MvNormalSVDRV(MvNormalRV):
+    name = "multivariate_normal"
+    signature = "(n),(n,n)->(n)"
+    dtype = "floatX"
+    _print_name = ("MultivariateNormal", "\\operatorname{MultivariateNormal}")
+class MvNormalSVD(MvNormal):
+    """Dummy distribution intended to be rewritten into a JAX multivariate_normal with method="svd".
+    A JAX MvNormal robust to low-rank covariance matrices
+    """
+    rv_op = MvNormalSVDRV()
+try:
+    import jax.random
+    from pytensor.link.jax.dispatch.random import jax_sample_fn
+    @jax_sample_fn.register(MvNormalSVDRV)
+    def jax_sample_fn_mvnormal_svd(op, node):
+        def sample_fn(rng, size, dtype, *parameters):
+            rng_key = rng["jax_state"]
+            rng_key, sampling_key = jax.random.split(rng_key, 2)
+            sample = jax.random.multivariate_normal(
+                sampling_key, *parameters, shape=size, dtype=dtype, method="svd"
+            )
+            rng["jax_state"] = rng_key
+            return (rng, sample)
+        return sample_fn
+except ImportError:
+    pass
+class LinearGaussianStateSpaceRV(SymbolicRandomVariable):
+    default_output = 1
+    _print_name = ("LinearGuassianStateSpace", "\\operatorname{LinearGuassianStateSpace}")
+    def update(self, node: Node):
+        return {node.inputs[-1]: node.outputs[0]}
+class _LinearGaussianStateSpace(Continuous):
+    def __new__(
+        cls,
+        name,
+        a0,
+        P0,
+        c,
+        d,
+        T,
+        Z,
+        R,
+        H,
+        Q,
+        steps=None,
+        mode=None,
+        sequence_names=None,
+        append_x0=True,
+        **kwargs,
+    ):
+        # Ignore dims in support shape because they are just passed along to the "observed" and "latent" distributions
+        # created by LinearGaussianStateSpace. This "combined" distribution shouldn't ever be directly used.
+        steps = get_support_shape_1d(
+            support_shape=steps,
+            shape=None,
+            dims=None,
+            observed=kwargs.get("observed", None),
+            support_shape_offset=0,
+        )
+        return super().__new__(
+            cls,
+            name,
+            a0,
+            P0,
+            c,
+            d,
+            T,
+            Z,
+            R,
+            H,
+            Q,
+            steps=steps,
+            mode=mode,
+            sequence_names=sequence_names,
+            append_x0=append_x0,
+            **kwargs,
+        )
+    @classmethod
+    def dist(
+        cls,
+        a0,
+        P0,
+        c,
+        d,
+        T,
+        Z,
+        R,
+        H,
+        Q,
+        steps=None,
+        mode=None,
+        sequence_names=None,
+        append_x0=True,
+        **kwargs,
+    ):
+        steps = get_support_shape_1d(
+            support_shape=steps, shape=kwargs.get("shape", None), support_shape_offset=0
+        )
+        if steps is None:
+            raise ValueError("Must specify steps or shape parameter")
+        steps = pt.as_tensor_variable(intX(steps), ndim=0)
+        return super().dist(
+            [a0, P0, c, d, T, Z, R, H, Q, steps],
+            mode=mode,
+            sequence_names=sequence_names,
+            append_x0=append_x0,
+            **kwargs,
+        )
+    @classmethod
+    def rv_op(
+        cls,
+        a0,
+        P0,
+        c,
+        d,
+        T,
+        Z,
+        R,
+        H,
+        Q,
+        steps,
+        size=None,
+        mode=None,
+        sequence_names=None,
+        append_x0=True,
+    ):
+        if sequence_names is None:
+            sequence_names = []
+        a0_, P0_, c_, d_, T_, Z_, R_, H_, Q_ = map(
+            lambda x: x.type(), [a0, P0, c, d, T, Z, R, H, Q]
+        )
+        c_.name = "c"
+        d_.name = "d"
+        T_.name = "T"
+        Z_.name = "Z"
+        R_.name = "R"
+        H_.name = "H"
+        Q_.name = "Q"
+        sequences = [
+            x
+            for x, name in zip([c_, d_, T_, Z_, R_, H_, Q_], ["c", "d", "T", "Z", "R", "H", "Q"])
+            if name in sequence_names
+        ]
+        non_sequences = [x for x in [c_, d_, T_, Z_, R_, H_, Q_] if x not in sequences]
+        rng = pytensor.shared(np.random.default_rng())
+        def sort_args(args):
+            sorted_args = []
+            # Inside the scan, outputs_info variables get a time step appended to their name
+            # e.g. x -> x[t]. Remove this so we can identify variables by name.
+            arg_names = [x.name.replace("[t]", "") for x in args]
+            # c, d ,T, Z, R, H, Q is the "canonical" ordering
+            for name in ["c", "d", "T", "Z", "R", "H", "Q"]:
+                idx = arg_names.index(name)
+                sorted_args.append(args[idx])
+            return sorted_args
+        n_seq = len(sequence_names)
+        def step_fn(*args):
+            seqs, state, non_seqs = args[:n_seq], args[n_seq], args[n_seq + 1 :]
+            non_seqs, rng = non_seqs[:-1], non_seqs[-1]
+            c, d, T, Z, R, H, Q = sort_args(seqs + non_seqs)
+            k = T.shape[0]
+            a = state[:k]
+            middle_rng, a_innovation = MvNormalSVD.dist(mu=0, cov=Q, rng=rng).owner.outputs
+            next_rng, y_innovation = MvNormalSVD.dist(mu=0, cov=H, rng=middle_rng).owner.outputs
+            a_mu = c + T @ a
+            a_next = a_mu + R @ a_innovation
+            y_mu = d + Z @ a_next
+            y_next = y_mu + y_innovation
+            next_state = pt.concatenate([a_next, y_next], axis=0)
+            return next_state, {rng: next_rng}
+        Z_init = Z_ if Z_ in non_sequences else Z_[0]
+        H_init = H_ if H_ in non_sequences else H_[0]
+        init_x_ = MvNormalSVD.dist(a0_, P0_, rng=rng)
+        init_y_ = MvNormalSVD.dist(Z_init @ init_x_, H_init, rng=rng)
+        init_dist_ = pt.concatenate([init_x_, init_y_], axis=0)
+        statespace, updates = pytensor.scan(
+            step_fn,
+            outputs_info=[init_dist_],
+            sequences=None if len(sequences) == 0 else sequences,
+            non_sequences=[*non_sequences, rng],
+            n_steps=steps,
+            mode=mode,
+            strict=True,
+        )
+        if append_x0:
+            statespace_ = pt.concatenate([init_dist_[None], statespace], axis=0)
+            statespace_ = pt.specify_shape(statespace_, (steps + 1, None))
+        else:
+            statespace_ = statespace
+            statespace_ = pt.specify_shape(statespace_, (steps, None))
+        (ss_rng,) = tuple(updates.values())
+        linear_gaussian_ss_op = LinearGaussianStateSpaceRV(
+            inputs=[a0_, P0_, c_, d_, T_, Z_, R_, H_, Q_, steps, rng],
+            outputs=[ss_rng, statespace_],
+            extended_signature=make_signature(sequence_names),
+        )
+        linear_gaussian_ss = linear_gaussian_ss_op(a0, P0, c, d, T, Z, R, H, Q, steps, rng)
+        return linear_gaussian_ss
+class LinearGaussianStateSpace(Continuous):
+    """
+    Linear Gaussian Statespace distribution
+    """
+    def __new__(
+        cls,
+        name,
+        a0,
+        P0,
+        c,
+        d,
+        T,
+        Z,
+        R,
+        H,
+        Q,
+        *,
+        steps,
+        k_endog=None,
+        sequence_names=None,
+        mode=None,
+        append_x0=True,
+        **kwargs,
+    ):
+        dims = kwargs.pop("dims", None)
+        latent_dims = None
+        obs_dims = None
+        if dims is not None:
+            if len(dims) != 3:
+                ValueError(
+                    "LinearGaussianStateSpace expects 3 dims: time, all_states, and observed_states"
+                )
+            time_dim, state_dim, obs_dim = dims
+            latent_dims = [time_dim, state_dim]
+            obs_dims = [time_dim, obs_dim]
+        latent_obs_combined = _LinearGaussianStateSpace(
+            f"{name}_combined",
+            a0,
+            P0,
+            c,
+            d,
+            T,
+            Z,
+            R,
+            H,
+            Q,
+            steps=steps,
+            mode=mode,
+            sequence_names=sequence_names,
+            append_x0=append_x0,
+            **kwargs,
+        )
+        latent_obs_combined = pt.specify_shape(latent_obs_combined, (steps + int(append_x0), None))
+        if k_endog is None:
+            k_endog = cls._get_k_endog(H)
+        latent_slice = slice(None, -k_endog)
+        obs_slice = slice(-k_endog, None)
+        latent_states = latent_obs_combined[..., latent_slice]
+        obs_states = latent_obs_combined[..., obs_slice]
+        latent_states = pm.Deterministic(f"{name}_latent", latent_states, dims=latent_dims)
+        obs_states = pm.Deterministic(f"{name}_observed", obs_states, dims=obs_dims)
+        return latent_states, obs_states
+    @classmethod
+    def dist(cls, a0, P0, c, d, T, Z, R, H, Q, *, steps=None, **kwargs):
+        latent_obs_combined = _LinearGaussianStateSpace.dist(
+            a0, P0, c, d, T, Z, R, H, Q, steps=steps, **kwargs
+        )
+        k_states = T.type.shape[0]
+        latent_states = latent_obs_combined[..., :k_states]
+        obs_states = latent_obs_combined[..., k_states:]
+        return latent_states, obs_states
+    @classmethod
+    def _get_k_states(cls, T):
+        k_states = T.type.shape[0]
+        if k_states is None:
+            raise ValueError(lgss_shape_message)
+        return k_states
+    @classmethod
+    def _get_k_endog(cls, H):
+        k_endog = H.type.shape[0]
+        if k_endog is None:
+            raise ValueError(lgss_shape_message)
+        return k_endog
+class KalmanFilterRV(SymbolicRandomVariable):
+    default_output = 1
+    _print_name = ("KalmanFilter", "\\operatorname{KalmanFilter}")
+    extended_signature = "(t,s),(t,s,s),(t),[rng]->[rng],(t,s)"
+    def update(self, node: Node):
+        return {node.inputs[-1]: node.outputs[0]}
+class SequenceMvNormal(Continuous):
+    def __new__(cls, *args, **kwargs):
+        return super().__new__(cls, *args, **kwargs)
+    @classmethod
+    def dist(cls, mus, covs, logp, **kwargs):
+        return super().dist([mus, covs, logp], **kwargs)
+    @classmethod
+    def rv_op(cls, mus, covs, logp, size=None):
+        # Batch dimensions (if any) will be on the far left, but scan requires time to be there instead
+        if mus.ndim > 2:
+            mus = pt.moveaxis(mus, -2, 0)
+        if covs.ndim > 3:
+            covs = pt.moveaxis(covs, -3, 0)
+        mus_, covs_ = mus.type(), covs.type()
+        logp_ = logp.type()
+        rng = pytensor.shared(np.random.default_rng())
+        def step(mu, cov, rng):
+            new_rng, mvn = MvNormalSVD.dist(mu=mu, cov=cov, rng=rng).owner.outputs
+            return mvn, {rng: new_rng}
+        mvn_seq, updates = pytensor.scan(
+            step, sequences=[mus_, covs_], non_sequences=[rng], strict=True, n_steps=mus_.shape[0]
+        )
+        mvn_seq = pt.specify_shape(mvn_seq, mus.type.shape)
+        # Move time axis back to position -2 so batches are on the left
+        if mvn_seq.ndim > 2:
+            mvn_seq = pt.moveaxis(mvn_seq, 0, -2)
+        (seq_mvn_rng,) = tuple(updates.values())
+        mvn_seq_op = KalmanFilterRV(
+            inputs=[mus_, covs_, logp_, rng], outputs=[seq_mvn_rng, mvn_seq], ndim_supp=2
+        )
+        mvn_seq = mvn_seq_op(mus, covs, logp, rng)
+        return mvn_seq
+@_logprob.register(KalmanFilterRV)
+def sequence_mvnormal_logp(op, values, mus, covs, logp, rng, **kwargs):
+    return check_parameters(
+        logp,
+        pt.eq(values[0].shape[0], mus.shape[0]),
+        pt.eq(covs.shape[0], mus.shape[0]),
+        msg="Observed data and parameters must have the same number of timesteps (dimension 0)",
+    )