PyPI - pymc-extras - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

pymc-extras 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

pymc_extras/__init__.py +5 -1
pymc_extras/distributions/timeseries.py +1 -1
pymc_extras/inference/fit.py +0 -4
pymc_extras/inference/pathfinder/__init__.py +3 -0
pymc_extras/inference/pathfinder/importance_sampling.py +139 -0
pymc_extras/inference/pathfinder/lbfgs.py +190 -0
pymc_extras/inference/pathfinder/pathfinder.py +1746 -0
pymc_extras/model/marginal/distributions.py +100 -3
pymc_extras/model/marginal/graph_analysis.py +8 -9
pymc_extras/model/marginal/marginal_model.py +437 -424
pymc_extras/model/model_api.py +18 -2
pymc_extras/statespace/core/statespace.py +79 -36
pymc_extras/statespace/models/structural.py +21 -6
pymc_extras/utils/model_equivalence.py +66 -0
pymc_extras/version.txt +1 -1
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/METADATA +15 -5
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/RECORD +28 -24
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/WHEEL +1 -1
tests/model/marginal/test_distributions.py +12 -11
tests/model/marginal/test_marginal_model.py +301 -201
tests/model/test_model_api.py +9 -0
tests/statespace/test_statespace.py +54 -0
tests/statespace/test_structural.py +10 -3
tests/test_pathfinder.py +135 -7
tests/test_pivoted_cholesky.py +1 -1
tests/utils.py +0 -31
pymc_extras/inference/pathfinder.py +0 -134
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/LICENSE +0 -0
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/top_level.txt +0 -0

tests/model/test_model_api.py CHANGED Viewed

@@ -25,5 +25,14 @@ def test_logp():
     mw2 = model_wrapped2(coords=coords)
+    @pmx.as_model()
+    def model_wrapped3(mu):
+        pm.Normal("x", mu, 1.0, dims="obs")
+    mw3 = model_wrapped3(0.0, coords=coords)
+    mw4 = model_wrapped3(np.array([np.nan]), coords=coords)
     np.testing.assert_equal(model.point_logps(), mw.point_logps())
     np.testing.assert_equal(mw.point_logps(), mw2.point_logps())
+    assert mw3["mu"] in mw3.data_vars
+    assert "mu" not in mw4

tests/statespace/test_statespace.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from collections.abc import Sequence
 from functools import partial
 import numpy as np
@@ -349,6 +350,59 @@ def test_sampling_methods(group, kind, ss_mod, idata, rng):
             assert not np.any(np.isnan(test_idata[f"{group}_{output}"].values))
+@pytest.mark.filterwarnings("ignore:Provided data contains missing values")
+def test_sample_conditional_with_time_varying():
+    class TVCovariance(PyMCStateSpace):
+        def __init__(self):
+            super().__init__(k_states=1, k_endog=1, k_posdef=1)
+        def make_symbolic_graph(self) -> None:
+            self.ssm["transition", 0, 0] = 1.0
+            self.ssm["design", 0, 0] = 1.0
+            sigma_cov = self.make_and_register_variable("sigma_cov", (None,))
+            self.ssm["state_cov"] = sigma_cov[:, None, None] ** 2
+        @property
+        def param_names(self) -> list[str]:
+            return ["sigma_cov"]
+        @property
+        def coords(self) -> dict[str, Sequence[str]]:
+            return make_default_coords(self)
+        @property
+        def state_names(self) -> list[str]:
+            return ["level"]
+        @property
+        def observed_states(self) -> list[str]:
+            return ["level"]
+        @property
+        def shock_names(self) -> list[str]:
+            return ["level"]
+    ss_mod = TVCovariance()
+    empty_data = pd.DataFrame(
+        np.nan, index=pd.date_range("2020-01-01", periods=100, freq="D"), columns=["data"]
+    )
+    coords = ss_mod.coords
+    coords["time"] = empty_data.index
+    with pm.Model(coords=coords) as mod:
+        log_sigma_cov = pm.Normal("log_sigma_cov", mu=0, sigma=0.1, dims=["time"])
+        pm.Deterministic("sigma_cov", pm.math.exp(log_sigma_cov.cumsum()), dims=["time"])
+        ss_mod.build_statespace_graph(data=empty_data)
+        prior = pm.sample_prior_predictive(10)
+    ss_mod.sample_unconditional_prior(prior)
+    ss_mod.sample_conditional_prior(prior)
 def _make_time_idx(mod, use_datetime_index=True):
     if use_datetime_index:
         mod._fit_coords["time"] = nile.index

tests/statespace/test_structural.py CHANGED Viewed

@@ -2,6 +2,7 @@ import functools as ft
 import warnings
 from collections import defaultdict
+from copyreg import remove_extension
 from typing import Optional
 import numpy as np
@@ -592,13 +593,18 @@ def test_autoregressive_model(order, rng):
 @pytest.mark.parametrize("s", [10, 25, 50])
 @pytest.mark.parametrize("innovations", [True, False])
-def test_time_seasonality(s, innovations, rng):
+@pytest.mark.parametrize("remove_first_state", [True, False])
+def test_time_seasonality(s, innovations, remove_first_state, rng):
     def random_word(rng):
         return "".join(rng.choice(list("abcdefghijklmnopqrstuvwxyz")) for _ in range(5))
     state_names = [random_word(rng) for _ in range(s)]
     mod = st.TimeSeasonality(
-        season_length=s, innovations=innovations, name="season", state_names=state_names
+        season_length=s,
+        innovations=innovations,
+        name="season",
+        state_names=state_names,
+        remove_first_state=remove_first_state,
     )
     x0 = np.zeros(mod.k_states, dtype=floatX)
     x0[0] = 1
@@ -615,7 +621,8 @@ def test_time_seasonality(s, innovations, rng):
     # Check coords
     mod.build(verbose=False)
     _assert_basic_coords_correct(mod)
-    assert mod.coords["season_state"] == state_names[1:]
+    test_slice = slice(1, None) if remove_first_state else slice(None)
+    assert mod.coords["season_state"] == state_names[test_slice]
 def get_shift_factor(s):

tests/test_pathfinder.py CHANGED Viewed

@@ -18,12 +18,12 @@ import numpy as np
 import pymc as pm
 import pytest
+pytestmark = pytest.mark.filterwarnings("ignore:compile_pymc was renamed to compile:FutureWarning")
 import pymc_extras as pmx
-@pytest.mark.skipif(sys.platform == "win32", reason="JAX not supported on windows.")
-def test_pathfinder():
-    # Data of the Eight Schools Model
+def eight_schools_model() -> pm.Model:
     J = 8
     y = np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0])
     sigma = np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0])
@@ -35,11 +35,139 @@ def test_pathfinder():
         theta = pm.Normal("theta", mu=0, sigma=1, shape=J)
         obs = pm.Normal("obs", mu=mu + tau * theta, sigma=sigma, shape=J, observed=y)
-        idata = pmx.fit(method="pathfinder", random_seed=41)
+    return model
+@pytest.fixture
+def reference_idata():
+    model = eight_schools_model()
+    with model:
+        idata = pmx.fit(
+            method="pathfinder",
+            num_paths=50,
+            jitter=10.0,
+            random_seed=41,
+            inference_backend="pymc",
+        )
+    return idata
+@pytest.mark.parametrize("inference_backend", ["pymc", "blackjax"])
+def test_pathfinder(inference_backend, reference_idata):
+    if inference_backend == "blackjax" and sys.platform == "win32":
+        pytest.skip("JAX not supported on windows")
+    if inference_backend == "blackjax":
+        model = eight_schools_model()
+        with model:
+            idata = pmx.fit(
+                method="pathfinder",
+                num_paths=50,
+                jitter=10.0,
+                random_seed=41,
+                inference_backend=inference_backend,
+            )
+    else:
+        idata = reference_idata
+        np.testing.assert_allclose(idata.posterior["mu"].mean(), 5.0, atol=1.6)
+        np.testing.assert_allclose(idata.posterior["tau"].mean(), 4.15, atol=1.5)
     assert idata.posterior["mu"].shape == (1, 1000)
     assert idata.posterior["tau"].shape == (1, 1000)
     assert idata.posterior["theta"].shape == (1, 1000, 8)
-    # FIXME: pathfinder doesn't find a reasonable mean! Fix bug or choose model pathfinder can handle
-    # np.testing.assert_allclose(idata.posterior["mu"].mean(), 5.0)
-    np.testing.assert_allclose(idata.posterior["tau"].mean(), 4.15, atol=0.5)
+@pytest.mark.parametrize("concurrent", ["thread", "process"])
+def test_concurrent_results(reference_idata, concurrent):
+    model = eight_schools_model()
+    with model:
+        idata_conc = pmx.fit(
+            method="pathfinder",
+            num_paths=50,
+            jitter=10.0,
+            random_seed=41,
+            inference_backend="pymc",
+            concurrent=concurrent,
+        )
+    np.testing.assert_allclose(
+        reference_idata.posterior.mu.data.mean(),
+        idata_conc.posterior.mu.data.mean(),
+        atol=0.4,
+    )
+    np.testing.assert_allclose(
+        reference_idata.posterior.tau.data.mean(),
+        idata_conc.posterior.tau.data.mean(),
+        atol=0.4,
+    )
+def test_seed(reference_idata):
+    model = eight_schools_model()
+    with model:
+        idata_41 = pmx.fit(
+            method="pathfinder",
+            num_paths=50,
+            jitter=10.0,
+            random_seed=41,
+            inference_backend="pymc",
+        )
+        idata_123 = pmx.fit(
+            method="pathfinder",
+            num_paths=50,
+            jitter=10.0,
+            random_seed=123,
+            inference_backend="pymc",
+        )
+    assert not np.allclose(idata_41.posterior.mu.data.mean(), idata_123.posterior.mu.data.mean())
+    assert np.allclose(idata_41.posterior.mu.data.mean(), idata_41.posterior.mu.data.mean())
+def test_bfgs_sample():
+    import pytensor.tensor as pt
+    from pymc_extras.inference.pathfinder.pathfinder import (
+        alpha_recover,
+        bfgs_sample,
+        inverse_hessian_factors,
+    )
+    """test BFGS sampling"""
+    Lp1, N = 8, 10
+    L = Lp1 - 1
+    J = 6
+    num_samples = 1000
+    # mock data
+    x_data = np.random.randn(Lp1, N)
+    g_data = np.random.randn(Lp1, N)
+    # get factors
+    x_full = pt.as_tensor(x_data, dtype="float64")
+    g_full = pt.as_tensor(g_data, dtype="float64")
+    epsilon = 1e-11
+    x = x_full[1:]
+    g = g_full[1:]
+    alpha, S, Z, update_mask = alpha_recover(x_full, g_full, epsilon)
+    beta, gamma = inverse_hessian_factors(alpha, S, Z, update_mask, J)
+    # sample
+    phi, logq = bfgs_sample(
+        num_samples=num_samples,
+        x=x,
+        g=g,
+        alpha=alpha,
+        beta=beta,
+        gamma=gamma,
+    )
+    # check shapes
+    assert beta.eval().shape == (L, N, 2 * J)
+    assert gamma.eval().shape == (L, 2 * J, 2 * J)
+    assert phi.eval().shape == (L, num_samples, N)
+    assert logq.eval().shape == (L, num_samples)

tests/test_pivoted_cholesky.py CHANGED Viewed

@@ -8,7 +8,7 @@
 #     pass
 # import numpy as np
 #
-# import pymc_experimental as pmx
+# import pymc_extras as pmx
 #
 #
 # def test_match_gpytorch_linearcg_output():

tests/utils.py CHANGED Viewed

@@ -1,31 +0,0 @@
-from collections.abc import Sequence
-from pytensor.compile import SharedVariable
-from pytensor.graph import Constant, graph_inputs
-from pytensor.graph.basic import Variable, equal_computations
-from pytensor.tensor.random.type import RandomType
-def equal_computations_up_to_root(
-    xs: Sequence[Variable], ys: Sequence[Variable], ignore_rng_values=True
-) -> bool:
-    # Check if graphs are equivalent even if root variables have distinct identities
-    x_graph_inputs = [var for var in graph_inputs(xs) if not isinstance(var, Constant)]
-    y_graph_inputs = [var for var in graph_inputs(ys) if not isinstance(var, Constant)]
-    if len(x_graph_inputs) != len(y_graph_inputs):
-        return False
-    for x, y in zip(x_graph_inputs, y_graph_inputs):
-        if x.type != y.type:
-            return False
-        if x.name != y.name:
-            return False
-        if isinstance(x, SharedVariable):
-            if not isinstance(y, SharedVariable):
-                return False
-            if isinstance(x.type, RandomType) and ignore_rng_values:
-                continue
-            if not x.type.values_eq(x.get_value(), y.get_value()):
-                return False
-    return equal_computations(xs, ys, in_xs=x_graph_inputs, in_ys=y_graph_inputs)

pymc_extras/inference/pathfinder.py DELETED Viewed

@@ -1,134 +0,0 @@
-#   Copyright 2022 The PyMC Developers
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-import collections
-import sys
-import arviz as az
-import blackjax
-import jax
-import numpy as np
-import pymc as pm
-from packaging import version
-from pymc.backends.arviz import coords_and_dims_for_inferencedata
-from pymc.blocking import DictToArrayBijection, RaveledVars
-from pymc.model import modelcontext
-from pymc.sampling.jax import get_jaxified_graph
-from pymc.util import RandomSeed, _get_seeds_per_chain, get_default_varnames
-def convert_flat_trace_to_idata(
-    samples,
-    include_transformed=False,
-    postprocessing_backend="cpu",
-    model=None,
-):
-    model = modelcontext(model)
-    ip = model.initial_point()
-    ip_point_map_info = pm.blocking.DictToArrayBijection.map(ip).point_map_info
-    trace = collections.defaultdict(list)
-    for sample in samples:
-        raveld_vars = RaveledVars(sample, ip_point_map_info)
-        point = DictToArrayBijection.rmap(raveld_vars, ip)
-        for p, v in point.items():
-            trace[p].append(v.tolist())
-    trace = {k: np.asarray(v)[None, ...] for k, v in trace.items()}
-    var_names = model.unobserved_value_vars
-    vars_to_sample = list(get_default_varnames(var_names, include_transformed=include_transformed))
-    print("Transforming variables...", file=sys.stdout)
-    jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=vars_to_sample)
-    result = jax.vmap(jax.vmap(jax_fn))(
-        *jax.device_put(list(trace.values()), jax.devices(postprocessing_backend)[0])
-    )
-    trace = {v.name: r for v, r in zip(vars_to_sample, result)}
-    coords, dims = coords_and_dims_for_inferencedata(model)
-    idata = az.from_dict(trace, dims=dims, coords=coords)
-    return idata
-def fit_pathfinder(
-    samples=1000,
-    random_seed: RandomSeed | None = None,
-    postprocessing_backend="cpu",
-    model=None,
-    **pathfinder_kwargs,
-):
-    """
-    Fit the pathfinder algorithm as implemented in blackjax
-    Requires the JAX backend
-    Parameters
-    ----------
-    samples : int
-        Number of samples to draw from the fitted approximation.
-    random_seed : int
-        Random seed to set.
-    postprocessing_backend : str
-        Where to compute transformations of the trace.
-        "cpu" or "gpu".
-    pathfinder_kwargs:
-        kwargs for blackjax.vi.pathfinder.approximate
-    Returns
-    -------
-    arviz.InferenceData
-    Reference
-    ---------
-    https://arxiv.org/abs/2108.03782
-    """
-    # Temporarily helper
-    if version.parse(blackjax.__version__).major < 1:
-        raise ImportError("fit_pathfinder requires blackjax 1.0 or above")
-    model = modelcontext(model)
-    ip = model.initial_point()
-    ip_map = DictToArrayBijection.map(ip)
-    new_logprob, new_input = pm.pytensorf.join_nonshared_inputs(
-        ip, (model.logp(),), model.value_vars, ()
-    )
-    logprob_fn_list = get_jaxified_graph([new_input], new_logprob)
-    def logprob_fn(x):
-        return logprob_fn_list(x)[0]
-    [pathfinder_seed, sample_seed] = _get_seeds_per_chain(random_seed, 2)
-    print("Running pathfinder...", file=sys.stdout)
-    pathfinder_state, _ = blackjax.vi.pathfinder.approximate(
-        rng_key=jax.random.key(pathfinder_seed),
-        logdensity_fn=logprob_fn,
-        initial_position=ip_map.data,
-        **pathfinder_kwargs,
-    )
-    samples, _ = blackjax.vi.pathfinder.sample(
-        rng_key=jax.random.key(sample_seed),
-        state=pathfinder_state,
-        num_samples=samples,
-    )
-    idata = convert_flat_trace_to_idata(
-        samples,
-        postprocessing_backend=postprocessing_backend,
-        model=model,
-    )
-    return idata

{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

pymc-extras 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

pymc-extras 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl