PyPI - pymc-extras - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

pymc-extras 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

pymc_extras/__init__.py +5 -1
pymc_extras/distributions/timeseries.py +1 -1
pymc_extras/inference/fit.py +0 -4
pymc_extras/inference/pathfinder/__init__.py +3 -0
pymc_extras/inference/pathfinder/importance_sampling.py +139 -0
pymc_extras/inference/pathfinder/lbfgs.py +190 -0
pymc_extras/inference/pathfinder/pathfinder.py +1746 -0
pymc_extras/model/marginal/distributions.py +100 -3
pymc_extras/model/marginal/graph_analysis.py +8 -9
pymc_extras/model/marginal/marginal_model.py +437 -424
pymc_extras/model/model_api.py +18 -2
pymc_extras/statespace/core/statespace.py +79 -36
pymc_extras/statespace/models/structural.py +21 -6
pymc_extras/utils/model_equivalence.py +66 -0
pymc_extras/version.txt +1 -1
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/METADATA +15 -5
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/RECORD +28 -24
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/WHEEL +1 -1
tests/model/marginal/test_distributions.py +12 -11
tests/model/marginal/test_marginal_model.py +301 -201
tests/model/test_model_api.py +9 -0
tests/statespace/test_statespace.py +54 -0
tests/statespace/test_structural.py +10 -3
tests/test_pathfinder.py +135 -7
tests/test_pivoted_cholesky.py +1 -1
tests/utils.py +0 -31
pymc_extras/inference/pathfinder.py +0 -134
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/LICENSE +0 -0
{pymc_extras-0.2.0.dist-info → pymc_extras-0.2.2.dist-info}/top_level.txt +0 -0

tests/model/marginal/test_marginal_model.py CHANGED Viewed

@@ -9,25 +9,28 @@ import pytensor.tensor as pt
 import pytest
 from arviz import InferenceData, dict_to_dataset
+from pymc import Model, draw
 from pymc.distributions import transforms
 from pymc.distributions.transforms import ordered
-from pymc.model.fgraph import fgraph_from_model
-from pymc.pytensorf import inputvars
+from pymc.initial_point import make_initial_point_expression
+from pymc.pytensorf import constant_fold, inputvars
 from pymc.util import UNSET
 from scipy.special import log_softmax, logsumexp
 from scipy.stats import halfnorm, norm
+from pymc_extras.model.marginal.distributions import MarginalRV
 from pymc_extras.model.marginal.marginal_model import (
-    MarginalModel,
     marginalize,
+    recover_marginals,
+    unmarginalize,
 )
-from tests.utils import equal_computations_up_to_root
+from pymc_extras.utils.model_equivalence import equivalent_models
 def test_basic_marginalized_rv():
     data = [2] * 5
-    with MarginalModel() as m:
+    with Model() as m:
         sigma = pm.HalfNormal("sigma")
         idx = pm.Categorical("idx", p=[0.1, 0.3, 0.6])
         mu = pt.switch(
@@ -42,79 +45,105 @@ def test_basic_marginalized_rv():
         y = pm.Normal("y", mu=mu, sigma=sigma)
         z = pm.Normal("z", y, observed=data)
-    m.marginalize([idx])
-    assert idx not in m.free_RVs
-    assert [rv.name for rv in m.marginalized_rvs] == ["idx"]
+    marginal_m = marginalize(m, [idx])
+    assert isinstance(marginal_m["y"].owner.op, MarginalRV)
+    assert ["idx"] not in [rv.name for rv in marginal_m.free_RVs]
+    # Test forward draws
+    y_draws, z_draws = draw(
+        [marginal_m["y"], marginal_m["z"]],
+        # Make sigma very small to make draws deterministic
+        givens={marginal_m["sigma"]: 0.001},
+        draws=1000,
+        random_seed=54,
+    )
+    assert sorted(np.unique(y_draws.round()) == [-1.0, 0.0, 1.0])
+    assert z_draws[y_draws < 0].mean() < z_draws[y_draws > 0].mean()
+    # Test initial_point
+    ips = make_initial_point_expression(
+        # Use basic_RVs to include the observed RV
+        free_rvs=marginal_m.basic_RVs,
+        rvs_to_transforms=marginal_m.rvs_to_transforms,
+        initval_strategies={},
+    )
+    # After simplification, we should have only constants in the graph (expect alloc which isn't constant folded):
+    ip_sigma, ip_y, ip_z = constant_fold(ips)
+    np.testing.assert_allclose(ip_sigma, 1.0)
+    np.testing.assert_allclose(ip_y, 1.0)
+    np.testing.assert_allclose(ip_z, np.full((5,), 1.0))
+    marginal_ip = marginal_m.initial_point()
+    expected_ip = m.initial_point()
+    expected_ip.pop("idx")
+    assert marginal_ip == expected_ip
     # Test logp
-    with pm.Model() as m_ref:
+    with pm.Model() as ref_m:
         sigma = pm.HalfNormal("sigma")
         y = pm.NormalMixture("y", w=[0.1, 0.3, 0.6], mu=[-1, 0, 1], sigma=sigma)
         z = pm.Normal("z", y, observed=data)
-    test_point = m_ref.initial_point()
-    ref_logp = m_ref.compile_logp()(test_point)
-    ref_dlogp = m_ref.compile_dlogp([m_ref["y"]])(test_point)
-    # Assert we can marginalize and unmarginalize internally non-destructively
-    for i in range(3):
-        np.testing.assert_almost_equal(
-            m.compile_logp()(test_point),
-            ref_logp,
-        )
-        np.testing.assert_almost_equal(
-            m.compile_dlogp([m["y"]])(test_point),
-            ref_dlogp,
-        )
+    np.testing.assert_almost_equal(
+        marginal_m.compile_logp()(marginal_ip),
+        ref_m.compile_logp()(marginal_ip),
+    )
+    np.testing.assert_almost_equal(
+        marginal_m.compile_dlogp([marginal_m["y"]])(marginal_ip),
+        ref_m.compile_dlogp([ref_m["y"]])(marginal_ip),
+    )
 def test_one_to_one_marginalized_rvs():
     """Test case with multiple, independent marginalized RVs."""
-    with MarginalModel() as m:
+    with Model() as m:
         sigma = pm.HalfNormal("sigma")
         idx1 = pm.Bernoulli("idx1", p=0.75)
         x = pm.Normal("x", mu=idx1, sigma=sigma)
         idx2 = pm.Bernoulli("idx2", p=0.75, shape=(5,))
         y = pm.Normal("y", mu=(idx2 * 2 - 1), sigma=sigma, shape=(5,))
-    m.marginalize([idx1, idx2])
-    m["x"].owner is not m["y"].owner
-    _m = m.clone()._marginalize()
-    _m["x"].owner is not _m["y"].owner
+    marginal_m = marginalize(m, [idx1, idx2])
+    assert isinstance(marginal_m["x"].owner.op, MarginalRV)
+    assert isinstance(marginal_m["y"].owner.op, MarginalRV)
+    assert marginal_m["x"].owner is not marginal_m["y"].owner
-    with pm.Model() as m_ref:
+    with pm.Model() as ref_m:
         sigma = pm.HalfNormal("sigma")
         x = pm.NormalMixture("x", w=[0.25, 0.75], mu=[0, 1], sigma=sigma)
         y = pm.NormalMixture("y", w=[0.25, 0.75], mu=[-1, 1], sigma=sigma, shape=(5,))
     # Test logp
-    test_point = m_ref.initial_point()
-    x_logp, y_logp = m.compile_logp(vars=[m["x"], m["y"]], sum=False)(test_point)
-    x_ref_log, y_ref_logp = m_ref.compile_logp(vars=[m_ref["x"], m_ref["y"]], sum=False)(test_point)
+    test_point = ref_m.initial_point()
+    x_logp, y_logp = marginal_m.compile_logp(vars=[marginal_m["x"], marginal_m["y"]], sum=False)(
+        test_point
+    )
+    x_ref_log, y_ref_logp = ref_m.compile_logp(vars=[ref_m["x"], ref_m["y"]], sum=False)(test_point)
     np.testing.assert_array_almost_equal(x_logp, x_ref_log.sum())
     np.testing.assert_array_almost_equal(y_logp, y_ref_logp)
 def test_one_to_many_marginalized_rvs():
     """Test that marginalization works when there is more than one dependent RV"""
-    with MarginalModel() as m:
+    with Model() as m:
         sigma = pm.HalfNormal("sigma")
         idx = pm.Bernoulli("idx", p=0.75)
         x = pm.Normal("x", mu=idx, sigma=sigma)
         y = pm.Normal("y", mu=(idx * 2 - 1), sigma=sigma, shape=(5,))
-    ref_logp_x_y_fn = m.compile_logp([idx, x, y])
-    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
-        m.marginalize([idx])
+    marginal_m = marginalize(m, [idx])
-    m["x"].owner is not m["y"].owner
-    _m = m.clone()._marginalize()
-    _m["x"].owner is _m["y"].owner
+    marginal_x = marginal_m["x"]
+    marginal_y = marginal_m["y"]
+    assert isinstance(marginal_x.owner.op, MarginalRV)
+    assert isinstance(marginal_y.owner.op, MarginalRV)
+    assert marginal_x.owner is marginal_y.owner
-    tp = m.initial_point()
+    ref_logp_x_y_fn = m.compile_logp([idx, x, y])
+    tp = marginal_m.initial_point()
     ref_logp_x_y = logsumexp([ref_logp_x_y_fn({**tp, **{"idx": idx}}) for idx in (0, 1)])
-    logp_x_y = m.compile_logp([x, y])(tp)
+    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
+        logp_x_y = marginal_m.compile_logp([marginal_x, marginal_y])(tp)
     np.testing.assert_array_almost_equal(logp_x_y, ref_logp_x_y)
@@ -122,7 +151,7 @@ def test_one_to_many_unaligned_marginalized_rvs():
     """Test that marginalization works when there is more than one dependent RV with batch dimensions that are not aligned"""
     def build_model(build_batched: bool):
-        with MarginalModel() as m:
+        with Model() as m:
             if build_batched:
                 idx = pm.Bernoulli("idx", p=[0.75, 0.4], shape=(3, 2))
             else:
@@ -134,44 +163,41 @@ def test_one_to_many_unaligned_marginalized_rvs():
         return m
-    m = build_model(build_batched=True)
-    ref_m = build_model(build_batched=False)
+    marginal_m = marginalize(build_model(build_batched=True), ["idx"])
+    ref_m = marginalize(build_model(build_batched=False), [f"idx_{i}" for i in range(6)])
-    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
-        m.marginalize(["idx"])
-        ref_m.marginalize([f"idx_{i}" for i in range(6)])
+    test_point = marginal_m.initial_point()
-    test_point = m.initial_point()
-    np.testing.assert_allclose(
-        m.compile_logp()(test_point),
-        ref_m.compile_logp()(test_point),
-    )
+    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
+        np.testing.assert_allclose(
+            marginal_m.compile_logp()(test_point),
+            ref_m.compile_logp()(test_point),
+        )
 def test_many_to_one_marginalized_rvs():
     """Test when random variables depend on multiple marginalized variables"""
-    with MarginalModel() as m:
+    with Model() as m:
         x = pm.Bernoulli("x", 0.1)
         y = pm.Bernoulli("y", 0.3)
         z = pm.DiracDelta("z", c=x + y)
-    m.marginalize([x, y])
-    logp = m.compile_logp()
+    logp_fn = marginalize(m, [x, y]).compile_logp()
-    np.testing.assert_allclose(np.exp(logp({"z": 0})), 0.9 * 0.7)
-    np.testing.assert_allclose(np.exp(logp({"z": 1})), 0.9 * 0.3 + 0.1 * 0.7)
-    np.testing.assert_allclose(np.exp(logp({"z": 2})), 0.1 * 0.3)
+    np.testing.assert_allclose(np.exp(logp_fn({"z": 0})), 0.9 * 0.7)
+    np.testing.assert_allclose(np.exp(logp_fn({"z": 1})), 0.9 * 0.3 + 0.1 * 0.7)
+    np.testing.assert_allclose(np.exp(logp_fn({"z": 2})), 0.1 * 0.3)
 @pytest.mark.parametrize("batched", (False, "left", "right"))
 def test_nested_marginalized_rvs(batched):
     """Test that marginalization works when there are nested marginalized RVs"""
-    def build_model(build_batched: bool) -> MarginalModel:
+    def build_model(build_batched: bool) -> Model:
         idx_shape = (3,) if build_batched else ()
         sub_idx_shape = (5,) if not build_batched else (5, 3) if batched == "left" else (3, 5)
-        with MarginalModel() as m:
+        with Model() as m:
             sigma = pm.HalfNormal("sigma")
             idx = pm.Bernoulli("idx", p=0.75, shape=idx_shape)
@@ -186,10 +212,33 @@ def test_nested_marginalized_rvs(batched):
         return m
-    m = build_model(build_batched=batched)
-    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
-        m.marginalize(["idx", "sub_idx"])
-    assert sorted(m.name for m in m.marginalized_rvs) == ["idx", "sub_idx"]
+    marginal_m = marginalize(build_model(build_batched=batched), ["idx", "sub_idx"])
+    assert all(rv.name not in ("idx", "sub_idx") for rv in marginal_m.free_RVs)
+    # Test forward draws and initial_point, shouldn't depend on batching, so we only test one case
+    if not batched:
+        # Test forward draws
+        dep_draws, sub_dep_draws = draw(
+            [marginal_m["dep"], marginal_m["sub_dep"]],
+            # Make sigma very small to make draws deterministic
+            givens={marginal_m["sigma"]: 0.001},
+            draws=1000,
+            random_seed=214,
+        )
+        assert sorted(np.unique(dep_draws.round()) == [-1000.0, 1000.0])
+        assert sorted(np.unique(sub_dep_draws.round()) == [-1000.0, -900.0, 1000.0, 1100.0])
+        # Test initial_point
+        ips = make_initial_point_expression(
+            free_rvs=marginal_m.free_RVs,
+            rvs_to_transforms=marginal_m.rvs_to_transforms,
+            initval_strategies={},
+        )
+        # After simplification, we should have only constants in the graph
+        ip_sigma, ip_dep, ip_sub_dep = constant_fold(ips)
+        np.testing.assert_allclose(ip_sigma, 1.0)
+        np.testing.assert_allclose(ip_dep, 1000.0)
+        np.testing.assert_allclose(ip_sub_dep, np.full((5,), 1100.0))
     # Test logp
     ref_m = build_model(build_batched=False)
@@ -210,14 +259,70 @@ def test_nested_marginalized_rvs(batched):
     if batched:
         ref_logp *= 3
-    test_point = m.initial_point()
+    test_point = marginal_m.initial_point()
     test_point["dep"] = np.full_like(test_point["dep"], 1000)
     test_point["sub_dep"] = np.full_like(test_point["sub_dep"], 1000 + 100)
-    logp = m.compile_logp(vars=[m["dep"], m["sub_dep"]])(test_point)
+    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
+        logp = marginal_m.compile_logp(vars=[marginal_m["dep"], marginal_m["sub_dep"]])(test_point)
     np.testing.assert_almost_equal(logp, ref_logp)
+def test_interdependent_rvs():
+    """Test Marginalization when dependent RVs are interdependent."""
+    with Model() as m:
+        idx = pm.Bernoulli("idx", p=0.75)
+        x = pm.Normal("x", mu=idx * 2, sigma=1e-3)
+        # Y depends on both x and idx
+        y = pm.Normal("y", mu=x * idx * 2, sigma=1e-3)
+    marginal_m = marginalize(m, "idx")
+    marginal_x = marginal_m["x"]
+    marginal_y = marginal_m["y"]
+    assert isinstance(marginal_x.owner.op, MarginalRV)
+    assert isinstance(marginal_y.owner.op, MarginalRV)
+    assert marginal_x.owner is marginal_y.owner
+    # Test forward draws
+    x_draws, y_draws = draw([marginal_x, marginal_y], draws=1000, random_seed=54)
+    assert sorted(np.unique(x_draws.round())) == [0, 2]
+    assert sorted(np.unique(y_draws.round())) == [0, 4]
+    assert np.unique(y_draws[x_draws < 1].round()) == [0]
+    assert np.unique(y_draws[x_draws > 1].round()) == [4]
+    # Test initial_point
+    ips = make_initial_point_expression(
+        free_rvs=marginal_m.free_RVs,
+        rvs_to_transforms={},
+        initval_strategies={},
+    )
+    # After simplification, we should have only constants in the graph
+    ip_x, ip_y = constant_fold(ips)
+    np.testing.assert_allclose(ip_x, 2.0)
+    np.testing.assert_allclose(ip_y, 4.0)
+    # Test custom initval strategy
+    ips = make_initial_point_expression(
+        # Test that order does not matter
+        free_rvs=marginal_m.free_RVs[::-1],
+        rvs_to_transforms={},
+        initval_strategies={marginal_x: pt.constant(5.0)},
+    )
+    ip_y, ip_x = constant_fold(ips)
+    np.testing.assert_allclose(ip_x, 5.0)
+    np.testing.assert_allclose(ip_y, 10.0)
+    # Test logp
+    test_point = marginal_m.initial_point()
+    ref_logp_fn = m.compile_logp([m["idx"], m["x"], m["y"]])
+    ref_logp = logsumexp([ref_logp_fn({**test_point, **{"idx": idx}}) for idx in (0, 1)])
+    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
+        logp = marginal_m.compile_logp([marginal_m["x"], marginal_m["y"]])(test_point)
+    np.testing.assert_almost_equal(logp, ref_logp)
 @pytest.mark.parametrize("advanced_indexing", (False, True))
 def test_marginalized_index_as_key(advanced_indexing):
     """Test we can marginalize graphs where indexing is used as a mapping."""
@@ -232,13 +337,13 @@ def test_marginalized_index_as_key(advanced_indexing):
         y_val = -1
         shape = ()
-    with MarginalModel() as m:
+    with Model() as m:
         x = pm.Categorical("x", p=w, shape=shape)
         y = pm.Normal("y", mu[x].T, sigma=1, observed=y_val)
-    m.marginalize(x)
+    marginal_m = marginalize(m, x)
-    marginal_logp = m.compile_logp(sum=False)({})[0]
+    marginal_logp = marginal_m.compile_logp(sum=False)({})[0]
     ref_logp = pm.logp(pm.NormalMixture.dist(w=w, mu=mu.T, sigma=1, shape=shape), y_val).eval()
     np.testing.assert_allclose(marginal_logp, ref_logp)
@@ -247,8 +352,8 @@ def test_marginalized_index_as_key(advanced_indexing):
 def test_marginalized_index_as_value_and_key():
     """Test we can marginalize graphs were marginalized_rv is indexed."""
-    def build_model(build_batched: bool) -> MarginalModel:
-        with MarginalModel() as m:
+    def build_model(build_batched: bool) -> Model:
+        with Model() as m:
             if build_batched:
                 latent_state = pm.Bernoulli("latent_state", p=0.3, size=(4,))
             else:
@@ -270,16 +375,16 @@ def test_marginalized_index_as_value_and_key():
     m = build_model(build_batched=True)
     ref_m = build_model(build_batched=False)
-    m.marginalize(["latent_state"])
-    ref_m.marginalize([f"latent_state_{i}" for i in range(4)])
+    m = marginalize(m, ["latent_state"])
+    ref_m = marginalize(ref_m, [f"latent_state_{i}" for i in range(4)])
     test_point = {"picked_intensity": 1}
     np.testing.assert_allclose(
         m.compile_logp()(test_point),
         ref_m.compile_logp()(test_point),
     )
-    m.marginalize(["picked_intensity"])
-    ref_m.marginalize(["picked_intensity"])
+    m = marginalize(m, ["picked_intensity"])
+    ref_m = marginalize(ref_m, ["picked_intensity"])
     test_point = {}
     np.testing.assert_allclose(
         m.compile_logp()(test_point),
@@ -291,99 +396,99 @@ class TestNotSupportedMixedDims:
     """Test lack of support for models where batch dims of marginalized variables are mixed."""
     def test_mixed_dims_via_transposed_dot(self):
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.7, shape=2)
             y = pm.Normal("y", mu=idx @ idx.T)
-            with pytest.raises(NotImplementedError):
-                m.marginalize(idx)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, idx)
     def test_mixed_dims_via_indexing(self):
         mean = pt.as_tensor([[0.1, 0.9], [0.6, 0.4]])
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.7, shape=2)
             y = pm.Normal("y", mu=mean[idx, :] + mean[:, idx])
-            with pytest.raises(NotImplementedError):
-                m.marginalize(idx)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, idx)
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.7, shape=2)
             y = pm.Normal("y", mu=mean[idx, None] + mean[None, idx])
-            with pytest.raises(NotImplementedError):
-                m.marginalize(idx)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, idx)
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.7, shape=2)
             mu = pt.specify_broadcastable(mean[:, None][idx], 1) + pt.specify_broadcastable(
                 mean[None, :][:, idx], 0
             )
             y = pm.Normal("y", mu=mu)
-            with pytest.raises(NotImplementedError):
-                m.marginalize(idx)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, idx)
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.7, shape=2)
             y = pm.Normal("y", mu=idx[0] + idx[1])
-            with pytest.raises(NotImplementedError):
-                m.marginalize(idx)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, idx)
     def test_mixed_dims_via_vector_indexing(self):
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.7, shape=2)
             y = pm.Normal("y", mu=idx[[0, 1, 0, 0]])
-            with pytest.raises(NotImplementedError):
-                m.marginalize(idx)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, idx)
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Categorical("key", p=[0.1, 0.3, 0.6], shape=(2, 2))
             y = pm.Normal("y", pt.as_tensor([[0, 1], [2, 3]])[idx.astype(bool)])
-            with pytest.raises(NotImplementedError):
-                m.marginalize(idx)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, idx)
     def test_mixed_dims_via_support_dimension(self):
-        with MarginalModel() as m:
+        with Model() as m:
             x = pm.Bernoulli("x", p=0.7, shape=3)
             y = pm.Dirichlet("y", a=x * 10 + 1)
-            with pytest.raises(NotImplementedError):
-                m.marginalize(x)
+        with pytest.raises(NotImplementedError):
+            marginalize(m, x)
     def test_mixed_dims_via_nested_marginalization(self):
-        with MarginalModel() as m:
+        with Model() as m:
             x = pm.Bernoulli("x", p=0.7, shape=(3,))
             y = pm.Bernoulli("y", p=0.7, shape=(2,))
             z = pm.Normal("z", mu=pt.add.outer(x, y), shape=(3, 2))
-            with pytest.raises(NotImplementedError):
-                m.marginalize([x, y])
+        with pytest.raises(NotImplementedError):
+            marginalize(m, [x, y])
 def test_marginalized_deterministic_and_potential():
     rng = np.random.default_rng(299)
-    with MarginalModel() as m:
+    with Model() as m:
         x = pm.Bernoulli("x", p=0.7)
         y = pm.Normal("y", x)
         z = pm.Normal("z", x)
         det = pm.Deterministic("det", y + z)
         pot = pm.Potential("pot", y + z + 1)
-    with pytest.warns(UserWarning, match="There are multiple dependent variables"):
-        m.marginalize([x])
+    marginal_m = marginalize(m, [x])
     y_draw, z_draw, det_draw, pot_draw = pm.draw([y, z, det, pot], draws=5, random_seed=rng)
     np.testing.assert_almost_equal(y_draw + z_draw, det_draw)
     np.testing.assert_almost_equal(det_draw, pot_draw - 1)
-    y_value = m.rvs_to_values[y]
-    z_value = m.rvs_to_values[z]
-    det_value, pot_value = m.replace_rvs_by_values([det, pot])
+    y_value = marginal_m.rvs_to_values[marginal_m["y"]]
+    z_value = marginal_m.rvs_to_values[marginal_m["z"]]
+    det_value, pot_value = marginal_m.replace_rvs_by_values([marginal_m["det"], marginal_m["pot"]])
     assert set(inputvars([det_value, pot_value])) == {y_value, z_value}
     assert det_value.eval({y_value: 2, z_value: 5}) == 7
     assert pot_value.eval({y_value: 2, z_value: 5}) == 8
 def test_not_supported_marginalized_deterministic_and_potential():
-    with MarginalModel() as m:
+    with Model() as m:
         x = pm.Bernoulli("x", p=0.7)
         y = pm.Normal("y", x)
         det = pm.Deterministic("det", x + y)
@@ -391,9 +496,9 @@ def test_not_supported_marginalized_deterministic_and_potential():
     with pytest.raises(
         NotImplementedError, match="Cannot marginalize x due to dependent Deterministic det"
     ):
-        m.marginalize([x])
+        marginalize(m, [x])
-    with MarginalModel() as m:
+    with Model() as m:
         x = pm.Bernoulli("x", p=0.7)
         y = pm.Normal("y", x)
         pot = pm.Potential("pot", x + y)
@@ -401,7 +506,7 @@ def test_not_supported_marginalized_deterministic_and_potential():
     with pytest.raises(
         NotImplementedError, match="Cannot marginalize x due to dependent Potential pot"
     ):
-        m.marginalize([x])
+        marginalize(m, [x])
 @pytest.mark.parametrize(
@@ -410,15 +515,15 @@ def test_not_supported_marginalized_deterministic_and_potential():
         (None, does_not_warn()),
         (UNSET, does_not_warn()),
         (transforms.log, does_not_warn()),
-        (transforms.Chain([transforms.log, transforms.logodds]), does_not_warn()),
+        (transforms.Chain([transforms.logodds, transforms.log]), does_not_warn()),
         (
-            transforms.Interval(0, 1),
+            transforms.Interval(0, 2),
             pytest.warns(
                 UserWarning, match="which depends on the marginalized idx may no longer work"
             ),
         ),
         (
-            transforms.Chain([transforms.log, transforms.Interval(0, 1)]),
+            transforms.Chain([transforms.log, transforms.Interval(-1, 1)]),
             pytest.warns(
                 UserWarning, match="which depends on the marginalized idx may no longer work"
             ),
@@ -428,7 +533,7 @@ def test_not_supported_marginalized_deterministic_and_potential():
 def test_marginalized_transforms(transform, expected_warning):
     w = [0.1, 0.3, 0.6]
     data = [0, 5, 10]
-    initval = 0.5  # Value that will be negative on the unconstrained space
+    initval = 0.7  # Value that will be negative on the unconstrained space
     with pm.Model() as m_ref:
         sigma = pm.Mixture(
@@ -440,7 +545,7 @@ def test_marginalized_transforms(transform, expected_warning):
         )
         y = pm.Normal("y", 0, sigma, observed=data)
-    with MarginalModel() as m:
+    with Model() as m:
         idx = pm.Categorical("idx", p=w)
         sigma = pm.HalfNormal(
             "sigma",
@@ -453,32 +558,32 @@ def test_marginalized_transforms(transform, expected_warning):
                     3,
                 ),
             ),
-            initval=initval,
             default_transform=transform,
         )
         y = pm.Normal("y", 0, sigma, observed=data)
     with expected_warning:
-        m.marginalize([idx])
+        marginal_m = marginalize(m, [idx])
-    ip = m.initial_point()
+    marginal_m.set_initval(marginal_m["sigma"], initval)
+    ip = marginal_m.initial_point()
     if transform is not None:
         if transform is UNSET:
             transform_name = "log"
         else:
             transform_name = transform.name
-        assert f"sigma_{transform_name}__" in ip
-    np.testing.assert_allclose(m.compile_logp()(ip), m_ref.compile_logp()(ip))
+        assert -np.inf < ip[f"sigma_{transform_name}__"] < 0.0
+    np.testing.assert_allclose(marginal_m.compile_logp()(ip), m_ref.compile_logp()(ip))
 def test_data_container():
     """Test that MarginalModel can handle Data containers."""
-    with MarginalModel(coords={"obs": [0]}) as marginal_m:
+    with Model(coords={"obs": [0]}) as m:
         x = pm.Data("x", 2.5)
         idx = pm.Bernoulli("idx", p=0.7, dims="obs")
         y = pm.Normal("y", idx * x, dims="obs")
-    marginal_m.marginalize([idx])
+    marginal_m = marginalize(m, [idx])
     logp_fn = marginal_m.compile_logp()
@@ -501,7 +606,7 @@ def test_mutable_indexing_jax_backend():
     pytest.importorskip("jax")
     from pymc.sampling.jax import get_jaxified_logp
-    with MarginalModel() as model:
+    with Model() as model:
         data = pm.Data("data", np.zeros(10))
         cat_effect = pm.Normal("cat_effect", sigma=1, shape=5)
@@ -509,38 +614,8 @@ def test_mutable_indexing_jax_backend():
         is_outlier = pm.Bernoulli("is_outlier", 0.4, shape=10)
         pm.LogNormal("y", mu=cat_effect[cat_effect_idx], sigma=1 + is_outlier, observed=data)
-    model.marginalize(["is_outlier"])
-    get_jaxified_logp(model)
-def test_marginal_model_func():
-    def create_model(model_class):
-        with model_class(coords={"trial": range(10)}) as m:
-            idx = pm.Bernoulli("idx", p=0.5, dims="trial")
-            mu = pt.where(idx, 1, -1)
-            sigma = pm.HalfNormal("sigma")
-            y = pm.Normal("y", mu=mu, sigma=sigma, dims="trial", observed=[1] * 10)
-        return m
-    marginal_m = marginalize(create_model(pm.Model), ["idx"])
-    assert isinstance(marginal_m, MarginalModel)
-    reference_m = create_model(MarginalModel)
-    reference_m.marginalize(["idx"])
-    # Check forward graph representation is the same
-    marginal_fgraph, _ = fgraph_from_model(marginal_m)
-    reference_fgraph, _ = fgraph_from_model(reference_m)
-    assert equal_computations_up_to_root(marginal_fgraph.outputs, reference_fgraph.outputs)
-    # Check logp graph is the same
-    # This fails because OpFromGraphs comparison is broken
-    # assert equal_computations_up_to_root([marginal_m.logp()], [reference_m.logp()])
-    ip = marginal_m.initial_point()
-    np.testing.assert_allclose(
-        marginal_m.compile_logp()(ip),
-        reference_m.compile_logp()(ip),
-    )
+    marginal_model = marginalize(model, ["is_outlier"])
+    get_jaxified_logp(marginal_model)
 class TestFullModels:
@@ -559,10 +634,10 @@ class TestFullModels:
         # fmt: on
         years = np.arange(1851, 1962)
-        with MarginalModel() as disaster_model:
+        with Model() as disaster_model:
             switchpoint = pm.DiscreteUniform("switchpoint", lower=years.min(), upper=years.max())
-            early_rate = pm.Exponential("early_rate", 1.0, initval=3)
-            late_rate = pm.Exponential("late_rate", 1.0, initval=1)
+            early_rate = pm.Exponential("early_rate", 1.0)
+            late_rate = pm.Exponential("late_rate", 1.0)
             rate = pm.math.switch(switchpoint >= years, early_rate, late_rate)
             with pytest.warns(Warning):
                 disasters = pm.Poisson("disasters", rate, observed=disaster_data)
@@ -573,17 +648,21 @@ class TestFullModels:
         m, years = disaster_model
         ip = m.initial_point()
+        ip["late_rate_log__"] += 1.0  # Make early and endpoint ip different
         ip.pop("switchpoint")
         ref_logp_fn = m.compile_logp(
             [m["switchpoint"], m["disasters_observed"], m["disasters_unobserved"]]
         )
         ref_logp = logsumexp([ref_logp_fn({**ip, **{"switchpoint": year}}) for year in years])
-        with pytest.warns(UserWarning, match="There are multiple dependent variables"):
-            m.marginalize(m["switchpoint"])
+        marginal_m = marginalize(m, m["switchpoint"])
-        logp = m.compile_logp([m["disasters_observed"], m["disasters_unobserved"]])(ip)
-        np.testing.assert_almost_equal(logp, ref_logp)
+        with pytest.warns(UserWarning, match="There are multiple dependent variables"):
+            marginal_m_logp = marginal_m.compile_logp(
+                [marginal_m["disasters_observed"], marginal_m["disasters_unobserved"]]
+            )(ip)
+        np.testing.assert_almost_equal(marginal_m_logp, ref_logp)
     @pytest.mark.slow
     def test_change_point_model_sampling(self, disaster_model):
@@ -596,13 +675,13 @@ class TestFullModels:
                 sample=("draw", "chain")
             )
-        with pytest.warns(UserWarning, match="There are multiple dependent variables"):
-            m.marginalize([m["switchpoint"]])
+        marginal_m = marginalize(m, "switchpoint")
-        with m:
-            after_marg = pm.sample(chains=2, random_seed=rng).posterior.stack(
-                sample=("draw", "chain")
-            )
+        with marginal_m:
+            with pytest.warns(UserWarning, match="There are multiple dependent variables"):
+                after_marg = pm.sample(chains=2, random_seed=rng).posterior.stack(
+                    sample=("draw", "chain")
+                )
         np.testing.assert_allclose(
             before_marg["early_rate"].mean(), after_marg["early_rate"].mean(), rtol=1e-2
@@ -618,7 +697,7 @@ class TestFullModels:
     @pytest.mark.parametrize("univariate", (True, False))
     def test_vector_univariate_mixture(self, univariate):
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.5, shape=(2,) if univariate else ())
             def dist(idx, size):
@@ -630,8 +709,8 @@ class TestFullModels:
             pm.CustomDist("norm", idx, dist=dist)
-        m.marginalize(idx)
-        logp_fn = m.compile_logp()
+        marginal_m = marginalize(m, idx)
+        logp_fn = marginal_m.compile_logp()
         if univariate:
             with pm.Model() as ref_m:
@@ -659,16 +738,17 @@ class TestFullModels:
             np.testing.assert_allclose(logp_fn(pt), ref_logp_fn(pt))
     def test_k_censored_clusters_model(self):
-        def build_model(build_batched: bool) -> MarginalModel:
-            data = np.array([[-1.0, -1.0], [0.0, 0.0], [1.0, 1.0]])
-            nobs = data.shape[0]
-            n_clusters = 5
+        data = np.array([[-1.0, -1.0], [0.0, 0.0], [1.0, 1.0]])
+        nobs = data.shape[0]
+        n_clusters = 5
+        def build_model(build_batched: bool) -> Model:
             coords = {
                 "cluster": range(n_clusters),
                 "ndim": ("x", "y"),
                 "obs": range(nobs),
             }
-            with MarginalModel(coords=coords) as m:
+            with Model(coords=coords) as m:
                 if build_batched:
                     idx = pm.Categorical("idx", p=np.ones(n_clusters) / n_clusters, dims=["obs"])
                 else:
@@ -683,7 +763,6 @@ class TestFullModels:
                     "mu_x",
                     dims=["cluster"],
                     transform=ordered,
-                    initval=np.linspace(-1, 1, n_clusters),
                 )
                 mu_y = pm.Normal("mu_y", dims=["cluster"])
                 mu = pm.math.stack([mu_x, mu_y], axis=-1)  # (cluster, ndim)
@@ -702,12 +781,10 @@ class TestFullModels:
             return m
-        m = build_model(build_batched=True)
-        ref_m = build_model(build_batched=False)
-        m.marginalize([m["idx"]])
-        ref_m.marginalize([n for n in ref_m.named_vars if n.startswith("idx_")])
+        m = marginalize(build_model(build_batched=True), "idx")
+        m.set_initval(m["mu_x"], np.linspace(-1, 1, n_clusters))
+        ref_m = marginalize(build_model(build_batched=False), [f"idx_{i}" for i in range(nobs)])
         test_point = m.initial_point()
         np.testing.assert_almost_equal(
             m.compile_logp()(test_point),
@@ -715,9 +792,32 @@ class TestFullModels:
         )
+def test_unmarginalize():
+    with pm.Model() as m:
+        idx = pm.Bernoulli("idx", p=0.5)
+        sub_idx = pm.Bernoulli("sub_idx", p=pt.as_tensor([0.3, 0.7])[idx])
+        x = pm.Normal("x", mu=(idx + sub_idx) - 1)
+    marginal_m = marginalize(m, [idx, sub_idx])
+    assert not equivalent_models(marginal_m, m)
+    unmarginal_m = unmarginalize(marginal_m)
+    assert equivalent_models(unmarginal_m, m)
+    unmarginal_idx_explicit = unmarginalize(marginal_m, ("idx", "sub_idx"))
+    assert equivalent_models(unmarginal_idx_explicit, m)
+    # Test partial unmarginalize
+    unmarginal_idx = unmarginalize(marginal_m, "idx")
+    assert equivalent_models(unmarginal_idx, marginalize(m, "sub_idx"))
+    unmarginal_sub_idx = unmarginalize(marginal_m, "sub_idx")
+    assert equivalent_models(unmarginal_sub_idx, marginalize(m, "idx"))
 class TestRecoverMarginals:
     def test_basic(self):
-        with MarginalModel() as m:
+        with Model() as m:
             sigma = pm.HalfNormal("sigma")
             p = np.array([0.5, 0.2, 0.3])
             k = pm.Categorical("k", p=p)
@@ -725,11 +825,11 @@ class TestRecoverMarginals:
             mu_ = pt.as_tensor_variable(mu)
             y = pm.Normal("y", mu=mu_[k], sigma=sigma)
-        m.marginalize([k])
+        marginal_m = marginalize(m, [k])
         rng = np.random.default_rng(211)
-        with m:
+        with marginal_m:
             prior = pm.sample_prior_predictive(
                 draws=20,
                 random_seed=rng,
@@ -737,7 +837,7 @@ class TestRecoverMarginals:
             )
             idata = InferenceData(posterior=dict_to_dataset(prior))
-        idata = m.recover_marginals(idata, return_samples=True)
+        idata = recover_marginals(marginal_m, idata, return_samples=True)
         post = idata.posterior
         assert "k" in post
         assert "lp_k" in post
@@ -763,15 +863,15 @@ class TestRecoverMarginals:
     def test_coords(self):
         """Test if coords can be recovered with marginalized value had it originally"""
-        with MarginalModel(coords={"year": [1990, 1991, 1992]}) as m:
+        with Model(coords={"year": [1990, 1991, 1992]}) as m:
             sigma = pm.HalfNormal("sigma")
             idx = pm.Bernoulli("idx", p=0.75, dims="year")
             x = pm.Normal("x", mu=idx, sigma=sigma, dims="year")
-        m.marginalize([idx])
+        marginal_m = marginalize(m, [idx])
         rng = np.random.default_rng(211)
-        with m:
+        with marginal_m:
             prior = pm.sample_prior_predictive(
                 draws=20,
                 random_seed=rng,
@@ -781,23 +881,23 @@ class TestRecoverMarginals:
                 posterior=dict_to_dataset({k: np.expand_dims(prior[k], axis=0) for k in prior})
             )
-        idata = m.recover_marginals(idata, return_samples=True)
+        idata = recover_marginals(marginal_m, idata, return_samples=True)
         post = idata.posterior
         assert post.idx.dims == ("chain", "draw", "year")
         assert post.lp_idx.dims == ("chain", "draw", "year", "lp_idx_dim")
     def test_batched(self):
         """Test that marginalization works for batched random variables"""
-        with MarginalModel() as m:
+        with Model() as m:
             sigma = pm.HalfNormal("sigma")
             idx = pm.Bernoulli("idx", p=0.7, shape=(3, 2))
             y = pm.Normal("y", mu=idx.T, sigma=sigma, shape=(2, 3))
-        m.marginalize([idx])
+        marginal_m = marginalize(m, [idx])
         rng = np.random.default_rng(211)
-        with m:
+        with marginal_m:
             prior = pm.sample_prior_predictive(
                 draws=20,
                 random_seed=rng,
@@ -807,7 +907,7 @@ class TestRecoverMarginals:
                 posterior=dict_to_dataset({k: np.expand_dims(prior[k], axis=0) for k in prior})
             )
-        idata = m.recover_marginals(idata, return_samples=True)
+        idata = recover_marginals(marginal_m, idata, return_samples=True)
         post = idata.posterior
         assert post["y"].shape == (1, 20, 2, 3)
         assert post["idx"].shape == (1, 20, 3, 2)
@@ -816,16 +916,16 @@ class TestRecoverMarginals:
     def test_nested(self):
         """Test that marginalization works when there are nested marginalized RVs"""
-        with MarginalModel() as m:
+        with Model() as m:
             idx = pm.Bernoulli("idx", p=0.75)
             sub_idx = pm.Bernoulli("sub_idx", p=pt.switch(pt.eq(idx, 0), 0.15, 0.95))
             sub_dep = pm.Normal("y", mu=idx + sub_idx, sigma=1.0)
-        m.marginalize([idx, sub_idx])
+        marginal_m = marginalize(m, [idx, sub_idx])
         rng = np.random.default_rng(211)
-        with m:
+        with marginal_m:
             prior = pm.sample_prior_predictive(
                 draws=20,
                 random_seed=rng,
@@ -833,7 +933,7 @@ class TestRecoverMarginals:
             )
             idata = InferenceData(posterior=dict_to_dataset(prior))
-        idata = m.recover_marginals(idata, return_samples=True)
+        idata = recover_marginals(marginal_m, idata, return_samples=True)
         post = idata.posterior
         assert "idx" in post
         assert "lp_idx" in post

pymc-extras 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

pymc-extras 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl