PyPI - pymc-extras - Versions diffs - 0.2.0__py3-none-any.whl - Mend

pymc-extras 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

pymc_extras/__init__.py +29 -0
pymc_extras/distributions/__init__.py +40 -0
pymc_extras/distributions/continuous.py +351 -0
pymc_extras/distributions/discrete.py +399 -0
pymc_extras/distributions/histogram_utils.py +163 -0
pymc_extras/distributions/multivariate/__init__.py +3 -0
pymc_extras/distributions/multivariate/r2d2m2cp.py +446 -0
pymc_extras/distributions/timeseries.py +356 -0
pymc_extras/gp/__init__.py +18 -0
pymc_extras/gp/latent_approx.py +183 -0
pymc_extras/inference/__init__.py +18 -0
pymc_extras/inference/find_map.py +431 -0
pymc_extras/inference/fit.py +44 -0
pymc_extras/inference/laplace.py +570 -0
pymc_extras/inference/pathfinder.py +134 -0
pymc_extras/inference/smc/__init__.py +13 -0
pymc_extras/inference/smc/sampling.py +451 -0
pymc_extras/linearmodel.py +130 -0
pymc_extras/model/__init__.py +0 -0
pymc_extras/model/marginal/__init__.py +0 -0
pymc_extras/model/marginal/distributions.py +276 -0
pymc_extras/model/marginal/graph_analysis.py +372 -0
pymc_extras/model/marginal/marginal_model.py +595 -0
pymc_extras/model/model_api.py +56 -0
pymc_extras/model/transforms/__init__.py +0 -0
pymc_extras/model/transforms/autoreparam.py +434 -0
pymc_extras/model_builder.py +759 -0
pymc_extras/preprocessing/__init__.py +0 -0
pymc_extras/preprocessing/standard_scaler.py +17 -0
pymc_extras/printing.py +182 -0
pymc_extras/statespace/__init__.py +13 -0
pymc_extras/statespace/core/__init__.py +7 -0
pymc_extras/statespace/core/compile.py +48 -0
pymc_extras/statespace/core/representation.py +438 -0
pymc_extras/statespace/core/statespace.py +2268 -0
pymc_extras/statespace/filters/__init__.py +15 -0
pymc_extras/statespace/filters/distributions.py +453 -0
pymc_extras/statespace/filters/kalman_filter.py +820 -0
pymc_extras/statespace/filters/kalman_smoother.py +126 -0
pymc_extras/statespace/filters/utilities.py +59 -0
pymc_extras/statespace/models/ETS.py +670 -0
pymc_extras/statespace/models/SARIMAX.py +536 -0
pymc_extras/statespace/models/VARMAX.py +393 -0
pymc_extras/statespace/models/__init__.py +6 -0
pymc_extras/statespace/models/structural.py +1651 -0
pymc_extras/statespace/models/utilities.py +387 -0
pymc_extras/statespace/utils/__init__.py +0 -0
pymc_extras/statespace/utils/constants.py +74 -0
pymc_extras/statespace/utils/coord_tools.py +0 -0
pymc_extras/statespace/utils/data_tools.py +182 -0
pymc_extras/utils/__init__.py +23 -0
pymc_extras/utils/linear_cg.py +290 -0
pymc_extras/utils/pivoted_cholesky.py +69 -0
pymc_extras/utils/prior.py +200 -0
pymc_extras/utils/spline.py +131 -0
pymc_extras/version.py +11 -0
pymc_extras/version.txt +1 -0
pymc_extras-0.2.0.dist-info/LICENSE +212 -0
pymc_extras-0.2.0.dist-info/METADATA +99 -0
pymc_extras-0.2.0.dist-info/RECORD +101 -0
pymc_extras-0.2.0.dist-info/WHEEL +5 -0
pymc_extras-0.2.0.dist-info/top_level.txt +2 -0
tests/__init__.py +13 -0
tests/distributions/__init__.py +19 -0
tests/distributions/test_continuous.py +185 -0
tests/distributions/test_discrete.py +210 -0
tests/distributions/test_discrete_markov_chain.py +258 -0
tests/distributions/test_multivariate.py +304 -0
tests/model/__init__.py +0 -0
tests/model/marginal/__init__.py +0 -0
tests/model/marginal/test_distributions.py +131 -0
tests/model/marginal/test_graph_analysis.py +182 -0
tests/model/marginal/test_marginal_model.py +867 -0
tests/model/test_model_api.py +29 -0
tests/statespace/__init__.py +0 -0
tests/statespace/test_ETS.py +411 -0
tests/statespace/test_SARIMAX.py +405 -0
tests/statespace/test_VARMAX.py +184 -0
tests/statespace/test_coord_assignment.py +116 -0
tests/statespace/test_distributions.py +270 -0
tests/statespace/test_kalman_filter.py +326 -0
tests/statespace/test_representation.py +175 -0
tests/statespace/test_statespace.py +818 -0
tests/statespace/test_statespace_JAX.py +156 -0
tests/statespace/test_structural.py +829 -0
tests/statespace/utilities/__init__.py +0 -0
tests/statespace/utilities/shared_fixtures.py +9 -0
tests/statespace/utilities/statsmodel_local_level.py +42 -0
tests/statespace/utilities/test_helpers.py +310 -0
tests/test_blackjax_smc.py +222 -0
tests/test_find_map.py +98 -0
tests/test_histogram_approximation.py +109 -0
tests/test_laplace.py +238 -0
tests/test_linearmodel.py +208 -0
tests/test_model_builder.py +306 -0
tests/test_pathfinder.py +45 -0
tests/test_pivoted_cholesky.py +24 -0
tests/test_printing.py +98 -0
tests/test_prior_from_trace.py +172 -0
tests/test_splines.py +77 -0
tests/utils.py +31 -0

tests/test_model_builder.py ADDED Viewed

@@ -0,0 +1,306 @@
+#   Copyright 2023 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import hashlib
+import json
+import sys
+import tempfile
+import numpy as np
+import pandas as pd
+import pymc as pm
+import pytest
+from pymc_extras.model_builder import ModelBuilder
+@pytest.fixture(scope="module")
+def toy_X():
+    x = np.linspace(start=0, stop=1, num=100)
+    X = pd.DataFrame({"input": x})
+    return X
+@pytest.fixture(scope="module")
+def toy_y(toy_X):
+    y = 5 * toy_X["input"] + 3
+    y = y + np.random.normal(0, 1, size=len(toy_X))
+    y = pd.Series(y, name="output")
+    return y
+def get_unfitted_model_instance(X, y):
+    """Creates an unfitted model instance to which idata can be copied in
+    and then used as a fitted model instance. That way a fitted model
+    can be used multiple times without having to run `fit` multiple times."""
+    sampler_config = {
+        "draws": 20,
+        "tune": 10,
+        "chains": 2,
+        "target_accept": 0.95,
+    }
+    model_config = {
+        "a": {"loc": 0, "scale": 10, "dims": ("numbers",)},
+        "b": {"loc": 0, "scale": 10},
+        "obs_error": 2,
+    }
+    model = test_ModelBuilder(
+        model_config=model_config, sampler_config=sampler_config, test_parameter="test_paramter"
+    )
+    # Do the things that `model.fit` does except sample to create idata.
+    model._generate_and_preprocess_model_data(X, y.values.flatten())
+    model.build_model(X, y)
+    return model
+@pytest.fixture(scope="module")
+def fitted_model_instance_base(toy_X, toy_y):
+    """Because fitting takes a relatively long time, this is intended to
+    be used only once and then have new instances created and fit data patched in
+    for tests that use a fitted model instance. Tests should use
+    `fitted_model_instance` instead of this."""
+    model = get_unfitted_model_instance(toy_X, toy_y)
+    model.fit(toy_X, toy_y)
+    return model
+@pytest.fixture
+def fitted_model_instance(toy_X, toy_y, fitted_model_instance_base):
+    """Get a fitted model instance. A new instance is created and fit data is
+    patched in, so tests using this fixture can modify the model object without
+    affecting other tests."""
+    model = get_unfitted_model_instance(toy_X, toy_y)
+    model.idata = fitted_model_instance_base.idata.copy()
+    return model
+class test_ModelBuilder(ModelBuilder):
+    def __init__(self, model_config=None, sampler_config=None, test_parameter=None):
+        self.test_parameter = test_parameter
+        super().__init__(model_config=model_config, sampler_config=sampler_config)
+    _model_type = "test_model"
+    version = "0.1"
+    def build_model(self, X: pd.DataFrame, y: pd.Series, model_config=None):
+        coords = {"numbers": np.arange(len(X))}
+        self.generate_and_preprocess_model_data(X, y)
+        with pm.Model(coords=coords) as self.model:
+            if model_config is None:
+                model_config = self.model_config
+            x = pm.Data("x", self.X["input"].values)
+            y_data = pm.Data("y_data", self.y)
+            # prior parameters
+            a_loc = model_config["a"]["loc"]
+            a_scale = model_config["a"]["scale"]
+            b_loc = model_config["b"]["loc"]
+            b_scale = model_config["b"]["scale"]
+            obs_error = model_config["obs_error"]
+            # priors
+            a = pm.Normal("a", a_loc, sigma=a_scale, dims=model_config["a"]["dims"])
+            b = pm.Normal("b", b_loc, sigma=b_scale)
+            obs_error = pm.HalfNormal("σ_model_fmc", obs_error)
+            # observed data
+            output = pm.Normal("output", a + b * x, obs_error, shape=x.shape, observed=y_data)
+    def _save_input_params(self, idata):
+        idata.attrs["test_paramter"] = json.dumps(self.test_parameter)
+    @property
+    def output_var(self):
+        return "output"
+    def _data_setter(self, x: pd.Series, y: pd.Series = None):
+        with self.model:
+            pm.set_data({"x": x.values})
+            if y is not None:
+                pm.set_data({"y_data": y.values})
+    @property
+    def _serializable_model_config(self):
+        return self.model_config
+    def generate_and_preprocess_model_data(self, X: pd.DataFrame, y: pd.Series):
+        self.X = X
+        self.y = y
+    @staticmethod
+    def get_default_model_config() -> dict:
+        return {
+            "a": {"loc": 0, "scale": 10, "dims": ("numbers",)},
+            "b": {"loc": 0, "scale": 10},
+            "obs_error": 2,
+        }
+    def _generate_and_preprocess_model_data(
+        self, X: pd.DataFrame | pd.Series, y: pd.Series
+    ) -> None:
+        self.X = X
+        self.y = y
+    @staticmethod
+    def get_default_sampler_config() -> dict:
+        return {
+            "draws": 10,
+            "tune": 10,
+            "chains": 3,
+            "target_accept": 0.95,
+        }
+def test_save_input_params(fitted_model_instance):
+    assert fitted_model_instance.idata.attrs["test_paramter"] == '"test_paramter"'
+@pytest.mark.skipif(
+    sys.platform == "win32", reason="Permissions for temp files not granted on windows CI."
+)
+def test_save_load(fitted_model_instance):
+    temp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False)
+    fitted_model_instance.save(temp.name)
+    test_builder2 = test_ModelBuilder.load(temp.name)
+    assert fitted_model_instance.idata.groups() == test_builder2.idata.groups()
+    assert fitted_model_instance.id == test_builder2.id
+    x_pred = np.random.uniform(low=0, high=1, size=100)
+    prediction_data = pd.DataFrame({"input": x_pred})
+    pred1 = fitted_model_instance.predict(prediction_data["input"])
+    pred2 = test_builder2.predict(prediction_data["input"])
+    assert pred1.shape == pred2.shape
+    temp.close()
+def test_initial_build_and_fit(fitted_model_instance, check_idata=True) -> ModelBuilder:
+    if check_idata:
+        assert fitted_model_instance.idata is not None
+        assert "posterior" in fitted_model_instance.idata.groups()
+def test_save_without_fit_raises_runtime_error():
+    model_builder = test_ModelBuilder()
+    with pytest.raises(RuntimeError):
+        model_builder.save("saved_model")
+def test_empty_sampler_config_fit(toy_X, toy_y):
+    sampler_config = {}
+    model_builder = test_ModelBuilder(sampler_config=sampler_config)
+    model_builder.idata = model_builder.fit(X=toy_X, y=toy_y)
+    assert model_builder.idata is not None
+    assert "posterior" in model_builder.idata.groups()
+def test_fit(fitted_model_instance):
+    prediction_data = pd.DataFrame({"input": np.random.uniform(low=0, high=1, size=100)})
+    pred = fitted_model_instance.predict(prediction_data["input"])
+    post_pred = fitted_model_instance.sample_posterior_predictive(
+        prediction_data["input"], extend_idata=True, combined=True
+    )
+    post_pred[fitted_model_instance.output_var].shape[0] == prediction_data.input.shape
+def test_fit_no_y(toy_X):
+    model_builder = test_ModelBuilder()
+    model_builder.idata = model_builder.fit(X=toy_X, chains=1, tune=1, draws=1)
+    assert model_builder.model is not None
+    assert model_builder.idata is not None
+    assert "posterior" in model_builder.idata.groups()
+def test_predict(fitted_model_instance):
+    x_pred = np.random.uniform(low=0, high=1, size=100)
+    prediction_data = pd.DataFrame({"input": x_pred})
+    pred = fitted_model_instance.predict(prediction_data["input"])
+    # Perform elementwise comparison using numpy
+    assert isinstance(pred, np.ndarray)
+    assert len(pred) > 0
+@pytest.mark.parametrize("combined", [True, False])
+def test_sample_posterior_predictive(fitted_model_instance, combined):
+    n_pred = 100
+    x_pred = np.random.uniform(low=0, high=1, size=n_pred)
+    prediction_data = pd.DataFrame({"input": x_pred})
+    pred = fitted_model_instance.sample_posterior_predictive(
+        prediction_data["input"], combined=combined, extend_idata=True
+    )
+    chains = fitted_model_instance.idata.sample_stats.sizes["chain"]
+    draws = fitted_model_instance.idata.sample_stats.sizes["draw"]
+    expected_shape = (n_pred, chains * draws) if combined else (chains, draws, n_pred)
+    assert pred[fitted_model_instance.output_var].shape == expected_shape
+    assert np.issubdtype(pred[fitted_model_instance.output_var].dtype, np.floating)
+@pytest.mark.parametrize("group", ["prior_predictive", "posterior_predictive"])
+@pytest.mark.parametrize("extend_idata", [True, False])
+def test_sample_xxx_extend_idata_param(fitted_model_instance, group, extend_idata):
+    output_var = fitted_model_instance.output_var
+    idata_prev = fitted_model_instance.idata[group][output_var]
+    # Since coordinates are provided, the dimension must match
+    n_pred = 100  # Must match toy_x
+    x_pred = np.random.uniform(0, 1, n_pred)
+    prediction_data = pd.DataFrame({"input": x_pred})
+    if group == "prior_predictive":
+        prediction_method = fitted_model_instance.sample_prior_predictive
+    else:  # group == "posterior_predictive":
+        prediction_method = fitted_model_instance.sample_posterior_predictive
+    pred = prediction_method(prediction_data["input"], combined=False, extend_idata=extend_idata)
+    pred_unstacked = pred[output_var].values
+    idata_now = fitted_model_instance.idata[group][output_var].values
+    if extend_idata:
+        # After sampling, data in the model should be the same as the predictions
+        np.testing.assert_array_equal(idata_now, pred_unstacked)
+        # Data in the model should NOT be the same as before
+        if idata_now.shape == idata_prev.values.shape:
+            assert np.sum(np.abs(idata_now - idata_prev.values) < 1e-5) <= 2
+    else:
+        # After sampling, data in the model should be the same as it was before
+        np.testing.assert_array_equal(idata_now, idata_prev.values)
+        # Data in the model should NOT be the same as the predictions
+        if idata_now.shape == pred_unstacked.shape:
+            assert np.sum(np.abs(idata_now - pred_unstacked) < 1e-5) <= 2
+def test_model_config_formatting():
+    model_config = {
+        "a": {
+            "loc": [0, 0],
+            "scale": 10,
+            "dims": [
+                "x",
+            ],
+        },
+    }
+    model_builder = test_ModelBuilder()
+    converted_model_config = model_builder._model_config_formatting(model_config)
+    np.testing.assert_equal(converted_model_config["a"]["dims"], ("x",))
+    np.testing.assert_equal(converted_model_config["a"]["loc"], np.array([0, 0]))
+def test_id():
+    model_builder = test_ModelBuilder()
+    expected_id = hashlib.sha256(
+        str(model_builder.model_config.values()).encode()
+        + model_builder.version.encode()
+        + model_builder._model_type.encode()
+    ).hexdigest()[:16]
+    assert model_builder.id == expected_id

tests/test_pathfinder.py ADDED Viewed

@@ -0,0 +1,45 @@
+#   Copyright 2022 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import sys
+import numpy as np
+import pymc as pm
+import pytest
+import pymc_extras as pmx
+@pytest.mark.skipif(sys.platform == "win32", reason="JAX not supported on windows.")
+def test_pathfinder():
+    # Data of the Eight Schools Model
+    J = 8
+    y = np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0])
+    sigma = np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0])
+    with pm.Model() as model:
+        mu = pm.Normal("mu", mu=0.0, sigma=10.0)
+        tau = pm.HalfCauchy("tau", 5.0)
+        theta = pm.Normal("theta", mu=0, sigma=1, shape=J)
+        obs = pm.Normal("obs", mu=mu + tau * theta, sigma=sigma, shape=J, observed=y)
+        idata = pmx.fit(method="pathfinder", random_seed=41)
+    assert idata.posterior["mu"].shape == (1, 1000)
+    assert idata.posterior["tau"].shape == (1, 1000)
+    assert idata.posterior["theta"].shape == (1, 1000, 8)
+    # FIXME: pathfinder doesn't find a reasonable mean! Fix bug or choose model pathfinder can handle
+    # np.testing.assert_allclose(idata.posterior["mu"].mean(), 5.0)
+    np.testing.assert_allclose(idata.posterior["tau"].mean(), 4.15, atol=0.5)

tests/test_pivoted_cholesky.py ADDED Viewed

@@ -0,0 +1,24 @@
+# try:
+#     import gpytorch
+#     import torch
+# except ImportError as e:
+#     # print(
+#     #     f"Please install Pytorch and GPyTorch to use this pivoted Cholesky implementation. Error {e}"
+#     # )
+#     pass
+# import numpy as np
+#
+# import pymc_experimental as pmx
+#
+#
+# def test_match_gpytorch_linearcg_output():
+#     N = 10
+#     rank = 5
+#     np.random.seed(1234)  # nans with seed 1234
+#     K = np.random.randn(N, N)
+#     K = K @ K.T + N * np.eye(N)
+#     K_torch = torch.from_numpy(K)
+#
+#     L_gpt = gpytorch.pivoted_cholesky(K_torch, rank=rank, error_tol=1e-3)
+#     L_np, _ = pmx.utils.pivoted_cholesky(K, max_iter=rank, error_tol=1e-3)
+#     assert np.allclose(L_gpt, L_np.T)

tests/test_printing.py ADDED Viewed

@@ -0,0 +1,98 @@
+import numpy as np
+import pymc as pm
+from rich.console import Console
+from pymc_extras.printing import model_table
+def get_text(table) -> str:
+    console = Console(width=80)
+    with console.capture() as capture:
+        console.print(table)
+    return capture.get()
+def test_model_table():
+    with pm.Model(coords={"trial": range(6), "subject": range(20)}) as model:
+        x_data = pm.Data("x_data", np.random.normal(size=(6, 20)), dims=("trial", "subject"))
+        y_data = pm.Data("y_data", np.random.normal(size=(6, 20)), dims=("trial", "subject"))
+        mu = pm.Normal("mu", mu=0, sigma=1)
+        sigma = pm.HalfNormal("sigma", sigma=1)
+        global_intercept = pm.Normal("global_intercept", mu=0, sigma=1)
+        intercept_subject = pm.Normal("intercept_subject", mu=0, sigma=1, shape=(20, 1))
+        beta_subject = pm.Normal("beta_subject", mu=mu, sigma=sigma, dims="subject")
+        mu_trial = pm.Deterministic(
+            "mu_trial",
+            global_intercept.squeeze() + intercept_subject + beta_subject * x_data,
+            dims=["trial", "subject"],
+        )
+        noise = pm.Exponential("noise", lam=1)
+        y = pm.Normal("y", mu=mu_trial, sigma=noise, observed=y_data, dims=("trial", "subject"))
+        pm.Potential("beta_subject_penalty", -pm.math.abs(beta_subject), dims="subject")
+    table_txt = get_text(model_table(model))
+    expected = """               Variable  Expression                      Dimensions
+────────────────────────────────────────────────────────────────────────────────
+               x_data =  Data                            trial[6] × subject[20]
+               y_data =  Data                            trial[6] × subject[20]
+                   mu ~  Normal(0, 1)
+                sigma ~  HalfNormal(0, 1)
+     global_intercept ~  Normal(0, 1)
+    intercept_subject ~  Normal(0, 1)                    [20, 1]
+         beta_subject ~  Normal(mu, sigma)               subject[20]
+                noise ~  Exponential(f())
+                                                         Parameter count = 44
+             mu_trial =  f(intercept_subject,            trial[6] × subject[20]
+                         beta_subject,
+                         global_intercept)
+ beta_subject_penalty =  Potential(f(beta_subject))      subject[20]
+                    y ~  Normal(mu_trial, noise)         trial[6] × subject[20]
+"""
+    assert [s.strip() for s in table_txt.splitlines()] == [s.strip() for s in expected.splitlines()]
+    table_txt = get_text(model_table(model, split_groups=False))
+    expected = """               Variable  Expression                      Dimensions
+────────────────────────────────────────────────────────────────────────────────
+               x_data =  Data                            trial[6] × subject[20]
+               y_data =  Data                            trial[6] × subject[20]
+                   mu ~  Normal(0, 1)
+                sigma ~  HalfNormal(0, 1)
+     global_intercept ~  Normal(0, 1)
+    intercept_subject ~  Normal(0, 1)                    [20, 1]
+         beta_subject ~  Normal(mu, sigma)               subject[20]
+             mu_trial =  f(intercept_subject,            trial[6] × subject[20]
+                         beta_subject,
+                         global_intercept)
+                noise ~  Exponential(f())
+                    y ~  Normal(mu_trial, noise)         trial[6] × subject[20]
+ beta_subject_penalty =  Potential(f(beta_subject))      subject[20]
+                                                         Parameter count = 44
+"""
+    assert [s.strip() for s in table_txt.splitlines()] == [s.strip() for s in expected.splitlines()]
+    table_txt = get_text(
+        model_table(model, split_groups=False, truncate_deterministic=30, parameter_count=False)
+    )
+    expected = """               Variable  Expression                  Dimensions
+────────────────────────────────────────────────────────────────────────────
+               x_data =  Data                        trial[6] × subject[20]
+               y_data =  Data                        trial[6] × subject[20]
+                   mu ~  Normal(0, 1)
+                sigma ~  HalfNormal(0, 1)
+     global_intercept ~  Normal(0, 1)
+    intercept_subject ~  Normal(0, 1)                [20, 1]
+         beta_subject ~  Normal(mu, sigma)           subject[20]
+             mu_trial =  f(intercept_subject, ...)   trial[6] × subject[20]
+                noise ~  Exponential(f())
+                    y ~  Normal(mu_trial, noise)     trial[6] × subject[20]
+ beta_subject_penalty =  Potential(f(beta_subject))  subject[20]
+"""
+    assert [s.strip() for s in table_txt.splitlines()] == [s.strip() for s in expected.splitlines()]

tests/test_prior_from_trace.py ADDED Viewed

@@ -0,0 +1,172 @@
+#   Copyright 2022 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import arviz as az
+import numpy as np
+import pymc as pm
+import pytest
+from pymc.distributions import transforms
+import pymc_extras as pmx
+@pytest.mark.parametrize(
+    "case",
+    [
+        (("a", dict(name="b")), dict(name="b", transform=None, dims=None)),
+        (("a", None), dict(name="a", transform=None, dims=None)),
+        (("a", transforms.log), dict(name="a", transform=transforms.log, dims=None)),
+        (
+            ("a", dict(transform=transforms.log)),
+            dict(name="a", transform=transforms.log, dims=None),
+        ),
+        (("a", dict(name="b")), dict(name="b", transform=None, dims=None)),
+        (("a", dict(name="b", dims="test")), dict(name="b", transform=None, dims="test")),
+        (("a", ("test",)), dict(name="a", transform=None, dims=("test",))),
+    ],
+)
+def test_parsing_arguments(case):
+    inp, out = case
+    test = pmx.utils.prior._arg_to_param_cfg(*inp)
+    assert test == out
+@pytest.fixture
+def coords():
+    return dict(test=range(3), simplex=range(4))
+@pytest.fixture(
+    params=[
+        [
+            ("t",),
+            dict(
+                a="d",
+                b=dict(transform=transforms.log, dims=("test",)),
+                c=dict(transform=transforms.simplex, dims=("simplex",)),
+            ),
+        ],
+        [("t",), dict()],
+    ]
+)
+def user_param_cfg(request):
+    return request.param
+@pytest.fixture
+def param_cfg(user_param_cfg):
+    return pmx.utils.prior._parse_args(user_param_cfg[0], **user_param_cfg[1])
+@pytest.fixture
+def transformed_data(param_cfg, coords):
+    vars = dict()
+    for k, cfg in param_cfg.items():
+        if cfg["dims"] is not None:
+            extra_dims = [len(coords[d]) for d in cfg["dims"]]
+            if cfg["transform"] is not None:
+                t = np.random.randn(*extra_dims)
+                extra_dims = tuple(cfg["transform"].forward(t).shape.eval())
+        else:
+            extra_dims = []
+        orig = np.random.randn(4, 100, *extra_dims)
+        vars[k] = orig
+    return vars
+@pytest.fixture
+def idata(transformed_data, param_cfg):
+    vars = dict()
+    for k, orig in transformed_data.items():
+        cfg = param_cfg[k]
+        if cfg["transform"] is not None:
+            var = cfg["transform"].backward(orig).eval()
+        else:
+            var = orig
+        assert not np.isnan(var).any()
+        vars[k] = var
+    return az.convert_to_inference_data(vars)
+def test_idata_for_tests(idata, param_cfg):
+    assert set(idata.posterior.keys()) == set(param_cfg)
+    assert len(idata.posterior.coords["chain"]) == 4
+    assert len(idata.posterior.coords["draw"]) == 100
+def test_args_compose():
+    cfg = pmx.utils.prior._parse_args(
+        var_names=["a"],
+        b=("test",),
+        c=transforms.log,
+        d="e",
+        f=dict(dims="test"),
+        g=dict(name="h", dims="test", transform=transforms.log),
+    )
+    assert cfg == dict(
+        a=dict(name="a", dims=None, transform=None),
+        b=dict(name="b", dims=("test",), transform=None),
+        c=dict(name="c", dims=None, transform=transforms.log),
+        d=dict(name="e", dims=None, transform=None),
+        f=dict(name="f", dims="test", transform=None),
+        g=dict(name="h", dims="test", transform=transforms.log),
+    )
+def test_transform_idata(transformed_data, idata, param_cfg):
+    flat_info = pmx.utils.prior._flatten(idata, **param_cfg)
+    expected_shape = 0
+    for v in transformed_data.values():
+        expected_shape += int(np.prod(v.shape[2:]))
+    assert flat_info["data"].shape[1] == expected_shape
+    assert len(flat_info["info"]) == len(param_cfg)
+    assert "sinfo" in flat_info["info"][0]
+    assert "vinfo" in flat_info["info"][0]
+@pytest.fixture
+def flat_info(idata, param_cfg):
+    return pmx.utils.prior._flatten(idata, **param_cfg)
+def test_mean_chol(flat_info):
+    mean, chol = pmx.utils.prior._mean_chol(flat_info["data"])
+    assert mean.shape == (flat_info["data"].shape[1],)
+    assert chol.shape == (flat_info["data"].shape[1],) * 2
+def test_mvn_prior_from_flat_info(flat_info, coords, param_cfg):
+    with pm.Model(coords=coords) as model:
+        priors = pmx.utils.prior._mvn_prior_from_flat_info("trace_prior_", flat_info)
+        test_prior = pm.sample_prior_predictive(1)
+    names = [p["name"] for p in param_cfg.values()]
+    assert set(model.named_vars) == {"trace_prior_", *names}
+def test_prior_from_idata(idata, user_param_cfg, coords, param_cfg):
+    with pm.Model(coords=coords) as model:
+        priors = pmx.utils.prior.prior_from_idata(
+            idata, var_names=user_param_cfg[0], **user_param_cfg[1]
+        )
+        test_prior = pm.sample_prior_predictive(1)
+    names = [p["name"] for p in param_cfg.values()]
+    assert set(model.named_vars) == {"trace_prior_", *names}
+def test_empty(idata, coords):
+    with pm.Model(coords=coords):
+        priors = pmx.utils.prior.prior_from_idata(idata)
+        assert not priors