pymc-extras 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,5 +25,14 @@ def test_logp():
25
25
 
26
26
  mw2 = model_wrapped2(coords=coords)
27
27
 
28
+ @pmx.as_model()
29
+ def model_wrapped3(mu):
30
+ pm.Normal("x", mu, 1.0, dims="obs")
31
+
32
+ mw3 = model_wrapped3(0.0, coords=coords)
33
+ mw4 = model_wrapped3(np.array([np.nan]), coords=coords)
34
+
28
35
  np.testing.assert_equal(model.point_logps(), mw.point_logps())
29
36
  np.testing.assert_equal(mw.point_logps(), mw2.point_logps())
37
+ assert mw3["mu"] in mw3.data_vars
38
+ assert "mu" not in mw4
@@ -1,3 +1,4 @@
1
+ from collections.abc import Sequence
1
2
  from functools import partial
2
3
 
3
4
  import numpy as np
@@ -349,6 +350,59 @@ def test_sampling_methods(group, kind, ss_mod, idata, rng):
349
350
  assert not np.any(np.isnan(test_idata[f"{group}_{output}"].values))
350
351
 
351
352
 
353
+ @pytest.mark.filterwarnings("ignore:Provided data contains missing values")
354
+ def test_sample_conditional_with_time_varying():
355
+ class TVCovariance(PyMCStateSpace):
356
+ def __init__(self):
357
+ super().__init__(k_states=1, k_endog=1, k_posdef=1)
358
+
359
+ def make_symbolic_graph(self) -> None:
360
+ self.ssm["transition", 0, 0] = 1.0
361
+
362
+ self.ssm["design", 0, 0] = 1.0
363
+
364
+ sigma_cov = self.make_and_register_variable("sigma_cov", (None,))
365
+ self.ssm["state_cov"] = sigma_cov[:, None, None] ** 2
366
+
367
+ @property
368
+ def param_names(self) -> list[str]:
369
+ return ["sigma_cov"]
370
+
371
+ @property
372
+ def coords(self) -> dict[str, Sequence[str]]:
373
+ return make_default_coords(self)
374
+
375
+ @property
376
+ def state_names(self) -> list[str]:
377
+ return ["level"]
378
+
379
+ @property
380
+ def observed_states(self) -> list[str]:
381
+ return ["level"]
382
+
383
+ @property
384
+ def shock_names(self) -> list[str]:
385
+ return ["level"]
386
+
387
+ ss_mod = TVCovariance()
388
+ empty_data = pd.DataFrame(
389
+ np.nan, index=pd.date_range("2020-01-01", periods=100, freq="D"), columns=["data"]
390
+ )
391
+
392
+ coords = ss_mod.coords
393
+ coords["time"] = empty_data.index
394
+ with pm.Model(coords=coords) as mod:
395
+ log_sigma_cov = pm.Normal("log_sigma_cov", mu=0, sigma=0.1, dims=["time"])
396
+ pm.Deterministic("sigma_cov", pm.math.exp(log_sigma_cov.cumsum()), dims=["time"])
397
+
398
+ ss_mod.build_statespace_graph(data=empty_data)
399
+
400
+ prior = pm.sample_prior_predictive(10)
401
+
402
+ ss_mod.sample_unconditional_prior(prior)
403
+ ss_mod.sample_conditional_prior(prior)
404
+
405
+
352
406
  def _make_time_idx(mod, use_datetime_index=True):
353
407
  if use_datetime_index:
354
408
  mod._fit_coords["time"] = nile.index
@@ -2,6 +2,7 @@ import functools as ft
2
2
  import warnings
3
3
 
4
4
  from collections import defaultdict
5
+ from copyreg import remove_extension
5
6
  from typing import Optional
6
7
 
7
8
  import numpy as np
@@ -592,13 +593,18 @@ def test_autoregressive_model(order, rng):
592
593
 
593
594
  @pytest.mark.parametrize("s", [10, 25, 50])
594
595
  @pytest.mark.parametrize("innovations", [True, False])
595
- def test_time_seasonality(s, innovations, rng):
596
+ @pytest.mark.parametrize("remove_first_state", [True, False])
597
+ def test_time_seasonality(s, innovations, remove_first_state, rng):
596
598
  def random_word(rng):
597
599
  return "".join(rng.choice(list("abcdefghijklmnopqrstuvwxyz")) for _ in range(5))
598
600
 
599
601
  state_names = [random_word(rng) for _ in range(s)]
600
602
  mod = st.TimeSeasonality(
601
- season_length=s, innovations=innovations, name="season", state_names=state_names
603
+ season_length=s,
604
+ innovations=innovations,
605
+ name="season",
606
+ state_names=state_names,
607
+ remove_first_state=remove_first_state,
602
608
  )
603
609
  x0 = np.zeros(mod.k_states, dtype=floatX)
604
610
  x0[0] = 1
@@ -615,7 +621,8 @@ def test_time_seasonality(s, innovations, rng):
615
621
  # Check coords
616
622
  mod.build(verbose=False)
617
623
  _assert_basic_coords_correct(mod)
618
- assert mod.coords["season_state"] == state_names[1:]
624
+ test_slice = slice(1, None) if remove_first_state else slice(None)
625
+ assert mod.coords["season_state"] == state_names[test_slice]
619
626
 
620
627
 
621
628
  def get_shift_factor(s):
tests/test_pathfinder.py CHANGED
@@ -18,12 +18,12 @@ import numpy as np
18
18
  import pymc as pm
19
19
  import pytest
20
20
 
21
+ pytestmark = pytest.mark.filterwarnings("ignore:compile_pymc was renamed to compile:FutureWarning")
22
+
21
23
  import pymc_extras as pmx
22
24
 
23
25
 
24
- @pytest.mark.skipif(sys.platform == "win32", reason="JAX not supported on windows.")
25
- def test_pathfinder():
26
- # Data of the Eight Schools Model
26
+ def eight_schools_model() -> pm.Model:
27
27
  J = 8
28
28
  y = np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0])
29
29
  sigma = np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0])
@@ -35,11 +35,139 @@ def test_pathfinder():
35
35
  theta = pm.Normal("theta", mu=0, sigma=1, shape=J)
36
36
  obs = pm.Normal("obs", mu=mu + tau * theta, sigma=sigma, shape=J, observed=y)
37
37
 
38
- idata = pmx.fit(method="pathfinder", random_seed=41)
38
+ return model
39
+
40
+
41
+ @pytest.fixture
42
+ def reference_idata():
43
+ model = eight_schools_model()
44
+ with model:
45
+ idata = pmx.fit(
46
+ method="pathfinder",
47
+ num_paths=50,
48
+ jitter=10.0,
49
+ random_seed=41,
50
+ inference_backend="pymc",
51
+ )
52
+ return idata
53
+
54
+
55
+ @pytest.mark.parametrize("inference_backend", ["pymc", "blackjax"])
56
+ def test_pathfinder(inference_backend, reference_idata):
57
+ if inference_backend == "blackjax" and sys.platform == "win32":
58
+ pytest.skip("JAX not supported on windows")
59
+
60
+ if inference_backend == "blackjax":
61
+ model = eight_schools_model()
62
+ with model:
63
+ idata = pmx.fit(
64
+ method="pathfinder",
65
+ num_paths=50,
66
+ jitter=10.0,
67
+ random_seed=41,
68
+ inference_backend=inference_backend,
69
+ )
70
+ else:
71
+ idata = reference_idata
72
+ np.testing.assert_allclose(idata.posterior["mu"].mean(), 5.0, atol=1.6)
73
+ np.testing.assert_allclose(idata.posterior["tau"].mean(), 4.15, atol=1.5)
39
74
 
40
75
  assert idata.posterior["mu"].shape == (1, 1000)
41
76
  assert idata.posterior["tau"].shape == (1, 1000)
42
77
  assert idata.posterior["theta"].shape == (1, 1000, 8)
43
- # FIXME: pathfinder doesn't find a reasonable mean! Fix bug or choose model pathfinder can handle
44
- # np.testing.assert_allclose(idata.posterior["mu"].mean(), 5.0)
45
- np.testing.assert_allclose(idata.posterior["tau"].mean(), 4.15, atol=0.5)
78
+
79
+
80
+ @pytest.mark.parametrize("concurrent", ["thread", "process"])
81
+ def test_concurrent_results(reference_idata, concurrent):
82
+ model = eight_schools_model()
83
+ with model:
84
+ idata_conc = pmx.fit(
85
+ method="pathfinder",
86
+ num_paths=50,
87
+ jitter=10.0,
88
+ random_seed=41,
89
+ inference_backend="pymc",
90
+ concurrent=concurrent,
91
+ )
92
+
93
+ np.testing.assert_allclose(
94
+ reference_idata.posterior.mu.data.mean(),
95
+ idata_conc.posterior.mu.data.mean(),
96
+ atol=0.4,
97
+ )
98
+
99
+ np.testing.assert_allclose(
100
+ reference_idata.posterior.tau.data.mean(),
101
+ idata_conc.posterior.tau.data.mean(),
102
+ atol=0.4,
103
+ )
104
+
105
+
106
+ def test_seed(reference_idata):
107
+ model = eight_schools_model()
108
+ with model:
109
+ idata_41 = pmx.fit(
110
+ method="pathfinder",
111
+ num_paths=50,
112
+ jitter=10.0,
113
+ random_seed=41,
114
+ inference_backend="pymc",
115
+ )
116
+
117
+ idata_123 = pmx.fit(
118
+ method="pathfinder",
119
+ num_paths=50,
120
+ jitter=10.0,
121
+ random_seed=123,
122
+ inference_backend="pymc",
123
+ )
124
+
125
+ assert not np.allclose(idata_41.posterior.mu.data.mean(), idata_123.posterior.mu.data.mean())
126
+
127
+ assert np.allclose(idata_41.posterior.mu.data.mean(), idata_41.posterior.mu.data.mean())
128
+
129
+
130
+ def test_bfgs_sample():
131
+ import pytensor.tensor as pt
132
+
133
+ from pymc_extras.inference.pathfinder.pathfinder import (
134
+ alpha_recover,
135
+ bfgs_sample,
136
+ inverse_hessian_factors,
137
+ )
138
+
139
+ """test BFGS sampling"""
140
+ Lp1, N = 8, 10
141
+ L = Lp1 - 1
142
+ J = 6
143
+ num_samples = 1000
144
+
145
+ # mock data
146
+ x_data = np.random.randn(Lp1, N)
147
+ g_data = np.random.randn(Lp1, N)
148
+
149
+ # get factors
150
+ x_full = pt.as_tensor(x_data, dtype="float64")
151
+ g_full = pt.as_tensor(g_data, dtype="float64")
152
+ epsilon = 1e-11
153
+
154
+ x = x_full[1:]
155
+ g = g_full[1:]
156
+ alpha, S, Z, update_mask = alpha_recover(x_full, g_full, epsilon)
157
+ beta, gamma = inverse_hessian_factors(alpha, S, Z, update_mask, J)
158
+
159
+ # sample
160
+ phi, logq = bfgs_sample(
161
+ num_samples=num_samples,
162
+ x=x,
163
+ g=g,
164
+ alpha=alpha,
165
+ beta=beta,
166
+ gamma=gamma,
167
+ )
168
+
169
+ # check shapes
170
+ assert beta.eval().shape == (L, N, 2 * J)
171
+ assert gamma.eval().shape == (L, 2 * J, 2 * J)
172
+ assert phi.eval().shape == (L, num_samples, N)
173
+ assert logq.eval().shape == (L, num_samples)
@@ -8,7 +8,7 @@
8
8
  # pass
9
9
  # import numpy as np
10
10
  #
11
- # import pymc_experimental as pmx
11
+ # import pymc_extras as pmx
12
12
  #
13
13
  #
14
14
  # def test_match_gpytorch_linearcg_output():
tests/utils.py CHANGED
@@ -1,31 +0,0 @@
1
- from collections.abc import Sequence
2
-
3
- from pytensor.compile import SharedVariable
4
- from pytensor.graph import Constant, graph_inputs
5
- from pytensor.graph.basic import Variable, equal_computations
6
- from pytensor.tensor.random.type import RandomType
7
-
8
-
9
- def equal_computations_up_to_root(
10
- xs: Sequence[Variable], ys: Sequence[Variable], ignore_rng_values=True
11
- ) -> bool:
12
- # Check if graphs are equivalent even if root variables have distinct identities
13
-
14
- x_graph_inputs = [var for var in graph_inputs(xs) if not isinstance(var, Constant)]
15
- y_graph_inputs = [var for var in graph_inputs(ys) if not isinstance(var, Constant)]
16
- if len(x_graph_inputs) != len(y_graph_inputs):
17
- return False
18
- for x, y in zip(x_graph_inputs, y_graph_inputs):
19
- if x.type != y.type:
20
- return False
21
- if x.name != y.name:
22
- return False
23
- if isinstance(x, SharedVariable):
24
- if not isinstance(y, SharedVariable):
25
- return False
26
- if isinstance(x.type, RandomType) and ignore_rng_values:
27
- continue
28
- if not x.type.values_eq(x.get_value(), y.get_value()):
29
- return False
30
-
31
- return equal_computations(xs, ys, in_xs=x_graph_inputs, in_ys=y_graph_inputs)
@@ -1,134 +0,0 @@
1
- # Copyright 2022 The PyMC Developers
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import collections
16
- import sys
17
-
18
- import arviz as az
19
- import blackjax
20
- import jax
21
- import numpy as np
22
- import pymc as pm
23
-
24
- from packaging import version
25
- from pymc.backends.arviz import coords_and_dims_for_inferencedata
26
- from pymc.blocking import DictToArrayBijection, RaveledVars
27
- from pymc.model import modelcontext
28
- from pymc.sampling.jax import get_jaxified_graph
29
- from pymc.util import RandomSeed, _get_seeds_per_chain, get_default_varnames
30
-
31
-
32
- def convert_flat_trace_to_idata(
33
- samples,
34
- include_transformed=False,
35
- postprocessing_backend="cpu",
36
- model=None,
37
- ):
38
- model = modelcontext(model)
39
- ip = model.initial_point()
40
- ip_point_map_info = pm.blocking.DictToArrayBijection.map(ip).point_map_info
41
- trace = collections.defaultdict(list)
42
- for sample in samples:
43
- raveld_vars = RaveledVars(sample, ip_point_map_info)
44
- point = DictToArrayBijection.rmap(raveld_vars, ip)
45
- for p, v in point.items():
46
- trace[p].append(v.tolist())
47
-
48
- trace = {k: np.asarray(v)[None, ...] for k, v in trace.items()}
49
-
50
- var_names = model.unobserved_value_vars
51
- vars_to_sample = list(get_default_varnames(var_names, include_transformed=include_transformed))
52
- print("Transforming variables...", file=sys.stdout)
53
- jax_fn = get_jaxified_graph(inputs=model.value_vars, outputs=vars_to_sample)
54
- result = jax.vmap(jax.vmap(jax_fn))(
55
- *jax.device_put(list(trace.values()), jax.devices(postprocessing_backend)[0])
56
- )
57
- trace = {v.name: r for v, r in zip(vars_to_sample, result)}
58
- coords, dims = coords_and_dims_for_inferencedata(model)
59
- idata = az.from_dict(trace, dims=dims, coords=coords)
60
-
61
- return idata
62
-
63
-
64
- def fit_pathfinder(
65
- samples=1000,
66
- random_seed: RandomSeed | None = None,
67
- postprocessing_backend="cpu",
68
- model=None,
69
- **pathfinder_kwargs,
70
- ):
71
- """
72
- Fit the pathfinder algorithm as implemented in blackjax
73
-
74
- Requires the JAX backend
75
-
76
- Parameters
77
- ----------
78
- samples : int
79
- Number of samples to draw from the fitted approximation.
80
- random_seed : int
81
- Random seed to set.
82
- postprocessing_backend : str
83
- Where to compute transformations of the trace.
84
- "cpu" or "gpu".
85
- pathfinder_kwargs:
86
- kwargs for blackjax.vi.pathfinder.approximate
87
-
88
- Returns
89
- -------
90
- arviz.InferenceData
91
-
92
- Reference
93
- ---------
94
- https://arxiv.org/abs/2108.03782
95
- """
96
- # Temporarily helper
97
- if version.parse(blackjax.__version__).major < 1:
98
- raise ImportError("fit_pathfinder requires blackjax 1.0 or above")
99
-
100
- model = modelcontext(model)
101
-
102
- ip = model.initial_point()
103
- ip_map = DictToArrayBijection.map(ip)
104
-
105
- new_logprob, new_input = pm.pytensorf.join_nonshared_inputs(
106
- ip, (model.logp(),), model.value_vars, ()
107
- )
108
-
109
- logprob_fn_list = get_jaxified_graph([new_input], new_logprob)
110
-
111
- def logprob_fn(x):
112
- return logprob_fn_list(x)[0]
113
-
114
- [pathfinder_seed, sample_seed] = _get_seeds_per_chain(random_seed, 2)
115
-
116
- print("Running pathfinder...", file=sys.stdout)
117
- pathfinder_state, _ = blackjax.vi.pathfinder.approximate(
118
- rng_key=jax.random.key(pathfinder_seed),
119
- logdensity_fn=logprob_fn,
120
- initial_position=ip_map.data,
121
- **pathfinder_kwargs,
122
- )
123
- samples, _ = blackjax.vi.pathfinder.sample(
124
- rng_key=jax.random.key(sample_seed),
125
- state=pathfinder_state,
126
- num_samples=samples,
127
- )
128
-
129
- idata = convert_flat_trace_to_idata(
130
- samples,
131
- postprocessing_backend=postprocessing_backend,
132
- model=model,
133
- )
134
- return idata