pymc-extras 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymc_extras/__init__.py +29 -0
- pymc_extras/distributions/__init__.py +40 -0
- pymc_extras/distributions/continuous.py +351 -0
- pymc_extras/distributions/discrete.py +399 -0
- pymc_extras/distributions/histogram_utils.py +163 -0
- pymc_extras/distributions/multivariate/__init__.py +3 -0
- pymc_extras/distributions/multivariate/r2d2m2cp.py +446 -0
- pymc_extras/distributions/timeseries.py +356 -0
- pymc_extras/gp/__init__.py +18 -0
- pymc_extras/gp/latent_approx.py +183 -0
- pymc_extras/inference/__init__.py +18 -0
- pymc_extras/inference/find_map.py +431 -0
- pymc_extras/inference/fit.py +44 -0
- pymc_extras/inference/laplace.py +570 -0
- pymc_extras/inference/pathfinder.py +134 -0
- pymc_extras/inference/smc/__init__.py +13 -0
- pymc_extras/inference/smc/sampling.py +451 -0
- pymc_extras/linearmodel.py +130 -0
- pymc_extras/model/__init__.py +0 -0
- pymc_extras/model/marginal/__init__.py +0 -0
- pymc_extras/model/marginal/distributions.py +276 -0
- pymc_extras/model/marginal/graph_analysis.py +372 -0
- pymc_extras/model/marginal/marginal_model.py +595 -0
- pymc_extras/model/model_api.py +56 -0
- pymc_extras/model/transforms/__init__.py +0 -0
- pymc_extras/model/transforms/autoreparam.py +434 -0
- pymc_extras/model_builder.py +759 -0
- pymc_extras/preprocessing/__init__.py +0 -0
- pymc_extras/preprocessing/standard_scaler.py +17 -0
- pymc_extras/printing.py +182 -0
- pymc_extras/statespace/__init__.py +13 -0
- pymc_extras/statespace/core/__init__.py +7 -0
- pymc_extras/statespace/core/compile.py +48 -0
- pymc_extras/statespace/core/representation.py +438 -0
- pymc_extras/statespace/core/statespace.py +2268 -0
- pymc_extras/statespace/filters/__init__.py +15 -0
- pymc_extras/statespace/filters/distributions.py +453 -0
- pymc_extras/statespace/filters/kalman_filter.py +820 -0
- pymc_extras/statespace/filters/kalman_smoother.py +126 -0
- pymc_extras/statespace/filters/utilities.py +59 -0
- pymc_extras/statespace/models/ETS.py +670 -0
- pymc_extras/statespace/models/SARIMAX.py +536 -0
- pymc_extras/statespace/models/VARMAX.py +393 -0
- pymc_extras/statespace/models/__init__.py +6 -0
- pymc_extras/statespace/models/structural.py +1651 -0
- pymc_extras/statespace/models/utilities.py +387 -0
- pymc_extras/statespace/utils/__init__.py +0 -0
- pymc_extras/statespace/utils/constants.py +74 -0
- pymc_extras/statespace/utils/coord_tools.py +0 -0
- pymc_extras/statespace/utils/data_tools.py +182 -0
- pymc_extras/utils/__init__.py +23 -0
- pymc_extras/utils/linear_cg.py +290 -0
- pymc_extras/utils/pivoted_cholesky.py +69 -0
- pymc_extras/utils/prior.py +200 -0
- pymc_extras/utils/spline.py +131 -0
- pymc_extras/version.py +11 -0
- pymc_extras/version.txt +1 -0
- pymc_extras-0.2.0.dist-info/LICENSE +212 -0
- pymc_extras-0.2.0.dist-info/METADATA +99 -0
- pymc_extras-0.2.0.dist-info/RECORD +101 -0
- pymc_extras-0.2.0.dist-info/WHEEL +5 -0
- pymc_extras-0.2.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +13 -0
- tests/distributions/__init__.py +19 -0
- tests/distributions/test_continuous.py +185 -0
- tests/distributions/test_discrete.py +210 -0
- tests/distributions/test_discrete_markov_chain.py +258 -0
- tests/distributions/test_multivariate.py +304 -0
- tests/model/__init__.py +0 -0
- tests/model/marginal/__init__.py +0 -0
- tests/model/marginal/test_distributions.py +131 -0
- tests/model/marginal/test_graph_analysis.py +182 -0
- tests/model/marginal/test_marginal_model.py +867 -0
- tests/model/test_model_api.py +29 -0
- tests/statespace/__init__.py +0 -0
- tests/statespace/test_ETS.py +411 -0
- tests/statespace/test_SARIMAX.py +405 -0
- tests/statespace/test_VARMAX.py +184 -0
- tests/statespace/test_coord_assignment.py +116 -0
- tests/statespace/test_distributions.py +270 -0
- tests/statespace/test_kalman_filter.py +326 -0
- tests/statespace/test_representation.py +175 -0
- tests/statespace/test_statespace.py +818 -0
- tests/statespace/test_statespace_JAX.py +156 -0
- tests/statespace/test_structural.py +829 -0
- tests/statespace/utilities/__init__.py +0 -0
- tests/statespace/utilities/shared_fixtures.py +9 -0
- tests/statespace/utilities/statsmodel_local_level.py +42 -0
- tests/statespace/utilities/test_helpers.py +310 -0
- tests/test_blackjax_smc.py +222 -0
- tests/test_find_map.py +98 -0
- tests/test_histogram_approximation.py +109 -0
- tests/test_laplace.py +238 -0
- tests/test_linearmodel.py +208 -0
- tests/test_model_builder.py +306 -0
- tests/test_pathfinder.py +45 -0
- tests/test_pivoted_cholesky.py +24 -0
- tests/test_printing.py +98 -0
- tests/test_prior_from_trace.py +172 -0
- tests/test_splines.py +77 -0
- tests/utils.py +31 -0
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
# Copyright 2023 The PyMC Developers
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from collections import namedtuple
|
|
17
|
+
from collections.abc import Sequence
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
import pymc as pm
|
|
21
|
+
import pytensor.tensor as pt
|
|
22
|
+
|
|
23
|
+
__all__ = ["R2D2M2CP"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _psivar2musigma(
|
|
27
|
+
psi: pt.TensorVariable,
|
|
28
|
+
explained_var: pt.TensorVariable,
|
|
29
|
+
psi_mask: pt.TensorLike | None,
|
|
30
|
+
) -> tuple[pt.TensorVariable, pt.TensorVariable]:
|
|
31
|
+
sign = pt.sign(psi - 0.5)
|
|
32
|
+
if psi_mask is not None:
|
|
33
|
+
# any computation might be ignored for ~psi_mask
|
|
34
|
+
# sign and explained_var are used
|
|
35
|
+
psi = pt.where(psi_mask, psi, 0.5)
|
|
36
|
+
pi = pt.erfinv(2 * psi - 1)
|
|
37
|
+
f = (1 / (2 * pi**2 + 1)) ** 0.5
|
|
38
|
+
sigma = explained_var**0.5 * f
|
|
39
|
+
mu = sigma * pi * 2**0.5
|
|
40
|
+
if psi_mask is not None:
|
|
41
|
+
return (
|
|
42
|
+
pt.where(psi_mask, mu, sign * explained_var**0.5),
|
|
43
|
+
pt.where(psi_mask, sigma, 0),
|
|
44
|
+
)
|
|
45
|
+
else:
|
|
46
|
+
return mu, sigma
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _R2D2M2CP_beta(
|
|
50
|
+
name: str,
|
|
51
|
+
output_sigma: pt.TensorVariable,
|
|
52
|
+
input_sigma: pt.TensorVariable,
|
|
53
|
+
r2: pt.TensorVariable,
|
|
54
|
+
phi: pt.TensorVariable,
|
|
55
|
+
psi: pt.TensorVariable,
|
|
56
|
+
*,
|
|
57
|
+
psi_mask,
|
|
58
|
+
dims: str | Sequence[str],
|
|
59
|
+
centered=False,
|
|
60
|
+
) -> pt.TensorVariable:
|
|
61
|
+
"""R2D2M2CP beta prior.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
name: str
|
|
66
|
+
Name for the distribution
|
|
67
|
+
output_sigma: tensor
|
|
68
|
+
standard deviation of the outcome
|
|
69
|
+
input_sigma: tensor
|
|
70
|
+
standard deviation of the explanatory variables
|
|
71
|
+
r2: tensor
|
|
72
|
+
expected R2 for the linear regression
|
|
73
|
+
phi: tensor
|
|
74
|
+
variance weights that sums up to 1
|
|
75
|
+
psi: tensor
|
|
76
|
+
probability of a coefficients to be positive
|
|
77
|
+
"""
|
|
78
|
+
explained_variance = phi * pt.expand_dims(r2 * output_sigma**2, (-1,))
|
|
79
|
+
mu_param, std_param = _psivar2musigma(psi, explained_variance, psi_mask=psi_mask)
|
|
80
|
+
if not centered:
|
|
81
|
+
with pm.Model(name):
|
|
82
|
+
if psi_mask is not None and psi_mask.any():
|
|
83
|
+
# limit case where some probs are not 1 or 0
|
|
84
|
+
# setsubtensor is required
|
|
85
|
+
r_idx = psi_mask.nonzero()
|
|
86
|
+
with pm.Model("raw"):
|
|
87
|
+
raw = pm.Normal("masked", shape=len(r_idx[0]))
|
|
88
|
+
raw = pt.set_subtensor(pt.zeros_like(mu_param)[r_idx], raw)
|
|
89
|
+
raw = pm.Deterministic("raw", raw, dims=dims)
|
|
90
|
+
elif psi_mask is not None:
|
|
91
|
+
# all variables are deterministic
|
|
92
|
+
raw = pt.zeros_like(mu_param)
|
|
93
|
+
else:
|
|
94
|
+
raw = pm.Normal("raw", dims=dims)
|
|
95
|
+
beta = pm.Deterministic(name, (raw * std_param + mu_param) / input_sigma, dims=dims)
|
|
96
|
+
else:
|
|
97
|
+
if psi_mask is not None and psi_mask.any():
|
|
98
|
+
# limit case where some probs are not 1 or 0
|
|
99
|
+
# setsubtensor is required
|
|
100
|
+
r_idx = psi_mask.nonzero()
|
|
101
|
+
with pm.Model(name):
|
|
102
|
+
mean = (mu_param / input_sigma)[r_idx]
|
|
103
|
+
sigma = (std_param / input_sigma)[r_idx]
|
|
104
|
+
masked = pm.Normal(
|
|
105
|
+
"masked",
|
|
106
|
+
mean,
|
|
107
|
+
sigma,
|
|
108
|
+
shape=len(r_idx[0]),
|
|
109
|
+
)
|
|
110
|
+
beta = pt.set_subtensor(mean, masked)
|
|
111
|
+
beta = pm.Deterministic(name, beta, dims=dims)
|
|
112
|
+
elif psi_mask is not None:
|
|
113
|
+
# all variables are deterministic
|
|
114
|
+
beta = pm.Deterministic(name, (mu_param / input_sigma), dims=dims)
|
|
115
|
+
else:
|
|
116
|
+
beta = pm.Normal(name, mu_param / input_sigma, std_param / input_sigma, dims=dims)
|
|
117
|
+
return beta
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _broadcast_as_dims(
|
|
121
|
+
*values: np.ndarray,
|
|
122
|
+
dims: Sequence[str],
|
|
123
|
+
) -> tuple[np.ndarray, ...] | np.ndarray:
|
|
124
|
+
model = pm.modelcontext(None)
|
|
125
|
+
shape = [len(model.coords[d]) for d in dims]
|
|
126
|
+
ret = tuple(np.broadcast_to(v, shape) for v in values)
|
|
127
|
+
# strip output
|
|
128
|
+
if len(values) == 1:
|
|
129
|
+
ret = ret[0]
|
|
130
|
+
return ret
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _psi_masked(
|
|
134
|
+
positive_probs: pt.TensorLike,
|
|
135
|
+
positive_probs_std: pt.TensorLike,
|
|
136
|
+
*,
|
|
137
|
+
dims: Sequence[str],
|
|
138
|
+
) -> tuple[pt.TensorLike | None, pt.TensorVariable]:
|
|
139
|
+
if not (
|
|
140
|
+
isinstance(positive_probs, pt.Constant) and isinstance(positive_probs_std, pt.Constant)
|
|
141
|
+
):
|
|
142
|
+
raise TypeError(
|
|
143
|
+
"Only constant values for positive_probs and positive_probs_std are accepted"
|
|
144
|
+
)
|
|
145
|
+
positive_probs, positive_probs_std = _broadcast_as_dims(
|
|
146
|
+
positive_probs.data, positive_probs_std.data, dims=dims
|
|
147
|
+
)
|
|
148
|
+
mask = ~np.bitwise_or(positive_probs == 1, positive_probs == 0)
|
|
149
|
+
if np.bitwise_and(~mask, positive_probs_std != 0).any():
|
|
150
|
+
raise ValueError("Can't have both positive_probs == '1 or 0' and positive_probs_std != 0")
|
|
151
|
+
if (~mask).any() and mask.any():
|
|
152
|
+
# limit case where some probs are not 1 or 0
|
|
153
|
+
# setsubtensor is required
|
|
154
|
+
r_idx = mask.nonzero()
|
|
155
|
+
with pm.Model("psi"):
|
|
156
|
+
psi = pm.Beta(
|
|
157
|
+
"masked",
|
|
158
|
+
mu=positive_probs[r_idx],
|
|
159
|
+
sigma=positive_probs_std[r_idx],
|
|
160
|
+
shape=len(r_idx[0]),
|
|
161
|
+
)
|
|
162
|
+
psi = pt.set_subtensor(pt.as_tensor(positive_probs)[r_idx], psi)
|
|
163
|
+
psi = pm.Deterministic("psi", psi, dims=dims)
|
|
164
|
+
elif (~mask).all():
|
|
165
|
+
# limit case where all the probs are limit case
|
|
166
|
+
psi = pt.as_tensor(positive_probs)
|
|
167
|
+
else:
|
|
168
|
+
psi = pm.Beta("psi", mu=positive_probs, sigma=positive_probs_std, dims=dims)
|
|
169
|
+
mask = None
|
|
170
|
+
return mask, psi
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _psi(
|
|
174
|
+
positive_probs: pt.TensorLike,
|
|
175
|
+
positive_probs_std: pt.TensorLike | None,
|
|
176
|
+
*,
|
|
177
|
+
dims: Sequence[str],
|
|
178
|
+
) -> tuple[pt.TensorLike | None, pt.TensorVariable]:
|
|
179
|
+
if positive_probs_std is not None:
|
|
180
|
+
mask, psi = _psi_masked(
|
|
181
|
+
positive_probs=pt.as_tensor(positive_probs),
|
|
182
|
+
positive_probs_std=pt.as_tensor(positive_probs_std),
|
|
183
|
+
dims=dims,
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
positive_probs = pt.as_tensor(positive_probs)
|
|
187
|
+
if not isinstance(positive_probs, pt.Constant):
|
|
188
|
+
raise TypeError("Only constant values for positive_probs are allowed")
|
|
189
|
+
psi = _broadcast_as_dims(positive_probs.data, dims=dims)
|
|
190
|
+
mask = np.atleast_1d(~np.bitwise_or(psi == 1, psi == 0))
|
|
191
|
+
if mask.all():
|
|
192
|
+
mask = None
|
|
193
|
+
return mask, psi
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _phi(
|
|
197
|
+
variables_importance: pt.TensorLike | None,
|
|
198
|
+
variance_explained: pt.TensorLike | None,
|
|
199
|
+
importance_concentration: pt.TensorLike | None,
|
|
200
|
+
*,
|
|
201
|
+
dims: Sequence[str],
|
|
202
|
+
) -> pt.TensorVariable:
|
|
203
|
+
*broadcast_dims, dim = dims
|
|
204
|
+
model = pm.modelcontext(None)
|
|
205
|
+
if variables_importance is not None:
|
|
206
|
+
if variance_explained is not None:
|
|
207
|
+
raise TypeError("Can't use variable importance with variance explained")
|
|
208
|
+
if len(model.coords[dim]) <= 1:
|
|
209
|
+
raise TypeError("Can't use variable importance with less than two variables")
|
|
210
|
+
variables_importance = pt.as_tensor(variables_importance)
|
|
211
|
+
if importance_concentration is not None:
|
|
212
|
+
variables_importance *= importance_concentration
|
|
213
|
+
return pm.Dirichlet("phi", variables_importance, dims=[*broadcast_dims, dim])
|
|
214
|
+
elif variance_explained is not None:
|
|
215
|
+
if len(model.coords[dim]) <= 1:
|
|
216
|
+
raise TypeError("Can't use variance explained with less than two variables")
|
|
217
|
+
phi = pt.as_tensor(variance_explained)
|
|
218
|
+
else:
|
|
219
|
+
phi = _broadcast_as_dims(1.0, dims=dims)
|
|
220
|
+
if importance_concentration is not None:
|
|
221
|
+
return pm.Dirichlet("phi", importance_concentration * phi, dims=[*broadcast_dims, dim])
|
|
222
|
+
else:
|
|
223
|
+
return phi
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
R2D2M2CPOut = namedtuple("R2D2M2CPOut", ["eps", "beta"])
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def R2D2M2CP(
|
|
230
|
+
name: str,
|
|
231
|
+
output_sigma: pt.TensorLike,
|
|
232
|
+
input_sigma: pt.TensorLike,
|
|
233
|
+
*,
|
|
234
|
+
dims: Sequence[str],
|
|
235
|
+
r2: pt.TensorLike,
|
|
236
|
+
variables_importance: pt.TensorLike | None = None,
|
|
237
|
+
variance_explained: pt.TensorLike | None = None,
|
|
238
|
+
importance_concentration: pt.TensorLike | None = None,
|
|
239
|
+
r2_std: pt.TensorLike | None = None,
|
|
240
|
+
positive_probs: pt.TensorLike | None = 0.5,
|
|
241
|
+
positive_probs_std: pt.TensorLike | None = None,
|
|
242
|
+
centered: bool = False,
|
|
243
|
+
) -> R2D2M2CPOut:
|
|
244
|
+
"""R2D2M2CP Prior.
|
|
245
|
+
|
|
246
|
+
Parameters
|
|
247
|
+
----------
|
|
248
|
+
name : str
|
|
249
|
+
Name for the distribution
|
|
250
|
+
output_sigma : Tensor
|
|
251
|
+
Output standard deviation
|
|
252
|
+
input_sigma : Tensor
|
|
253
|
+
Input standard deviation
|
|
254
|
+
dims : Union[str, Sequence[str]]
|
|
255
|
+
Dims for the distribution
|
|
256
|
+
r2 : Tensor
|
|
257
|
+
:math:`R^2` estimate
|
|
258
|
+
variables_importance : Tensor, optional
|
|
259
|
+
Optional estimate for variables importance, positive, by default None
|
|
260
|
+
variance_explained : Tensor, optional
|
|
261
|
+
Alternative estimate for variables importance which is point estimate of
|
|
262
|
+
variance explained, should sum up to one, by default None
|
|
263
|
+
importance_concentration : Tensor, optional
|
|
264
|
+
Confidence around variance explained or variable importance estimate
|
|
265
|
+
r2_std : Tensor, optional
|
|
266
|
+
Optional uncertainty over :math:`R^2`, by default None
|
|
267
|
+
positive_probs : Tensor, optional
|
|
268
|
+
Optional probability of variables contribution to be positive, by default 0.5
|
|
269
|
+
positive_probs_std : Tensor, optional
|
|
270
|
+
Optional uncertainty over effect direction probability, by default None
|
|
271
|
+
centered : bool, optional
|
|
272
|
+
Centered or Non-Centered parametrization of the distribution, by default Non-Centered. Advised to check both
|
|
273
|
+
|
|
274
|
+
Returns
|
|
275
|
+
-------
|
|
276
|
+
residual_sigma, coefficients
|
|
277
|
+
Output variance (sigma squared) is split in residual variance and explained variance.
|
|
278
|
+
|
|
279
|
+
Raises
|
|
280
|
+
------
|
|
281
|
+
TypeError
|
|
282
|
+
If parametrization is wrong.
|
|
283
|
+
|
|
284
|
+
Notes
|
|
285
|
+
-----
|
|
286
|
+
The R2D2M2CP prior is a modification of R2D2M2 prior.
|
|
287
|
+
|
|
288
|
+
- ``(R2D2M2)`` CP is taken from https://arxiv.org/abs/2208.07132
|
|
289
|
+
- R2D2M2 ``(CP)``, (Correlation Probability) is proposed and implemented by Max Kochurov (@ferrine)
|
|
290
|
+
|
|
291
|
+
Examples
|
|
292
|
+
--------
|
|
293
|
+
Here are arguments explained in a synthetic example
|
|
294
|
+
|
|
295
|
+
.. warning::
|
|
296
|
+
|
|
297
|
+
To use the prior in a linear regression
|
|
298
|
+
|
|
299
|
+
- make sure :math:`X` is centered around zero
|
|
300
|
+
- intercept represents prior predictive mean when :math:`X` is centered
|
|
301
|
+
- setting named dims is required
|
|
302
|
+
|
|
303
|
+
.. code-block:: python
|
|
304
|
+
|
|
305
|
+
import pymc_extras as pmx
|
|
306
|
+
import pymc as pm
|
|
307
|
+
import numpy as np
|
|
308
|
+
X = np.random.randn(10, 3)
|
|
309
|
+
b = np.random.randn(3)
|
|
310
|
+
y = X @ b + np.random.randn(10) * 0.04 + 5
|
|
311
|
+
with pm.Model(coords=dict(variables=["a", "b", "c"])) as model:
|
|
312
|
+
eps, beta = pmx.distributions.R2D2M2CP(
|
|
313
|
+
"beta",
|
|
314
|
+
y.std(),
|
|
315
|
+
X.std(0),
|
|
316
|
+
dims="variables",
|
|
317
|
+
# NOTE: global shrinkage
|
|
318
|
+
r2=0.8,
|
|
319
|
+
# NOTE: if you are unsure about r2
|
|
320
|
+
r2_std=0.2,
|
|
321
|
+
# NOTE: if you know where a variable should go
|
|
322
|
+
# if you do not know, leave as 0.5
|
|
323
|
+
positive_probs=[0.8, 0.5, 0.1],
|
|
324
|
+
# NOTE: if you have different opinions about
|
|
325
|
+
# where a variable should go.
|
|
326
|
+
# NOTE: if you put 0.5 previously,
|
|
327
|
+
# just put 0.1 there, but other
|
|
328
|
+
# sigmas should work fine too
|
|
329
|
+
positive_probs_std=[0.3, 0.1, 0.2],
|
|
330
|
+
# NOTE: variable importances are relative to each other,
|
|
331
|
+
# but larget numbers put "more" weight in the relation
|
|
332
|
+
# use
|
|
333
|
+
# * 1-10 for small confidence
|
|
334
|
+
# * 10-30 for moderate confidence
|
|
335
|
+
# * 30+ for high confidence
|
|
336
|
+
# EXAMPLE:
|
|
337
|
+
# "a" - is likely to be useful
|
|
338
|
+
# "b" - no idea if it is useful
|
|
339
|
+
# "c" - a must have in the relation
|
|
340
|
+
variables_importance=[10, 1, 34],
|
|
341
|
+
# NOTE: try both
|
|
342
|
+
centered=True
|
|
343
|
+
)
|
|
344
|
+
# intercept prior centering should be around prior predictive mean
|
|
345
|
+
intercept = y.mean()
|
|
346
|
+
# regressors should be centered around zero
|
|
347
|
+
Xc = X - X.mean(0)
|
|
348
|
+
obs = pm.Normal("obs", intercept + Xc @ beta, eps, observed=y)
|
|
349
|
+
|
|
350
|
+
There can be special cases by choosing specific set of arguments
|
|
351
|
+
|
|
352
|
+
Here the prior distribution of beta is ``Normal(0, y.std() * r2 ** .5)``
|
|
353
|
+
|
|
354
|
+
.. code-block:: python
|
|
355
|
+
|
|
356
|
+
with pm.Model(coords=dict(variables=["a", "b", "c"])) as model:
|
|
357
|
+
eps, beta = pmx.distributions.R2D2M2CP(
|
|
358
|
+
"beta",
|
|
359
|
+
y.std(),
|
|
360
|
+
X.std(0),
|
|
361
|
+
dims="variables",
|
|
362
|
+
# NOTE: global shrinkage
|
|
363
|
+
r2=0.8,
|
|
364
|
+
# NOTE: if you are unsure about r2
|
|
365
|
+
r2_std=0.2,
|
|
366
|
+
# NOTE: if you know where a variable should go
|
|
367
|
+
# if you do not know, leave as 0.5
|
|
368
|
+
centered=False
|
|
369
|
+
)
|
|
370
|
+
# intercept prior centering should be around prior predictive mean
|
|
371
|
+
intercept = y.mean()
|
|
372
|
+
# regressors should be centered around zero
|
|
373
|
+
Xc = X - X.mean(0)
|
|
374
|
+
obs = pm.Normal("obs", intercept + Xc @ beta, eps, observed=y)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
It is fine to leave some of the ``_std`` arguments unspecified.
|
|
378
|
+
You can also specify only ``positive_probs``, and all
|
|
379
|
+
the variables are assumed to explain same amount of variance (same importance)
|
|
380
|
+
|
|
381
|
+
.. code-block:: python
|
|
382
|
+
|
|
383
|
+
with pm.Model(coords=dict(variables=["a", "b", "c"])) as model:
|
|
384
|
+
eps, beta = pmx.distributions.R2D2M2CP(
|
|
385
|
+
"beta",
|
|
386
|
+
y.std(),
|
|
387
|
+
X.std(0),
|
|
388
|
+
dims="variables",
|
|
389
|
+
# NOTE: global shrinkage
|
|
390
|
+
r2=0.8,
|
|
391
|
+
# NOTE: if you are unsure about r2
|
|
392
|
+
r2_std=0.2,
|
|
393
|
+
# NOTE: if you know where a variable should go
|
|
394
|
+
# if you do not know, leave as 0.5
|
|
395
|
+
positive_probs=[0.8, 0.5, 0.1],
|
|
396
|
+
# NOTE: try both
|
|
397
|
+
centered=True
|
|
398
|
+
)
|
|
399
|
+
intercept = y.mean()
|
|
400
|
+
obs = pm.Normal("obs", intercept + X @ beta, eps, observed=y)
|
|
401
|
+
|
|
402
|
+
Notes
|
|
403
|
+
-----
|
|
404
|
+
To reference R2D2M2CP implementation, you can use the following bibtex entry:
|
|
405
|
+
|
|
406
|
+
.. code-block::
|
|
407
|
+
|
|
408
|
+
@misc{pymc-extras-r2d2m2cp,
|
|
409
|
+
title = {pymc-devs/pymc-extras: {P}ull {R}equest 137, {R2D2M2CP}},
|
|
410
|
+
url = {https://github.com/pymc-devs/pymc-extras/pull/137},
|
|
411
|
+
author = {Max Kochurov},
|
|
412
|
+
howpublished = {GitHub},
|
|
413
|
+
year = {2023}
|
|
414
|
+
}
|
|
415
|
+
"""
|
|
416
|
+
if not isinstance(dims, list | tuple):
|
|
417
|
+
dims = (dims,)
|
|
418
|
+
*broadcast_dims, dim = dims
|
|
419
|
+
input_sigma = pt.as_tensor(input_sigma)
|
|
420
|
+
output_sigma = pt.as_tensor(output_sigma)
|
|
421
|
+
with pm.Model(name):
|
|
422
|
+
if r2_std is not None:
|
|
423
|
+
r2 = pm.Beta("r2", mu=r2, sigma=r2_std, dims=broadcast_dims)
|
|
424
|
+
phi = _phi(
|
|
425
|
+
variables_importance=variables_importance,
|
|
426
|
+
variance_explained=variance_explained,
|
|
427
|
+
importance_concentration=importance_concentration,
|
|
428
|
+
dims=dims,
|
|
429
|
+
)
|
|
430
|
+
mask, psi = _psi(
|
|
431
|
+
positive_probs=positive_probs, positive_probs_std=positive_probs_std, dims=dims
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
beta = _R2D2M2CP_beta(
|
|
435
|
+
name,
|
|
436
|
+
output_sigma,
|
|
437
|
+
input_sigma,
|
|
438
|
+
r2,
|
|
439
|
+
phi,
|
|
440
|
+
psi,
|
|
441
|
+
dims=[*broadcast_dims, dim],
|
|
442
|
+
centered=centered,
|
|
443
|
+
psi_mask=mask,
|
|
444
|
+
)
|
|
445
|
+
resid_sigma = (1 - r2) ** 0.5 * output_sigma
|
|
446
|
+
return R2D2M2CPOut(resid_sigma, beta)
|