CUQIpy 1.3.0.post0.dev298__py3-none-any.whl → 1.4.0.post0.dev61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cuqi/__init__.py +1 -0
- cuqi/_version.py +3 -3
- cuqi/density/_density.py +9 -1
- cuqi/distribution/_distribution.py +24 -15
- cuqi/distribution/_joint_distribution.py +96 -11
- cuqi/distribution/_posterior.py +9 -0
- cuqi/experimental/__init__.py +1 -2
- cuqi/experimental/_recommender.py +4 -4
- cuqi/implicitprior/__init__.py +1 -1
- cuqi/implicitprior/_restorator.py +35 -1
- cuqi/legacy/__init__.py +2 -0
- cuqi/legacy/sampler/__init__.py +11 -0
- cuqi/legacy/sampler/_conjugate.py +55 -0
- cuqi/legacy/sampler/_conjugate_approx.py +52 -0
- cuqi/legacy/sampler/_cwmh.py +196 -0
- cuqi/legacy/sampler/_gibbs.py +231 -0
- cuqi/legacy/sampler/_hmc.py +335 -0
- cuqi/legacy/sampler/_langevin_algorithm.py +198 -0
- cuqi/legacy/sampler/_laplace_approximation.py +184 -0
- cuqi/legacy/sampler/_mh.py +190 -0
- cuqi/legacy/sampler/_pcn.py +244 -0
- cuqi/legacy/sampler/_rto.py +284 -0
- cuqi/legacy/sampler/_sampler.py +182 -0
- cuqi/likelihood/_likelihood.py +1 -1
- cuqi/model/_model.py +212 -77
- cuqi/pde/__init__.py +4 -0
- cuqi/pde/_observation_map.py +36 -0
- cuqi/pde/_pde.py +52 -21
- cuqi/problem/_problem.py +87 -80
- cuqi/sampler/__init__.py +120 -8
- cuqi/sampler/_conjugate.py +376 -35
- cuqi/sampler/_conjugate_approx.py +40 -16
- cuqi/sampler/_cwmh.py +132 -138
- cuqi/{experimental/mcmc → sampler}/_direct.py +1 -1
- cuqi/sampler/_gibbs.py +269 -130
- cuqi/sampler/_hmc.py +328 -201
- cuqi/sampler/_langevin_algorithm.py +282 -98
- cuqi/sampler/_laplace_approximation.py +87 -117
- cuqi/sampler/_mh.py +47 -157
- cuqi/sampler/_pcn.py +56 -211
- cuqi/sampler/_rto.py +206 -140
- cuqi/sampler/_sampler.py +540 -135
- {cuqipy-1.3.0.post0.dev298.dist-info → cuqipy-1.4.0.post0.dev61.dist-info}/METADATA +1 -1
- {cuqipy-1.3.0.post0.dev298.dist-info → cuqipy-1.4.0.post0.dev61.dist-info}/RECORD +47 -45
- cuqi/experimental/mcmc/__init__.py +0 -122
- cuqi/experimental/mcmc/_conjugate.py +0 -396
- cuqi/experimental/mcmc/_conjugate_approx.py +0 -76
- cuqi/experimental/mcmc/_cwmh.py +0 -190
- cuqi/experimental/mcmc/_gibbs.py +0 -374
- cuqi/experimental/mcmc/_hmc.py +0 -460
- cuqi/experimental/mcmc/_langevin_algorithm.py +0 -382
- cuqi/experimental/mcmc/_laplace_approximation.py +0 -154
- cuqi/experimental/mcmc/_mh.py +0 -80
- cuqi/experimental/mcmc/_pcn.py +0 -89
- cuqi/experimental/mcmc/_rto.py +0 -306
- cuqi/experimental/mcmc/_sampler.py +0 -564
- {cuqipy-1.3.0.post0.dev298.dist-info → cuqipy-1.4.0.post0.dev61.dist-info}/WHEEL +0 -0
- {cuqipy-1.3.0.post0.dev298.dist-info → cuqipy-1.4.0.post0.dev61.dist-info}/licenses/LICENSE +0 -0
- {cuqipy-1.3.0.post0.dev298.dist-info → cuqipy-1.4.0.post0.dev61.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from cuqi.legacy.sampler import Sampler
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# another implementation is in https://github.com/mfouesneau/NUTS
|
|
6
|
+
class NUTS(Sampler):
|
|
7
|
+
"""No-U-Turn Sampler (Hoffman and Gelman, 2014).
|
|
8
|
+
|
|
9
|
+
Samples a distribution given its logpdf and gradient using a Hamiltonian Monte Carlo (HMC) algorithm with automatic parameter tuning.
|
|
10
|
+
|
|
11
|
+
For more details see: See Hoffman, M. D., & Gelman, A. (2014). The no-U-turn sampler: Adaptively setting path lengths in Hamiltonian Monte Carlo. Journal of Machine Learning Research, 15, 1593-1623.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
|
|
16
|
+
target : `cuqi.distribution.Distribution`
|
|
17
|
+
The target distribution to sample. Must have logpdf and gradient method. Custom logpdfs and gradients are supported by using a :class:`cuqi.distribution.UserDefinedDistribution`.
|
|
18
|
+
|
|
19
|
+
x0 : ndarray
|
|
20
|
+
Initial parameters. *Optional*
|
|
21
|
+
|
|
22
|
+
max_depth : int
|
|
23
|
+
Maximum depth of the tree.
|
|
24
|
+
|
|
25
|
+
adapt_step_size : Bool or float
|
|
26
|
+
Whether to adapt the step size.
|
|
27
|
+
If True, the step size is adapted automatically.
|
|
28
|
+
If False, the step size is fixed to the initially estimated value.
|
|
29
|
+
If set to a scalar, the step size will be given by user and not adapted.
|
|
30
|
+
|
|
31
|
+
opt_acc_rate : float
|
|
32
|
+
The optimal acceptance rate to reach if using adaptive step size.
|
|
33
|
+
Suggested values are 0.6 (default) or 0.8 (as in stan).
|
|
34
|
+
|
|
35
|
+
callback : callable, *Optional*
|
|
36
|
+
If set this function will be called after every sample.
|
|
37
|
+
The signature of the callback function is `callback(sample, sample_index)`,
|
|
38
|
+
where `sample` is the current sample and `sample_index` is the index of the sample.
|
|
39
|
+
An example is shown in demos/demo31_callback.py.
|
|
40
|
+
|
|
41
|
+
Example
|
|
42
|
+
-------
|
|
43
|
+
.. code-block:: python
|
|
44
|
+
|
|
45
|
+
# Import cuqi
|
|
46
|
+
import cuqi
|
|
47
|
+
|
|
48
|
+
# Define a target distribution
|
|
49
|
+
tp = cuqi.testproblem.WangCubic()
|
|
50
|
+
target = tp.posterior
|
|
51
|
+
|
|
52
|
+
# Set up sampler
|
|
53
|
+
sampler = cuqi.legacy.sampler.NUTS(target)
|
|
54
|
+
|
|
55
|
+
# Sample
|
|
56
|
+
samples = sampler.sample(10000, 5000)
|
|
57
|
+
|
|
58
|
+
# Plot samples
|
|
59
|
+
samples.plot_pair()
|
|
60
|
+
|
|
61
|
+
After running the NUTS sampler, run diagnostics can be accessed via the
|
|
62
|
+
following attributes:
|
|
63
|
+
|
|
64
|
+
.. code-block:: python
|
|
65
|
+
|
|
66
|
+
# Number of tree nodes created each NUTS iteration
|
|
67
|
+
sampler.num_tree_node_list
|
|
68
|
+
|
|
69
|
+
# Step size used in each NUTS iteration
|
|
70
|
+
sampler.epsilon_list
|
|
71
|
+
|
|
72
|
+
# Suggested step size during adaptation (the value of this step size is
|
|
73
|
+
# only used after adaptation). The suggested step size is None if
|
|
74
|
+
# adaptation is not requested.
|
|
75
|
+
sampler.epsilon_bar_list
|
|
76
|
+
|
|
77
|
+
# Additionally, iterations' number can be accessed via
|
|
78
|
+
sampler.iteration_list
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
def __init__(self, target, x0=None, max_depth=15, adapt_step_size=True, opt_acc_rate=0.6, **kwargs):
|
|
82
|
+
super().__init__(target, x0=x0, **kwargs)
|
|
83
|
+
self.max_depth = max_depth
|
|
84
|
+
self.adapt_step_size = adapt_step_size
|
|
85
|
+
self.opt_acc_rate = opt_acc_rate
|
|
86
|
+
# if this flag is True, the samples and the burn-in will be returned
|
|
87
|
+
# otherwise, the burn-in will be truncated
|
|
88
|
+
self._return_burnin = False
|
|
89
|
+
|
|
90
|
+
# NUTS run diagnostic
|
|
91
|
+
# number of tree nodes created each NUTS iteration
|
|
92
|
+
self._num_tree_node = 0
|
|
93
|
+
# Create lists to store NUTS run diagnostics
|
|
94
|
+
self._create_run_diagnostic_attributes()
|
|
95
|
+
|
|
96
|
+
def _create_run_diagnostic_attributes(self):
|
|
97
|
+
"""A method to create attributes to store NUTS run diagnostic."""
|
|
98
|
+
self._reset_run_diagnostic_attributes()
|
|
99
|
+
|
|
100
|
+
def _reset_run_diagnostic_attributes(self):
|
|
101
|
+
"""A method to reset attributes to store NUTS run diagnostic."""
|
|
102
|
+
# NUTS iterations
|
|
103
|
+
self.iteration_list = []
|
|
104
|
+
# List to store number of tree nodes created each NUTS iteration
|
|
105
|
+
self.num_tree_node_list = []
|
|
106
|
+
# List of step size used in each NUTS iteration
|
|
107
|
+
self.epsilon_list = []
|
|
108
|
+
# List of burn-in step size suggestion during adaptation
|
|
109
|
+
# only used when adaptation is done
|
|
110
|
+
# remains fixed after adaptation (after burn-in)
|
|
111
|
+
self.epsilon_bar_list = []
|
|
112
|
+
|
|
113
|
+
def _update_run_diagnostic_attributes(self, k, n_tree, eps, eps_bar):
|
|
114
|
+
"""A method to update attributes to store NUTS run diagnostic."""
|
|
115
|
+
# Store the current iteration number k
|
|
116
|
+
self.iteration_list.append(k)
|
|
117
|
+
# Store the number of tree nodes created in iteration k
|
|
118
|
+
self.num_tree_node_list.append(n_tree)
|
|
119
|
+
# Store the step size used in iteration k
|
|
120
|
+
self.epsilon_list.append(eps)
|
|
121
|
+
# Store the step size suggestion during adaptation in iteration k
|
|
122
|
+
self.epsilon_bar_list.append(eps_bar)
|
|
123
|
+
|
|
124
|
+
def _nuts_target(self, x): # returns logposterior tuple evaluation-gradient
|
|
125
|
+
return self.target.logd(x), self.target.gradient(x)
|
|
126
|
+
|
|
127
|
+
def _sample_adapt(self, N, Nb):
|
|
128
|
+
return self._sample(N, Nb)
|
|
129
|
+
|
|
130
|
+
def _sample(self, N, Nb):
|
|
131
|
+
# Reset run diagnostic attributes
|
|
132
|
+
self._reset_run_diagnostic_attributes()
|
|
133
|
+
|
|
134
|
+
if self.adapt_step_size is True and Nb == 0:
|
|
135
|
+
raise ValueError("Adaptive step size is True but number of burn-in steps is 0. Please set Nb > 0.")
|
|
136
|
+
|
|
137
|
+
# Allocation
|
|
138
|
+
Ns = Nb+N # total number of chains
|
|
139
|
+
theta = np.empty((self.dim, Ns))
|
|
140
|
+
joint_eval = np.empty(Ns)
|
|
141
|
+
step_sizes = np.empty(Ns)
|
|
142
|
+
|
|
143
|
+
# Initial state
|
|
144
|
+
theta[:, 0] = self.x0
|
|
145
|
+
joint_eval[0], grad = self._nuts_target(self.x0)
|
|
146
|
+
|
|
147
|
+
# Step size variables
|
|
148
|
+
epsilon, epsilon_bar = None, None
|
|
149
|
+
|
|
150
|
+
# parameters dual averaging
|
|
151
|
+
if (self.adapt_step_size == True):
|
|
152
|
+
epsilon = self._FindGoodEpsilon(theta[:, 0], joint_eval[0], grad)
|
|
153
|
+
mu = np.log(10*epsilon)
|
|
154
|
+
gamma, t_0, kappa = 0.05, 10, 0.75 # kappa in (0.5, 1]
|
|
155
|
+
epsilon_bar, H_bar = 1, 0
|
|
156
|
+
delta = self.opt_acc_rate # https://mc-stan.org/docs/2_18/reference-manual/hmc-algorithm-parameters.html
|
|
157
|
+
step_sizes[0] = epsilon
|
|
158
|
+
elif (self.adapt_step_size == False):
|
|
159
|
+
epsilon = self._FindGoodEpsilon(theta[:, 0], joint_eval[0], grad)
|
|
160
|
+
else:
|
|
161
|
+
epsilon = self.adapt_step_size # if scalar then user specifies the step size
|
|
162
|
+
|
|
163
|
+
# run NUTS
|
|
164
|
+
for k in range(1, Ns):
|
|
165
|
+
# reset number of tree nodes for each iteration
|
|
166
|
+
self._num_tree_node = 0
|
|
167
|
+
|
|
168
|
+
theta_k, joint_k = theta[:, k-1], joint_eval[k-1] # initial position (parameters)
|
|
169
|
+
r_k = self._Kfun(1, 'sample') # resample momentum vector
|
|
170
|
+
Ham = joint_k - self._Kfun(r_k, 'eval') # Hamiltonian
|
|
171
|
+
|
|
172
|
+
# slice variable
|
|
173
|
+
log_u = Ham - np.random.exponential(1, size=1) # u = np.log(np.random.uniform(0, np.exp(H)))
|
|
174
|
+
|
|
175
|
+
# initialization
|
|
176
|
+
j, s, n = 0, 1, 1
|
|
177
|
+
theta[:, k], joint_eval[k] = theta_k, joint_k
|
|
178
|
+
theta_minus, theta_plus = np.copy(theta_k), np.copy(theta_k)
|
|
179
|
+
grad_minus, grad_plus = np.copy(grad), np.copy(grad)
|
|
180
|
+
r_minus, r_plus = np.copy(r_k), np.copy(r_k)
|
|
181
|
+
|
|
182
|
+
# run NUTS
|
|
183
|
+
while (s == 1) and (j <= self.max_depth):
|
|
184
|
+
# sample a direction
|
|
185
|
+
v = int(2*(np.random.rand() < 0.5)-1)
|
|
186
|
+
|
|
187
|
+
# build tree: doubling procedure
|
|
188
|
+
if (v == -1):
|
|
189
|
+
theta_minus, r_minus, grad_minus, _, _, _, \
|
|
190
|
+
theta_prime, joint_prime, grad_prime, n_prime, s_prime, alpha, n_alpha = \
|
|
191
|
+
self._BuildTree(theta_minus, r_minus, grad_minus, Ham, log_u, v, j, epsilon)
|
|
192
|
+
else:
|
|
193
|
+
_, _, _, theta_plus, r_plus, grad_plus, \
|
|
194
|
+
theta_prime, joint_prime, grad_prime, n_prime, s_prime, alpha, n_alpha = \
|
|
195
|
+
self._BuildTree(theta_plus, r_plus, grad_plus, Ham, log_u, v, j, epsilon)
|
|
196
|
+
|
|
197
|
+
# Metropolis step
|
|
198
|
+
alpha2 = min(1, (n_prime/n)) #min(0, np.log(n_p) - np.log(n))
|
|
199
|
+
if (s_prime == 1) and (np.random.rand() <= alpha2):
|
|
200
|
+
theta[:, k] = theta_prime
|
|
201
|
+
joint_eval[k] = joint_prime
|
|
202
|
+
grad = np.copy(grad_prime)
|
|
203
|
+
|
|
204
|
+
# update number of particles, tree level, and stopping criterion
|
|
205
|
+
n += n_prime
|
|
206
|
+
dtheta = theta_plus - theta_minus
|
|
207
|
+
s = s_prime * int((dtheta @ r_minus.T) >= 0) * int((dtheta @ r_plus.T) >= 0)
|
|
208
|
+
j += 1
|
|
209
|
+
|
|
210
|
+
# update run diagnostic attributes
|
|
211
|
+
self._update_run_diagnostic_attributes(
|
|
212
|
+
k, self._num_tree_node, epsilon, epsilon_bar)
|
|
213
|
+
|
|
214
|
+
# adapt epsilon during burn-in using dual averaging
|
|
215
|
+
if (k <= Nb) and (self.adapt_step_size == True):
|
|
216
|
+
eta1 = 1/(k + t_0)
|
|
217
|
+
H_bar = (1-eta1)*H_bar + eta1*(delta - (alpha/n_alpha))
|
|
218
|
+
epsilon = np.exp(mu - (np.sqrt(k)/gamma)*H_bar)
|
|
219
|
+
eta = k**(-kappa)
|
|
220
|
+
epsilon_bar = np.exp(eta*np.log(epsilon) + (1-eta)*np.log(epsilon_bar))
|
|
221
|
+
elif (k == Nb+1) and (self.adapt_step_size == True):
|
|
222
|
+
epsilon = epsilon_bar # fix epsilon after burn-in
|
|
223
|
+
step_sizes[k] = epsilon
|
|
224
|
+
|
|
225
|
+
# msg
|
|
226
|
+
self._print_progress(k+1, Ns) #k+1 is the sample number, k is index assuming x0 is the first sample
|
|
227
|
+
self._call_callback(theta[:, k], k)
|
|
228
|
+
|
|
229
|
+
if np.isnan(joint_eval[k]):
|
|
230
|
+
raise NameError('NaN potential func')
|
|
231
|
+
|
|
232
|
+
# apply burn-in
|
|
233
|
+
if not self._return_burnin:
|
|
234
|
+
theta = theta[:, Nb:]
|
|
235
|
+
joint_eval = joint_eval[Nb:]
|
|
236
|
+
return theta, joint_eval, step_sizes
|
|
237
|
+
|
|
238
|
+
#=========================================================================
|
|
239
|
+
# auxiliary standard Gaussian PDF: kinetic energy function
|
|
240
|
+
# d_log_2pi = d*np.log(2*np.pi)
|
|
241
|
+
def _Kfun(self, r, flag):
|
|
242
|
+
if flag == 'eval': # evaluate
|
|
243
|
+
return 0.5*(r.T @ r) #+ d_log_2pi
|
|
244
|
+
if flag == 'sample': # sample
|
|
245
|
+
return np.random.standard_normal(size=self.dim)
|
|
246
|
+
|
|
247
|
+
#=========================================================================
|
|
248
|
+
def _FindGoodEpsilon(self, theta, joint, grad, epsilon=1):
|
|
249
|
+
r = self._Kfun(1, 'sample') # resample a momentum
|
|
250
|
+
Ham = joint - self._Kfun(r, 'eval') # initial Hamiltonian
|
|
251
|
+
_, r_prime, joint_prime, grad_prime = self._Leapfrog(theta, r, grad, epsilon)
|
|
252
|
+
|
|
253
|
+
# trick to make sure the step is not huge, leading to infinite values of the likelihood
|
|
254
|
+
k = 1
|
|
255
|
+
while np.isinf(joint_prime) or np.isinf(grad_prime).any():
|
|
256
|
+
k *= 0.5
|
|
257
|
+
_, r_prime, joint_prime, grad_prime = self._Leapfrog(theta, r, grad, epsilon*k)
|
|
258
|
+
epsilon = 0.5*k*epsilon
|
|
259
|
+
|
|
260
|
+
# doubles/halves the value of epsilon until the accprob of the Langevin proposal crosses 0.5
|
|
261
|
+
Ham_prime = joint_prime - self._Kfun(r_prime, 'eval')
|
|
262
|
+
log_ratio = Ham_prime - Ham
|
|
263
|
+
a = 1 if log_ratio > np.log(0.5) else -1
|
|
264
|
+
while (a*log_ratio > -a*np.log(2)):
|
|
265
|
+
epsilon = (2**a)*epsilon
|
|
266
|
+
_, r_prime, joint_prime, _ = self._Leapfrog(theta, r, grad, epsilon)
|
|
267
|
+
Ham_prime = joint_prime - self._Kfun(r_prime, 'eval')
|
|
268
|
+
log_ratio = Ham_prime - Ham
|
|
269
|
+
return epsilon
|
|
270
|
+
|
|
271
|
+
#=========================================================================
|
|
272
|
+
def _Leapfrog(self, theta_old, r_old, grad_old, epsilon):
|
|
273
|
+
# symplectic integrator: trajectories preserve phase space volumen
|
|
274
|
+
r_new = r_old + 0.5*epsilon*grad_old # half-step
|
|
275
|
+
theta_new = theta_old + epsilon*r_new # full-step
|
|
276
|
+
joint_new, grad_new = self._nuts_target(theta_new) # new gradient
|
|
277
|
+
r_new += 0.5*epsilon*grad_new # half-step
|
|
278
|
+
return theta_new, r_new, joint_new, grad_new
|
|
279
|
+
|
|
280
|
+
#=========================================================================
|
|
281
|
+
# @functools.lru_cache(maxsize=128)
|
|
282
|
+
def _BuildTree(self, theta, r, grad, Ham, log_u, v, j, epsilon, Delta_max=1000):
|
|
283
|
+
# Increment the number of tree nodes counter
|
|
284
|
+
self._num_tree_node += 1
|
|
285
|
+
|
|
286
|
+
if (j == 0): # base case
|
|
287
|
+
# single leapfrog step in the direction v
|
|
288
|
+
theta_prime, r_prime, joint_prime, grad_prime = self._Leapfrog(theta, r, grad, v*epsilon)
|
|
289
|
+
Ham_prime = joint_prime - self._Kfun(r_prime, 'eval') # Hamiltonian eval
|
|
290
|
+
n_prime = int(log_u <= Ham_prime) # if particle is in the slice
|
|
291
|
+
s_prime = int(log_u < Delta_max + Ham_prime) # check U-turn
|
|
292
|
+
#
|
|
293
|
+
diff_Ham = Ham_prime - Ham
|
|
294
|
+
|
|
295
|
+
# Compute the acceptance probability
|
|
296
|
+
# alpha_prime = min(1, np.exp(diff_Ham))
|
|
297
|
+
# written in a stable way to avoid overflow when computing
|
|
298
|
+
# exp(diff_Ham) for large values of diff_Ham
|
|
299
|
+
alpha_prime = 1 if diff_Ham > 0 else np.exp(diff_Ham)
|
|
300
|
+
n_alpha_prime = 1
|
|
301
|
+
#
|
|
302
|
+
theta_minus, theta_plus = theta_prime, theta_prime
|
|
303
|
+
r_minus, r_plus = r_prime, r_prime
|
|
304
|
+
grad_minus, grad_plus = grad_prime, grad_prime
|
|
305
|
+
else:
|
|
306
|
+
# recursion: build the left/right subtrees
|
|
307
|
+
theta_minus, r_minus, grad_minus, theta_plus, r_plus, grad_plus, \
|
|
308
|
+
theta_prime, joint_prime, grad_prime, n_prime, s_prime, alpha_prime, n_alpha_prime = \
|
|
309
|
+
self._BuildTree(theta, r, grad, Ham, log_u, v, j-1, epsilon)
|
|
310
|
+
if (s_prime == 1): # do only if the stopping criteria does not verify at the first subtree
|
|
311
|
+
if (v == -1):
|
|
312
|
+
theta_minus, r_minus, grad_minus, _, _, _, \
|
|
313
|
+
theta_2prime, joint_2prime, grad_2prime, n_2prime, s_2prime, alpha_2prime, n_alpha_2prime = \
|
|
314
|
+
self._BuildTree(theta_minus, r_minus, grad_minus, Ham, log_u, v, j-1, epsilon)
|
|
315
|
+
else:
|
|
316
|
+
_, _, _, theta_plus, r_plus, grad_plus, \
|
|
317
|
+
theta_2prime, joint_2prime, grad_2prime, n_2prime, s_2prime, alpha_2prime, n_alpha_2prime = \
|
|
318
|
+
self._BuildTree(theta_plus, r_plus, grad_plus, Ham, log_u, v, j-1, epsilon)
|
|
319
|
+
|
|
320
|
+
# Metropolis step
|
|
321
|
+
alpha2 = n_2prime / max(1, (n_prime + n_2prime))
|
|
322
|
+
if (np.random.rand() <= alpha2):
|
|
323
|
+
theta_prime = np.copy(theta_2prime)
|
|
324
|
+
joint_prime = np.copy(joint_2prime)
|
|
325
|
+
grad_prime = np.copy(grad_2prime)
|
|
326
|
+
|
|
327
|
+
# update number of particles and stopping criterion
|
|
328
|
+
alpha_prime += alpha_2prime
|
|
329
|
+
n_alpha_prime += n_alpha_2prime
|
|
330
|
+
dtheta = theta_plus - theta_minus
|
|
331
|
+
s_prime = s_2prime * int((dtheta@r_minus.T)>=0) * int((dtheta@r_plus.T)>=0)
|
|
332
|
+
n_prime += n_2prime
|
|
333
|
+
return theta_minus, r_minus, grad_minus, theta_plus, r_plus, grad_plus, \
|
|
334
|
+
theta_prime, joint_prime, grad_prime, n_prime, s_prime, alpha_prime, n_alpha_prime
|
|
335
|
+
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import cuqi
|
|
3
|
+
from cuqi.legacy.sampler import Sampler
|
|
4
|
+
|
|
5
|
+
class ULA(Sampler):
|
|
6
|
+
"""Unadjusted Langevin algorithm (ULA) (Roberts and Tweedie, 1996)
|
|
7
|
+
|
|
8
|
+
Samples a distribution given its logpdf and gradient (up to a constant) based on
|
|
9
|
+
Langevin diffusion dL_t = dW_t + 1/2*Nabla target.logd(L_t)dt, where L_t is
|
|
10
|
+
the Langevin diffusion and W_t is the `dim`-dimensional standard Brownian motion.
|
|
11
|
+
|
|
12
|
+
For more details see: Roberts, G. O., & Tweedie, R. L. (1996). Exponential convergence
|
|
13
|
+
of Langevin distributions and their discrete approximations. Bernoulli, 341-363.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
|
|
18
|
+
target : `cuqi.distribution.Distribution`
|
|
19
|
+
The target distribution to sample. Must have logd and gradient method. Custom logpdfs
|
|
20
|
+
and gradients are supported by using a :class:`cuqi.distribution.UserDefinedDistribution`.
|
|
21
|
+
|
|
22
|
+
x0 : ndarray
|
|
23
|
+
Initial parameters. *Optional*
|
|
24
|
+
|
|
25
|
+
scale : int
|
|
26
|
+
The Langevin diffusion discretization time step (In practice, a scale of 1/dim**2 is
|
|
27
|
+
recommended but not guaranteed to be the optimal choice).
|
|
28
|
+
|
|
29
|
+
dim : int
|
|
30
|
+
Dimension of parameter space. Required if target logpdf and gradient are callable
|
|
31
|
+
functions. *Optional*.
|
|
32
|
+
|
|
33
|
+
callback : callable, *Optional*
|
|
34
|
+
If set this function will be called after every sample.
|
|
35
|
+
The signature of the callback function is `callback(sample, sample_index)`,
|
|
36
|
+
where `sample` is the current sample and `sample_index` is the index of the sample.
|
|
37
|
+
An example is shown in demos/demo31_callback.py.
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
Example
|
|
41
|
+
-------
|
|
42
|
+
.. code-block:: python
|
|
43
|
+
|
|
44
|
+
# Parameters
|
|
45
|
+
dim = 5 # Dimension of distribution
|
|
46
|
+
mu = np.arange(dim) # Mean of Gaussian
|
|
47
|
+
std = 1 # standard deviation of Gaussian
|
|
48
|
+
|
|
49
|
+
# Logpdf function
|
|
50
|
+
logpdf_func = lambda x: -1/(std**2)*np.sum((x-mu)**2)
|
|
51
|
+
gradient_func = lambda x: -2/(std**2)*(x - mu)
|
|
52
|
+
|
|
53
|
+
# Define distribution from logpdf and gradient as UserDefinedDistribution
|
|
54
|
+
target = cuqi.distribution.UserDefinedDistribution(dim=dim, logpdf_func=logpdf_func,
|
|
55
|
+
gradient_func=gradient_func)
|
|
56
|
+
|
|
57
|
+
# Set up sampler
|
|
58
|
+
sampler = cuqi.legacy.sampler.ULA(target, scale=1/dim**2)
|
|
59
|
+
|
|
60
|
+
# Sample
|
|
61
|
+
samples = sampler.sample(2000)
|
|
62
|
+
|
|
63
|
+
A Deblur example can be found in demos/demo27_ULA.py
|
|
64
|
+
"""
|
|
65
|
+
def __init__(self, target, scale, x0=None, dim=None, rng=None, **kwargs):
|
|
66
|
+
super().__init__(target, x0=x0, dim=dim, **kwargs)
|
|
67
|
+
self.scale = scale
|
|
68
|
+
self.rng = rng
|
|
69
|
+
|
|
70
|
+
def _sample_adapt(self, N, Nb):
|
|
71
|
+
return self._sample(N, Nb)
|
|
72
|
+
|
|
73
|
+
def _sample(self, N, Nb):
|
|
74
|
+
# allocation
|
|
75
|
+
Ns = Nb+N
|
|
76
|
+
samples = np.empty((self.dim, Ns))
|
|
77
|
+
target_eval = np.empty(Ns)
|
|
78
|
+
g_target_eval = np.empty((self.dim, Ns))
|
|
79
|
+
acc = np.zeros(Ns)
|
|
80
|
+
|
|
81
|
+
# initial state
|
|
82
|
+
samples[:, 0] = self.x0
|
|
83
|
+
target_eval[0], g_target_eval[:,0] = self.target.logd(self.x0), self.target.gradient(self.x0)
|
|
84
|
+
acc[0] = 1
|
|
85
|
+
|
|
86
|
+
# ULA
|
|
87
|
+
for s in range(Ns-1):
|
|
88
|
+
samples[:, s+1], target_eval[s+1], g_target_eval[:,s+1], acc[s+1] = \
|
|
89
|
+
self.single_update(samples[:, s], target_eval[s], g_target_eval[:,s])
|
|
90
|
+
self._print_progress(s+2,Ns) #s+2 is the sample number, s+1 is index assuming x0 is the first sample
|
|
91
|
+
self._call_callback(samples[:, s+1], s+1)
|
|
92
|
+
|
|
93
|
+
# apply burn-in
|
|
94
|
+
samples = samples[:, Nb:]
|
|
95
|
+
target_eval = target_eval[Nb:]
|
|
96
|
+
acc = acc[Nb:]
|
|
97
|
+
return samples, target_eval, np.mean(acc)
|
|
98
|
+
|
|
99
|
+
def single_update(self, x_t, target_eval_t, g_target_eval_t):
|
|
100
|
+
# approximate Langevin diffusion
|
|
101
|
+
xi = cuqi.distribution.Normal(mean=np.zeros(self.dim), std=np.sqrt(self.scale)).sample(rng=self.rng)
|
|
102
|
+
x_star = x_t + 0.5*self.scale*g_target_eval_t + xi
|
|
103
|
+
logpi_eval_star, g_logpi_star = self.target.logd(x_star), self.target.gradient(x_star)
|
|
104
|
+
|
|
105
|
+
# msg
|
|
106
|
+
if np.isnan(logpi_eval_star):
|
|
107
|
+
raise NameError('NaN potential func. Consider using smaller scale parameter')
|
|
108
|
+
|
|
109
|
+
return x_star, logpi_eval_star, g_logpi_star, 1 # sample always accepted without Metropolis correction
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class MALA(ULA):
|
|
113
|
+
""" Metropolis-adjusted Langevin algorithm (MALA) (Roberts and Tweedie, 1996)
|
|
114
|
+
|
|
115
|
+
Samples a distribution given its logd and gradient (up to a constant) based on
|
|
116
|
+
Langevin diffusion dL_t = dW_t + 1/2*Nabla target.logd(L_t)dt, where L_t is
|
|
117
|
+
the Langevin diffusion and W_t is the `dim`-dimensional standard Brownian motion.
|
|
118
|
+
The sample is then accepted or rejected according to Metropolis–Hastings algorithm.
|
|
119
|
+
|
|
120
|
+
For more details see: Roberts, G. O., & Tweedie, R. L. (1996). Exponential convergence
|
|
121
|
+
of Langevin distributions and their discrete approximations. Bernoulli, 341-363.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
|
|
126
|
+
target : `cuqi.distribution.Distribution`
|
|
127
|
+
The target distribution to sample. Must have logpdf and gradient method. Custom logpdfs
|
|
128
|
+
and gradients are supported by using a :class:`cuqi.distribution.UserDefinedDistribution`.
|
|
129
|
+
|
|
130
|
+
x0 : ndarray
|
|
131
|
+
Initial parameters. *Optional*
|
|
132
|
+
|
|
133
|
+
scale : int
|
|
134
|
+
The Langevin diffusion discretization time step.
|
|
135
|
+
|
|
136
|
+
dim : int
|
|
137
|
+
Dimension of parameter space. Required if target logpdf and gradient are callable
|
|
138
|
+
functions. *Optional*.
|
|
139
|
+
|
|
140
|
+
callback : callable, *Optional*
|
|
141
|
+
If set this function will be called after every sample.
|
|
142
|
+
The signature of the callback function is `callback(sample, sample_index)`,
|
|
143
|
+
where `sample` is the current sample and `sample_index` is the index of the sample.
|
|
144
|
+
An example is shown in demos/demo31_callback.py.
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
Example
|
|
148
|
+
-------
|
|
149
|
+
.. code-block:: python
|
|
150
|
+
|
|
151
|
+
# Parameters
|
|
152
|
+
dim = 5 # Dimension of distribution
|
|
153
|
+
mu = np.arange(dim) # Mean of Gaussian
|
|
154
|
+
std = 1 # standard deviation of Gaussian
|
|
155
|
+
|
|
156
|
+
# Logpdf function
|
|
157
|
+
logpdf_func = lambda x: -1/(std**2)*np.sum((x-mu)**2)
|
|
158
|
+
gradient_func = lambda x: -2/(std**2)*(x-mu)
|
|
159
|
+
|
|
160
|
+
# Define distribution from logpdf as UserDefinedDistribution (sample and gradients also supported)
|
|
161
|
+
target = cuqi.distribution.UserDefinedDistribution(dim=dim, logpdf_func=logpdf_func,
|
|
162
|
+
gradient_func=gradient_func)
|
|
163
|
+
|
|
164
|
+
# Set up sampler
|
|
165
|
+
sampler = cuqi.legacy.sampler.MALA(target, scale=1/5**2)
|
|
166
|
+
|
|
167
|
+
# Sample
|
|
168
|
+
samples = sampler.sample(2000)
|
|
169
|
+
|
|
170
|
+
A Deblur example can be found in demos/demo28_MALA.py
|
|
171
|
+
"""
|
|
172
|
+
def __init__(self, target, scale, x0=None, dim=None, rng=None, **kwargs):
|
|
173
|
+
super().__init__(target, scale, x0=x0, dim=dim, rng=rng, **kwargs)
|
|
174
|
+
|
|
175
|
+
def single_update(self, x_t, target_eval_t, g_target_eval_t):
|
|
176
|
+
# approximate Langevin diffusion
|
|
177
|
+
xi = cuqi.distribution.Normal(mean=np.zeros(self.dim), std=np.sqrt(self.scale)).sample(rng=self.rng)
|
|
178
|
+
x_star = x_t + (self.scale/2)*g_target_eval_t + xi
|
|
179
|
+
logpi_eval_star, g_logpi_star = self.target.logd(x_star), self.target.gradient(x_star)
|
|
180
|
+
|
|
181
|
+
# Metropolis step
|
|
182
|
+
log_target_ratio = logpi_eval_star - target_eval_t
|
|
183
|
+
log_prop_ratio = self.log_proposal(x_t, x_star, g_logpi_star) \
|
|
184
|
+
- self.log_proposal(x_star, x_t, g_target_eval_t)
|
|
185
|
+
log_alpha = min(0, log_target_ratio + log_prop_ratio)
|
|
186
|
+
|
|
187
|
+
# accept/reject
|
|
188
|
+
log_u = np.log(cuqi.distribution.Uniform(low=0, high=1).sample(rng=self.rng))
|
|
189
|
+
if (log_u <= log_alpha) and (np.isnan(logpi_eval_star) == False):
|
|
190
|
+
return x_star, logpi_eval_star, g_logpi_star, 1
|
|
191
|
+
else:
|
|
192
|
+
return x_t.copy(), target_eval_t, g_target_eval_t.copy(), 0
|
|
193
|
+
|
|
194
|
+
def log_proposal(self, theta_star, theta_k, g_logpi_k):
|
|
195
|
+
mu = theta_k + ((self.scale)/2)*g_logpi_k
|
|
196
|
+
misfit = theta_star - mu
|
|
197
|
+
return -0.5*((1/(self.scale))*(misfit.T @ misfit))
|
|
198
|
+
|