gsMap 1.71__py3-none-any.whl → 1.71.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/__init__.py +0 -0
- gsMap/GNN/adjacency_matrix.py +75 -75
- gsMap/GNN/model.py +90 -89
- gsMap/GNN/train.py +0 -0
- gsMap/__init__.py +5 -5
- gsMap/__main__.py +2 -2
- gsMap/cauchy_combination_test.py +141 -141
- gsMap/config.py +805 -805
- gsMap/diagnosis.py +273 -273
- gsMap/find_latent_representation.py +133 -133
- gsMap/format_sumstats.py +407 -407
- gsMap/generate_ldscore.py +618 -618
- gsMap/latent_to_gene.py +234 -234
- gsMap/main.py +31 -31
- gsMap/report.py +160 -160
- gsMap/run_all_mode.py +194 -194
- gsMap/setup.py +0 -0
- gsMap/spatial_ldsc_multiple_sumstats.py +380 -380
- gsMap/templates/report_template.html +198 -198
- gsMap/utils/__init__.py +0 -0
- gsMap/utils/generate_r2_matrix.py +735 -735
- gsMap/utils/jackknife.py +514 -514
- gsMap/utils/make_annotations.py +518 -518
- gsMap/utils/manhattan_plot.py +639 -639
- gsMap/utils/regression_read.py +294 -294
- gsMap/visualize.py +198 -198
- {gsmap-1.71.dist-info → gsmap-1.71.1.dist-info}/LICENSE +21 -21
- {gsmap-1.71.dist-info → gsmap-1.71.1.dist-info}/METADATA +2 -2
- gsmap-1.71.1.dist-info/RECORD +31 -0
- gsmap-1.71.dist-info/RECORD +0 -31
- {gsmap-1.71.dist-info → gsmap-1.71.1.dist-info}/WHEEL +0 -0
- {gsmap-1.71.dist-info → gsmap-1.71.1.dist-info}/entry_points.txt +0 -0
gsMap/utils/jackknife.py
CHANGED
@@ -1,514 +1,514 @@
|
|
1
|
-
'''
|
2
|
-
(c) 2014 Brendan Bulik-Sullivan and Hilary Finucane
|
3
|
-
|
4
|
-
Fast block jackknives.
|
5
|
-
|
6
|
-
Everything in this module deals with 2D numpy arrays. 1D data are represented as arrays
|
7
|
-
with dimension (N, 1) or (1, N), to avoid bugs arising from numpy treating (N, ) as
|
8
|
-
a fundamentally different shape from (N, 1). The convention in this module is for the
|
9
|
-
first dimension to represent # of data points (or # of blocks in a block jackknife, since
|
10
|
-
a block is like a datapoint), and for the second dimension to represent the dimensionality
|
11
|
-
of the data.
|
12
|
-
|
13
|
-
'''
|
14
|
-
|
15
|
-
from __future__ import division
|
16
|
-
import numpy as np
|
17
|
-
from scipy.optimize import nnls
|
18
|
-
np.seterr(divide='raise', invalid='raise')
|
19
|
-
xrange = range
|
20
|
-
|
21
|
-
def _check_shape(x, y):
|
22
|
-
'''Check that x and y have the correct shapes (for regression jackknives).'''
|
23
|
-
if len(x.shape) != 2 or len(y.shape) != 2:
|
24
|
-
raise ValueError('x and y must be 2D arrays.')
|
25
|
-
if x.shape[0] != y.shape[0]:
|
26
|
-
raise ValueError(
|
27
|
-
'Number of datapoints in x != number of datapoints in y.')
|
28
|
-
if y.shape[1] != 1:
|
29
|
-
raise ValueError('y must have shape (n_snp, 1)')
|
30
|
-
n, p = x.shape
|
31
|
-
if p > n:
|
32
|
-
raise ValueError('More dimensions than datapoints.')
|
33
|
-
|
34
|
-
return (n, p)
|
35
|
-
|
36
|
-
|
37
|
-
def _check_shape_block(xty_block_values, xtx_block_values):
|
38
|
-
'''Check that xty_block_values and xtx_block_values have correct shapes.'''
|
39
|
-
if xtx_block_values.shape[0:2] != xty_block_values.shape:
|
40
|
-
raise ValueError(
|
41
|
-
'Shape of xty_block_values must equal shape of first two dimensions of xty_block_values.')
|
42
|
-
if len(xtx_block_values.shape) < 3:
|
43
|
-
raise ValueError('xtx_block_values must be a 3D array.')
|
44
|
-
if xtx_block_values.shape[1] != xtx_block_values.shape[2]:
|
45
|
-
raise ValueError(
|
46
|
-
'Last two axes of xtx_block_values must have same dimension.')
|
47
|
-
|
48
|
-
return xtx_block_values.shape[0:2]
|
49
|
-
|
50
|
-
|
51
|
-
class Jackknife(object):
|
52
|
-
|
53
|
-
'''
|
54
|
-
Base class for jackknife objects. Input involves x,y, so this base class is tailored
|
55
|
-
for statistics computed from independent and dependent variables (e.g., regressions).
|
56
|
-
The __delete_vals_to_pseudovalues__ and __jknife__ methods will still be useful for other
|
57
|
-
sorts of statistics, but the __init__ method will need to be overriden.
|
58
|
-
|
59
|
-
Parameters
|
60
|
-
----------
|
61
|
-
x : np.matrix with shape (n, p)
|
62
|
-
Independent variable.
|
63
|
-
y : np.matrix with shape (n, 1)
|
64
|
-
Dependent variable.
|
65
|
-
n_blocks : int
|
66
|
-
Number of jackknife blocks
|
67
|
-
*args, **kwargs :
|
68
|
-
Arguments for inheriting jackknives.
|
69
|
-
|
70
|
-
Attributes
|
71
|
-
----------
|
72
|
-
n_blocks : int
|
73
|
-
Number of jackknife blocks
|
74
|
-
p : int
|
75
|
-
Dimensionality of the independent varianble
|
76
|
-
N : int
|
77
|
-
Number of datapoints (equal to x.shape[0])
|
78
|
-
|
79
|
-
Methods
|
80
|
-
-------
|
81
|
-
jknife(pseudovalues):
|
82
|
-
Computes jackknife estimate and variance from the jackknife pseudovalues.
|
83
|
-
delete_vals_to_pseudovalues(delete_vals, est):
|
84
|
-
Converts delete values and the whole-data estimate to pseudovalues.
|
85
|
-
get_separators():
|
86
|
-
Returns (approximately) evenly-spaced jackknife block boundaries.
|
87
|
-
'''
|
88
|
-
|
89
|
-
def __init__(self, x, y, n_blocks=None, separators=None):
|
90
|
-
self.N, self.p = _check_shape(x, y)
|
91
|
-
if separators is not None:
|
92
|
-
if max(separators) != self.N:
|
93
|
-
raise ValueError(
|
94
|
-
'Max(separators) must be equal to number of data points.')
|
95
|
-
if min(separators) != 0:
|
96
|
-
raise ValueError('Max(separators) must be equal to 0.')
|
97
|
-
self.separators = sorted(separators)
|
98
|
-
self.n_blocks = len(separators) - 1
|
99
|
-
elif n_blocks is not None:
|
100
|
-
self.n_blocks = n_blocks
|
101
|
-
self.separators = self.get_separators(self.N, self.n_blocks)
|
102
|
-
else:
|
103
|
-
raise ValueError('Must specify either n_blocks are separators.')
|
104
|
-
|
105
|
-
if self.n_blocks > self.N:
|
106
|
-
raise ValueError('More blocks than data points.')
|
107
|
-
|
108
|
-
@classmethod
|
109
|
-
def jknife(cls, pseudovalues):
|
110
|
-
'''
|
111
|
-
Converts pseudovalues to jackknife estimate and variance.
|
112
|
-
|
113
|
-
Parameters
|
114
|
-
----------
|
115
|
-
pseudovalues : np.matrix pf floats with shape (n_blocks, p)
|
116
|
-
|
117
|
-
Returns
|
118
|
-
-------
|
119
|
-
jknife_est : np.matrix with shape (1, p)
|
120
|
-
Jackknifed estimate.
|
121
|
-
jknife_var : np.matrix with shape (1, p)
|
122
|
-
Variance of jackknifed estimate.
|
123
|
-
jknife_se : np.matrix with shape (1, p)
|
124
|
-
Standard error of jackknifed estimate, equal to sqrt(jknife_var).
|
125
|
-
jknife_cov : np.matrix with shape (p, p)
|
126
|
-
Covariance matrix of jackknifed estimate.
|
127
|
-
|
128
|
-
'''
|
129
|
-
n_blocks = pseudovalues.shape[0]
|
130
|
-
jknife_cov = np.atleast_2d(np.cov(pseudovalues.T, ddof=1) / n_blocks)
|
131
|
-
jknife_var = np.atleast_2d(np.diag(jknife_cov))
|
132
|
-
jknife_se = np.atleast_2d(np.sqrt(jknife_var))
|
133
|
-
jknife_est = np.atleast_2d(np.mean(pseudovalues, axis=0))
|
134
|
-
return (jknife_est, jknife_var, jknife_se, jknife_cov)
|
135
|
-
|
136
|
-
@classmethod
|
137
|
-
def delete_values_to_pseudovalues(cls, delete_values, est):
|
138
|
-
'''
|
139
|
-
Converts whole-data estimate and delete values to pseudovalues.
|
140
|
-
|
141
|
-
Parameters
|
142
|
-
----------
|
143
|
-
delete_values : np.matrix with shape (n_blocks, p)
|
144
|
-
Delete values.
|
145
|
-
est : np.matrix with shape (1, p):
|
146
|
-
Whole-data estimate.
|
147
|
-
|
148
|
-
Returns
|
149
|
-
-------
|
150
|
-
pseudovalues : np.matrix with shape (n_blocks, p)
|
151
|
-
Psuedovalues.
|
152
|
-
|
153
|
-
Raises
|
154
|
-
------
|
155
|
-
ValueError :
|
156
|
-
If est.shape != (1, delete_values.shape[1])
|
157
|
-
|
158
|
-
'''
|
159
|
-
n_blocks, p = delete_values.shape
|
160
|
-
if est.shape != (1, p):
|
161
|
-
raise ValueError(
|
162
|
-
'Different number of parameters in delete_values than in est.')
|
163
|
-
|
164
|
-
return n_blocks * est - (n_blocks - 1) * delete_values
|
165
|
-
|
166
|
-
@classmethod
|
167
|
-
def get_separators(cls, N, n_blocks):
|
168
|
-
'''Define evenly-spaced block boundaries.'''
|
169
|
-
return np.floor(np.linspace(0, N, n_blocks + 1)).astype(int)
|
170
|
-
|
171
|
-
|
172
|
-
class LstsqJackknifeSlow(Jackknife):
|
173
|
-
|
174
|
-
'''
|
175
|
-
Slow linear-regression block jackknife. This class computes delete values directly,
|
176
|
-
rather than forming delete values from block values. Useful for testing and for
|
177
|
-
non-negative least squares (which as far as I am aware does not admit a fast block
|
178
|
-
jackknife algorithm).
|
179
|
-
|
180
|
-
Inherits from Jackknife class.
|
181
|
-
|
182
|
-
Parameters
|
183
|
-
----------
|
184
|
-
x : np.matrix with shape (n, p)
|
185
|
-
Independent variable.
|
186
|
-
y : np.matrix with shape (n, 1)
|
187
|
-
Dependent variable.
|
188
|
-
n_blocks : int
|
189
|
-
Number of jackknife blocks
|
190
|
-
nn: bool
|
191
|
-
Non-negative least-squares?
|
192
|
-
|
193
|
-
Attributes
|
194
|
-
----------
|
195
|
-
est : np.matrix with shape (1, p)
|
196
|
-
FWLS estimate.
|
197
|
-
jknife_est : np.matrix with shape (1, p)
|
198
|
-
Jackknifed estimate.
|
199
|
-
jknife_var : np.matrix with shape (1, p)
|
200
|
-
Variance of jackknifed estimate.
|
201
|
-
jknife_se : np.matrix with shape (1, p)
|
202
|
-
Standard error of jackknifed estimate, equal to sqrt(jknife_var).
|
203
|
-
jknife_cov : np.matrix with shape (p, p)
|
204
|
-
Covariance matrix of jackknifed estimate.
|
205
|
-
delete_vals : np.matrix with shape (n_blocks, p)
|
206
|
-
Jackknife delete values.
|
207
|
-
|
208
|
-
Methods
|
209
|
-
-------
|
210
|
-
delete_values(x, y, func, s):
|
211
|
-
Compute delete values of func(x, y) the slow way, with blocks defined by s.
|
212
|
-
|
213
|
-
'''
|
214
|
-
|
215
|
-
def __init__(self, x, y, n_blocks=None, nn=False, separators=None):
|
216
|
-
Jackknife.__init__(self, x, y, n_blocks, separators)
|
217
|
-
if nn: # non-negative least squares
|
218
|
-
func = lambda x, y: np.atleast_2d(nnls(x, np.array(y).T[0])[0])
|
219
|
-
else:
|
220
|
-
func = lambda x, y: np.atleast_2d(
|
221
|
-
np.linalg.lstsq(x, np.array(y).T[0])[0])
|
222
|
-
|
223
|
-
self.est = func(x, y)
|
224
|
-
self.delete_values = self.delete_values(x, y, func, self.separators)
|
225
|
-
self.pseudovalues = self.delete_values_to_pseudovalues(
|
226
|
-
self.delete_values, self.est)
|
227
|
-
(self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
|
228
|
-
self.jknife(self.pseudovalues)
|
229
|
-
|
230
|
-
@classmethod
|
231
|
-
def delete_values(cls, x, y, func, s):
|
232
|
-
'''
|
233
|
-
Compute delete values by deleting one block at a time.
|
234
|
-
|
235
|
-
Parameters
|
236
|
-
----------
|
237
|
-
x : np.matrix with shape (n, p)
|
238
|
-
Independent variable.
|
239
|
-
y : np.matrix with shape (n, 1)
|
240
|
-
Dependent variable.
|
241
|
-
func : function (n, p) , (n, 1) --> (1, p)
|
242
|
-
Function of x and y to be jackknived.
|
243
|
-
s : list of ints
|
244
|
-
Block separators.
|
245
|
-
|
246
|
-
Returns
|
247
|
-
-------
|
248
|
-
delete_values : np.matrix with shape (n_blocks, p)
|
249
|
-
Delete block values (with n_blocks blocks defined by parameter s).
|
250
|
-
|
251
|
-
Raises
|
252
|
-
------
|
253
|
-
ValueError :
|
254
|
-
If x.shape[0] does not equal y.shape[0] or x and y are not 2D.
|
255
|
-
|
256
|
-
'''
|
257
|
-
_check_shape(x, y)
|
258
|
-
d = [func(np.vstack([x[0:s[i], ...], x[s[i + 1]:, ...]]), np.vstack([y[0:s[i], ...], y[s[i + 1]:, ...]]))
|
259
|
-
for i in xrange(len(s) - 1)]
|
260
|
-
|
261
|
-
return np.concatenate(d, axis=0)
|
262
|
-
|
263
|
-
|
264
|
-
class LstsqJackknifeFast(Jackknife):
|
265
|
-
|
266
|
-
'''
|
267
|
-
Fast block jackknife for linear regression.
|
268
|
-
|
269
|
-
Inherits from Jackknife class.
|
270
|
-
|
271
|
-
Parameters
|
272
|
-
----------
|
273
|
-
x : np.matrix with shape (n, p)
|
274
|
-
Independent variable.
|
275
|
-
y : np.matrix with shape (n, 1)
|
276
|
-
Dependent variable.
|
277
|
-
n_blocks : int
|
278
|
-
Number of jackknife blocks
|
279
|
-
|
280
|
-
Attributes
|
281
|
-
----------
|
282
|
-
est : np.matrix with shape (1, p)
|
283
|
-
FWLS estimate.
|
284
|
-
jknife_est : np.matrix with shape (1, p)
|
285
|
-
Jackknifed estimate.
|
286
|
-
jknife_var : np.matrix with shape (1, p)
|
287
|
-
Variance of jackknifed estimate.
|
288
|
-
jknife_se : np.matrix with shape (1, p)
|
289
|
-
Standard error of jackknifed estimate, equal to sqrt(jknife_var).
|
290
|
-
jknife_cov : np.matrix with shape (p, p)
|
291
|
-
Covariance matrix of jackknifed estimate.
|
292
|
-
delete_vals : np.matrix with shape (n_blocks, p)
|
293
|
-
Jackknife delete values.
|
294
|
-
|
295
|
-
Methods
|
296
|
-
-------
|
297
|
-
block_values(x, y, n_blocks) :
|
298
|
-
Computes block values for the regression y~x.
|
299
|
-
block_values_to_est(block_values) :
|
300
|
-
Computes whole-data estimate from block values.
|
301
|
-
block_values_to_pseudovalues(block_values, est) :
|
302
|
-
Computes pseudovalues and delete values in a single pass over the block values.
|
303
|
-
|
304
|
-
'''
|
305
|
-
|
306
|
-
def __init__(self, x, y, n_blocks=None, separators=None):
|
307
|
-
Jackknife.__init__(self, x, y, n_blocks, separators)
|
308
|
-
xty, xtx = self.block_values(x, y, self.separators)
|
309
|
-
self.est = self.block_values_to_est(xty, xtx)
|
310
|
-
self.delete_values = self.block_values_to_delete_values(xty, xtx)
|
311
|
-
self.pseudovalues = self.delete_values_to_pseudovalues(
|
312
|
-
self.delete_values, self.est)
|
313
|
-
(self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
|
314
|
-
self.jknife(self.pseudovalues)
|
315
|
-
|
316
|
-
@classmethod
|
317
|
-
def block_values(cls, x, y, s):
|
318
|
-
'''
|
319
|
-
Compute block values.
|
320
|
-
|
321
|
-
Parameters
|
322
|
-
----------
|
323
|
-
x : np.matrix with shape (n, p)
|
324
|
-
Independent variable.
|
325
|
-
y : np.matrix with shape (n, 1)
|
326
|
-
Dependent variable.
|
327
|
-
n_blocks : int
|
328
|
-
Number of jackknife blocks
|
329
|
-
s : list of ints
|
330
|
-
Block separators.
|
331
|
-
|
332
|
-
Returns
|
333
|
-
-------
|
334
|
-
xty_block_values : np.matrix with shape (n_blocks, p)
|
335
|
-
Block values of X^T Y.
|
336
|
-
xtx_block_values : 3d np array with shape (n_blocks, p, p)
|
337
|
-
Block values of X^T X.
|
338
|
-
|
339
|
-
Raises
|
340
|
-
------
|
341
|
-
ValueError :
|
342
|
-
If x.shape[0] does not equal y.shape[0] or x and y are not 2D.
|
343
|
-
|
344
|
-
'''
|
345
|
-
n, p = _check_shape(x, y)
|
346
|
-
n_blocks = len(s) - 1
|
347
|
-
xtx_block_values = np.zeros((n_blocks, p, p))
|
348
|
-
xty_block_values = np.zeros((n_blocks, p))
|
349
|
-
for i in range(n_blocks):
|
350
|
-
xty_block_values[i, ...] = np.dot(
|
351
|
-
x[s[i]:s[i + 1], ...].T, y[s[i]:s[i + 1], ...]).reshape((1, p))
|
352
|
-
xtx_block_values[i, ...] = np.dot(
|
353
|
-
x[s[i]:s[i + 1], ...].T, x[s[i]:s[i + 1], ...])
|
354
|
-
|
355
|
-
return (xty_block_values, xtx_block_values)
|
356
|
-
|
357
|
-
@classmethod
|
358
|
-
def block_values_to_est(cls, xty_block_values, xtx_block_values):
|
359
|
-
'''
|
360
|
-
Converts block values to the whole-data linear regression estimate.
|
361
|
-
|
362
|
-
Parameters
|
363
|
-
----------
|
364
|
-
xty_block_values : np.matrix with shape (n_blocks, p)
|
365
|
-
Block values of X^T Y.
|
366
|
-
xtx_block_values : 3D np.array with shape (n_blocks, p, p)
|
367
|
-
Block values of X^T X
|
368
|
-
|
369
|
-
Returns
|
370
|
-
-------
|
371
|
-
est : np.matrix with shape (1, p)
|
372
|
-
Whole data estimate.
|
373
|
-
|
374
|
-
Raises
|
375
|
-
------
|
376
|
-
LinAlgError :
|
377
|
-
If design matrix is singular.
|
378
|
-
ValueError :
|
379
|
-
If the last two dimensions of xtx_block_values are not equal or if the first two
|
380
|
-
dimensions of xtx_block_values do not equal the shape of xty_block_values.
|
381
|
-
|
382
|
-
'''
|
383
|
-
n_blocks, p = _check_shape_block(xty_block_values, xtx_block_values)
|
384
|
-
xty = np.sum(xty_block_values, axis=0)
|
385
|
-
xtx = np.sum(xtx_block_values, axis=0)
|
386
|
-
return np.linalg.solve(xtx, xty).reshape((1, p))
|
387
|
-
|
388
|
-
@classmethod
|
389
|
-
def block_values_to_delete_values(cls, xty_block_values, xtx_block_values):
|
390
|
-
'''
|
391
|
-
Converts block values to delete values.
|
392
|
-
|
393
|
-
Parameters
|
394
|
-
----------
|
395
|
-
xty_block_values : np.matrix with shape (n_blocks, p)
|
396
|
-
Block values of X^T Y.
|
397
|
-
xtx_block_values : 3D np.array with shape (n_blocks, p, p)
|
398
|
-
Block values of X^T X
|
399
|
-
est : np.matrix with shape (1, p)
|
400
|
-
Whole data estimate
|
401
|
-
|
402
|
-
Returns
|
403
|
-
-------
|
404
|
-
delete_values : np.matrix with shape (n_blocks, p)
|
405
|
-
Delete Values.
|
406
|
-
|
407
|
-
Raises
|
408
|
-
------
|
409
|
-
LinAlgError :
|
410
|
-
If delete design matrix is singular.
|
411
|
-
ValueError :
|
412
|
-
If the last two dimensions of xtx_block_values are not equal or if the first two
|
413
|
-
dimensions of xtx_block_values do not equal the shape of xty_block_values.
|
414
|
-
|
415
|
-
'''
|
416
|
-
n_blocks, p = _check_shape_block(xty_block_values, xtx_block_values)
|
417
|
-
delete_values = np.zeros((n_blocks, p))
|
418
|
-
xty_tot = np.sum(xty_block_values, axis=0)
|
419
|
-
xtx_tot = np.sum(xtx_block_values, axis=0)
|
420
|
-
for j in range(n_blocks):
|
421
|
-
delete_xty = xty_tot - xty_block_values[j]
|
422
|
-
delete_xtx = xtx_tot - xtx_block_values[j]
|
423
|
-
delete_values[j, ...] = np.linalg.solve(
|
424
|
-
delete_xtx, delete_xty).reshape((1, p))
|
425
|
-
|
426
|
-
return delete_values
|
427
|
-
|
428
|
-
|
429
|
-
class RatioJackknife(Jackknife):
|
430
|
-
|
431
|
-
'''
|
432
|
-
Block jackknife ratio estimate.
|
433
|
-
|
434
|
-
Jackknife.
|
435
|
-
|
436
|
-
Parameters
|
437
|
-
----------
|
438
|
-
est : float or np.array with shape (1, p)
|
439
|
-
Whole data ratio estimate
|
440
|
-
numer_delete_values : np.matrix with shape (n_blocks, p)
|
441
|
-
Delete values for the numerator.
|
442
|
-
denom_delete_values: np.matrix with shape (n_blocks, p)
|
443
|
-
Delete values for the denominator.
|
444
|
-
|
445
|
-
Methods
|
446
|
-
-------
|
447
|
-
delete_vals_to_pseudovalues(est, denom, num):
|
448
|
-
Converts denominator/ numerator delete values and the whole-data estimate to
|
449
|
-
pseudovalues.
|
450
|
-
|
451
|
-
Raises
|
452
|
-
------
|
453
|
-
FloatingPointError :
|
454
|
-
If any entry of denom_delete_values is zero.
|
455
|
-
|
456
|
-
Note that it is possible for the denominator to cross zero (i.e., be both positive
|
457
|
-
and negative) and still have a finite ratio estimate and SE, for example if the
|
458
|
-
numerator is fixed to 0 and the denominator is either -1 or 1. If the denominator
|
459
|
-
is noisily close to zero, then it is unlikely that the denominator will yield zero
|
460
|
-
exactly (and therefore yield an inf or nan), but delete values will be of the form
|
461
|
-
(numerator / close to zero) and -(numerator / close to zero), i.e., (big) and -(big),
|
462
|
-
and so the jackknife will (correctly) yield huge SE.
|
463
|
-
|
464
|
-
'''
|
465
|
-
|
466
|
-
def __init__(self, est, numer_delete_values, denom_delete_values):
|
467
|
-
if numer_delete_values.shape != denom_delete_values.shape:
|
468
|
-
raise ValueError(
|
469
|
-
'numer_delete_values.shape != denom_delete_values.shape.')
|
470
|
-
if len(numer_delete_values.shape) != 2:
|
471
|
-
raise ValueError('Delete values must be matrices.')
|
472
|
-
if len(est.shape) != 2 or est.shape[0] != 1 or est.shape[1] != numer_delete_values.shape[1]:
|
473
|
-
raise ValueError(
|
474
|
-
'Shape of est does not match shape of delete values.')
|
475
|
-
|
476
|
-
self.n_blocks = numer_delete_values.shape[0]
|
477
|
-
self.est = est
|
478
|
-
self.pseudovalues = self.delete_values_to_pseudovalues(self.est,
|
479
|
-
denom_delete_values, numer_delete_values)
|
480
|
-
(self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
|
481
|
-
self.jknife(self.pseudovalues)
|
482
|
-
|
483
|
-
@classmethod
|
484
|
-
def delete_values_to_pseudovalues(cls, est, denom, numer):
|
485
|
-
'''
|
486
|
-
Converts delete values to pseudovalues.
|
487
|
-
|
488
|
-
Parameters
|
489
|
-
----------
|
490
|
-
est : np.matrix with shape (1, p)
|
491
|
-
Whole-data ratio estimate.
|
492
|
-
denom : np.matrix with shape (n_blocks, p)
|
493
|
-
Denominator delete values.
|
494
|
-
numer : np.matrix with shape (n_blocks, p)
|
495
|
-
Numerator delete values.
|
496
|
-
|
497
|
-
Returns
|
498
|
-
-------
|
499
|
-
pseudovalues :
|
500
|
-
Ratio Jackknife Pseudovalues.
|
501
|
-
|
502
|
-
Raises
|
503
|
-
------
|
504
|
-
ValueError :
|
505
|
-
If numer.shape != denom.shape.
|
506
|
-
|
507
|
-
'''
|
508
|
-
n_blocks, p = denom.shape
|
509
|
-
pseudovalues = np.zeros((n_blocks, p))
|
510
|
-
for j in range(0, n_blocks):
|
511
|
-
pseudovalues[j, ...] = n_blocks * est - \
|
512
|
-
(n_blocks - 1) * numer[j, ...] / denom[j, ...]
|
513
|
-
|
514
|
-
return pseudovalues
|
1
|
+
'''
|
2
|
+
(c) 2014 Brendan Bulik-Sullivan and Hilary Finucane
|
3
|
+
|
4
|
+
Fast block jackknives.
|
5
|
+
|
6
|
+
Everything in this module deals with 2D numpy arrays. 1D data are represented as arrays
|
7
|
+
with dimension (N, 1) or (1, N), to avoid bugs arising from numpy treating (N, ) as
|
8
|
+
a fundamentally different shape from (N, 1). The convention in this module is for the
|
9
|
+
first dimension to represent # of data points (or # of blocks in a block jackknife, since
|
10
|
+
a block is like a datapoint), and for the second dimension to represent the dimensionality
|
11
|
+
of the data.
|
12
|
+
|
13
|
+
'''
|
14
|
+
|
15
|
+
from __future__ import division
|
16
|
+
import numpy as np
|
17
|
+
from scipy.optimize import nnls
|
18
|
+
np.seterr(divide='raise', invalid='raise')
|
19
|
+
xrange = range
|
20
|
+
|
21
|
+
def _check_shape(x, y):
|
22
|
+
'''Check that x and y have the correct shapes (for regression jackknives).'''
|
23
|
+
if len(x.shape) != 2 or len(y.shape) != 2:
|
24
|
+
raise ValueError('x and y must be 2D arrays.')
|
25
|
+
if x.shape[0] != y.shape[0]:
|
26
|
+
raise ValueError(
|
27
|
+
'Number of datapoints in x != number of datapoints in y.')
|
28
|
+
if y.shape[1] != 1:
|
29
|
+
raise ValueError('y must have shape (n_snp, 1)')
|
30
|
+
n, p = x.shape
|
31
|
+
if p > n:
|
32
|
+
raise ValueError('More dimensions than datapoints.')
|
33
|
+
|
34
|
+
return (n, p)
|
35
|
+
|
36
|
+
|
37
|
+
def _check_shape_block(xty_block_values, xtx_block_values):
|
38
|
+
'''Check that xty_block_values and xtx_block_values have correct shapes.'''
|
39
|
+
if xtx_block_values.shape[0:2] != xty_block_values.shape:
|
40
|
+
raise ValueError(
|
41
|
+
'Shape of xty_block_values must equal shape of first two dimensions of xty_block_values.')
|
42
|
+
if len(xtx_block_values.shape) < 3:
|
43
|
+
raise ValueError('xtx_block_values must be a 3D array.')
|
44
|
+
if xtx_block_values.shape[1] != xtx_block_values.shape[2]:
|
45
|
+
raise ValueError(
|
46
|
+
'Last two axes of xtx_block_values must have same dimension.')
|
47
|
+
|
48
|
+
return xtx_block_values.shape[0:2]
|
49
|
+
|
50
|
+
|
51
|
+
class Jackknife(object):
|
52
|
+
|
53
|
+
'''
|
54
|
+
Base class for jackknife objects. Input involves x,y, so this base class is tailored
|
55
|
+
for statistics computed from independent and dependent variables (e.g., regressions).
|
56
|
+
The __delete_vals_to_pseudovalues__ and __jknife__ methods will still be useful for other
|
57
|
+
sorts of statistics, but the __init__ method will need to be overriden.
|
58
|
+
|
59
|
+
Parameters
|
60
|
+
----------
|
61
|
+
x : np.matrix with shape (n, p)
|
62
|
+
Independent variable.
|
63
|
+
y : np.matrix with shape (n, 1)
|
64
|
+
Dependent variable.
|
65
|
+
n_blocks : int
|
66
|
+
Number of jackknife blocks
|
67
|
+
*args, **kwargs :
|
68
|
+
Arguments for inheriting jackknives.
|
69
|
+
|
70
|
+
Attributes
|
71
|
+
----------
|
72
|
+
n_blocks : int
|
73
|
+
Number of jackknife blocks
|
74
|
+
p : int
|
75
|
+
Dimensionality of the independent varianble
|
76
|
+
N : int
|
77
|
+
Number of datapoints (equal to x.shape[0])
|
78
|
+
|
79
|
+
Methods
|
80
|
+
-------
|
81
|
+
jknife(pseudovalues):
|
82
|
+
Computes jackknife estimate and variance from the jackknife pseudovalues.
|
83
|
+
delete_vals_to_pseudovalues(delete_vals, est):
|
84
|
+
Converts delete values and the whole-data estimate to pseudovalues.
|
85
|
+
get_separators():
|
86
|
+
Returns (approximately) evenly-spaced jackknife block boundaries.
|
87
|
+
'''
|
88
|
+
|
89
|
+
def __init__(self, x, y, n_blocks=None, separators=None):
|
90
|
+
self.N, self.p = _check_shape(x, y)
|
91
|
+
if separators is not None:
|
92
|
+
if max(separators) != self.N:
|
93
|
+
raise ValueError(
|
94
|
+
'Max(separators) must be equal to number of data points.')
|
95
|
+
if min(separators) != 0:
|
96
|
+
raise ValueError('Max(separators) must be equal to 0.')
|
97
|
+
self.separators = sorted(separators)
|
98
|
+
self.n_blocks = len(separators) - 1
|
99
|
+
elif n_blocks is not None:
|
100
|
+
self.n_blocks = n_blocks
|
101
|
+
self.separators = self.get_separators(self.N, self.n_blocks)
|
102
|
+
else:
|
103
|
+
raise ValueError('Must specify either n_blocks are separators.')
|
104
|
+
|
105
|
+
if self.n_blocks > self.N:
|
106
|
+
raise ValueError('More blocks than data points.')
|
107
|
+
|
108
|
+
@classmethod
|
109
|
+
def jknife(cls, pseudovalues):
|
110
|
+
'''
|
111
|
+
Converts pseudovalues to jackknife estimate and variance.
|
112
|
+
|
113
|
+
Parameters
|
114
|
+
----------
|
115
|
+
pseudovalues : np.matrix pf floats with shape (n_blocks, p)
|
116
|
+
|
117
|
+
Returns
|
118
|
+
-------
|
119
|
+
jknife_est : np.matrix with shape (1, p)
|
120
|
+
Jackknifed estimate.
|
121
|
+
jknife_var : np.matrix with shape (1, p)
|
122
|
+
Variance of jackknifed estimate.
|
123
|
+
jknife_se : np.matrix with shape (1, p)
|
124
|
+
Standard error of jackknifed estimate, equal to sqrt(jknife_var).
|
125
|
+
jknife_cov : np.matrix with shape (p, p)
|
126
|
+
Covariance matrix of jackknifed estimate.
|
127
|
+
|
128
|
+
'''
|
129
|
+
n_blocks = pseudovalues.shape[0]
|
130
|
+
jknife_cov = np.atleast_2d(np.cov(pseudovalues.T, ddof=1) / n_blocks)
|
131
|
+
jknife_var = np.atleast_2d(np.diag(jknife_cov))
|
132
|
+
jknife_se = np.atleast_2d(np.sqrt(jknife_var))
|
133
|
+
jknife_est = np.atleast_2d(np.mean(pseudovalues, axis=0))
|
134
|
+
return (jknife_est, jknife_var, jknife_se, jknife_cov)
|
135
|
+
|
136
|
+
@classmethod
|
137
|
+
def delete_values_to_pseudovalues(cls, delete_values, est):
|
138
|
+
'''
|
139
|
+
Converts whole-data estimate and delete values to pseudovalues.
|
140
|
+
|
141
|
+
Parameters
|
142
|
+
----------
|
143
|
+
delete_values : np.matrix with shape (n_blocks, p)
|
144
|
+
Delete values.
|
145
|
+
est : np.matrix with shape (1, p):
|
146
|
+
Whole-data estimate.
|
147
|
+
|
148
|
+
Returns
|
149
|
+
-------
|
150
|
+
pseudovalues : np.matrix with shape (n_blocks, p)
|
151
|
+
Psuedovalues.
|
152
|
+
|
153
|
+
Raises
|
154
|
+
------
|
155
|
+
ValueError :
|
156
|
+
If est.shape != (1, delete_values.shape[1])
|
157
|
+
|
158
|
+
'''
|
159
|
+
n_blocks, p = delete_values.shape
|
160
|
+
if est.shape != (1, p):
|
161
|
+
raise ValueError(
|
162
|
+
'Different number of parameters in delete_values than in est.')
|
163
|
+
|
164
|
+
return n_blocks * est - (n_blocks - 1) * delete_values
|
165
|
+
|
166
|
+
@classmethod
|
167
|
+
def get_separators(cls, N, n_blocks):
|
168
|
+
'''Define evenly-spaced block boundaries.'''
|
169
|
+
return np.floor(np.linspace(0, N, n_blocks + 1)).astype(int)
|
170
|
+
|
171
|
+
|
172
|
+
class LstsqJackknifeSlow(Jackknife):
|
173
|
+
|
174
|
+
'''
|
175
|
+
Slow linear-regression block jackknife. This class computes delete values directly,
|
176
|
+
rather than forming delete values from block values. Useful for testing and for
|
177
|
+
non-negative least squares (which as far as I am aware does not admit a fast block
|
178
|
+
jackknife algorithm).
|
179
|
+
|
180
|
+
Inherits from Jackknife class.
|
181
|
+
|
182
|
+
Parameters
|
183
|
+
----------
|
184
|
+
x : np.matrix with shape (n, p)
|
185
|
+
Independent variable.
|
186
|
+
y : np.matrix with shape (n, 1)
|
187
|
+
Dependent variable.
|
188
|
+
n_blocks : int
|
189
|
+
Number of jackknife blocks
|
190
|
+
nn: bool
|
191
|
+
Non-negative least-squares?
|
192
|
+
|
193
|
+
Attributes
|
194
|
+
----------
|
195
|
+
est : np.matrix with shape (1, p)
|
196
|
+
FWLS estimate.
|
197
|
+
jknife_est : np.matrix with shape (1, p)
|
198
|
+
Jackknifed estimate.
|
199
|
+
jknife_var : np.matrix with shape (1, p)
|
200
|
+
Variance of jackknifed estimate.
|
201
|
+
jknife_se : np.matrix with shape (1, p)
|
202
|
+
Standard error of jackknifed estimate, equal to sqrt(jknife_var).
|
203
|
+
jknife_cov : np.matrix with shape (p, p)
|
204
|
+
Covariance matrix of jackknifed estimate.
|
205
|
+
delete_vals : np.matrix with shape (n_blocks, p)
|
206
|
+
Jackknife delete values.
|
207
|
+
|
208
|
+
Methods
|
209
|
+
-------
|
210
|
+
delete_values(x, y, func, s):
|
211
|
+
Compute delete values of func(x, y) the slow way, with blocks defined by s.
|
212
|
+
|
213
|
+
'''
|
214
|
+
|
215
|
+
def __init__(self, x, y, n_blocks=None, nn=False, separators=None):
|
216
|
+
Jackknife.__init__(self, x, y, n_blocks, separators)
|
217
|
+
if nn: # non-negative least squares
|
218
|
+
func = lambda x, y: np.atleast_2d(nnls(x, np.array(y).T[0])[0])
|
219
|
+
else:
|
220
|
+
func = lambda x, y: np.atleast_2d(
|
221
|
+
np.linalg.lstsq(x, np.array(y).T[0])[0])
|
222
|
+
|
223
|
+
self.est = func(x, y)
|
224
|
+
self.delete_values = self.delete_values(x, y, func, self.separators)
|
225
|
+
self.pseudovalues = self.delete_values_to_pseudovalues(
|
226
|
+
self.delete_values, self.est)
|
227
|
+
(self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
|
228
|
+
self.jknife(self.pseudovalues)
|
229
|
+
|
230
|
+
@classmethod
|
231
|
+
def delete_values(cls, x, y, func, s):
|
232
|
+
'''
|
233
|
+
Compute delete values by deleting one block at a time.
|
234
|
+
|
235
|
+
Parameters
|
236
|
+
----------
|
237
|
+
x : np.matrix with shape (n, p)
|
238
|
+
Independent variable.
|
239
|
+
y : np.matrix with shape (n, 1)
|
240
|
+
Dependent variable.
|
241
|
+
func : function (n, p) , (n, 1) --> (1, p)
|
242
|
+
Function of x and y to be jackknived.
|
243
|
+
s : list of ints
|
244
|
+
Block separators.
|
245
|
+
|
246
|
+
Returns
|
247
|
+
-------
|
248
|
+
delete_values : np.matrix with shape (n_blocks, p)
|
249
|
+
Delete block values (with n_blocks blocks defined by parameter s).
|
250
|
+
|
251
|
+
Raises
|
252
|
+
------
|
253
|
+
ValueError :
|
254
|
+
If x.shape[0] does not equal y.shape[0] or x and y are not 2D.
|
255
|
+
|
256
|
+
'''
|
257
|
+
_check_shape(x, y)
|
258
|
+
d = [func(np.vstack([x[0:s[i], ...], x[s[i + 1]:, ...]]), np.vstack([y[0:s[i], ...], y[s[i + 1]:, ...]]))
|
259
|
+
for i in xrange(len(s) - 1)]
|
260
|
+
|
261
|
+
return np.concatenate(d, axis=0)
|
262
|
+
|
263
|
+
|
264
|
+
class LstsqJackknifeFast(Jackknife):
|
265
|
+
|
266
|
+
'''
|
267
|
+
Fast block jackknife for linear regression.
|
268
|
+
|
269
|
+
Inherits from Jackknife class.
|
270
|
+
|
271
|
+
Parameters
|
272
|
+
----------
|
273
|
+
x : np.matrix with shape (n, p)
|
274
|
+
Independent variable.
|
275
|
+
y : np.matrix with shape (n, 1)
|
276
|
+
Dependent variable.
|
277
|
+
n_blocks : int
|
278
|
+
Number of jackknife blocks
|
279
|
+
|
280
|
+
Attributes
|
281
|
+
----------
|
282
|
+
est : np.matrix with shape (1, p)
|
283
|
+
FWLS estimate.
|
284
|
+
jknife_est : np.matrix with shape (1, p)
|
285
|
+
Jackknifed estimate.
|
286
|
+
jknife_var : np.matrix with shape (1, p)
|
287
|
+
Variance of jackknifed estimate.
|
288
|
+
jknife_se : np.matrix with shape (1, p)
|
289
|
+
Standard error of jackknifed estimate, equal to sqrt(jknife_var).
|
290
|
+
jknife_cov : np.matrix with shape (p, p)
|
291
|
+
Covariance matrix of jackknifed estimate.
|
292
|
+
delete_vals : np.matrix with shape (n_blocks, p)
|
293
|
+
Jackknife delete values.
|
294
|
+
|
295
|
+
Methods
|
296
|
+
-------
|
297
|
+
block_values(x, y, n_blocks) :
|
298
|
+
Computes block values for the regression y~x.
|
299
|
+
block_values_to_est(block_values) :
|
300
|
+
Computes whole-data estimate from block values.
|
301
|
+
block_values_to_pseudovalues(block_values, est) :
|
302
|
+
Computes pseudovalues and delete values in a single pass over the block values.
|
303
|
+
|
304
|
+
'''
|
305
|
+
|
306
|
+
def __init__(self, x, y, n_blocks=None, separators=None):
|
307
|
+
Jackknife.__init__(self, x, y, n_blocks, separators)
|
308
|
+
xty, xtx = self.block_values(x, y, self.separators)
|
309
|
+
self.est = self.block_values_to_est(xty, xtx)
|
310
|
+
self.delete_values = self.block_values_to_delete_values(xty, xtx)
|
311
|
+
self.pseudovalues = self.delete_values_to_pseudovalues(
|
312
|
+
self.delete_values, self.est)
|
313
|
+
(self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
|
314
|
+
self.jknife(self.pseudovalues)
|
315
|
+
|
316
|
+
@classmethod
|
317
|
+
def block_values(cls, x, y, s):
|
318
|
+
'''
|
319
|
+
Compute block values.
|
320
|
+
|
321
|
+
Parameters
|
322
|
+
----------
|
323
|
+
x : np.matrix with shape (n, p)
|
324
|
+
Independent variable.
|
325
|
+
y : np.matrix with shape (n, 1)
|
326
|
+
Dependent variable.
|
327
|
+
n_blocks : int
|
328
|
+
Number of jackknife blocks
|
329
|
+
s : list of ints
|
330
|
+
Block separators.
|
331
|
+
|
332
|
+
Returns
|
333
|
+
-------
|
334
|
+
xty_block_values : np.matrix with shape (n_blocks, p)
|
335
|
+
Block values of X^T Y.
|
336
|
+
xtx_block_values : 3d np array with shape (n_blocks, p, p)
|
337
|
+
Block values of X^T X.
|
338
|
+
|
339
|
+
Raises
|
340
|
+
------
|
341
|
+
ValueError :
|
342
|
+
If x.shape[0] does not equal y.shape[0] or x and y are not 2D.
|
343
|
+
|
344
|
+
'''
|
345
|
+
n, p = _check_shape(x, y)
|
346
|
+
n_blocks = len(s) - 1
|
347
|
+
xtx_block_values = np.zeros((n_blocks, p, p))
|
348
|
+
xty_block_values = np.zeros((n_blocks, p))
|
349
|
+
for i in range(n_blocks):
|
350
|
+
xty_block_values[i, ...] = np.dot(
|
351
|
+
x[s[i]:s[i + 1], ...].T, y[s[i]:s[i + 1], ...]).reshape((1, p))
|
352
|
+
xtx_block_values[i, ...] = np.dot(
|
353
|
+
x[s[i]:s[i + 1], ...].T, x[s[i]:s[i + 1], ...])
|
354
|
+
|
355
|
+
return (xty_block_values, xtx_block_values)
|
356
|
+
|
357
|
+
@classmethod
|
358
|
+
def block_values_to_est(cls, xty_block_values, xtx_block_values):
|
359
|
+
'''
|
360
|
+
Converts block values to the whole-data linear regression estimate.
|
361
|
+
|
362
|
+
Parameters
|
363
|
+
----------
|
364
|
+
xty_block_values : np.matrix with shape (n_blocks, p)
|
365
|
+
Block values of X^T Y.
|
366
|
+
xtx_block_values : 3D np.array with shape (n_blocks, p, p)
|
367
|
+
Block values of X^T X
|
368
|
+
|
369
|
+
Returns
|
370
|
+
-------
|
371
|
+
est : np.matrix with shape (1, p)
|
372
|
+
Whole data estimate.
|
373
|
+
|
374
|
+
Raises
|
375
|
+
------
|
376
|
+
LinAlgError :
|
377
|
+
If design matrix is singular.
|
378
|
+
ValueError :
|
379
|
+
If the last two dimensions of xtx_block_values are not equal or if the first two
|
380
|
+
dimensions of xtx_block_values do not equal the shape of xty_block_values.
|
381
|
+
|
382
|
+
'''
|
383
|
+
n_blocks, p = _check_shape_block(xty_block_values, xtx_block_values)
|
384
|
+
xty = np.sum(xty_block_values, axis=0)
|
385
|
+
xtx = np.sum(xtx_block_values, axis=0)
|
386
|
+
return np.linalg.solve(xtx, xty).reshape((1, p))
|
387
|
+
|
388
|
+
@classmethod
|
389
|
+
def block_values_to_delete_values(cls, xty_block_values, xtx_block_values):
|
390
|
+
'''
|
391
|
+
Converts block values to delete values.
|
392
|
+
|
393
|
+
Parameters
|
394
|
+
----------
|
395
|
+
xty_block_values : np.matrix with shape (n_blocks, p)
|
396
|
+
Block values of X^T Y.
|
397
|
+
xtx_block_values : 3D np.array with shape (n_blocks, p, p)
|
398
|
+
Block values of X^T X
|
399
|
+
est : np.matrix with shape (1, p)
|
400
|
+
Whole data estimate
|
401
|
+
|
402
|
+
Returns
|
403
|
+
-------
|
404
|
+
delete_values : np.matrix with shape (n_blocks, p)
|
405
|
+
Delete Values.
|
406
|
+
|
407
|
+
Raises
|
408
|
+
------
|
409
|
+
LinAlgError :
|
410
|
+
If delete design matrix is singular.
|
411
|
+
ValueError :
|
412
|
+
If the last two dimensions of xtx_block_values are not equal or if the first two
|
413
|
+
dimensions of xtx_block_values do not equal the shape of xty_block_values.
|
414
|
+
|
415
|
+
'''
|
416
|
+
n_blocks, p = _check_shape_block(xty_block_values, xtx_block_values)
|
417
|
+
delete_values = np.zeros((n_blocks, p))
|
418
|
+
xty_tot = np.sum(xty_block_values, axis=0)
|
419
|
+
xtx_tot = np.sum(xtx_block_values, axis=0)
|
420
|
+
for j in range(n_blocks):
|
421
|
+
delete_xty = xty_tot - xty_block_values[j]
|
422
|
+
delete_xtx = xtx_tot - xtx_block_values[j]
|
423
|
+
delete_values[j, ...] = np.linalg.solve(
|
424
|
+
delete_xtx, delete_xty).reshape((1, p))
|
425
|
+
|
426
|
+
return delete_values
|
427
|
+
|
428
|
+
|
429
|
+
class RatioJackknife(Jackknife):
|
430
|
+
|
431
|
+
'''
|
432
|
+
Block jackknife ratio estimate.
|
433
|
+
|
434
|
+
Jackknife.
|
435
|
+
|
436
|
+
Parameters
|
437
|
+
----------
|
438
|
+
est : float or np.array with shape (1, p)
|
439
|
+
Whole data ratio estimate
|
440
|
+
numer_delete_values : np.matrix with shape (n_blocks, p)
|
441
|
+
Delete values for the numerator.
|
442
|
+
denom_delete_values: np.matrix with shape (n_blocks, p)
|
443
|
+
Delete values for the denominator.
|
444
|
+
|
445
|
+
Methods
|
446
|
+
-------
|
447
|
+
delete_vals_to_pseudovalues(est, denom, num):
|
448
|
+
Converts denominator/ numerator delete values and the whole-data estimate to
|
449
|
+
pseudovalues.
|
450
|
+
|
451
|
+
Raises
|
452
|
+
------
|
453
|
+
FloatingPointError :
|
454
|
+
If any entry of denom_delete_values is zero.
|
455
|
+
|
456
|
+
Note that it is possible for the denominator to cross zero (i.e., be both positive
|
457
|
+
and negative) and still have a finite ratio estimate and SE, for example if the
|
458
|
+
numerator is fixed to 0 and the denominator is either -1 or 1. If the denominator
|
459
|
+
is noisily close to zero, then it is unlikely that the denominator will yield zero
|
460
|
+
exactly (and therefore yield an inf or nan), but delete values will be of the form
|
461
|
+
(numerator / close to zero) and -(numerator / close to zero), i.e., (big) and -(big),
|
462
|
+
and so the jackknife will (correctly) yield huge SE.
|
463
|
+
|
464
|
+
'''
|
465
|
+
|
466
|
+
def __init__(self, est, numer_delete_values, denom_delete_values):
|
467
|
+
if numer_delete_values.shape != denom_delete_values.shape:
|
468
|
+
raise ValueError(
|
469
|
+
'numer_delete_values.shape != denom_delete_values.shape.')
|
470
|
+
if len(numer_delete_values.shape) != 2:
|
471
|
+
raise ValueError('Delete values must be matrices.')
|
472
|
+
if len(est.shape) != 2 or est.shape[0] != 1 or est.shape[1] != numer_delete_values.shape[1]:
|
473
|
+
raise ValueError(
|
474
|
+
'Shape of est does not match shape of delete values.')
|
475
|
+
|
476
|
+
self.n_blocks = numer_delete_values.shape[0]
|
477
|
+
self.est = est
|
478
|
+
self.pseudovalues = self.delete_values_to_pseudovalues(self.est,
|
479
|
+
denom_delete_values, numer_delete_values)
|
480
|
+
(self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
|
481
|
+
self.jknife(self.pseudovalues)
|
482
|
+
|
483
|
+
@classmethod
|
484
|
+
def delete_values_to_pseudovalues(cls, est, denom, numer):
|
485
|
+
'''
|
486
|
+
Converts delete values to pseudovalues.
|
487
|
+
|
488
|
+
Parameters
|
489
|
+
----------
|
490
|
+
est : np.matrix with shape (1, p)
|
491
|
+
Whole-data ratio estimate.
|
492
|
+
denom : np.matrix with shape (n_blocks, p)
|
493
|
+
Denominator delete values.
|
494
|
+
numer : np.matrix with shape (n_blocks, p)
|
495
|
+
Numerator delete values.
|
496
|
+
|
497
|
+
Returns
|
498
|
+
-------
|
499
|
+
pseudovalues :
|
500
|
+
Ratio Jackknife Pseudovalues.
|
501
|
+
|
502
|
+
Raises
|
503
|
+
------
|
504
|
+
ValueError :
|
505
|
+
If numer.shape != denom.shape.
|
506
|
+
|
507
|
+
'''
|
508
|
+
n_blocks, p = denom.shape
|
509
|
+
pseudovalues = np.zeros((n_blocks, p))
|
510
|
+
for j in range(0, n_blocks):
|
511
|
+
pseudovalues[j, ...] = n_blocks * est - \
|
512
|
+
(n_blocks - 1) * numer[j, ...] / denom[j, ...]
|
513
|
+
|
514
|
+
return pseudovalues
|