gsMap 1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/jackknife.py ADDED
@@ -0,0 +1,514 @@
1
+ '''
2
+ (c) 2014 Brendan Bulik-Sullivan and Hilary Finucane
3
+
4
+ Fast block jackknives.
5
+
6
+ Everything in this module deals with 2D numpy arrays. 1D data are represented as arrays
7
+ with dimension (N, 1) or (1, N), to avoid bugs arising from numpy treating (N, ) as
8
+ a fundamentally different shape from (N, 1). The convention in this module is for the
9
+ first dimension to represent # of data points (or # of blocks in a block jackknife, since
10
+ a block is like a datapoint), and for the second dimension to represent the dimensionality
11
+ of the data.
12
+
13
+ '''
14
+
15
+ from __future__ import division
16
+ import numpy as np
17
+ from scipy.optimize import nnls
18
+ np.seterr(divide='raise', invalid='raise')
19
+ xrange = range
20
+
21
+ def _check_shape(x, y):
22
+ '''Check that x and y have the correct shapes (for regression jackknives).'''
23
+ if len(x.shape) != 2 or len(y.shape) != 2:
24
+ raise ValueError('x and y must be 2D arrays.')
25
+ if x.shape[0] != y.shape[0]:
26
+ raise ValueError(
27
+ 'Number of datapoints in x != number of datapoints in y.')
28
+ if y.shape[1] != 1:
29
+ raise ValueError('y must have shape (n_snp, 1)')
30
+ n, p = x.shape
31
+ if p > n:
32
+ raise ValueError('More dimensions than datapoints.')
33
+
34
+ return (n, p)
35
+
36
+
37
+ def _check_shape_block(xty_block_values, xtx_block_values):
38
+ '''Check that xty_block_values and xtx_block_values have correct shapes.'''
39
+ if xtx_block_values.shape[0:2] != xty_block_values.shape:
40
+ raise ValueError(
41
+ 'Shape of xty_block_values must equal shape of first two dimensions of xty_block_values.')
42
+ if len(xtx_block_values.shape) < 3:
43
+ raise ValueError('xtx_block_values must be a 3D array.')
44
+ if xtx_block_values.shape[1] != xtx_block_values.shape[2]:
45
+ raise ValueError(
46
+ 'Last two axes of xtx_block_values must have same dimension.')
47
+
48
+ return xtx_block_values.shape[0:2]
49
+
50
+
51
+ class Jackknife(object):
52
+
53
+ '''
54
+ Base class for jackknife objects. Input involves x,y, so this base class is tailored
55
+ for statistics computed from independent and dependent variables (e.g., regressions).
56
+ The __delete_vals_to_pseudovalues__ and __jknife__ methods will still be useful for other
57
+ sorts of statistics, but the __init__ method will need to be overriden.
58
+
59
+ Parameters
60
+ ----------
61
+ x : np.matrix with shape (n, p)
62
+ Independent variable.
63
+ y : np.matrix with shape (n, 1)
64
+ Dependent variable.
65
+ n_blocks : int
66
+ Number of jackknife blocks
67
+ *args, **kwargs :
68
+ Arguments for inheriting jackknives.
69
+
70
+ Attributes
71
+ ----------
72
+ n_blocks : int
73
+ Number of jackknife blocks
74
+ p : int
75
+ Dimensionality of the independent varianble
76
+ N : int
77
+ Number of datapoints (equal to x.shape[0])
78
+
79
+ Methods
80
+ -------
81
+ jknife(pseudovalues):
82
+ Computes jackknife estimate and variance from the jackknife pseudovalues.
83
+ delete_vals_to_pseudovalues(delete_vals, est):
84
+ Converts delete values and the whole-data estimate to pseudovalues.
85
+ get_separators():
86
+ Returns (approximately) evenly-spaced jackknife block boundaries.
87
+ '''
88
+
89
+ def __init__(self, x, y, n_blocks=None, separators=None):
90
+ self.N, self.p = _check_shape(x, y)
91
+ if separators is not None:
92
+ if max(separators) != self.N:
93
+ raise ValueError(
94
+ 'Max(separators) must be equal to number of data points.')
95
+ if min(separators) != 0:
96
+ raise ValueError('Max(separators) must be equal to 0.')
97
+ self.separators = sorted(separators)
98
+ self.n_blocks = len(separators) - 1
99
+ elif n_blocks is not None:
100
+ self.n_blocks = n_blocks
101
+ self.separators = self.get_separators(self.N, self.n_blocks)
102
+ else:
103
+ raise ValueError('Must specify either n_blocks are separators.')
104
+
105
+ if self.n_blocks > self.N:
106
+ raise ValueError('More blocks than data points.')
107
+
108
+ @classmethod
109
+ def jknife(cls, pseudovalues):
110
+ '''
111
+ Converts pseudovalues to jackknife estimate and variance.
112
+
113
+ Parameters
114
+ ----------
115
+ pseudovalues : np.matrix pf floats with shape (n_blocks, p)
116
+
117
+ Returns
118
+ -------
119
+ jknife_est : np.matrix with shape (1, p)
120
+ Jackknifed estimate.
121
+ jknife_var : np.matrix with shape (1, p)
122
+ Variance of jackknifed estimate.
123
+ jknife_se : np.matrix with shape (1, p)
124
+ Standard error of jackknifed estimate, equal to sqrt(jknife_var).
125
+ jknife_cov : np.matrix with shape (p, p)
126
+ Covariance matrix of jackknifed estimate.
127
+
128
+ '''
129
+ n_blocks = pseudovalues.shape[0]
130
+ jknife_cov = np.atleast_2d(np.cov(pseudovalues.T, ddof=1) / n_blocks)
131
+ jknife_var = np.atleast_2d(np.diag(jknife_cov))
132
+ jknife_se = np.atleast_2d(np.sqrt(jknife_var))
133
+ jknife_est = np.atleast_2d(np.mean(pseudovalues, axis=0))
134
+ return (jknife_est, jknife_var, jknife_se, jknife_cov)
135
+
136
+ @classmethod
137
+ def delete_values_to_pseudovalues(cls, delete_values, est):
138
+ '''
139
+ Converts whole-data estimate and delete values to pseudovalues.
140
+
141
+ Parameters
142
+ ----------
143
+ delete_values : np.matrix with shape (n_blocks, p)
144
+ Delete values.
145
+ est : np.matrix with shape (1, p):
146
+ Whole-data estimate.
147
+
148
+ Returns
149
+ -------
150
+ pseudovalues : np.matrix with shape (n_blocks, p)
151
+ Psuedovalues.
152
+
153
+ Raises
154
+ ------
155
+ ValueError :
156
+ If est.shape != (1, delete_values.shape[1])
157
+
158
+ '''
159
+ n_blocks, p = delete_values.shape
160
+ if est.shape != (1, p):
161
+ raise ValueError(
162
+ 'Different number of parameters in delete_values than in est.')
163
+
164
+ return n_blocks * est - (n_blocks - 1) * delete_values
165
+
166
+ @classmethod
167
+ def get_separators(cls, N, n_blocks):
168
+ '''Define evenly-spaced block boundaries.'''
169
+ return np.floor(np.linspace(0, N, n_blocks + 1)).astype(int)
170
+
171
+
172
+ class LstsqJackknifeSlow(Jackknife):
173
+
174
+ '''
175
+ Slow linear-regression block jackknife. This class computes delete values directly,
176
+ rather than forming delete values from block values. Useful for testing and for
177
+ non-negative least squares (which as far as I am aware does not admit a fast block
178
+ jackknife algorithm).
179
+
180
+ Inherits from Jackknife class.
181
+
182
+ Parameters
183
+ ----------
184
+ x : np.matrix with shape (n, p)
185
+ Independent variable.
186
+ y : np.matrix with shape (n, 1)
187
+ Dependent variable.
188
+ n_blocks : int
189
+ Number of jackknife blocks
190
+ nn: bool
191
+ Non-negative least-squares?
192
+
193
+ Attributes
194
+ ----------
195
+ est : np.matrix with shape (1, p)
196
+ FWLS estimate.
197
+ jknife_est : np.matrix with shape (1, p)
198
+ Jackknifed estimate.
199
+ jknife_var : np.matrix with shape (1, p)
200
+ Variance of jackknifed estimate.
201
+ jknife_se : np.matrix with shape (1, p)
202
+ Standard error of jackknifed estimate, equal to sqrt(jknife_var).
203
+ jknife_cov : np.matrix with shape (p, p)
204
+ Covariance matrix of jackknifed estimate.
205
+ delete_vals : np.matrix with shape (n_blocks, p)
206
+ Jackknife delete values.
207
+
208
+ Methods
209
+ -------
210
+ delete_values(x, y, func, s):
211
+ Compute delete values of func(x, y) the slow way, with blocks defined by s.
212
+
213
+ '''
214
+
215
+ def __init__(self, x, y, n_blocks=None, nn=False, separators=None):
216
+ Jackknife.__init__(self, x, y, n_blocks, separators)
217
+ if nn: # non-negative least squares
218
+ func = lambda x, y: np.atleast_2d(nnls(x, np.array(y).T[0])[0])
219
+ else:
220
+ func = lambda x, y: np.atleast_2d(
221
+ np.linalg.lstsq(x, np.array(y).T[0])[0])
222
+
223
+ self.est = func(x, y)
224
+ self.delete_values = self.delete_values(x, y, func, self.separators)
225
+ self.pseudovalues = self.delete_values_to_pseudovalues(
226
+ self.delete_values, self.est)
227
+ (self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
228
+ self.jknife(self.pseudovalues)
229
+
230
+ @classmethod
231
+ def delete_values(cls, x, y, func, s):
232
+ '''
233
+ Compute delete values by deleting one block at a time.
234
+
235
+ Parameters
236
+ ----------
237
+ x : np.matrix with shape (n, p)
238
+ Independent variable.
239
+ y : np.matrix with shape (n, 1)
240
+ Dependent variable.
241
+ func : function (n, p) , (n, 1) --> (1, p)
242
+ Function of x and y to be jackknived.
243
+ s : list of ints
244
+ Block separators.
245
+
246
+ Returns
247
+ -------
248
+ delete_values : np.matrix with shape (n_blocks, p)
249
+ Delete block values (with n_blocks blocks defined by parameter s).
250
+
251
+ Raises
252
+ ------
253
+ ValueError :
254
+ If x.shape[0] does not equal y.shape[0] or x and y are not 2D.
255
+
256
+ '''
257
+ _check_shape(x, y)
258
+ d = [func(np.vstack([x[0:s[i], ...], x[s[i + 1]:, ...]]), np.vstack([y[0:s[i], ...], y[s[i + 1]:, ...]]))
259
+ for i in xrange(len(s) - 1)]
260
+
261
+ return np.concatenate(d, axis=0)
262
+
263
+
264
+ class LstsqJackknifeFast(Jackknife):
265
+
266
+ '''
267
+ Fast block jackknife for linear regression.
268
+
269
+ Inherits from Jackknife class.
270
+
271
+ Parameters
272
+ ----------
273
+ x : np.matrix with shape (n, p)
274
+ Independent variable.
275
+ y : np.matrix with shape (n, 1)
276
+ Dependent variable.
277
+ n_blocks : int
278
+ Number of jackknife blocks
279
+
280
+ Attributes
281
+ ----------
282
+ est : np.matrix with shape (1, p)
283
+ FWLS estimate.
284
+ jknife_est : np.matrix with shape (1, p)
285
+ Jackknifed estimate.
286
+ jknife_var : np.matrix with shape (1, p)
287
+ Variance of jackknifed estimate.
288
+ jknife_se : np.matrix with shape (1, p)
289
+ Standard error of jackknifed estimate, equal to sqrt(jknife_var).
290
+ jknife_cov : np.matrix with shape (p, p)
291
+ Covariance matrix of jackknifed estimate.
292
+ delete_vals : np.matrix with shape (n_blocks, p)
293
+ Jackknife delete values.
294
+
295
+ Methods
296
+ -------
297
+ block_values(x, y, n_blocks) :
298
+ Computes block values for the regression y~x.
299
+ block_values_to_est(block_values) :
300
+ Computes whole-data estimate from block values.
301
+ block_values_to_pseudovalues(block_values, est) :
302
+ Computes pseudovalues and delete values in a single pass over the block values.
303
+
304
+ '''
305
+
306
+ def __init__(self, x, y, n_blocks=None, separators=None):
307
+ Jackknife.__init__(self, x, y, n_blocks, separators)
308
+ xty, xtx = self.block_values(x, y, self.separators)
309
+ self.est = self.block_values_to_est(xty, xtx)
310
+ self.delete_values = self.block_values_to_delete_values(xty, xtx)
311
+ self.pseudovalues = self.delete_values_to_pseudovalues(
312
+ self.delete_values, self.est)
313
+ (self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
314
+ self.jknife(self.pseudovalues)
315
+
316
+ @classmethod
317
+ def block_values(cls, x, y, s):
318
+ '''
319
+ Compute block values.
320
+
321
+ Parameters
322
+ ----------
323
+ x : np.matrix with shape (n, p)
324
+ Independent variable.
325
+ y : np.matrix with shape (n, 1)
326
+ Dependent variable.
327
+ n_blocks : int
328
+ Number of jackknife blocks
329
+ s : list of ints
330
+ Block separators.
331
+
332
+ Returns
333
+ -------
334
+ xty_block_values : np.matrix with shape (n_blocks, p)
335
+ Block values of X^T Y.
336
+ xtx_block_values : 3d np array with shape (n_blocks, p, p)
337
+ Block values of X^T X.
338
+
339
+ Raises
340
+ ------
341
+ ValueError :
342
+ If x.shape[0] does not equal y.shape[0] or x and y are not 2D.
343
+
344
+ '''
345
+ n, p = _check_shape(x, y)
346
+ n_blocks = len(s) - 1
347
+ xtx_block_values = np.zeros((n_blocks, p, p))
348
+ xty_block_values = np.zeros((n_blocks, p))
349
+ for i in range(n_blocks):
350
+ xty_block_values[i, ...] = np.dot(
351
+ x[s[i]:s[i + 1], ...].T, y[s[i]:s[i + 1], ...]).reshape((1, p))
352
+ xtx_block_values[i, ...] = np.dot(
353
+ x[s[i]:s[i + 1], ...].T, x[s[i]:s[i + 1], ...])
354
+
355
+ return (xty_block_values, xtx_block_values)
356
+
357
+ @classmethod
358
+ def block_values_to_est(cls, xty_block_values, xtx_block_values):
359
+ '''
360
+ Converts block values to the whole-data linear regression estimate.
361
+
362
+ Parameters
363
+ ----------
364
+ xty_block_values : np.matrix with shape (n_blocks, p)
365
+ Block values of X^T Y.
366
+ xtx_block_values : 3D np.array with shape (n_blocks, p, p)
367
+ Block values of X^T X
368
+
369
+ Returns
370
+ -------
371
+ est : np.matrix with shape (1, p)
372
+ Whole data estimate.
373
+
374
+ Raises
375
+ ------
376
+ LinAlgError :
377
+ If design matrix is singular.
378
+ ValueError :
379
+ If the last two dimensions of xtx_block_values are not equal or if the first two
380
+ dimensions of xtx_block_values do not equal the shape of xty_block_values.
381
+
382
+ '''
383
+ n_blocks, p = _check_shape_block(xty_block_values, xtx_block_values)
384
+ xty = np.sum(xty_block_values, axis=0)
385
+ xtx = np.sum(xtx_block_values, axis=0)
386
+ return np.linalg.solve(xtx, xty).reshape((1, p))
387
+
388
+ @classmethod
389
+ def block_values_to_delete_values(cls, xty_block_values, xtx_block_values):
390
+ '''
391
+ Converts block values to delete values.
392
+
393
+ Parameters
394
+ ----------
395
+ xty_block_values : np.matrix with shape (n_blocks, p)
396
+ Block values of X^T Y.
397
+ xtx_block_values : 3D np.array with shape (n_blocks, p, p)
398
+ Block values of X^T X
399
+ est : np.matrix with shape (1, p)
400
+ Whole data estimate
401
+
402
+ Returns
403
+ -------
404
+ delete_values : np.matrix with shape (n_blocks, p)
405
+ Delete Values.
406
+
407
+ Raises
408
+ ------
409
+ LinAlgError :
410
+ If delete design matrix is singular.
411
+ ValueError :
412
+ If the last two dimensions of xtx_block_values are not equal or if the first two
413
+ dimensions of xtx_block_values do not equal the shape of xty_block_values.
414
+
415
+ '''
416
+ n_blocks, p = _check_shape_block(xty_block_values, xtx_block_values)
417
+ delete_values = np.zeros((n_blocks, p))
418
+ xty_tot = np.sum(xty_block_values, axis=0)
419
+ xtx_tot = np.sum(xtx_block_values, axis=0)
420
+ for j in range(n_blocks):
421
+ delete_xty = xty_tot - xty_block_values[j]
422
+ delete_xtx = xtx_tot - xtx_block_values[j]
423
+ delete_values[j, ...] = np.linalg.solve(
424
+ delete_xtx, delete_xty).reshape((1, p))
425
+
426
+ return delete_values
427
+
428
+
429
+ class RatioJackknife(Jackknife):
430
+
431
+ '''
432
+ Block jackknife ratio estimate.
433
+
434
+ Jackknife.
435
+
436
+ Parameters
437
+ ----------
438
+ est : float or np.array with shape (1, p)
439
+ Whole data ratio estimate
440
+ numer_delete_values : np.matrix with shape (n_blocks, p)
441
+ Delete values for the numerator.
442
+ denom_delete_values: np.matrix with shape (n_blocks, p)
443
+ Delete values for the denominator.
444
+
445
+ Methods
446
+ -------
447
+ delete_vals_to_pseudovalues(est, denom, num):
448
+ Converts denominator/ numerator delete values and the whole-data estimate to
449
+ pseudovalues.
450
+
451
+ Raises
452
+ ------
453
+ FloatingPointError :
454
+ If any entry of denom_delete_values is zero.
455
+
456
+ Note that it is possible for the denominator to cross zero (i.e., be both positive
457
+ and negative) and still have a finite ratio estimate and SE, for example if the
458
+ numerator is fixed to 0 and the denominator is either -1 or 1. If the denominator
459
+ is noisily close to zero, then it is unlikely that the denominator will yield zero
460
+ exactly (and therefore yield an inf or nan), but delete values will be of the form
461
+ (numerator / close to zero) and -(numerator / close to zero), i.e., (big) and -(big),
462
+ and so the jackknife will (correctly) yield huge SE.
463
+
464
+ '''
465
+
466
+ def __init__(self, est, numer_delete_values, denom_delete_values):
467
+ if numer_delete_values.shape != denom_delete_values.shape:
468
+ raise ValueError(
469
+ 'numer_delete_values.shape != denom_delete_values.shape.')
470
+ if len(numer_delete_values.shape) != 2:
471
+ raise ValueError('Delete values must be matrices.')
472
+ if len(est.shape) != 2 or est.shape[0] != 1 or est.shape[1] != numer_delete_values.shape[1]:
473
+ raise ValueError(
474
+ 'Shape of est does not match shape of delete values.')
475
+
476
+ self.n_blocks = numer_delete_values.shape[0]
477
+ self.est = est
478
+ self.pseudovalues = self.delete_values_to_pseudovalues(self.est,
479
+ denom_delete_values, numer_delete_values)
480
+ (self.jknife_est, self.jknife_var, self.jknife_se, self.jknife_cov) =\
481
+ self.jknife(self.pseudovalues)
482
+
483
+ @classmethod
484
+ def delete_values_to_pseudovalues(cls, est, denom, numer):
485
+ '''
486
+ Converts delete values to pseudovalues.
487
+
488
+ Parameters
489
+ ----------
490
+ est : np.matrix with shape (1, p)
491
+ Whole-data ratio estimate.
492
+ denom : np.matrix with shape (n_blocks, p)
493
+ Denominator delete values.
494
+ numer : np.matrix with shape (n_blocks, p)
495
+ Numerator delete values.
496
+
497
+ Returns
498
+ -------
499
+ pseudovalues :
500
+ Ratio Jackknife Pseudovalues.
501
+
502
+ Raises
503
+ ------
504
+ ValueError :
505
+ If numer.shape != denom.shape.
506
+
507
+ '''
508
+ n_blocks, p = denom.shape
509
+ pseudovalues = np.zeros((n_blocks, p))
510
+ for j in range(0, n_blocks):
511
+ pseudovalues[j, ...] = n_blocks * est - \
512
+ (n_blocks - 1) * numer[j, ...] / denom[j, ...]
513
+
514
+ return pseudovalues