edgepython 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edgepython/__init__.py +114 -0
- edgepython/classes.py +517 -0
- edgepython/compressed_matrix.py +388 -0
- edgepython/dgelist.py +314 -0
- edgepython/dispersion.py +920 -0
- edgepython/dispersion_lowlevel.py +1066 -0
- edgepython/exact_test.py +525 -0
- edgepython/expression.py +323 -0
- edgepython/filtering.py +96 -0
- edgepython/gene_sets.py +1215 -0
- edgepython/glm_fit.py +653 -0
- edgepython/glm_levenberg.py +359 -0
- edgepython/glm_test.py +375 -0
- edgepython/io.py +1887 -0
- edgepython/limma_port.py +987 -0
- edgepython/normalization.py +546 -0
- edgepython/ql_weights.py +765 -0
- edgepython/results.py +236 -0
- edgepython/sc_fit.py +1511 -0
- edgepython/smoothing.py +474 -0
- edgepython/splicing.py +537 -0
- edgepython/utils.py +1050 -0
- edgepython/visualization.py +409 -0
- edgepython/weighted_lowess.py +323 -0
- edgepython-0.2.0.dist-info/METADATA +201 -0
- edgepython-0.2.0.dist-info/RECORD +29 -0
- edgepython-0.2.0.dist-info/WHEEL +5 -0
- edgepython-0.2.0.dist-info/licenses/LICENSE +674 -0
- edgepython-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
# This code was written by Claude (Anthropic). The project was directed by Lior Pachter.
|
|
2
|
+
"""
|
|
3
|
+
Levenberg-Marquardt GLM fitting for negative binomial models.
|
|
4
|
+
|
|
5
|
+
Port of edgeR's mglmLevenberg and nbinomDeviance (C/C++ code reimplemented in NumPy).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from .compressed_matrix import (CompressedMatrix, compress_offsets,
|
|
10
|
+
compress_weights, compress_dispersions)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def mglm_levenberg(y, design, dispersion=0, offset=0, weights=None,
|
|
14
|
+
coef_start=None, start_method='null', maxit=200, tol=1e-06):
|
|
15
|
+
"""Fit genewise negative binomial GLMs using Levenberg damping.
|
|
16
|
+
|
|
17
|
+
Port of edgeR's mglmLevenberg.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
y : ndarray
|
|
22
|
+
Count matrix (genes x samples).
|
|
23
|
+
design : ndarray
|
|
24
|
+
Design matrix (samples x coefficients).
|
|
25
|
+
dispersion : float or ndarray
|
|
26
|
+
NB dispersions.
|
|
27
|
+
offset : float, ndarray, or CompressedMatrix
|
|
28
|
+
Log-scale offsets.
|
|
29
|
+
weights : ndarray, optional
|
|
30
|
+
Observation weights.
|
|
31
|
+
coef_start : ndarray, optional
|
|
32
|
+
Starting coefficient values.
|
|
33
|
+
start_method : str
|
|
34
|
+
'null' or 'y' for initialization.
|
|
35
|
+
maxit : int
|
|
36
|
+
Maximum iterations.
|
|
37
|
+
tol : float
|
|
38
|
+
Convergence tolerance.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
dict with 'coefficients', 'fitted.values', 'deviance', 'iter', 'failed'.
|
|
43
|
+
"""
|
|
44
|
+
y = np.asarray(y, dtype=np.float64)
|
|
45
|
+
if y.ndim == 1:
|
|
46
|
+
y = y.reshape(1, -1)
|
|
47
|
+
ngenes, nlibs = y.shape
|
|
48
|
+
|
|
49
|
+
design = np.asarray(design, dtype=np.float64)
|
|
50
|
+
if design.ndim == 1:
|
|
51
|
+
design = design.reshape(-1, 1)
|
|
52
|
+
ncoefs = design.shape[1]
|
|
53
|
+
|
|
54
|
+
# Handle empty design
|
|
55
|
+
if ncoefs == 0:
|
|
56
|
+
offset_mat = _expand_compressed(offset, y.shape)
|
|
57
|
+
fitted = np.exp(offset_mat)
|
|
58
|
+
dev = nbinom_deviance(y, fitted, dispersion, weights)
|
|
59
|
+
return {
|
|
60
|
+
'coefficients': np.zeros((ngenes, 0)),
|
|
61
|
+
'fitted.values': fitted,
|
|
62
|
+
'deviance': dev,
|
|
63
|
+
'iter': np.zeros(ngenes, dtype=int),
|
|
64
|
+
'failed': np.zeros(ngenes, dtype=bool)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Expand offset, dispersion, weights
|
|
68
|
+
offset_mat = _expand_compressed(offset, y.shape)
|
|
69
|
+
disp_mat = _expand_compressed(dispersion, y.shape)
|
|
70
|
+
if weights is not None:
|
|
71
|
+
w_mat = _expand_compressed(weights, y.shape)
|
|
72
|
+
else:
|
|
73
|
+
w_mat = np.ones_like(y)
|
|
74
|
+
|
|
75
|
+
# Initialize coefficients
|
|
76
|
+
if coef_start is not None:
|
|
77
|
+
beta = np.asarray(coef_start, dtype=np.float64)
|
|
78
|
+
if beta.ndim == 1:
|
|
79
|
+
beta = np.tile(beta, (ngenes, 1))
|
|
80
|
+
else:
|
|
81
|
+
beta = _get_levenberg_start(y, offset_mat, disp_mat, w_mat, design, start_method == 'null')
|
|
82
|
+
|
|
83
|
+
# Levenberg-Marquardt iteration for each gene
|
|
84
|
+
coefficients = np.zeros((ngenes, ncoefs))
|
|
85
|
+
fitted_values = np.zeros_like(y)
|
|
86
|
+
deviance = np.zeros(ngenes)
|
|
87
|
+
n_iter = np.zeros(ngenes, dtype=int)
|
|
88
|
+
failed = np.zeros(ngenes, dtype=bool)
|
|
89
|
+
|
|
90
|
+
for g in range(ngenes):
|
|
91
|
+
beta_g = beta[g].copy()
|
|
92
|
+
y_g = y[g]
|
|
93
|
+
offset_g = offset_mat[g]
|
|
94
|
+
disp_g = disp_mat[g] if disp_mat.ndim == 2 else disp_mat
|
|
95
|
+
w_g = w_mat[g] if w_mat.ndim == 2 else w_mat
|
|
96
|
+
|
|
97
|
+
if np.isscalar(disp_g):
|
|
98
|
+
disp_g = np.full(nlibs, disp_g)
|
|
99
|
+
if np.isscalar(w_g):
|
|
100
|
+
w_g = np.full(nlibs, w_g)
|
|
101
|
+
|
|
102
|
+
converged = False
|
|
103
|
+
lev = 1e-3 # Levenberg damping parameter
|
|
104
|
+
|
|
105
|
+
for it in range(maxit):
|
|
106
|
+
# Compute mu
|
|
107
|
+
eta = design @ beta_g + offset_g
|
|
108
|
+
mu = np.exp(np.clip(eta, -500, 500))
|
|
109
|
+
mu = np.maximum(mu, 1e-300)
|
|
110
|
+
|
|
111
|
+
# Working weights
|
|
112
|
+
denom = 1 + disp_g * mu
|
|
113
|
+
working_w = w_g * mu ** 2 / (mu * denom)
|
|
114
|
+
working_w = np.maximum(working_w, 1e-300)
|
|
115
|
+
|
|
116
|
+
# Working residuals
|
|
117
|
+
z = (y_g - mu) / mu
|
|
118
|
+
|
|
119
|
+
# Weighted least squares
|
|
120
|
+
W = np.diag(working_w)
|
|
121
|
+
XtWX = design.T @ W @ design
|
|
122
|
+
XtWz = design.T @ (working_w * z)
|
|
123
|
+
|
|
124
|
+
# Add Levenberg damping
|
|
125
|
+
XtWX_lev = XtWX + lev * np.diag(np.diag(XtWX) + 1e-10)
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
delta = np.linalg.solve(XtWX_lev, XtWz)
|
|
129
|
+
except np.linalg.LinAlgError:
|
|
130
|
+
failed[g] = True
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
# Compute deviance before update
|
|
134
|
+
dev_old = _unit_deviance_sum(y_g, mu, disp_g, w_g)
|
|
135
|
+
|
|
136
|
+
# Trial update
|
|
137
|
+
beta_new = beta_g + delta
|
|
138
|
+
eta_new = design @ beta_new + offset_g
|
|
139
|
+
mu_new = np.exp(np.clip(eta_new, -500, 500))
|
|
140
|
+
mu_new = np.maximum(mu_new, 1e-300)
|
|
141
|
+
dev_new = _unit_deviance_sum(y_g, mu_new, disp_g, w_g)
|
|
142
|
+
|
|
143
|
+
if dev_new <= dev_old:
|
|
144
|
+
# Accept and decrease damping
|
|
145
|
+
beta_g = beta_new
|
|
146
|
+
lev = max(lev / 10, 1e-10)
|
|
147
|
+
if abs(dev_old - dev_new) < tol * (abs(dev_old) + 0.1):
|
|
148
|
+
converged = True
|
|
149
|
+
n_iter[g] = it + 1
|
|
150
|
+
break
|
|
151
|
+
else:
|
|
152
|
+
# Reject and increase damping
|
|
153
|
+
lev = min(lev * 10, 1e10)
|
|
154
|
+
|
|
155
|
+
if not converged and not failed[g]:
|
|
156
|
+
n_iter[g] = maxit
|
|
157
|
+
# Use last good values
|
|
158
|
+
eta = design @ beta_g + offset_g
|
|
159
|
+
mu = np.exp(np.clip(eta, -500, 500))
|
|
160
|
+
|
|
161
|
+
coefficients[g] = beta_g
|
|
162
|
+
eta_final = design @ beta_g + offset_g
|
|
163
|
+
fitted_values[g] = np.exp(np.clip(eta_final, -500, 500))
|
|
164
|
+
|
|
165
|
+
deviance = nbinom_deviance(y, fitted_values, dispersion, weights)
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
'coefficients': coefficients,
|
|
169
|
+
'fitted.values': fitted_values,
|
|
170
|
+
'deviance': deviance,
|
|
171
|
+
'iter': n_iter,
|
|
172
|
+
'failed': failed
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _get_levenberg_start(y, offset, dispersion, weights, design, use_null):
|
|
177
|
+
"""Get starting values for Levenberg-Marquardt."""
|
|
178
|
+
ngenes, nlibs = y.shape
|
|
179
|
+
ncoefs = design.shape[1]
|
|
180
|
+
beta = np.zeros((ngenes, ncoefs))
|
|
181
|
+
|
|
182
|
+
if use_null:
|
|
183
|
+
# Start from null model (intercept only via offset)
|
|
184
|
+
for g in range(ngenes):
|
|
185
|
+
lib_size = np.exp(offset[g] if offset.ndim == 2 else offset)
|
|
186
|
+
total = np.sum(y[g])
|
|
187
|
+
total_lib = np.sum(lib_size)
|
|
188
|
+
if total > 0 and total_lib > 0:
|
|
189
|
+
mu_hat = total / total_lib
|
|
190
|
+
# Solve for beta[0] such that exp(X*beta + offset) ≈ y
|
|
191
|
+
# With null start, set all beta to 0 except intercept
|
|
192
|
+
beta[g, 0] = np.log(mu_hat) if mu_hat > 0 else -20
|
|
193
|
+
else:
|
|
194
|
+
# Start from y values
|
|
195
|
+
for g in range(ngenes):
|
|
196
|
+
lib_size = np.exp(offset[g] if offset.ndim == 2 else offset)
|
|
197
|
+
y_norm = y[g] / np.maximum(lib_size, 1e-300)
|
|
198
|
+
y_norm = np.maximum(y_norm, 1e-300)
|
|
199
|
+
log_y = np.log(y_norm)
|
|
200
|
+
try:
|
|
201
|
+
beta[g] = np.linalg.lstsq(design, log_y, rcond=None)[0]
|
|
202
|
+
except np.linalg.LinAlgError:
|
|
203
|
+
beta[g, 0] = np.mean(log_y)
|
|
204
|
+
|
|
205
|
+
return beta
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def nbinom_deviance(y, mean, dispersion=0, weights=None):
|
|
209
|
+
"""Residual deviances for row-wise negative binomial GLMs.
|
|
210
|
+
|
|
211
|
+
Port of edgeR's nbinomDeviance. Fully vectorized over genes.
|
|
212
|
+
"""
|
|
213
|
+
y = np.asarray(y, dtype=np.float64)
|
|
214
|
+
mean = np.asarray(mean, dtype=np.float64)
|
|
215
|
+
|
|
216
|
+
if y.ndim == 1:
|
|
217
|
+
y = y.reshape(1, -1)
|
|
218
|
+
mean = mean.reshape(1, -1)
|
|
219
|
+
|
|
220
|
+
ngenes, nlibs = y.shape
|
|
221
|
+
mean = np.maximum(mean, 1e-300)
|
|
222
|
+
|
|
223
|
+
dispersion = np.atleast_1d(np.asarray(dispersion, dtype=np.float64))
|
|
224
|
+
if dispersion.size == 1:
|
|
225
|
+
disp = dispersion[0]
|
|
226
|
+
elif dispersion.ndim == 1 and len(dispersion) == ngenes:
|
|
227
|
+
disp = dispersion
|
|
228
|
+
elif isinstance(dispersion, np.ndarray) and dispersion.shape == y.shape:
|
|
229
|
+
disp = dispersion
|
|
230
|
+
else:
|
|
231
|
+
disp = np.broadcast_to(dispersion, ngenes).copy()
|
|
232
|
+
|
|
233
|
+
if weights is not None:
|
|
234
|
+
w = _expand_compressed(weights, y.shape)
|
|
235
|
+
else:
|
|
236
|
+
w = None
|
|
237
|
+
|
|
238
|
+
# Compute unit deviance for entire matrix at once
|
|
239
|
+
scalar_disp = np.isscalar(disp) or (isinstance(disp, np.ndarray) and disp.ndim == 0)
|
|
240
|
+
if scalar_disp:
|
|
241
|
+
d = float(disp)
|
|
242
|
+
else:
|
|
243
|
+
d = disp
|
|
244
|
+
|
|
245
|
+
if scalar_disp and d == 0:
|
|
246
|
+
# Poisson case
|
|
247
|
+
unit_dev = np.zeros_like(y)
|
|
248
|
+
pos = y > 0
|
|
249
|
+
unit_dev[pos] = 2 * (y[pos] * np.log(y[pos] / mean[pos]) - (y[pos] - mean[pos]))
|
|
250
|
+
unit_dev[~pos] = 2 * mean[~pos]
|
|
251
|
+
elif scalar_disp:
|
|
252
|
+
# Scalar NB dispersion - most common case
|
|
253
|
+
unit_dev = np.zeros_like(y)
|
|
254
|
+
pos = y > 0
|
|
255
|
+
if np.any(pos):
|
|
256
|
+
unit_dev[pos] = 2 * (y[pos] * np.log(y[pos] / mean[pos]) -
|
|
257
|
+
(y[pos] + 1.0 / d) * np.log((1 + d * y[pos]) /
|
|
258
|
+
(1 + d * mean[pos])))
|
|
259
|
+
zero = ~pos
|
|
260
|
+
if np.any(zero):
|
|
261
|
+
unit_dev[zero] = 2.0 / d * np.log(1 + d * mean[zero])
|
|
262
|
+
else:
|
|
263
|
+
# Per-gene or per-element dispersion
|
|
264
|
+
if d.ndim == 1:
|
|
265
|
+
d_mat = d[:, None] # (ngenes, 1)
|
|
266
|
+
else:
|
|
267
|
+
d_mat = d # (ngenes, nlibs)
|
|
268
|
+
unit_dev = np.zeros_like(y)
|
|
269
|
+
pos = y > 0
|
|
270
|
+
if np.any(pos):
|
|
271
|
+
d_pos = np.broadcast_to(d_mat, y.shape)[pos]
|
|
272
|
+
unit_dev[pos] = 2 * (y[pos] * np.log(y[pos] / mean[pos]) -
|
|
273
|
+
(y[pos] + 1.0 / d_pos) * np.log((1 + d_pos * y[pos]) /
|
|
274
|
+
(1 + d_pos * mean[pos])))
|
|
275
|
+
zero = ~pos
|
|
276
|
+
if np.any(zero):
|
|
277
|
+
d_zero = np.broadcast_to(d_mat, y.shape)[zero]
|
|
278
|
+
unit_dev[zero] = 2.0 / d_zero * np.log(1 + d_zero * mean[zero])
|
|
279
|
+
|
|
280
|
+
unit_dev = np.maximum(unit_dev, 0)
|
|
281
|
+
if w is not None:
|
|
282
|
+
return np.sum(w * unit_dev, axis=1)
|
|
283
|
+
return np.sum(unit_dev, axis=1)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def nbinom_unit_deviance(y, mean, dispersion=0):
|
|
287
|
+
"""Unit deviance for the negative binomial distribution.
|
|
288
|
+
|
|
289
|
+
Port of edgeR's nbinomUnitDeviance.
|
|
290
|
+
"""
|
|
291
|
+
return _unit_nb_deviance(y, mean, dispersion)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _unit_nb_deviance(y, mu, dispersion):
|
|
295
|
+
"""Compute unit negative binomial deviance."""
|
|
296
|
+
y = np.asarray(y, dtype=np.float64)
|
|
297
|
+
mu = np.asarray(mu, dtype=np.float64)
|
|
298
|
+
mu = np.maximum(mu, 1e-300)
|
|
299
|
+
|
|
300
|
+
if np.isscalar(dispersion):
|
|
301
|
+
disp = dispersion
|
|
302
|
+
else:
|
|
303
|
+
disp = np.asarray(dispersion, dtype=np.float64)
|
|
304
|
+
|
|
305
|
+
# Poisson case
|
|
306
|
+
if np.isscalar(disp) and disp == 0:
|
|
307
|
+
dev = np.zeros_like(y)
|
|
308
|
+
pos = y > 0
|
|
309
|
+
dev[pos] = 2 * (y[pos] * np.log(y[pos] / mu[pos]) - (y[pos] - mu[pos]))
|
|
310
|
+
dev[~pos] = 2 * mu[~pos]
|
|
311
|
+
return dev
|
|
312
|
+
|
|
313
|
+
# NB case
|
|
314
|
+
dev = np.zeros_like(y)
|
|
315
|
+
pos = y > 0
|
|
316
|
+
zero = ~pos
|
|
317
|
+
|
|
318
|
+
if np.isscalar(disp):
|
|
319
|
+
# y > 0 part
|
|
320
|
+
if np.any(pos):
|
|
321
|
+
dev[pos] = 2 * (y[pos] * np.log(y[pos] / mu[pos]) -
|
|
322
|
+
(y[pos] + 1 / disp) * np.log((1 + disp * y[pos]) /
|
|
323
|
+
(1 + disp * mu[pos])))
|
|
324
|
+
# y == 0 part
|
|
325
|
+
if np.any(zero):
|
|
326
|
+
dev[zero] = 2 / disp * np.log(1 + disp * mu[zero])
|
|
327
|
+
else:
|
|
328
|
+
if np.any(pos):
|
|
329
|
+
dev[pos] = 2 * (y[pos] * np.log(y[pos] / mu[pos]) -
|
|
330
|
+
(y[pos] + 1 / disp[pos]) * np.log((1 + disp[pos] * y[pos]) /
|
|
331
|
+
(1 + disp[pos] * mu[pos])))
|
|
332
|
+
if np.any(zero):
|
|
333
|
+
d_z = disp[zero]
|
|
334
|
+
dev[zero] = 2 / d_z * np.log(1 + d_z * mu[zero])
|
|
335
|
+
|
|
336
|
+
return np.maximum(dev, 0)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _unit_deviance_sum(y, mu, disp, weights):
|
|
340
|
+
"""Sum of weighted unit deviances."""
|
|
341
|
+
ud = _unit_nb_deviance(y, mu, disp)
|
|
342
|
+
return np.sum(weights * ud)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _expand_compressed(x, shape):
|
|
346
|
+
"""Expand a scalar, vector, or CompressedMatrix to full matrix."""
|
|
347
|
+
if isinstance(x, CompressedMatrix):
|
|
348
|
+
return x.as_matrix()
|
|
349
|
+
x = np.asarray(x, dtype=np.float64)
|
|
350
|
+
if x.ndim == 0 or x.size == 1:
|
|
351
|
+
return np.full(shape, x.ravel()[0])
|
|
352
|
+
if x.ndim == 1:
|
|
353
|
+
if len(x) == shape[1]:
|
|
354
|
+
return np.tile(x, (shape[0], 1))
|
|
355
|
+
elif len(x) == shape[0]:
|
|
356
|
+
return np.tile(x.reshape(-1, 1), (1, shape[1]))
|
|
357
|
+
if x.shape == shape:
|
|
358
|
+
return x
|
|
359
|
+
return np.broadcast_to(x, shape).copy()
|