DeConveil 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DeConveil/__init__.py +7 -0
- DeConveil/dds.py +1279 -0
- DeConveil/default_inference.py +284 -0
- DeConveil/ds.py +758 -0
- DeConveil/grid_search.py +195 -0
- DeConveil/inference.py +373 -0
- DeConveil/utils_CNaware.py +809 -0
- DeConveil-0.1.0.dist-info/LICENSE +21 -0
- DeConveil-0.1.0.dist-info/METADATA +35 -0
- DeConveil-0.1.0.dist-info/RECORD +12 -0
- DeConveil-0.1.0.dist-info/WHEEL +5 -0
- DeConveil-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from joblib import Parallel # type: ignore
|
|
8
|
+
from joblib import delayed
|
|
9
|
+
from joblib import parallel_backend
|
|
10
|
+
from scipy.optimize import minimize # type: ignore
|
|
11
|
+
|
|
12
|
+
from deconveil import inference
|
|
13
|
+
from deconveil import utils_CNaware
|
|
14
|
+
from deconveil.utils_CNaware import fit_lin_mu
|
|
15
|
+
|
|
16
|
+
from pydeseq2 import utils
|
|
17
|
+
from pydeseq2.utils import get_num_processes
|
|
18
|
+
from pydeseq2.utils import fit_alpha_mle
|
|
19
|
+
from pydeseq2.utils import wald_test
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DefInference(inference.Inference):
|
|
24
|
+
|
|
25
|
+
"""Default DESeq2-related inference methods, using scipy/sklearn/numpy.
|
|
26
|
+
|
|
27
|
+
This object contains the interface to the default inference routines and uses
|
|
28
|
+
joblib internally for parallelization. Inherit this class or its parent to write
|
|
29
|
+
custom inference routines.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
joblib_verbosity : int
|
|
34
|
+
The verbosity level for joblib tasks. The higher the value, the more updates
|
|
35
|
+
are reported. (default: ``0``).
|
|
36
|
+
batch_size : int
|
|
37
|
+
Number of tasks to allocate to each joblib parallel worker. (default: ``128``).
|
|
38
|
+
n_cpus : int
|
|
39
|
+
Number of cpus to use. If None, all available cpus will be used.
|
|
40
|
+
(default: ``None``).
|
|
41
|
+
backend : str
|
|
42
|
+
Joblib backend.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
fit_rough_dispersions = staticmethod(utils_CNaware.fit_rough_dispersions) # type: ignore
|
|
46
|
+
fit_moments_dispersions2 = staticmethod(utils_CNaware.fit_moments_dispersions2) # type: ignore
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
joblib_verbosity: int = 0,
|
|
51
|
+
batch_size: int = 128,
|
|
52
|
+
n_cpus: Optional[int] = None,
|
|
53
|
+
backend: str = "loky",
|
|
54
|
+
):
|
|
55
|
+
self._joblib_verbosity = joblib_verbosity
|
|
56
|
+
self._batch_size = batch_size
|
|
57
|
+
self._n_cpus = utils.get_num_processes(n_cpus)
|
|
58
|
+
self._backend = backend
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def n_cpus(self) -> int: # noqa: D102
|
|
62
|
+
return self._n_cpus
|
|
63
|
+
|
|
64
|
+
@n_cpus.setter
|
|
65
|
+
def n_cpus(self, n_cpus: int) -> None:
|
|
66
|
+
self._n_cpus = utils.get_num_processes(n_cpus)
|
|
67
|
+
|
|
68
|
+
def lin_reg_mu( # noqa: D102
|
|
69
|
+
self,
|
|
70
|
+
counts: np.ndarray,
|
|
71
|
+
size_factors: np.ndarray,
|
|
72
|
+
design_matrix: np.ndarray,
|
|
73
|
+
min_mu: float,
|
|
74
|
+
) -> np.ndarray:
|
|
75
|
+
with parallel_backend(self._backend, inner_max_num_threads=1):
|
|
76
|
+
mu_hat_ = np.array(
|
|
77
|
+
Parallel(
|
|
78
|
+
n_jobs=self.n_cpus,
|
|
79
|
+
verbose=self._joblib_verbosity,
|
|
80
|
+
batch_size=self._batch_size,
|
|
81
|
+
)(
|
|
82
|
+
delayed(utils_CNaware.fit_lin_mu)(
|
|
83
|
+
counts=counts[:, i],
|
|
84
|
+
size_factors=size_factors,
|
|
85
|
+
design_matrix=design_matrix,
|
|
86
|
+
min_mu=min_mu,
|
|
87
|
+
)
|
|
88
|
+
for i in range(counts.shape[1])
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
return mu_hat_.T
|
|
92
|
+
|
|
93
|
+
def irls_glm(
|
|
94
|
+
self,
|
|
95
|
+
counts: np.ndarray,
|
|
96
|
+
size_factors: np.ndarray,
|
|
97
|
+
design_matrix: np.ndarray,
|
|
98
|
+
disp: np.ndarray,
|
|
99
|
+
cnv: np.ndarray,
|
|
100
|
+
min_mu: float,
|
|
101
|
+
beta_tol: float,
|
|
102
|
+
min_beta: float = -30,
|
|
103
|
+
max_beta: float = 30,
|
|
104
|
+
optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
|
|
105
|
+
maxiter: int = 250,
|
|
106
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
107
|
+
with parallel_backend(self._backend, inner_max_num_threads=1):
|
|
108
|
+
res = Parallel(
|
|
109
|
+
n_jobs=self.n_cpus,
|
|
110
|
+
verbose=self._joblib_verbosity,
|
|
111
|
+
batch_size=self._batch_size,
|
|
112
|
+
)(
|
|
113
|
+
delayed(utils_CNaware.irls_glm)(
|
|
114
|
+
counts=counts[:, i],
|
|
115
|
+
size_factors=size_factors,
|
|
116
|
+
design_matrix=design_matrix,
|
|
117
|
+
disp=disp[i],
|
|
118
|
+
cnv=cnv[:, i],
|
|
119
|
+
min_mu=min_mu,
|
|
120
|
+
beta_tol=beta_tol,
|
|
121
|
+
min_beta=min_beta,
|
|
122
|
+
max_beta=max_beta,
|
|
123
|
+
optimizer=optimizer,
|
|
124
|
+
maxiter=maxiter,
|
|
125
|
+
)
|
|
126
|
+
for i in range(counts.shape[1])
|
|
127
|
+
)
|
|
128
|
+
res = zip(*res)
|
|
129
|
+
MLE_lfcs_, mu_hat_, hat_diagonals_, converged_ = (np.array(m) for m in res)
|
|
130
|
+
|
|
131
|
+
return (
|
|
132
|
+
MLE_lfcs_,
|
|
133
|
+
mu_hat_.T,
|
|
134
|
+
hat_diagonals_.T,
|
|
135
|
+
converged_,
|
|
136
|
+
)
|
|
137
|
+
def alpha_mle( # noqa: D102
|
|
138
|
+
self,
|
|
139
|
+
counts: np.ndarray,
|
|
140
|
+
design_matrix: np.ndarray,
|
|
141
|
+
mu: np.ndarray,
|
|
142
|
+
alpha_hat: np.ndarray,
|
|
143
|
+
min_disp: float,
|
|
144
|
+
max_disp: float,
|
|
145
|
+
prior_disp_var: Optional[float] = None,
|
|
146
|
+
cr_reg: bool = True,
|
|
147
|
+
prior_reg: bool = False,
|
|
148
|
+
optimizer: Literal["BFGS", "L-BFGS-B"] = "L-BFGS-B",
|
|
149
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
150
|
+
with parallel_backend(self._backend, inner_max_num_threads=1):
|
|
151
|
+
res = Parallel(
|
|
152
|
+
n_jobs=self.n_cpus,
|
|
153
|
+
verbose=self._joblib_verbosity,
|
|
154
|
+
batch_size=self._batch_size,
|
|
155
|
+
)(
|
|
156
|
+
delayed(utils.fit_alpha_mle)(
|
|
157
|
+
counts=counts[:, i],
|
|
158
|
+
design_matrix=design_matrix,
|
|
159
|
+
mu=mu[:, i],
|
|
160
|
+
alpha_hat=alpha_hat[i],
|
|
161
|
+
min_disp=min_disp,
|
|
162
|
+
max_disp=max_disp,
|
|
163
|
+
prior_disp_var=prior_disp_var,
|
|
164
|
+
cr_reg=cr_reg,
|
|
165
|
+
prior_reg=prior_reg,
|
|
166
|
+
optimizer=optimizer,
|
|
167
|
+
)
|
|
168
|
+
for i in range(counts.shape[1])
|
|
169
|
+
)
|
|
170
|
+
res = zip(*res)
|
|
171
|
+
dispersions_, l_bfgs_b_converged_ = (np.array(m) for m in res)
|
|
172
|
+
return dispersions_, l_bfgs_b_converged_
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def dispersion_trend_gamma_glm( # noqa: D102
|
|
176
|
+
self, covariates: pd.Series, targets: pd.Series
|
|
177
|
+
) -> Tuple[np.ndarray, np.ndarray, bool]:
|
|
178
|
+
covariates_w_intercept = covariates.to_frame()
|
|
179
|
+
covariates_w_intercept.insert(0, "intercept", 1)
|
|
180
|
+
covariates_fit = covariates_w_intercept.values
|
|
181
|
+
targets_fit = targets.values
|
|
182
|
+
|
|
183
|
+
def loss(coeffs):
|
|
184
|
+
mu = covariates_fit @ coeffs
|
|
185
|
+
return np.nanmean(targets_fit / mu + np.log(mu), axis=0)
|
|
186
|
+
|
|
187
|
+
def grad(coeffs):
|
|
188
|
+
mu = covariates_fit @ coeffs
|
|
189
|
+
return -np.nanmean(
|
|
190
|
+
((targets_fit / mu - 1)[:, None] * covariates_fit) / mu[:, None], axis=0
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
res = minimize(
|
|
195
|
+
loss,
|
|
196
|
+
x0=np.array([1.0, 1.0]),
|
|
197
|
+
jac=grad,
|
|
198
|
+
method="L-BFGS-B",
|
|
199
|
+
bounds=[(1e-12, np.inf)],
|
|
200
|
+
)
|
|
201
|
+
except RuntimeWarning: # Could happen if the coefficients fall to zero
|
|
202
|
+
return np.array([np.nan, np.nan]), np.array([np.nan, np.nan]), False
|
|
203
|
+
|
|
204
|
+
coeffs = res.x
|
|
205
|
+
return coeffs, covariates_fit @ coeffs, res.success
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def wald_test( # noqa: D102
|
|
209
|
+
self,
|
|
210
|
+
design_matrix: np.ndarray,
|
|
211
|
+
disp: np.ndarray,
|
|
212
|
+
lfc: np.ndarray,
|
|
213
|
+
mu: np.ndarray,
|
|
214
|
+
ridge_factor: np.ndarray,
|
|
215
|
+
contrast: np.ndarray,
|
|
216
|
+
lfc_null: np.ndarray,
|
|
217
|
+
alt_hypothesis: Optional[
|
|
218
|
+
Literal["greaterAbs", "lessAbs", "greater", "less"]
|
|
219
|
+
] = None,
|
|
220
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
221
|
+
num_genes = mu.shape[1]
|
|
222
|
+
with parallel_backend(self._backend, inner_max_num_threads=1):
|
|
223
|
+
res = Parallel(
|
|
224
|
+
n_jobs=self.n_cpus,
|
|
225
|
+
verbose=self._joblib_verbosity,
|
|
226
|
+
batch_size=self._batch_size,
|
|
227
|
+
)(
|
|
228
|
+
delayed(wald_test)(
|
|
229
|
+
design_matrix=design_matrix,
|
|
230
|
+
disp=disp[i],
|
|
231
|
+
lfc=lfc[i],
|
|
232
|
+
mu=mu[:, i],
|
|
233
|
+
ridge_factor=ridge_factor,
|
|
234
|
+
contrast=contrast,
|
|
235
|
+
lfc_null=lfc_null, # Convert log2 to natural log
|
|
236
|
+
alt_hypothesis=alt_hypothesis,
|
|
237
|
+
)
|
|
238
|
+
for i in range(num_genes)
|
|
239
|
+
)
|
|
240
|
+
res = zip(*res)
|
|
241
|
+
pvals, stats, se = (np.array(m) for m in res)
|
|
242
|
+
|
|
243
|
+
return pvals, stats, se
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def lfc_shrink_nbinom_glm( # noqa: D102
|
|
247
|
+
self,
|
|
248
|
+
design_matrix: np.ndarray,
|
|
249
|
+
counts: np.ndarray,
|
|
250
|
+
cnv: np.ndarray,
|
|
251
|
+
size: np.ndarray,
|
|
252
|
+
offset: np.ndarray,
|
|
253
|
+
prior_no_shrink_scale: float,
|
|
254
|
+
prior_scale: float,
|
|
255
|
+
optimizer: str,
|
|
256
|
+
shrink_index: int,
|
|
257
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
258
|
+
with parallel_backend(self._backend, inner_max_num_threads=1):
|
|
259
|
+
num_genes = counts.shape[1]
|
|
260
|
+
res = Parallel(
|
|
261
|
+
n_jobs=self.n_cpus,
|
|
262
|
+
verbose=self._joblib_verbosity,
|
|
263
|
+
batch_size=self._batch_size,
|
|
264
|
+
)(
|
|
265
|
+
delayed(utils_CNaware.nbinomGLM)(
|
|
266
|
+
design_matrix=design_matrix,
|
|
267
|
+
counts=counts[:, i],
|
|
268
|
+
cnv=cnv[:, i],
|
|
269
|
+
size=size[i],
|
|
270
|
+
offset=offset,
|
|
271
|
+
prior_no_shrink_scale=prior_no_shrink_scale,
|
|
272
|
+
prior_scale=prior_scale,
|
|
273
|
+
optimizer=optimizer,
|
|
274
|
+
shrink_index=shrink_index,
|
|
275
|
+
)
|
|
276
|
+
for i in range(num_genes)
|
|
277
|
+
)
|
|
278
|
+
res = zip(*res)
|
|
279
|
+
lfcs, inv_hessians, l_bfgs_b_converged_ = (np.array(m) for m in res)
|
|
280
|
+
return lfcs, inv_hessians, l_bfgs_b_converged_
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
|